[COMMON] sched: ehmp: support EHMP(Exynos HMP)
authorPark Bumgyu <bumgyu.park@samsung.com>
Tue, 16 Jan 2018 10:01:05 +0000 (19:01 +0900)
committerChungwoo Park <cww.park@samsung.com>
Mon, 21 May 2018 08:26:29 +0000 (17:26 +0900)
Change-Id: Ie7ee8a84ed0fdc3a62d10a5b55488477edcdba7f
Signed-off-by: Park Bumgyu <bumgyu.park@samsung.com>
12 files changed:
include/linux/ehmp.h [new file with mode: 0644]
include/linux/sched.h
include/trace/events/ehmp.h [new file with mode: 0644]
include/trace/events/sched.h
init/Kconfig
kernel/sched/Makefile
kernel/sched/ehmp.c [new file with mode: 0644]
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/sched.h
kernel/sched/tune.c
kernel/sched/tune.h

diff --git a/include/linux/ehmp.h b/include/linux/ehmp.h
new file mode 100644 (file)
index 0000000..2494855
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/plist.h>
+#include <linux/sched/idle.h>
+
+#ifdef CONFIG_SCHED_TUNE
+enum stune_group {
+       STUNE_ROOT,
+       STUNE_FOREGROUND,
+       STUNE_BACKGROUND,
+       STUNE_TOPAPP,
+       STUNE_GROUP_COUNT,
+};
+#endif
+
+struct gb_qos_request {
+       struct plist_node node;
+       char *name;
+       bool active;
+};
+
+#ifdef CONFIG_SCHED_EHMP
+extern void exynos_init_entity_util_avg(struct sched_entity *se);
+extern int exynos_need_active_balance(enum cpu_idle_type idle,
+               struct sched_domain *sd, int src_cpu, int dst_cpu);
+
+extern unsigned long global_boost(void);
+extern int find_second_max_cap(void);
+
+extern int exynos_select_cpu(struct task_struct *p, int prev_cpu,
+                                       int sync, int sd_flag);
+
+extern void ontime_migration(void);
+extern int ontime_can_migration(struct task_struct *p, int cpu);
+extern void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight,
+                                               struct sched_avg *sa);
+extern void ontime_new_entity_load(struct task_struct *parent,
+                                       struct sched_entity *se);
+extern void ontime_trace_task_info(struct task_struct *p);
+extern void ehmp_update_max_cpu_capacity(int cpu, unsigned long val);
+
+extern void ehmp_update_overutilized(int cpu, unsigned long capacity);
+extern bool ehmp_trigger_lb(int src_cpu, int dst_cpu);
+
+extern void gb_qos_update_request(struct gb_qos_request *req, u32 new_value);
+
+extern void request_kernel_prefer_perf(int grp_idx, int enable);
+#else
+static inline void exynos_init_entity_util_avg(struct sched_entity *se) { }
+static inline int exynos_need_active_balance(enum cpu_idle_type idle,
+               struct sched_domain *sd, int src_cpu, int dst_cpu) { return 0; }
+
+static inline unsigned long global_boost(void) { return 0; }
+static inline int find_second_max_cap(void) { return -EINVAL; }
+
+static inline int exynos_select_cpu(struct task_struct *p,
+                                               int prev_cpu) { return -EINVAL; }
+static inline int exynos_select_cpu(struct task_struct *p, int prev_cpu,
+                                       int sync, int sd_flag) { return -EINVAL; }
+
+static inline void ontime_migration(void) { }
+static inline int ontime_can_migration(struct task_struct *p, int cpu) { return 1; }
+static inline void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight,
+                                                       struct sched_avg *sa) { }
+static inline void ontime_new_entity_load(struct task_struct *p,
+                                       struct sched_entity *se) { }
+static inline void ontime_trace_task_info(struct task_struct *p) { }
+
+static inline void ehmp_update_max_cpu_capacity(int cpu, unsigned long val) { }
+
+static inline void ehmp_update_overutilized(int cpu, unsigned long capacity) { }
+static inline bool ehmp_trigger_lb(int src_cpu, int dst_cpu) { return false; }
+
+static inline void gb_qos_update_request(struct gb_qos_request *req, u32 new_value) { }
+
+extern void request_kernel_prefer_perf(int grp_idx, int enable) { }
+#endif /* CONFIG_SCHED_EHMP */
index 5d9f33742aab7af40b327587c7a1f4a9362ce3d7..7d60db84e0fbc9b61eb650fd014e4011ac4e85f7 100644 (file)
@@ -347,6 +347,25 @@ struct sched_avg {
        unsigned long                   util_avg;
 };
 
+#ifdef CONFIG_SCHED_EHMP
+#define NOT_ONTIME             1
+#define ONTIME_MIGRATING       2
+#define ONTIME                 4
+
+struct ontime_avg {
+       u64 ontime_migration_time;
+       u64 load_sum;
+       u32 period_contrib;
+       unsigned long load_avg;
+};
+
+struct ontime_entity {
+       struct ontime_avg avg;
+       int flags;
+       int cpu;
+};
+#endif
+
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
        u64                             wait_start;
@@ -417,6 +436,9 @@ struct sched_entity {
         */
        struct sched_avg                avg ____cacheline_aligned_in_smp;
 #endif
+#ifdef CONFIG_SCHED_EHMP
+       struct ontime_entity            ontime;
+#endif
 };
 
 #ifdef CONFIG_SCHED_WALT
diff --git a/include/trace/events/ehmp.h b/include/trace/events/ehmp.h
new file mode 100644 (file)
index 0000000..cd99ba3
--- /dev/null
@@ -0,0 +1,340 @@
+/*
+ *  Copyright (C) 2017 Park Bumgyu <bumgyu.park@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ehmp
+
+#if !defined(_TRACE_EHMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EHMP_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for selection of boost cpu
+ */
+TRACE_EVENT(ehmp_select_boost_cpu,
+
+       TP_PROTO(struct task_struct *p, int cpu, int trigger, char *state),
+
+       TP_ARGS(p, cpu, trigger, state),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN   )
+               __field(        pid_t,          pid                     )
+               __field(        int,            cpu                     )
+               __field(        int,            trigger                 )
+               __array(        char,           state,          64      )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->cpu            = cpu;
+               __entry->trigger        = trigger;
+               memcpy(__entry->state, state, 64);
+       ),
+
+       TP_printk("comm=%s pid=%d target_cpu=%d trigger=%d state=%s",
+                 __entry->comm, __entry->pid, __entry->cpu,
+                 __entry->trigger, __entry->state)
+);
+
+/*
+ * Tracepoint for selection of group balancer
+ */
+TRACE_EVENT(ehmp_select_group_boost,
+
+       TP_PROTO(struct task_struct *p, int cpu, char *state),
+
+       TP_ARGS(p, cpu, state),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN   )
+               __field(        pid_t,          pid                     )
+               __field(        int,            cpu                     )
+               __array(        char,           state,          64      )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->cpu            = cpu;
+               memcpy(__entry->state, state, 64);
+       ),
+
+       TP_printk("comm=%s pid=%d target_cpu=%d state=%s",
+                 __entry->comm, __entry->pid, __entry->cpu, __entry->state)
+);
+
+TRACE_EVENT(ehmp_global_boost,
+
+       TP_PROTO(char *name, unsigned long boost),
+
+       TP_ARGS(name, boost),
+
+       TP_STRUCT__entry(
+               __array(        char,           name,           64      )
+               __field(        unsigned long,  boost                   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->name, name, 64);
+               __entry->boost          = boost;
+       ),
+
+       TP_printk("name=%s global_boost_value=%ld", __entry->name, __entry->boost)
+);
+
+/*
+ * Tracepoint for prefer idle
+ */
+TRACE_EVENT(ehmp_prefer_idle,
+
+       TP_PROTO(struct task_struct *p, int orig_cpu, int target_cpu,
+               unsigned long task_util, unsigned long new_util, int idle),
+
+       TP_ARGS(p, orig_cpu, target_cpu, task_util, new_util, idle),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN   )
+               __field(        pid_t,          pid                     )
+               __field(        int,            orig_cpu                )
+               __field(        int,            target_cpu              )
+               __field(        unsigned long,  task_util               )
+               __field(        unsigned long,  new_util                )
+               __field(        int,            idle                    )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->orig_cpu       = orig_cpu;
+               __entry->target_cpu     = target_cpu;
+               __entry->task_util      = task_util;
+               __entry->new_util       = new_util;
+               __entry->idle           = idle;
+       ),
+
+       TP_printk("comm=%s pid=%d orig_cpu=%d target_cpu=%d task_util=%lu new_util=%lu idle=%d",
+               __entry->comm, __entry->pid, __entry->orig_cpu, __entry->target_cpu,
+               __entry->task_util, __entry->new_util, __entry->idle)
+);
+
+TRACE_EVENT(ehmp_prefer_idle_cpu_select,
+
+       TP_PROTO(struct task_struct *p, int cpu),
+
+       TP_ARGS(p, cpu),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN   )
+               __field(        pid_t,          pid                     )
+               __field(        int,            cpu                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->cpu            = cpu;
+       ),
+
+       TP_printk("comm=%s pid=%d target_cpu=%d",
+                 __entry->comm, __entry->pid, __entry->cpu)
+);
+
+/*
+ * Tracepoint for cpu selection
+ */
+TRACE_EVENT(ehmp_find_best_target_stat,
+
+       TP_PROTO(int cpu, unsigned long cap, unsigned long util, unsigned long target_util),
+
+       TP_ARGS(cpu, cap, util, target_util),
+
+       TP_STRUCT__entry(
+               __field( int,           cpu     )
+               __field( unsigned long, cap     )
+               __field( unsigned long, util    )
+               __field( unsigned long, target_util     )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu = cpu;
+               __entry->cap = cap;
+               __entry->util = util;
+               __entry->target_util = target_util;
+       ),
+
+       TP_printk("find_best : [cpu%d] capacity %lu, util %lu, target_util %lu\n",
+               __entry->cpu, __entry->cap, __entry->util, __entry->target_util)
+);
+
+TRACE_EVENT(ehmp_find_best_target_candi,
+
+       TP_PROTO(unsigned int cpu),
+
+       TP_ARGS(cpu),
+
+       TP_STRUCT__entry(
+               __field( unsigned int, cpu      )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu = cpu;
+       ),
+
+       TP_printk("find_best: energy candidate cpu %d\n", __entry->cpu)
+);
+
+TRACE_EVENT(ehmp_find_best_target_cpu,
+
+       TP_PROTO(unsigned int cpu, unsigned long target_util),
+
+       TP_ARGS(cpu, target_util),
+
+       TP_STRUCT__entry(
+               __field( unsigned int, cpu      )
+               __field( unsigned long, target_util     )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu = cpu;
+               __entry->target_util = target_util;
+       ),
+
+       TP_printk("find_best: target_cpu %d, target_util %lu\n", __entry->cpu, __entry->target_util)
+);
+
+/*
+ * Tracepoint for ontime migration
+ */
+TRACE_EVENT(ehmp_ontime_migration,
+
+       TP_PROTO(struct task_struct *p, unsigned long load,
+               int src_cpu, int dst_cpu, int boost_migration),
+
+       TP_ARGS(p, load, src_cpu, dst_cpu, boost_migration),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN   )
+               __field(        pid_t,          pid                     )
+               __field(        unsigned long,  load                    )
+               __field(        int,            src_cpu                 )
+               __field(        int,            dst_cpu                 )
+               __field(        int,            bm                      )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->load           = load;
+               __entry->src_cpu        = src_cpu;
+               __entry->dst_cpu        = dst_cpu;
+               __entry->bm             = boost_migration;
+       ),
+
+       TP_printk("comm=%s pid=%d ontime_load_avg=%lu src_cpu=%d dst_cpu=%d boost_migration=%d",
+               __entry->comm, __entry->pid, __entry->load,
+               __entry->src_cpu, __entry->dst_cpu, __entry->bm)
+);
+
+/*
+ * Tracepoint for accounting ontime load averages for tasks.
+ */
+TRACE_EVENT(ehmp_ontime_new_entity_load,
+
+       TP_PROTO(struct task_struct *tsk, struct ontime_avg *avg),
+
+       TP_ARGS(tsk, avg),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN           )
+               __field( pid_t,         pid                             )
+               __field( int,           cpu                             )
+               __field( unsigned long, load_avg                        )
+               __field( u64,           load_sum                        )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid                    = tsk->pid;
+               __entry->cpu                    = task_cpu(tsk);
+               __entry->load_avg               = avg->load_avg;
+               __entry->load_sum               = avg->load_sum;
+       ),
+       TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu load_sum=%llu",
+                 __entry->comm,
+                 __entry->pid,
+                 __entry->cpu,
+                 __entry->load_avg,
+                 (u64)__entry->load_sum)
+);
+
+/*
+ * Tracepoint for accounting ontime load averages for tasks.
+ */
+TRACE_EVENT(ehmp_ontime_load_avg_task,
+
+       TP_PROTO(struct task_struct *tsk, struct ontime_avg *avg, int ontime_flag),
+
+       TP_ARGS(tsk, avg, ontime_flag),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN           )
+               __field( pid_t,         pid                             )
+               __field( int,           cpu                             )
+               __field( unsigned long, load_avg                        )
+               __field( u64,           load_sum                        )
+               __field( int,           ontime_flag                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid                    = tsk->pid;
+               __entry->cpu                    = task_cpu(tsk);
+               __entry->load_avg               = avg->load_avg;
+               __entry->load_sum               = avg->load_sum;
+               __entry->ontime_flag            = ontime_flag;
+       ),
+       TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu load_sum=%llu ontime_flag=%d",
+                 __entry->comm, __entry->pid, __entry->cpu, __entry->load_avg,
+                 (u64)__entry->load_sum, __entry->ontime_flag)
+);
+
+TRACE_EVENT(ehmp_ontime_check_migrate,
+
+       TP_PROTO(struct task_struct *tsk, int cpu, int migrate, char *label),
+
+       TP_ARGS(tsk, cpu, migrate, label),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN   )
+               __field( pid_t,         pid                     )
+               __field( int,           cpu                     )
+               __field( int,           migrate                 )
+               __array( char,          label,  64              )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid                    = tsk->pid;
+               __entry->cpu                    = cpu;
+               __entry->migrate                = migrate;
+               strncpy(__entry->label, label, 64);
+       ),
+
+       TP_printk("comm=%s pid=%d target_cpu=%d migrate=%d reason=%s",
+               __entry->comm, __entry->pid, __entry->cpu,
+               __entry->migrate, __entry->label)
+);
+
+#endif /* _TRACE_EHMP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 7909097a1e5edf35b95064d323e747fc072bd52c..7cf6918c558b9ea82d0abe75bc3b45dfeab60c20 100644 (file)
@@ -885,6 +885,67 @@ TRACE_EVENT(sched_tune_tasks_update,
                __entry->boost, __entry->max_boost)
 );
 
+/*
+ * Tracepoint for schedtune_grouputil_update
+ */
+TRACE_EVENT(sched_tune_grouputil_update,
+
+       TP_PROTO(int idx, int total, int accumulated, unsigned long group_util,
+                       struct task_struct *heaviest_p, unsigned long biggest_util),
+
+       TP_ARGS(idx, total, accumulated, group_util, heaviest_p, biggest_util),
+
+       TP_STRUCT__entry(
+               __field( int,           idx             )
+               __field( int,           total           )
+               __field( int,           accumulated     )
+               __field( unsigned long, group_util      )
+               __field( pid_t,         pid             )
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( unsigned long, biggest_util    )
+       ),
+
+       TP_fast_assign(
+               __entry->idx            = idx;
+               __entry->total          = total;
+               __entry->accumulated    = accumulated;
+               __entry->group_util     = group_util;
+               __entry->pid            = heaviest_p->pid;
+               memcpy(__entry->comm, heaviest_p->comm, TASK_COMM_LEN);
+               __entry->biggest_util   = biggest_util;
+       ),
+
+       TP_printk("idx=%d total=%d accumulated=%d group_util=%lu "
+                       "heaviest task(pid=%d comm=%s util=%lu)",
+               __entry->idx, __entry->total, __entry->accumulated, __entry->group_util,
+               __entry->pid, __entry->comm, __entry->biggest_util)
+);
+
+/*
+ * Tracepoint for checking group balancing
+ */
+TRACE_EVENT(sched_tune_check_group_balance,
+
+       TP_PROTO(int idx, int ib_count, bool balancing),
+
+       TP_ARGS(idx, ib_count, balancing),
+
+       TP_STRUCT__entry(
+               __field( int,           idx             )
+               __field( int,           ib_count        )
+               __field( bool,          balancing       )
+       ),
+
+       TP_fast_assign(
+               __entry->idx            = idx;
+               __entry->ib_count       = ib_count;
+               __entry->balancing      = balancing;
+       ),
+
+       TP_printk("idx=%d imbalance_count=%d balancing=%d",
+               __entry->idx, __entry->ib_count, __entry->balancing)
+);
+
 /*
  * Tracepoint for schedtune_boostgroup_update
  */
index cc828a1d223254be35018be72cc89f48b421ccdc..185e800e08be494b68c38a3f036756a035bd521e 100644 (file)
@@ -991,6 +991,18 @@ config SCHED_TUNE
 
          If unsure, say N.
 
+config SCHED_EHMP
+       bool "Exynos scheduler for Heterogeneous Multi-Processor"
+       depends on SMP
+       help
+         This option supports Exynos scheduler for HMP architecture. It is
+         designed to secure the limits of energy aware scheduler. This option
+         provides features such as independent boosting functinos such as
+         global boost and on-time migration, and prefer_perf and enhanced
+         prefer_idle that work in conjunction with SCHEDTUNE.
+
+         If unsure, say N.
+
 config DEFAULT_USE_ENERGY_AWARE
        bool "Default to enabling the Energy Aware Scheduler feature"
        default n
index b9207a9caa8616bde019367347305ce2a9de73f1..00aba22d914b14b1cfdd0847273dbb45770ad406 100644 (file)
@@ -20,6 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o
 obj-y += wait.o wait_bit.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
+obj-$(CONFIG_SCHED_EHMP) += ehmp.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += energy.o
 obj-$(CONFIG_SCHED_WALT) += walt.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/ehmp.c b/kernel/sched/ehmp.c
new file mode 100644 (file)
index 0000000..73b1692
--- /dev/null
@@ -0,0 +1,1670 @@
+/*
+ * Exynos scheduler for Heterogeneous Multi-Processing (HMP)
+ *
+ * Copyright (C) 2017 Samsung Electronics Co., Ltd
+ * Park Bumgyu <bumgyu.park@samsung.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/cpuidle.h>
+#include <linux/pm_qos.h>
+#include <linux/ehmp.h>
+#include <linux/sched_energy.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/ehmp.h>
+
+#include "sched.h"
+#include "tune.h"
+
+static unsigned long task_util(struct task_struct *p)
+{
+       return p->se.avg.util_avg;
+}
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+       return container_of(se, struct task_struct, se);
+}
+
+static inline struct sched_entity *se_of(struct sched_avg *sa)
+{
+       return container_of(sa, struct sched_entity, avg);
+}
+
+#define entity_is_cfs_rq(se)   (se->my_q)
+#define entity_is_task(se)     (!se->my_q)
+#define LOAD_AVG_MAX           47742
+
+static unsigned long maxcap_val = 1024;
+static int maxcap_cpu = 7;
+
+void ehmp_update_max_cpu_capacity(int cpu, unsigned long val)
+{
+       maxcap_cpu = cpu;
+       maxcap_val = val;
+}
+
+static inline struct device_node *get_ehmp_node(void)
+{
+       return of_find_node_by_path("/cpus/ehmp");
+}
+
+static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+{
+       return to_cpumask(sg->cpumask);
+}
+
+static bool sd_overutilized(struct sched_domain *sd)
+{
+       return sd->shared->overutilized;
+}
+
+#define tsk_cpus_allowed(tsk)  (&(tsk)->cpus_allowed)
+
+/**********************************************************************
+ * task initialization                                                *
+ **********************************************************************/
+void exynos_init_entity_util_avg(struct sched_entity *se)
+{
+       struct cfs_rq *cfs_rq = se->cfs_rq;
+       struct sched_avg *sa = &se->avg;
+       int cpu = cpu_of(cfs_rq->rq);
+       unsigned long cap_org = capacity_orig_of(cpu);
+       long cap = (long)(cap_org - cfs_rq->avg.util_avg) / 2;
+
+       if (cap > 0) {
+               if (cfs_rq->avg.util_avg != 0) {
+                       sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
+                       sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+
+                       if (sa->util_avg > cap)
+                               sa->util_avg = cap;
+               } else {
+                       sa->util_avg = cap_org >> 2;
+               }
+               /*
+                * If we wish to restore tuning via setting initial util,
+                * this is where we should do it.
+                */
+               sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+       }
+}
+
+/**********************************************************************
+ * load balance                                                       *
+ **********************************************************************/
+bool cpu_overutilized(int cpu);
+
+#define lb_sd_parent(sd) \
+       (sd->parent && sd->parent->groups != sd->parent->groups->next)
+
+static inline int
+check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
+{
+       return ((rq->cpu_capacity * sd->imbalance_pct) <
+                               (rq->cpu_capacity_orig * 100));
+}
+
+unsigned long global_boost(void);
+int exynos_need_active_balance(enum cpu_idle_type idle, struct sched_domain *sd,
+                                       int src_cpu, int dst_cpu)
+{
+       unsigned int src_imb_pct = lb_sd_parent(sd) ? sd->imbalance_pct : 1;
+       unsigned int dst_imb_pct = lb_sd_parent(sd) ? 100 : 1;
+       unsigned long src_cap = capacity_of(src_cpu);
+       unsigned long dst_cap = capacity_of(dst_cpu);
+
+       if ((idle != CPU_NOT_IDLE) &&
+           (cpu_rq(src_cpu)->cfs.h_nr_running == 1)) {
+               if ((check_cpu_capacity(cpu_rq(src_cpu), sd)) &&
+                   (src_cap * sd->imbalance_pct < dst_cap * 100)) {
+                       return 1;
+               }
+
+               if (!lb_sd_parent(sd) && src_cap < dst_cap)
+                       if (cpu_overutilized(src_cpu) || global_boost())
+                               return 1;
+       }
+
+       if ((src_cap * src_imb_pct < dst_cap * dst_imb_pct) &&
+                       cpu_rq(src_cpu)->cfs.h_nr_running == 1 &&
+                       cpu_overutilized(src_cpu) &&
+                       !cpu_overutilized(dst_cpu)) {
+               return 1;
+       }
+
+       return unlikely(sd->nr_balance_failed > sd->cache_nice_tries + 2);
+}
+
+/**********************************************************************
+ * load balance_trigger                                                      *
+ **********************************************************************/
+struct lbt_overutil {
+       /*
+        * overutil_ratio means
+        * N < 0  : disable user_overutilized
+        * N == 0 : Always overutilized
+        * N > 0  : overutil_cap = org_capacity * overutil_ratio / 100
+        */
+       unsigned long overutil_cap;
+       int overutil_ratio;
+};
+
+DEFINE_PER_CPU(struct lbt_overutil, ehmp_bot_overutil);
+DEFINE_PER_CPU(struct lbt_overutil, ehmp_top_overutil);
+#define DISABLE_OU     -1
+
+bool cpu_overutilized(int cpu)
+{
+       struct lbt_overutil *ou = &per_cpu(ehmp_top_overutil, cpu);
+
+       /*
+        * If top overutil is disabled, use main stream condition
+        * in the fair.c
+        */
+       if (ou->overutil_ratio == DISABLE_OU)
+               return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * 1280);
+
+       return cpu_util(cpu) > ou->overutil_cap;
+}
+
+static bool inline lbt_top_overutilized(int cpu)
+{
+//     struct rq *rq = cpu_rq(cpu);
+//     return sched_feat(ENERGY_AWARE) && rq->rd->overutilized;
+       return sched_feat(ENERGY_AWARE);
+}
+
+static bool inline lbt_bot_overutilized(int cpu)
+{
+       struct lbt_overutil *ou = &per_cpu(ehmp_bot_overutil, cpu);
+
+       /* if bot overutil is disabled, return false */
+       if (ou->overutil_ratio == DISABLE_OU)
+               return false;
+
+       return cpu_util(cpu) > ou->overutil_cap;
+}
+
+static void inline lbt_update_overutilized(int cpu,
+                       unsigned long capacity, bool top)
+{
+       struct lbt_overutil *ou;
+       ou = top ? &per_cpu(ehmp_top_overutil, cpu) :
+                       &per_cpu(ehmp_bot_overutil, cpu);
+
+       if (ou->overutil_ratio == DISABLE_OU)
+               ou->overutil_cap = 0;
+       else
+               ou->overutil_cap = (capacity * ou->overutil_ratio) / 100;
+}
+
+void ehmp_update_overutilized(int cpu, unsigned long capacity)
+{
+       lbt_update_overutilized(cpu, capacity, true);
+       lbt_update_overutilized(cpu, capacity, false);
+}
+
+static bool lbt_is_same_group(int src_cpu, int dst_cpu)
+{
+       struct sched_domain *sd  = rcu_dereference(per_cpu(sd_ea, src_cpu));
+       struct sched_group *sg;
+
+       if (!sd)
+               return false;
+
+       sg = sd->groups;
+       return cpumask_test_cpu(dst_cpu, sched_group_cpus(sg));
+}
+
+static bool lbt_overutilized(int src_cpu, int dst_cpu)
+{
+       bool top_overutilized, bot_overutilized;
+
+       /* src and dst are in the same domain, check top_overutilized */
+       top_overutilized = lbt_top_overutilized(src_cpu);
+       if (!lbt_is_same_group(src_cpu, dst_cpu))
+               return top_overutilized;
+
+       /* check bot overutilized */
+       bot_overutilized = lbt_bot_overutilized(src_cpu);
+       return bot_overutilized || top_overutilized;
+}
+
+static ssize_t _show_overutil(char *buf, bool top)
+{
+       struct sched_domain *sd;
+       struct sched_group *sg;
+       struct lbt_overutil *ou;
+       int cpu, ret = 0;
+
+       rcu_read_lock();
+
+       sd = rcu_dereference(per_cpu(sd_ea, 0));
+       if (!sd) {
+               rcu_read_unlock();
+               return ret;
+       }
+
+       sg = sd->groups;
+       do {
+               for_each_cpu_and(cpu, sched_group_cpus(sg), cpu_active_mask) {
+                       ou = top ? &per_cpu(ehmp_top_overutil, cpu) :
+                                               &per_cpu(ehmp_bot_overutil, cpu);
+                       ret += sprintf(buf + ret, "cpu%d ratio:%3d cap:%4lu\n",
+                                       cpu, ou->overutil_ratio, ou->overutil_cap);
+
+               }
+       } while (sg = sg->next, sg != sd->groups);
+
+       rcu_read_unlock();
+       return ret;
+}
+
+static ssize_t _store_overutil(const char *buf,
+                               size_t count, bool top)
+{
+       struct sched_domain *sd;
+       struct sched_group *sg;
+       struct lbt_overutil *ou;
+       unsigned long capacity;
+       int cpu;
+       const char *cp = buf;
+       int tokenized_data;
+
+       rcu_read_lock();
+
+       sd = rcu_dereference(per_cpu(sd_ea, 0));
+       if (!sd) {
+               rcu_read_unlock();
+               return count;
+       }
+
+       sg = sd->groups;
+       do {
+               if (sscanf(cp, "%d", &tokenized_data) != 1)
+                       tokenized_data = -1;
+
+               for_each_cpu_and(cpu, sched_group_cpus(sg), cpu_active_mask) {
+                       ou = top ? &per_cpu(ehmp_top_overutil, cpu) :
+                                       &per_cpu(ehmp_bot_overutil, cpu);
+                       ou->overutil_ratio = tokenized_data;
+
+                       capacity = arch_scale_cpu_capacity(sd, cpu);
+                       ehmp_update_overutilized(cpu, capacity);
+               }
+
+               cp = strpbrk(cp, " :");
+               if (!cp)
+                       break;
+               cp++;
+       } while (sg = sg->next, sg != sd->groups);
+
+       rcu_read_unlock();
+       return count;
+}
+
+static ssize_t show_top_overutil(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return _show_overutil(buf, true);
+}
+static ssize_t store_top_overutil(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf,
+               size_t count)
+{
+       return _store_overutil(buf, count, true);
+}
+static ssize_t show_bot_overutil(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return _show_overutil(buf, false);
+}
+static ssize_t store_bot_overutil(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf,
+               size_t count)
+{
+       return _store_overutil(buf, count, false);
+}
+
+static struct kobj_attribute top_overutil_attr =
+__ATTR(top_overutil, 0644, show_top_overutil, store_top_overutil);
+static struct kobj_attribute bot_overutil_attr =
+__ATTR(bot_overutil, 0644, show_bot_overutil, store_bot_overutil);
+
+static int __init init_lbt(void)
+{
+       struct device_node *dn;
+       int top_ou[NR_CPUS] = {-1, }, bot_ou[NR_CPUS] = {-1, };
+       int cpu;
+
+       dn = get_ehmp_node();
+       if (!dn)
+               return 0;
+
+       if (of_property_read_u32_array(dn, "top-overutil", top_ou, NR_CPUS) < 0)
+               return 0;
+
+       if (of_property_read_u32_array(dn, "bot-overutil", bot_ou, NR_CPUS) < 0)
+               return 0;
+
+       for_each_possible_cpu(cpu) {
+               per_cpu(ehmp_top_overutil, cpu).overutil_ratio = top_ou[cpu];
+               per_cpu(ehmp_bot_overutil, cpu).overutil_ratio = bot_ou[cpu];
+       }
+
+       return 0;
+}
+pure_initcall(init_lbt);
+
+bool ehmp_trigger_lb(int src_cpu, int dst_cpu)
+{
+       /* check overutilized condition */
+       return lbt_overutilized(src_cpu, dst_cpu);
+}
+
+/**********************************************************************
+ * Global boost                                                       *
+ **********************************************************************/
+static unsigned long gb_value = 0;
+static unsigned long gb_max_value = 0;
+static struct gb_qos_request gb_req_user =
+{
+       .name = "ehmp_gb_req_user",
+};
+
+static struct plist_head gb_list = PLIST_HEAD_INIT(gb_list);
+
+static DEFINE_SPINLOCK(gb_lock);
+
+static int gb_qos_max_value(void)
+{
+       return plist_last(&gb_list)->prio;
+}
+
+static int gb_qos_req_value(struct gb_qos_request *req)
+{
+       return req->node.prio;
+}
+
+void gb_qos_update_request(struct gb_qos_request *req, u32 new_value)
+{
+       unsigned long flags;
+
+       if (req->node.prio == new_value)
+               return;
+
+       spin_lock_irqsave(&gb_lock, flags);
+
+       if (req->active)
+               plist_del(&req->node, &gb_list);
+       else
+               req->active = 1;
+
+       plist_node_init(&req->node, new_value);
+       plist_add(&req->node, &gb_list);
+
+       gb_value = gb_max_value * gb_qos_max_value() / 100;
+       trace_ehmp_global_boost(req->name, new_value);
+
+       spin_unlock_irqrestore(&gb_lock, flags);
+}
+
+static ssize_t show_global_boost(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       struct gb_qos_request *req;
+       int ret = 0;
+
+       plist_for_each_entry(req, &gb_list, node)
+               ret += snprintf(buf + ret, 30, "%s : %d\n",
+                               req->name, gb_qos_req_value(req));
+
+       return ret;
+}
+
+static ssize_t store_global_boost(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf,
+               size_t count)
+{
+       unsigned int input;
+
+       if (!sscanf(buf, "%d", &input))
+               return -EINVAL;
+
+       gb_qos_update_request(&gb_req_user, input);
+
+       return count;
+}
+
+static struct kobj_attribute global_boost_attr =
+__ATTR(global_boost, 0644, show_global_boost, store_global_boost);
+
+#define BOOT_BOOST_DURATION 40000000   /* microseconds */
+unsigned long global_boost(void)
+{
+       u64 now = ktime_to_us(ktime_get());
+
+       if (now < BOOT_BOOST_DURATION)
+               return gb_max_value;
+
+       return gb_value;
+}
+
+int find_second_max_cap(void)
+{
+       struct sched_domain *sd = rcu_dereference(per_cpu(sd_ea, 0));
+       struct sched_group *sg;
+       int max_cap = 0, second_max_cap = 0;
+
+       if (!sd)
+               return 0;
+
+       sg = sd->groups;
+       do {
+               int i;
+
+               for_each_cpu(i, sched_group_cpus(sg)) {
+                       if (max_cap < cpu_rq(i)->cpu_capacity_orig) {
+                               second_max_cap = max_cap;
+                               max_cap = cpu_rq(i)->cpu_capacity_orig;
+                       }
+               }
+       } while (sg = sg->next, sg != sd->groups);
+
+       return second_max_cap;
+}
+
+static int __init init_global_boost(void)
+{
+       gb_max_value = find_second_max_cap() + 1;
+
+       return 0;
+}
+pure_initcall(init_global_boost);
+
+/**********************************************************************
+ * Boost cpu selection (global boost, schedtune.prefer_perf)          *
+ **********************************************************************/
+#define cpu_selected(cpu)      (cpu >= 0)
+
+int kernel_prefer_perf(int grp_idx);
+static ssize_t show_prefer_perf(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       int i, ret = 0;
+
+       for (i = 0; i < STUNE_GROUP_COUNT; i++)
+               ret += snprintf(buf + ret, 10, "%d ", kernel_prefer_perf(i));
+
+       ret += snprintf(buf + ret, 10, "\n");
+
+       return ret;
+}
+
+static struct kobj_attribute prefer_perf_attr =
+__ATTR(kernel_prefer_perf, 0444, show_prefer_perf, NULL);
+
+enum {
+       BT_PREFER_PERF = 0,
+       BT_GROUP_BALANCE,
+       BT_GLOBAL_BOOST,
+};
+
+struct boost_trigger {
+       int trigger;
+       int boost_val;
+};
+
+static int check_boost_trigger(struct task_struct *p, struct boost_trigger *bt)
+{
+       int gb;
+
+#ifdef CONFIG_SCHED_TUNE
+       if (schedtune_prefer_perf(p) > 0) {
+               bt->trigger = BT_PREFER_PERF;
+               bt->boost_val = schedtune_perf_threshold();
+               return 1;
+       }
+
+       if (schedtune_need_group_balance(p) > 0) {
+               bt->trigger = BT_GROUP_BALANCE;
+               bt->boost_val = schedtune_perf_threshold();
+               return 1;
+       }
+#endif
+
+       gb = global_boost();
+       if (gb) {
+               bt->trigger = BT_GLOBAL_BOOST;
+               bt->boost_val = gb;
+               return 1;
+       }
+
+       /* not boost state */
+       return 0;
+}
+
+static int boost_select_cpu(struct task_struct *p, struct cpumask *target_cpus)
+{
+       int i, cpu = 0;
+
+       if (cpumask_empty(target_cpus))
+               return -1;
+
+       if (cpumask_test_cpu(task_cpu(p), target_cpus))
+               return task_cpu(p);
+
+       /* Return last cpu in target_cpus */
+       for_each_cpu(i, target_cpus)
+               cpu = i;
+
+       return cpu;
+}
+
+static void mark_shallowest_cpu(int cpu, unsigned int *min_exit_latency,
+                                               struct cpumask *shallowest_cpus)
+{
+       struct rq *rq = cpu_rq(cpu);
+       struct cpuidle_state *idle = idle_get_state(rq);
+
+       /* Before enabling cpuidle, all idle cpus are marked */
+       if (!idle) {
+               cpumask_set_cpu(cpu, shallowest_cpus);
+               return;
+       }
+
+       /* Deeper idle cpu is ignored */
+       if (idle->exit_latency > *min_exit_latency)
+               return;
+
+       /* if shallower idle cpu is found, previsouly founded cpu is ignored */
+       if (idle->exit_latency < *min_exit_latency) {
+               cpumask_clear(shallowest_cpus);
+               *min_exit_latency = idle->exit_latency;
+       }
+
+       cpumask_set_cpu(cpu, shallowest_cpus);
+}
+static int check_migration_task(struct task_struct *p)
+{
+       return !p->se.avg.last_update_time;
+}
+
+static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
+{
+       unsigned long util, capacity;
+
+       /* Task has no contribution or is new */
+       if (cpu != task_cpu(p) || check_migration_task(p))
+               return cpu_util(cpu);
+
+       capacity = capacity_orig_of(cpu);
+       util = max_t(long, cpu_util(cpu) - task_util(p), 0);
+
+       return (util >= capacity) ? capacity : util;
+}
+
+static int find_group_boost_target(struct task_struct *p)
+{
+       struct sched_domain *sd;
+       int shallowest_cpu = -1;
+       int lowest_cpu = -1;
+       unsigned int min_exit_latency = UINT_MAX;
+       unsigned long lowest_util = ULONG_MAX;
+       int target_cpu = -1;
+       int cpu;
+       char state[30] = "fail";
+
+       sd = rcu_dereference(per_cpu(sd_ea, maxcap_cpu));
+       if (!sd)
+               return target_cpu;
+
+       if (cpumask_test_cpu(task_cpu(p), sched_group_cpus(sd->groups))) {
+               if (idle_cpu(task_cpu(p))) {
+                       target_cpu = task_cpu(p);
+                       strcpy(state, "current idle");
+                       goto find_target;
+               }
+       }
+
+       for_each_cpu_and(cpu, tsk_cpus_allowed(p), sched_group_cpus(sd->groups)) {
+               unsigned long util = cpu_util_wake(cpu, p);
+
+               if (idle_cpu(cpu)) {
+                       struct cpuidle_state *idle;
+
+                       idle = idle_get_state(cpu_rq(cpu));
+                       if (!idle) {
+                               target_cpu = cpu;
+                               strcpy(state, "idle wakeup");
+                               goto find_target;
+                       }
+
+                       if (idle->exit_latency < min_exit_latency) {
+                               min_exit_latency = idle->exit_latency;
+                               shallowest_cpu = cpu;
+                               continue;
+                       }
+               }
+
+               if (cpu_selected(shallowest_cpu))
+                       continue;
+
+               if (util < lowest_util) {
+                       lowest_cpu = cpu;
+                       lowest_util = util;
+               }
+       }
+
+       if (cpu_selected(shallowest_cpu)) {
+               target_cpu = shallowest_cpu;
+               strcpy(state, "shallowest idle");
+               goto find_target;
+       }
+
+       if (cpu_selected(lowest_cpu)) {
+               target_cpu = lowest_cpu;
+               strcpy(state, "lowest util");
+       }
+
+find_target:
+       trace_ehmp_select_group_boost(p, target_cpu, state);
+
+       return target_cpu;
+}
+
+static int
+find_boost_target(struct sched_domain *sd, struct task_struct *p,
+                       unsigned long min_util, struct boost_trigger *bt)
+{
+       struct sched_group *sg;
+       int boost = bt->boost_val;
+       unsigned long max_capacity;
+       struct cpumask boost_candidates;
+       struct cpumask backup_boost_candidates;
+       unsigned int min_exit_latency = UINT_MAX;
+       unsigned int backup_min_exit_latency = UINT_MAX;
+       int target_cpu;
+       bool go_up = false;
+       unsigned long lowest_util = ULONG_MAX;
+       int lowest_cpu = -1;
+       char state[30] = "fail";
+
+       if (bt->trigger == BT_GROUP_BALANCE)
+               return find_group_boost_target(p);
+
+       cpumask_setall(&boost_candidates);
+       cpumask_clear(&backup_boost_candidates);
+
+       max_capacity = maxcap_val;
+
+       sg = sd->groups;
+
+       do {
+               int i;
+
+               for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+                       unsigned long new_util, wake_util;
+
+                       if (!cpu_online(i))
+                               continue;
+
+                       wake_util = cpu_util_wake(i, p);
+                       new_util = wake_util + task_util(p);
+                       new_util = max(min_util, new_util);
+
+                       if (min(new_util + boost, max_capacity) > capacity_orig_of(i)) {
+                               if (!cpu_rq(i)->nr_running)
+                                       mark_shallowest_cpu(i, &backup_min_exit_latency,
+                                                       &backup_boost_candidates);
+                               else if (cpumask_test_cpu(task_cpu(p), sched_group_cpus(sg)))
+                                       go_up = true;
+
+                               continue;
+                       }
+
+                       if (cpumask_weight(&boost_candidates) >= nr_cpu_ids)
+                               cpumask_clear(&boost_candidates);
+
+                       if (!cpu_rq(i)->nr_running) {
+                               mark_shallowest_cpu(i, &min_exit_latency, &boost_candidates);
+                               continue;
+                       }
+
+                       if (wake_util < lowest_util) {
+                               lowest_util = wake_util;
+                               lowest_cpu = i;
+                       }
+               }
+
+               if (cpumask_weight(&boost_candidates) >= nr_cpu_ids)
+                       continue;
+
+               target_cpu = boost_select_cpu(p, &boost_candidates);
+               if (cpu_selected(target_cpu)) {
+                       strcpy(state, "big idle");
+                       goto out;
+               }
+
+               target_cpu = boost_select_cpu(p, &backup_boost_candidates);
+               if (cpu_selected(target_cpu)) {
+                       strcpy(state, "little idle");
+                       goto out;
+               }
+       } while (sg = sg->next, sg != sd->groups);
+
+       if (go_up) {
+               strcpy(state, "lowest big cpu");
+               target_cpu = lowest_cpu;
+               goto out;
+       }
+
+       strcpy(state, "current cpu");
+       target_cpu = task_cpu(p);
+
+out:
+       trace_ehmp_select_boost_cpu(p, target_cpu, bt->trigger, state);
+       return target_cpu;
+}
+
+/**********************************************************************
+ * schedtune.prefer_idle                                              *
+ **********************************************************************/
+static void mark_lowest_cpu(int cpu, unsigned long new_util,
+                       int *lowest_cpu, unsigned long *lowest_util)
+{
+       if (new_util >= *lowest_util)
+               return;
+
+       *lowest_util = new_util;
+       *lowest_cpu = cpu;
+}
+
+static int find_prefer_idle_target(struct sched_domain *sd,
+                       struct task_struct *p, unsigned long min_util)
+{
+       struct sched_group *sg;
+       int target_cpu = -1;
+       int lowest_cpu = -1;
+       int lowest_idle_cpu = -1;
+       int overcap_cpu = -1;
+       unsigned long lowest_util = ULONG_MAX;
+       unsigned long lowest_idle_util = ULONG_MAX;
+       unsigned long overcap_util = ULONG_MAX;
+       struct cpumask idle_candidates;
+       struct cpumask overcap_idle_candidates;
+
+       cpumask_clear(&idle_candidates);
+       cpumask_clear(&overcap_idle_candidates);
+
+       sg = sd->groups;
+
+       do {
+               int i;
+
+               for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+                       unsigned long new_util, wake_util;
+
+                       if (!cpu_online(i))
+                               continue;
+
+                       wake_util = cpu_util_wake(i, p);
+                       new_util = wake_util + task_util(p);
+                       new_util = max(min_util, new_util);
+
+                       trace_ehmp_prefer_idle(p, task_cpu(p), i, task_util(p),
+                                                       new_util, idle_cpu(i));
+
+                       if (new_util > capacity_orig_of(i)) {
+                               if (idle_cpu(i)) {
+                                       cpumask_set_cpu(i, &overcap_idle_candidates);
+                                       mark_lowest_cpu(i, new_util,
+                                               &overcap_cpu, &overcap_util);
+                               }
+
+                               continue;
+                       }
+
+                       if (idle_cpu(i)) {
+                               if (task_cpu(p) == i) {
+                                       target_cpu = i;
+                                       break;
+                               }
+
+                               cpumask_set_cpu(i, &idle_candidates);
+                               mark_lowest_cpu(i, new_util,
+                                       &lowest_idle_cpu, &lowest_idle_util);
+
+                               continue;
+                       }
+
+                       mark_lowest_cpu(i, new_util, &lowest_cpu, &lowest_util);
+               }
+
+               if (cpu_selected(target_cpu))
+                       break;
+
+               if (cpumask_weight(&idle_candidates)) {
+                       target_cpu = lowest_idle_cpu;
+                       break;
+               }
+
+               if (cpu_selected(lowest_cpu)) {
+                       target_cpu = lowest_cpu;
+                       break;
+               }
+
+       } while (sg = sg->next, sg != sd->groups);
+
+       if (cpu_selected(target_cpu))
+               goto out;
+
+       if (cpumask_weight(&overcap_idle_candidates)) {
+               if (cpumask_test_cpu(task_cpu(p), &overcap_idle_candidates))
+                       target_cpu = task_cpu(p);
+               else
+                       target_cpu = overcap_cpu;
+
+               goto out;
+       }
+
+out:
+       trace_ehmp_prefer_idle_cpu_select(p, target_cpu);
+
+       return target_cpu;
+}
+
+/**********************************************************************
+ * On-time migration                                                  *
+ **********************************************************************/
+static unsigned long up_threshold;
+static unsigned long down_threshold;
+static unsigned int min_residency_us;
+
+static ssize_t show_min_residency(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return snprintf(buf, 10, "%d\n", min_residency_us);
+}
+
+static ssize_t store_min_residency(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf,
+               size_t count)
+{
+       int input;
+
+       if (!sscanf(buf, "%d", &input))
+               return -EINVAL;
+
+       input = input < 0 ? 0 : input;
+
+       min_residency_us = input;
+
+       return count;
+}
+
+static struct kobj_attribute min_residency_attr =
+__ATTR(min_residency, 0644, show_min_residency, store_min_residency);
+
+static ssize_t show_up_threshold(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return snprintf(buf, 10, "%ld\n", up_threshold);
+}
+
+static ssize_t store_up_threshold(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf,
+               size_t count)
+{
+       long input;
+
+       if (!sscanf(buf, "%ld", &input))
+               return -EINVAL;
+
+       input = input < 0 ? 0 : input;
+       input = input > 1024 ? 1024 : input;
+
+       up_threshold = input;
+
+       return count;
+}
+
+static struct kobj_attribute up_threshold_attr =
+__ATTR(up_threshold, 0644, show_up_threshold, store_up_threshold);
+
+static ssize_t show_down_threshold(struct kobject *kobj,
+                struct kobj_attribute *attr, char *buf)
+{
+        return snprintf(buf, 10, "%ld\n", down_threshold);
+}
+
+static ssize_t store_down_threshold(struct kobject *kobj,
+                struct kobj_attribute *attr, const char *buf,
+                size_t count)
+{
+        long input;
+
+        if (!sscanf(buf, "%ld", &input))
+                return -EINVAL;
+
+        input = input < 0 ? 0 : input;
+        input = input > 1024 ? 1024 : input;
+
+        down_threshold = input;
+
+        return count;
+}
+
+static struct kobj_attribute down_threshold_attr =
+__ATTR(down_threshold, 0644, show_down_threshold, store_down_threshold);
+
+#define ontime_flag(p)                 (ontime_of(p)->flags)
+#define ontime_migration_time(p)       (ontime_of(p)->avg.ontime_migration_time)
+#define ontime_load_avg(p)             (ontime_of(p)->avg.load_avg)
+
+static inline struct ontime_entity *ontime_of(struct task_struct *p)
+{
+       return &p->se.ontime;
+}
+
+static inline void include_ontime_task(struct task_struct *p)
+{
+       ontime_flag(p) = ONTIME;
+
+       /* Manage time based on clock task of boot cpu(cpu0) */
+       ontime_migration_time(p) = cpu_rq(0)->clock_task;
+}
+
+static inline void exclude_ontime_task(struct task_struct *p)
+{
+       ontime_migration_time(p) = 0;
+       ontime_flag(p) = NOT_ONTIME;
+}
+
+static int
+ontime_select_target_cpu(struct sched_group *sg, const struct cpumask *mask)
+{
+       int cpu;
+       int dest_cpu = -1;
+       unsigned int min_exit_latency = UINT_MAX;
+       struct cpuidle_state *idle;
+
+       for_each_cpu_and(cpu, sched_group_cpus(sg), mask) {
+               if (!idle_cpu(cpu))
+                       continue;
+
+               if (cpu_rq(cpu)->ontime_migrating)
+                       continue;
+
+               idle = idle_get_state(cpu_rq(cpu));
+               if (!idle)
+                       return cpu;
+
+               if (idle && idle->exit_latency < min_exit_latency) {
+                       min_exit_latency = idle->exit_latency;
+                       dest_cpu = cpu;
+               }
+       }
+
+       return dest_cpu;
+}
+
+#define TASK_TRACK_COUNT       5
+
+extern struct sched_entity *__pick_next_entity(struct sched_entity *se);
+static struct task_struct *
+ontime_pick_heavy_task(struct sched_entity *se, struct cpumask *dst_cpus,
+                                               int *boost_migration)
+{
+       struct task_struct *heaviest_task = NULL;
+       struct task_struct *p;
+       unsigned int max_util_avg = 0;
+       int task_count = 0;
+       int boosted = !!global_boost();
+
+       /*
+        * Since current task does not exist in entity list of cfs_rq,
+        * check first that current task is heavy.
+        */
+       if (boosted || ontime_load_avg(task_of(se)) >= up_threshold) {
+               heaviest_task = task_of(se);
+               max_util_avg = ontime_load_avg(task_of(se));
+               if (boosted)
+                       *boost_migration = 1;
+       }
+
+       se = __pick_first_entity(se->cfs_rq);
+       while (se && task_count < TASK_TRACK_COUNT) {
+               /* Skip non-task entity */
+               if (entity_is_cfs_rq(se))
+                       goto next_entity;
+
+               p = task_of(se);
+               if (schedtune_prefer_perf(p)) {
+                       heaviest_task = p;
+                       *boost_migration = 1;
+                       break;
+               }
+
+               if (!boosted && ontime_load_avg(p) < up_threshold)
+                       goto next_entity;
+
+               if (ontime_load_avg(p) > max_util_avg &&
+                   cpumask_intersects(dst_cpus, tsk_cpus_allowed(p))) {
+                       heaviest_task = p;
+                       max_util_avg = ontime_load_avg(p);
+                       *boost_migration = boosted;
+               }
+
+next_entity:
+               se = __pick_next_entity(se);
+               task_count++;
+       }
+
+       return heaviest_task;
+}
+
+void ontime_new_entity_load(struct task_struct *parent, struct sched_entity *se)
+{
+       struct ontime_entity *ontime;
+
+       if (entity_is_cfs_rq(se))
+               return;
+
+       ontime = &se->ontime;
+
+       ontime->avg.load_sum = ontime_of(parent)->avg.load_sum;
+       ontime->avg.load_avg = ontime_of(parent)->avg.load_avg;
+       ontime->avg.ontime_migration_time = 0;
+       ontime->avg.period_contrib = 1023;
+       ontime->flags = NOT_ONTIME;
+
+       trace_ehmp_ontime_new_entity_load(task_of(se), &ontime->avg);
+}
+
+/* Structure of ontime migration environment */
+struct ontime_env {
+       struct rq               *dst_rq;
+       int                     dst_cpu;
+       struct rq               *src_rq;
+       int                     src_cpu;
+       struct task_struct      *target_task;
+       int                     boost_migration;
+};
+DEFINE_PER_CPU(struct ontime_env, ontime_env);
+
+static int can_migrate(struct task_struct *p, struct ontime_env *env)
+{
+       if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p)))
+               return 0;
+
+       if (task_running(env->src_rq, p))
+               return 0;
+
+       return 1;
+}
+
+static void move_task(struct task_struct *p, struct ontime_env *env)
+{
+       p->on_rq = TASK_ON_RQ_MIGRATING;
+       deactivate_task(env->src_rq, p, 0);
+       set_task_cpu(p, env->dst_cpu);
+
+       activate_task(env->dst_rq, p, 0);
+       p->on_rq = TASK_ON_RQ_QUEUED;
+       check_preempt_curr(env->dst_rq, p, 0);
+}
+
+static int move_specific_task(struct task_struct *target, struct ontime_env *env)
+{
+       struct task_struct *p, *n;
+
+       list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
+               if (!can_migrate(p, env))
+                       continue;
+
+               if (p != target)
+                       continue;
+
+               move_task(p, env);
+               return 1;
+       }
+
+       return 0;
+}
+
+static int ontime_migration_cpu_stop(void *data)
+{
+       struct ontime_env *env = data;
+       struct rq *src_rq, *dst_rq;
+       int src_cpu, dst_cpu;
+       struct task_struct *p;
+       struct sched_domain *sd;
+       int boost_migration;
+
+       /* Initialize environment data */
+       src_rq = env->src_rq;
+       dst_rq = env->dst_rq = cpu_rq(env->dst_cpu);
+       src_cpu = env->src_cpu = env->src_rq->cpu;
+       dst_cpu = env->dst_cpu;
+       p = env->target_task;
+       boost_migration = env->boost_migration;
+
+       raw_spin_lock_irq(&src_rq->lock);
+
+       if (!(ontime_flag(p) & ONTIME_MIGRATING))
+               goto out_unlock;
+
+       if (p->exit_state)
+               goto out_unlock;
+
+       if (unlikely(src_cpu != smp_processor_id()))
+               goto out_unlock;
+
+       if (src_rq->nr_running <= 1)
+               goto out_unlock;
+
+       if (src_rq != task_rq(p))
+               goto out_unlock;
+
+       BUG_ON(src_rq == dst_rq);
+
+       double_lock_balance(src_rq, dst_rq);
+
+       rcu_read_lock();
+       for_each_domain(dst_cpu, sd)
+               if (cpumask_test_cpu(src_cpu, sched_domain_span(sd)))
+                       break;
+
+       if (likely(sd) && move_specific_task(p, env)) {
+               if (boost_migration) {
+                       /* boost task is not classified as ontime task */
+                       exclude_ontime_task(p);
+               } else
+                       include_ontime_task(p);
+
+               rcu_read_unlock();
+               double_unlock_balance(src_rq, dst_rq);
+
+               trace_ehmp_ontime_migration(p, ontime_of(p)->avg.load_avg,
+                                       src_cpu, dst_cpu, boost_migration);
+               goto success_unlock;
+       }
+
+       rcu_read_unlock();
+       double_unlock_balance(src_rq, dst_rq);
+
+out_unlock:
+       exclude_ontime_task(p);
+
+success_unlock:
+       src_rq->active_balance = 0;
+       dst_rq->ontime_migrating = 0;
+
+       raw_spin_unlock_irq(&src_rq->lock);
+       put_task_struct(p);
+
+       return 0;
+}
+
+DEFINE_PER_CPU(struct cpu_stop_work, ontime_migration_work);
+
+static DEFINE_SPINLOCK(om_lock);
+
+void ontime_migration(void)
+{
+       struct sched_domain *sd;
+       struct sched_group *src_sg, *dst_sg;
+       int cpu;
+
+       if (!spin_trylock(&om_lock))
+               return;
+
+       rcu_read_lock();
+
+       sd = rcu_dereference(per_cpu(sd_ea, 0));
+       if (!sd)
+               goto ontime_migration_exit;
+
+       src_sg = sd->groups;
+
+       do {
+               dst_sg = src_sg->next;
+               for_each_cpu_and(cpu, sched_group_cpus(src_sg), cpu_active_mask) {
+                       unsigned long flags;
+                       struct rq *rq;
+                       struct sched_entity *se;
+                       struct task_struct *p;
+                       int dst_cpu;
+                       struct ontime_env *env = &per_cpu(ontime_env, cpu);
+                       int boost_migration = 0;
+
+                       rq = cpu_rq(cpu);
+                       raw_spin_lock_irqsave(&rq->lock, flags);
+
+                       /*
+                        * Ontime migration is not performed when active balance
+                        * is in progress.
+                        */
+                       if (rq->active_balance) {
+                               raw_spin_unlock_irqrestore(&rq->lock, flags);
+                               continue;
+                       }
+
+                       /*
+                        * No need to migration if source cpu does not have cfs
+                        * tasks.
+                        */
+                       if (!rq->cfs.curr) {
+                               raw_spin_unlock_irqrestore(&rq->lock, flags);
+                               continue;
+                       }
+
+                       se = rq->cfs.curr;
+
+                       /* Find task entity if entity is cfs_rq. */
+                       if (entity_is_cfs_rq(se)) {
+                               struct cfs_rq *cfs_rq;
+
+                               cfs_rq = se->my_q;
+                               while (cfs_rq) {
+                                       se = cfs_rq->curr;
+                                       cfs_rq = se->my_q;
+                               }
+                       }
+
+                       /*
+                        * Select cpu to migrate the task to. Return negative number
+                        * if there is no idle cpu in sg.
+                        */
+                       dst_cpu = ontime_select_target_cpu(dst_sg, cpu_active_mask);
+                       if (dst_cpu < 0) {
+                               raw_spin_unlock_irqrestore(&rq->lock, flags);
+                               continue;
+                       }
+
+                       /*
+                        * Pick task to be migrated. Return NULL if there is no
+                        * heavy task in rq.
+                        */
+                       p = ontime_pick_heavy_task(se, sched_group_cpus(dst_sg),
+                                                       &boost_migration);
+                       if (!p) {
+                               raw_spin_unlock_irqrestore(&rq->lock, flags);
+                               continue;
+                       }
+
+                       ontime_flag(p) = ONTIME_MIGRATING;
+                       get_task_struct(p);
+
+                       /* Set environment data */
+                       env->dst_cpu = dst_cpu;
+                       env->src_rq = rq;
+                       env->target_task = p;
+                       env->boost_migration = boost_migration;
+
+                       /* Prevent active balance to use stopper for migration */
+                       rq->active_balance = 1;
+
+                       cpu_rq(dst_cpu)->ontime_migrating = 1;
+
+                       raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+                       /* Migrate task through stopper */
+                       stop_one_cpu_nowait(cpu,
+                               ontime_migration_cpu_stop, env,
+                               &per_cpu(ontime_migration_work, cpu));
+               }
+       } while (src_sg = src_sg->next, src_sg->next != sd->groups);
+
+ontime_migration_exit:
+       rcu_read_unlock();
+       spin_unlock(&om_lock);
+}
+
+int ontime_can_migration(struct task_struct *p, int dst_cpu)
+{
+       u64 delta;
+
+       if (ontime_flag(p) & NOT_ONTIME) {
+               trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "not ontime");
+               return true;
+       }
+
+       if (ontime_flag(p) & ONTIME_MIGRATING) {
+               trace_ehmp_ontime_check_migrate(p, dst_cpu, false, "migrating");
+               return false;
+       }
+
+       if (cpumask_test_cpu(dst_cpu, cpu_coregroup_mask(maxcap_cpu))) {
+               trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "ontime on big");
+               return true;
+       }
+
+       /*
+        * At this point, task is "ontime task" and running on big
+        * and load balancer is trying to migrate task to LITTLE.
+        */
+       delta = cpu_rq(0)->clock_task - ontime_migration_time(p);
+       delta = delta >> 10;
+       if (delta <= min_residency_us) {
+               trace_ehmp_ontime_check_migrate(p, dst_cpu, false, "min residency");
+               return false;
+       }
+
+       if (cpu_rq(task_cpu(p))->nr_running > 1) {
+               trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "big is busy");
+               goto release;
+       }
+
+       if (ontime_load_avg(p) >= down_threshold) {
+               trace_ehmp_ontime_check_migrate(p, dst_cpu, false, "heavy task");
+               return false;
+       }
+
+       trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "ontime_release");
+release:
+       exclude_ontime_task(p);
+
+       return true;
+}
+
+static int ontime_task_wakeup(struct task_struct *p)
+{
+       struct sched_domain *sd;
+       u64 delta;
+       int target_cpu = -1;
+
+       if (ontime_flag(p) & NOT_ONTIME)
+               if (ontime_load_avg(p) < up_threshold)
+                       return -1;
+
+       if (ontime_flag(p) & ONTIME) {
+               delta = cpu_rq(0)->clock_task - ontime_migration_time(p);
+               delta = delta >> 10;
+
+               if (delta > min_residency_us &&
+                   ontime_load_avg(p) < down_threshold) {
+                       exclude_ontime_task(p);
+                       return -1;
+               }
+
+               if (idle_cpu(task_cpu(p)))
+                       return task_cpu(p);
+       }
+
+       /* caller must hold rcu for sched domain */
+       sd = rcu_dereference(per_cpu(sd_ea, maxcap_cpu));
+       if (!sd)
+               return -1;
+
+       target_cpu = ontime_select_target_cpu(sd->groups, tsk_cpus_allowed(p));
+       if (cpu_selected(target_cpu)) {
+               if (ontime_flag(p) & NOT_ONTIME)
+                       include_ontime_task(p);
+       } else {
+               if (ontime_flag(p) & ONTIME)
+                       exclude_ontime_task(p);
+       }
+
+       return target_cpu;
+}
+
+static void ontime_update_next_balance(int cpu, struct ontime_avg *oa)
+{
+       if (cpumask_test_cpu(cpu, cpu_coregroup_mask(maxcap_cpu)))
+               return;
+
+       if (oa->load_avg < up_threshold)
+               return;
+
+       /*
+        * Update the next_balance of this cpu because tick is most likely
+        * to occur first in currently running cpu.
+        */
+       cpu_rq(smp_processor_id())->next_balance = jiffies;
+}
+
+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+
+extern u64 decay_load(u64 val, u64 n);
+
+static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
+{
+       u32 c1, c2, c3 = d3;
+
+       c1 = decay_load((u64)d1, periods);
+       c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
+
+       return c1 + c2 + c3;
+}
+
+/*
+ * ontime_update_load_avg : load tracking for ontime-migration
+ *
+ * @sa : sched_avg to be updated
+ * @delta : elapsed time since last update
+ * @period_contrib : amount already accumulated against our next period
+ * @scale_freq : scale vector of cpu frequency
+ * @scale_cpu : scale vector of cpu capacity
+ */
+void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight, struct sched_avg *sa)
+{
+       struct ontime_avg *oa = &se_of(sa)->ontime.avg;
+       unsigned long scale_freq, scale_cpu;
+       u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
+       u64 periods;
+
+       scale_freq = arch_scale_freq_capacity(NULL, cpu);
+       scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+
+       delta += oa->period_contrib;
+       periods = delta / 1024; /* A period is 1024us (~1ms) */
+
+       if (periods) {
+               oa->load_sum = decay_load(oa->load_sum, periods);
+
+               delta %= 1024;
+               contrib = __accumulate_pelt_segments(periods,
+                               1024 - oa->period_contrib, delta);
+       }
+       oa->period_contrib = delta;
+
+       if (weight) {
+               contrib = cap_scale(contrib, scale_freq);
+               oa->load_sum += contrib * scale_cpu;
+       }
+
+       if (!periods)
+               return;
+
+       oa->load_avg = div_u64(oa->load_sum, LOAD_AVG_MAX - 1024 + oa->period_contrib);
+       ontime_update_next_balance(cpu, oa);
+}
+
+void ontime_trace_task_info(struct task_struct *p)
+{
+       trace_ehmp_ontime_load_avg_task(p, &ontime_of(p)->avg, ontime_flag(p));
+}
+
+static inline unsigned long mincap_of(int cpu)
+{
+       return sge_array[cpu][SD_LEVEL0]->cap_states[0].cap;
+}
+
+static int __init init_ontime(void)
+{
+       struct device_node *dn;
+       u32 prop;
+
+       dn = get_ehmp_node();
+       if (!dn)
+               return 0;
+
+       /*
+        * Initilize default values:
+        *   up_threshold       = 40% of LITTLE maximum capacity
+        *   down_threshold     = 50% of big minimum capacity
+        *   min_residency      = 8ms
+        */
+       up_threshold = capacity_orig_of(0) * 40 / 100;
+       down_threshold = mincap_of(maxcap_cpu) * 50 / 100;
+       min_residency_us = 8192;
+
+       of_property_read_u32(dn, "up-threshold", &prop);
+       up_threshold = prop;
+
+       of_property_read_u32(dn, "down-threshold", &prop);
+       down_threshold = prop;
+
+       of_property_read_u32(dn, "min-residency-us", &prop);
+       min_residency_us = prop;
+
+       return 0;
+}
+pure_initcall(init_ontime);
+
+/**********************************************************************
+ * cpu selection                                                      *
+ **********************************************************************/
+extern unsigned long boosted_task_util(struct task_struct *task);
+extern unsigned long capacity_curr_of(int cpu);
+extern struct energy_env *get_eenv(struct task_struct *p, int prev_cpu);
+extern int select_energy_cpu_idx(struct energy_env *eenv);
+extern int find_best_target(struct task_struct *p, int *backup_cpu,
+                                  bool boosted, bool prefer_idle);
+
+#define EAS_CPU_PRV    0
+#define EAS_CPU_NXT    1
+#define EAS_CPU_BKP    2
+
+static int select_energy_cpu(struct sched_domain *sd, struct task_struct *p,
+                               int prev_cpu, bool boosted)
+{
+       struct energy_env *eenv;
+       int energy_cpu = -1;
+
+       eenv = get_eenv(p, prev_cpu);
+       if (eenv->max_cpu_count < 2)
+               return energy_cpu;
+
+       eenv->max_cpu_count = EAS_CPU_BKP + 1;
+
+       /* Find a cpu with sufficient capacity */
+       eenv->cpu[EAS_CPU_NXT].cpu_id = find_best_target(p,
+                       &eenv->cpu[EAS_CPU_BKP].cpu_id, boosted, 0);
+
+       /* take note if no backup was found */
+       if (eenv->cpu[EAS_CPU_BKP].cpu_id < 0)
+               eenv->max_cpu_count = EAS_CPU_BKP;
+
+       /* take note if no target was found */
+        if (eenv->cpu[EAS_CPU_NXT].cpu_id < 0)
+                eenv->max_cpu_count = EAS_CPU_NXT;
+
+       if (eenv->max_cpu_count == EAS_CPU_NXT) {
+               /*
+                * we did not find any energy-awareness
+                * candidates beyond prev_cpu, so we will
+                * fall-back to the regular slow-path.
+                */
+               return energy_cpu;
+       }
+
+       /* find most energy-efficient CPU */
+       energy_cpu = select_energy_cpu_idx(eenv) < 0 ? -1 :
+                                       eenv->cpu[eenv->next_idx].cpu_id;
+
+       return energy_cpu;
+}
+
+int exynos_select_cpu(struct task_struct *p, int prev_cpu, int sync, int sd_flag)
+{
+       struct sched_domain *sd, *prev_sd;
+       int target_cpu = -1;
+       bool boosted, prefer_idle;
+       unsigned long min_util;
+       struct boost_trigger trigger = {
+               .trigger = 0,
+               .boost_val = 0
+       };
+
+       rcu_read_lock();
+
+       target_cpu = ontime_task_wakeup(p);
+       if (cpu_selected(target_cpu))
+               goto unlock;
+
+       /* Find target cpu from lowest capacity domain(cpu0) */
+       sd = rcu_dereference(per_cpu(sd_ea, 0));
+       if (!sd)
+               goto unlock;
+
+       boosted = schedtune_task_boost(p) > 0;
+       prefer_idle = sched_feat(EAS_PREFER_IDLE) ? (schedtune_task_boost(p) > 0) : 0;
+
+       min_util = boosted_task_util(p);
+
+       if (check_boost_trigger(p, &trigger)) {
+               target_cpu = find_boost_target(sd, p, min_util, &trigger);
+               if (cpu_selected(target_cpu))
+                       goto unlock;
+       }
+
+       if (sysctl_sched_sync_hint_enable && sync) {
+               int cpu = smp_processor_id();
+
+               if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+                       target_cpu = cpu;
+                       goto unlock;
+               }
+       }
+
+       if (prefer_idle) {
+               target_cpu = find_prefer_idle_target(sd, p, min_util);
+               if (cpu_selected(target_cpu))
+                       goto unlock;
+       }
+
+       prev_sd = rcu_dereference_sched(cpu_rq(prev_cpu)->sd);
+       if (sched_feat(ENERGY_AWARE) && sd_overutilized(sd))
+               target_cpu = select_energy_cpu(sd, p, prev_cpu, boosted);
+
+unlock:
+       rcu_read_unlock();
+
+       return target_cpu;
+}
+
+/**********************************************************************
+ * Sysfs                                                              *
+ **********************************************************************/
+static struct attribute *ehmp_attrs[] = {
+       &global_boost_attr.attr,
+       &min_residency_attr.attr,
+       &up_threshold_attr.attr,
+       &down_threshold_attr.attr,
+       &top_overutil_attr.attr,
+       &bot_overutil_attr.attr,
+       &prefer_perf_attr.attr,
+       NULL,
+};
+
+static const struct attribute_group ehmp_group = {
+       .attrs = ehmp_attrs,
+};
+
+static struct kobject *ehmp_kobj;
+
+static int __init init_sysfs(void)
+{
+       int ret;
+
+       ehmp_kobj = kobject_create_and_add("ehmp", kernel_kobj);
+       ret = sysfs_create_group(ehmp_kobj, &ehmp_group);
+
+       return 0;
+}
+late_initcall(init_sysfs);
index 337b954a91b3fde1d997b0e5387148ced5f4a820..4ac11857ffcadf19401d89e33b75ace53a10ac0c 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/mempolicy.h>
 #include <linux/migrate.h>
 #include <linux/task_work.h>
+#include <linux/ehmp.h>
 
 #include <trace/events/sched.h>
 
@@ -609,7 +610,7 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
        return rb_entry(left, struct sched_entity, run_node);
 }
 
-static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+struct sched_entity *__pick_next_entity(struct sched_entity *se)
 {
        struct rb_node *next = rb_next(&se->run_node);
 
@@ -757,6 +758,8 @@ void init_entity_runnable_average(struct sched_entity *se)
        sa->util_avg = 0;
        sa->util_sum = 0;
        /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
+
+       ontime_new_entity_load(current, se);
 }
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
@@ -793,6 +796,11 @@ void post_init_entity_util_avg(struct sched_entity *se)
        struct sched_avg *sa = &se->avg;
        long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
 
+       if (sched_feat(EXYNOS_HMP)) {
+               exynos_init_entity_util_avg(se);
+               goto util_init_done;
+       }
+
        if (cap > 0) {
                if (cfs_rq->avg.util_avg != 0) {
                        sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
@@ -806,6 +814,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
                sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
        }
 
+util_init_done:
        if (entity_is_task(se)) {
                struct task_struct *p = task_of(se);
                if (p->sched_class != &fair_sched_class) {
@@ -2840,7 +2849,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
  * Approximate:
  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
  */
-static u64 decay_load(u64 val, u64 n)
+u64 decay_load(u64 val, u64 n)
 {
        unsigned int local_n;
 
@@ -3025,6 +3034,9 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
        if (!weight)
                running = 0;
 
+       if (!cfs_rq && !rt_rq)
+               ontime_update_load_avg(delta, cpu, weight, sa);
+
        /*
         * Now we know we crossed measurement unit boundaries. The *_avg
         * accrues by two steps:
@@ -3458,6 +3470,9 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
 
        if (decayed && (flags & UPDATE_TG))
                update_tg_load_avg(cfs_rq, 0);
+
+       if (entity_is_task(se))
+               ontime_trace_task_info(task_of(se));
 }
 
 /**
@@ -5550,85 +5565,6 @@ static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
  * Hence - be careful when enabling DEBUG_EENV_DECISIONS
  * expecially if WALT is the task signal.
  */
-/*#define DEBUG_EENV_DECISIONS*/
-
-#ifdef DEBUG_EENV_DECISIONS
-/* max of 8 levels of sched groups traversed */
-#define EAS_EENV_DEBUG_LEVELS 16
-
-struct _eenv_debug {
-       unsigned long cap;
-       unsigned long norm_util;
-       unsigned long cap_energy;
-       unsigned long idle_energy;
-       unsigned long this_energy;
-       unsigned long this_busy_energy;
-       unsigned long this_idle_energy;
-       cpumask_t group_cpumask;
-       unsigned long cpu_util[1];
-};
-#endif
-
-struct eenv_cpu {
-       /* CPU ID, must be in cpus_mask */
-       int     cpu_id;
-
-       /*
-        * Index (into sched_group_energy::cap_states) of the OPP the
-        * CPU needs to run at if the task is placed on it.
-        * This includes the both active and blocked load, due to
-        * other tasks on this CPU,  as well as the task's own
-        * utilization.
-       */
-       int     cap_idx;
-       int     cap;
-
-       /* Estimated system energy */
-       unsigned long energy;
-
-       /* Estimated energy variation wrt EAS_CPU_PRV */
-       long nrg_delta;
-
-#ifdef DEBUG_EENV_DECISIONS
-       struct _eenv_debug *debug;
-       int debug_idx;
-#endif /* DEBUG_EENV_DECISIONS */
-};
-
-struct energy_env {
-       /* Utilization to move */
-       struct task_struct      *p;
-       unsigned long           util_delta;
-       unsigned long           util_delta_boosted;
-
-       /* Mask of CPUs candidates to evaluate */
-       cpumask_t               cpus_mask;
-
-       /* CPU candidates to evaluate */
-       struct eenv_cpu *cpu;
-       int eenv_cpu_count;
-
-#ifdef DEBUG_EENV_DECISIONS
-       /* pointer to the memory block reserved
-        * for debug on this CPU - there will be
-        * sizeof(struct _eenv_debug) *
-        *  (EAS_CPU_CNT * EAS_EENV_DEBUG_LEVELS)
-        * bytes allocated here.
-        */
-       struct _eenv_debug *debug;
-#endif
-       /*
-        * Index (into energy_env::cpu) of the morst energy efficient CPU for
-        * the specified energy_env::task
-        */
-       int     next_idx;
-       int     max_cpu_count;
-
-       /* Support data */
-       struct sched_group      *sg_top;
-       struct sched_group      *sg_cap;
-       struct sched_group      *sg;
-};
 
 static int cpu_util_wake(int cpu, struct task_struct *p);
 
@@ -6014,7 +5950,7 @@ static void dump_eenv_debug(struct energy_env *eenv)
  * A value greater than zero means that the most energy efficient CPU is the
  * one represented by eenv->cpu[eenv->next_idx].cpu_id.
  */
-static inline int select_energy_cpu_idx(struct energy_env *eenv)
+int select_energy_cpu_idx(struct energy_env *eenv)
 {
        int last_cpu_idx = eenv->max_cpu_count - 1;
        struct sched_domain *sd;
@@ -6295,7 +6231,7 @@ boosted_cpu_util(int cpu)
        return util + margin;
 }
 
-static inline unsigned long
+unsigned long
 boosted_task_util(struct task_struct *task)
 {
        unsigned long util = task_util(task);
@@ -6882,7 +6818,7 @@ static int start_cpu(bool boosted)
        return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
 }
 
-static inline int find_best_target(struct task_struct *p, int *backup_cpu,
+int find_best_target(struct task_struct *p, int *backup_cpu,
                                   bool boosted, bool prefer_idle)
 {
        unsigned long best_idle_min_cap_orig = ULONG_MAX;
@@ -7277,7 +7213,7 @@ static inline void reset_eenv(struct energy_env *eenv)
  * filled in here. Callers are responsible for adding
  * other CPU candidates up to eenv->max_cpu_count.
  */
-static inline struct energy_env *get_eenv(struct task_struct *p, int prev_cpu)
+struct energy_env *get_eenv(struct task_struct *p, int prev_cpu)
 {
        struct energy_env *eenv;
        cpumask_t cpumask_possible_cpus;
@@ -7495,6 +7431,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                              cpumask_test_cpu(cpu, &p->cpus_allowed);
        }
 
+       if (sched_feat(EXYNOS_HMP)) {
+               int selected_cpu;
+
+               selected_cpu = exynos_select_cpu(p, prev_cpu, sync, sd_flag);
+               if (selected_cpu >= 0)
+                       return selected_cpu;
+       }
+
        for_each_domain(cpu, tmp) {
                if (!(tmp->flags & SD_LOAD_BALANCE))
                        break;
@@ -8268,6 +8212,11 @@ static inline int migrate_degrades_locality(struct task_struct *p,
 }
 #endif
 
+static inline bool smaller_cpu_capacity(int cpu, int ref)
+{
+       return capacity_orig_of(cpu) < capacity_orig_of(ref);
+}
+
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
@@ -8280,11 +8229,21 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
        /*
         * We do not migrate tasks that are:
+        * 0) cannot be migrated to smaller capacity cpu due to schedtune.prefer_perf, or
         * 1) throttled_lb_pair, or
         * 2) cannot be migrated to this CPU due to cpus_allowed, or
         * 3) running (obviously), or
         * 4) are cache-hot on their current CPU.
         */
+       if (!ontime_can_migration(p, env->dst_cpu))
+               return 0;
+
+#ifdef CONFIG_SCHED_TUNE
+       if (smaller_cpu_capacity(env->dst_cpu, env->src_cpu) &&
+           schedtune_prefer_perf(p))
+               return 0;
+#endif
+
        if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
                return 0;
 
@@ -9762,6 +9721,9 @@ static int need_active_balance(struct lb_env *env)
                        return 1;
        }
 
+       if (sched_feat(EXYNOS_HMP))
+               return exynos_need_active_balance(env->idle, sd, env->src_cpu, env->dst_cpu);
+
        /*
         * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
         * It's worth migrating the task if the src_cpu's capacity is reduced
@@ -10820,6 +10782,9 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
 #else
        rebalance_domains(this_rq, idle);
 #endif
+
+       ontime_migration();
+       schedtune_group_util_update();
 }
 
 /*
index 306333beea5f28935a2586a7bbe498fe8f2b83e1..524fdaee49321843a3e40dee8892d81d89fd5f8d 100644 (file)
@@ -113,3 +113,9 @@ SCHED_FEAT(ENERGY_AWARE, false)
 SCHED_FEAT(EAS_PREFER_IDLE, true)
 SCHED_FEAT(FIND_BEST_TARGET, true)
 SCHED_FEAT(FBT_STRICT_ORDER, true)
+
+#ifdef CONFIG_SCHED_EHMP
+SCHED_FEAT(EXYNOS_HMP, true)
+#else
+SCHED_FEAT(EXYNOS_HMP, false)
+#endif
index 453ce4530dd9f0cf1821237eff2053e8a7f5dc0d..5d4480705485919c36bef7ab6cea654a520b9a77 100644 (file)
@@ -788,6 +788,9 @@ struct rq {
        u64 cum_window_demand;
 #endif /* CONFIG_SCHED_WALT */
 
+#ifdef CONFIG_SCHED_EHMP
+       bool ontime_migrating;
+#endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
        u64 prev_irq_time;
@@ -2140,6 +2143,85 @@ extern void nohz_balance_exit_idle(unsigned int cpu);
 static inline void nohz_balance_exit_idle(unsigned int cpu) { }
 #endif
 
+/*#define DEBUG_EENV_DECISIONS*/
+
+#ifdef DEBUG_EENV_DECISIONS
+/* max of 8 levels of sched groups traversed */
+#define EAS_EENV_DEBUG_LEVELS 16
+
+struct _eenv_debug {
+       unsigned long cap;
+       unsigned long norm_util;
+       unsigned long cap_energy;
+       unsigned long idle_energy;
+       unsigned long this_energy;
+       unsigned long this_busy_energy;
+       unsigned long this_idle_energy;
+       cpumask_t group_cpumask;
+       unsigned long cpu_util[1];
+};
+#endif
+
+struct eenv_cpu {
+       /* CPU ID, must be in cpus_mask */
+       int     cpu_id;
+
+       /*
+        * Index (into sched_group_energy::cap_states) of the OPP the
+        * CPU needs to run at if the task is placed on it.
+        * This includes the both active and blocked load, due to
+        * other tasks on this CPU,  as well as the task's own
+        * utilization.
+       */
+       int     cap_idx;
+       int     cap;
+
+       /* Estimated system energy */
+       unsigned long energy;
+
+       /* Estimated energy variation wrt EAS_CPU_PRV */
+       long nrg_delta;
+
+#ifdef DEBUG_EENV_DECISIONS
+       struct _eenv_debug *debug;
+       int debug_idx;
+#endif /* DEBUG_EENV_DECISIONS */
+};
+
+struct energy_env {
+       /* Utilization to move */
+       struct task_struct      *p;
+       unsigned long           util_delta;
+       unsigned long           util_delta_boosted;
+
+       /* Mask of CPUs candidates to evaluate */
+       cpumask_t               cpus_mask;
+
+       /* CPU candidates to evaluate */
+       struct eenv_cpu *cpu;
+       int eenv_cpu_count;
+
+#ifdef DEBUG_EENV_DECISIONS
+       /* pointer to the memory block reserved
+        * for debug on this CPU - there will be
+        * sizeof(struct _eenv_debug) *
+        *  (EAS_CPU_CNT * EAS_EENV_DEBUG_LEVELS)
+        * bytes allocated here.
+        */
+       struct _eenv_debug *debug;
+#endif
+       /*
+        * Index (into energy_env::cpu) of the morst energy efficient CPU for
+        * the specified energy_env::task
+        */
+       int     next_idx;
+       int     max_cpu_count;
+
+       /* Support data */
+       struct sched_group      *sg_top;
+       struct sched_group      *sg_cap;
+       struct sched_group      *sg;
+};
 
 #ifdef CONFIG_SMP
 
index 2e6ef5faaad0964938ad75ea1a02bc3083e01c66..94d5a7140bd34688253538a95f45900b3148812a 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/printk.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
+#include <linux/ehmp.h>
 
 #include <trace/events/sched.h>
 
 bool schedtune_initialized = false;
 extern struct reciprocal_value schedtune_spc_rdiv;
 
+static int perf_threshold = 0;
+
+int schedtune_perf_threshold(void)
+{
+       return perf_threshold + 1;
+}
+
+struct group_balancer {
+       /* sum of task utilization in group */
+       unsigned long util;
+
+       /* group balancing threshold */
+       unsigned long threshold;
+
+       /* imbalance ratio by heaviest task */
+       unsigned int imbalance_ratio;
+
+       /* balance ratio by heaviest task */
+       unsigned int balance_ratio;
+
+       /* heaviest task utilization in group */
+       unsigned long heaviest_util;
+
+       /* group utilization update interval */
+       unsigned long update_interval;
+
+       /* next group utilization update time */
+       unsigned long next_update_time;
+
+       /*
+        * group imbalance time = imbalance_count * update_interval
+        * imbalance_count >= imbalance_duration -> need balance
+        */
+       unsigned int imbalance_duration;
+       unsigned int imbalance_count;
+
+       /* utilization tracking window size */
+       unsigned long window;
+
+       /* group balancer locking */
+       raw_spinlock_t lock;
+
+       /* need group balancing? */
+       bool need_balance;
+};
+
 /*
  * EAS scheduler tunables for task groups.
  */
@@ -32,6 +79,13 @@ struct schedtune {
        /* Hint to bias scheduling of tasks on that SchedTune CGroup
         * towards idle CPUs */
        int prefer_idle;
+
+       /* Hint to bias scheduling of tasks on that SchedTune CGroup
+        * towards high performance CPUs */
+       int prefer_perf;
+
+       /* SchedTune group balancer */
+       struct group_balancer gb;
 };
 
 static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -62,6 +116,7 @@ static struct schedtune
 root_schedtune = {
        .boost  = 0,
        .prefer_idle = 0,
+       .prefer_perf = 0,
 };
 
 /*
@@ -380,6 +435,337 @@ int schedtune_prefer_idle(struct task_struct *p)
        return prefer_idle;
 }
 
+#ifdef CONFIG_SCHED_EHMP
+static atomic_t kernel_prefer_perf_req[BOOSTGROUPS_COUNT];
+int kernel_prefer_perf(int grp_idx)
+{
+       if (grp_idx >= BOOSTGROUPS_COUNT)
+               return -EINVAL;
+
+       return atomic_read(&kernel_prefer_perf_req[grp_idx]);
+}
+
+void request_kernel_prefer_perf(int grp_idx, int enable)
+{
+       if (grp_idx >= BOOSTGROUPS_COUNT)
+               return;
+
+       if (enable)
+               atomic_inc(&kernel_prefer_perf_req[grp_idx]);
+       else
+               BUG_ON(atomic_dec_return(&kernel_prefer_perf_req[grp_idx]) < 0);
+}
+#else
+static inline int kernel_prefer_perf(int grp_idx) { return 0; }
+#endif
+
+int schedtune_prefer_perf(struct task_struct *p)
+{
+       struct schedtune *st;
+       int prefer_perf;
+
+       if (unlikely(!schedtune_initialized))
+               return 0;
+
+       /* Get prefer_perf value */
+       rcu_read_lock();
+       st = task_schedtune(p);
+       prefer_perf = max(st->prefer_perf, kernel_prefer_perf(st->idx));
+       rcu_read_unlock();
+
+       return prefer_perf;
+}
+
+int schedtune_need_group_balance(struct task_struct *p)
+{
+       bool balance;
+
+       if (unlikely(!schedtune_initialized))
+               return 0;
+
+       rcu_read_lock();
+       balance = task_schedtune(p)->gb.need_balance;
+       rcu_read_unlock();
+
+       return balance;
+}
+
+static inline void
+check_need_group_balance(int group_idx, struct group_balancer *gb)
+{
+       int heaviest_ratio;
+
+       if (!gb->util) {
+               gb->imbalance_count = 0;
+               gb->need_balance = false;
+
+               goto out;
+       }
+
+       heaviest_ratio = gb->heaviest_util * 100 / gb->util;
+
+       if (gb->need_balance) {
+               if (gb->util < gb->threshold || heaviest_ratio < gb->balance_ratio) {
+                       gb->imbalance_count = 0;
+                       gb->need_balance = false;
+               }
+
+               goto out;
+       }
+
+       if (gb->util >= gb->threshold && heaviest_ratio > gb->imbalance_ratio) {
+               gb->imbalance_count++;
+
+               if (gb->imbalance_count >= gb->imbalance_duration)
+                       gb->need_balance = true;
+       } else {
+               gb->imbalance_count = 0;
+       }
+
+out:
+       trace_sched_tune_check_group_balance(group_idx,
+                               gb->imbalance_count, gb->need_balance);
+}
+
+static void __schedtune_group_util_update(struct schedtune *st)
+{
+       struct group_balancer *gb = &st->gb;
+       unsigned long now = cpu_rq(0)->clock_task;
+       struct css_task_iter it;
+       struct task_struct *p;
+       struct task_struct *heaviest_p = NULL;
+       unsigned long util_sum = 0;
+       unsigned long heaviest_util = 0;
+       unsigned int total = 0, accumulated = 0;
+
+       if (!raw_spin_trylock(&gb->lock))
+               return;
+
+       if (!gb->update_interval)
+               goto out;
+
+       if (time_before(now, gb->next_update_time))
+               goto out;
+
+       css_task_iter_start(&st->css, 0, &it);
+       while ((p = css_task_iter_next(&it))) {
+               unsigned long clock_task, delta, util;
+
+               total++;
+
+               clock_task = task_rq(p)->clock_task;
+               delta = clock_task - p->se.avg.last_update_time;
+               if (p->se.avg.last_update_time && delta > gb->window)
+                       continue;
+
+               util = p->se.avg.util_avg;
+               if (util > heaviest_util) {
+                       heaviest_util = util;
+                       heaviest_p = p;
+               }
+
+               util_sum += p->se.avg.util_avg;
+               accumulated++;
+       }
+       css_task_iter_end(&it);
+
+       gb->util = util_sum;
+       gb->heaviest_util = heaviest_util;
+       gb->next_update_time = now + gb->update_interval;
+
+       /* if there is no task in group, heaviest_p is always NULL */
+       if (heaviest_p)
+               trace_sched_tune_grouputil_update(st->idx, total, accumulated,
+                               gb->util, heaviest_p, gb->heaviest_util);
+
+       check_need_group_balance(st->idx, gb);
+out:
+       raw_spin_unlock(&gb->lock);
+}
+
+void schedtune_group_util_update(void)
+{
+       int idx;
+
+       if (unlikely(!schedtune_initialized))
+               return;
+
+       rcu_read_lock();
+
+       for (idx = 1; idx < BOOSTGROUPS_COUNT; idx++) {
+               struct schedtune *st = allocated_group[idx];
+
+               if (!st)
+                       continue;
+               __schedtune_group_util_update(st);
+       }
+
+       rcu_read_unlock();
+}
+
+static u64
+gb_util_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.util;
+}
+
+static u64
+gb_heaviest_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       if (!st->gb.util)
+               return 0;
+
+       return st->gb.heaviest_util * 100 / st->gb.util;
+}
+
+static u64
+gb_threshold_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.threshold;
+}
+
+static int
+gb_threshold_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 threshold)
+{
+       struct schedtune *st = css_st(css);
+       struct group_balancer *gb = &st->gb;
+
+       raw_spin_lock(&gb->lock);
+       gb->threshold = threshold;
+       check_need_group_balance(st->idx, gb);
+       raw_spin_unlock(&gb->lock);
+
+       return 0;
+}
+
+static u64
+gb_imbalance_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.imbalance_ratio;
+}
+
+static int
+gb_imbalance_ratio_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 ratio)
+{
+       struct schedtune *st = css_st(css);
+       struct group_balancer *gb = &st->gb;
+
+       ratio = min_t(u64, ratio, 100);
+
+       raw_spin_lock(&gb->lock);
+       gb->imbalance_ratio = ratio;
+       check_need_group_balance(st->idx, gb);
+       raw_spin_unlock(&gb->lock);
+
+       return 0;
+}
+
+static u64
+gb_balance_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.balance_ratio;
+}
+
+static int
+gb_balance_ratio_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 ratio)
+{
+       struct schedtune *st = css_st(css);
+       struct group_balancer *gb = &st->gb;
+
+       ratio = min_t(u64, ratio, 100);
+
+       raw_spin_lock(&gb->lock);
+       gb->balance_ratio = ratio;
+       check_need_group_balance(st->idx, gb);
+       raw_spin_unlock(&gb->lock);
+
+       return 0;
+}
+
+static u64
+gb_interval_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.update_interval / NSEC_PER_USEC;
+}
+
+static int
+gb_interval_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 interval_us)
+{
+       struct schedtune *st = css_st(css);
+       struct group_balancer *gb = &st->gb;
+
+       raw_spin_lock(&gb->lock);
+       gb->update_interval = interval_us * NSEC_PER_USEC;
+       if (!interval_us) {
+               gb->util = 0;
+               gb->need_balance = false;
+       }
+       raw_spin_unlock(&gb->lock);
+
+       return 0;
+}
+
+static u64
+gb_duration_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.imbalance_duration;
+}
+
+static int
+gb_duration_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 duration)
+{
+       struct schedtune *st = css_st(css);
+       struct group_balancer *gb = &st->gb;
+
+       raw_spin_lock(&gb->lock);
+       gb->imbalance_duration = duration;
+       check_need_group_balance(st->idx, gb);
+       raw_spin_unlock(&gb->lock);
+
+       return 0;
+}
+
+static u64
+gb_window_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->gb.window / NSEC_PER_MSEC;
+}
+
+static int
+gb_window_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 window)
+{
+       struct schedtune *st = css_st(css);
+       struct group_balancer *gb = &st->gb;
+
+       raw_spin_lock(&gb->lock);
+       gb->window = window * NSEC_PER_MSEC;
+       raw_spin_unlock(&gb->lock);
+
+       return 0;
+}
+
 static u64
 prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -398,6 +784,24 @@ prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
        return 0;
 }
 
+static u64
+prefer_perf_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->prefer_perf;
+}
+
+static int
+prefer_perf_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 prefer_perf)
+{
+       struct schedtune *st = css_st(css);
+       st->prefer_perf = prefer_perf;
+
+       return 0;
+}
+
 static s64
 boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -434,6 +838,49 @@ static struct cftype files[] = {
                .read_u64 = prefer_idle_read,
                .write_u64 = prefer_idle_write,
        },
+       {
+               .name = "prefer_perf",
+               .read_u64 = prefer_perf_read,
+               .write_u64 = prefer_perf_write,
+       },
+       {
+               .name = "gb_util",
+               .read_u64 = gb_util_read,
+       },
+       {
+               .name = "gb_heaviest_ratio",
+               .read_u64 = gb_heaviest_ratio_read,
+       },
+       {
+               .name = "gb_threshold",
+               .read_u64 = gb_threshold_read,
+               .write_u64 = gb_threshold_write,
+       },
+       {
+               .name = "gb_imbalance_ratio",
+               .read_u64 = gb_imbalance_ratio_read,
+               .write_u64 = gb_imbalance_ratio_write,
+       },
+       {
+               .name = "gb_balance_ratio",
+               .read_u64 = gb_balance_ratio_read,
+               .write_u64 = gb_balance_ratio_write,
+       },
+       {
+               .name = "gb_interval_us",
+               .read_u64 = gb_interval_read,
+               .write_u64 = gb_interval_write,
+       },
+       {
+               .name = "gb_duration",
+               .read_u64 = gb_duration_read,
+               .write_u64 = gb_duration_write,
+       },
+       {
+               .name = "gb_window_ms",
+               .read_u64 = gb_window_read,
+               .write_u64 = gb_window_write,
+       },
        { }     /* terminate */
 };
 
@@ -457,6 +904,22 @@ schedtune_boostgroup_init(struct schedtune *st)
        return 0;
 }
 
+static void
+schedtune_group_balancer_init(struct schedtune *st)
+{
+       raw_spin_lock_init(&st->gb.lock);
+
+       st->gb.threshold = ULONG_MAX;
+       st->gb.imbalance_ratio = 0;                             /* 0% */
+       st->gb.update_interval = 0;                             /* disable update */
+       st->gb.next_update_time = cpu_rq(0)->clock_task;
+
+       st->gb.imbalance_duration = 0;
+       st->gb.imbalance_count = 0;
+
+       st->gb.window = 100 * NSEC_PER_MSEC;            /* 100ms */
+}
+
 static struct cgroup_subsys_state *
 schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -486,6 +949,8 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
        if (!st)
                goto out;
 
+       schedtune_group_balancer_init(st);
+
        /* Initialize per CPUs boost group support */
        st->idx = idx;
        if (schedtune_boostgroup_init(st))
@@ -554,6 +1019,9 @@ schedtune_init(void)
 {
        schedtune_spc_rdiv = reciprocal_value(100);
        schedtune_init_cgroups();
+
+       perf_threshold = find_second_max_cap();
+
        return 0;
 }
 postcore_initcall(schedtune_init);
index e79e1b1989219e8349239f56057e7cd3f4c439b4..1588ba24bff9a72719dc404bb0b83228ae97dd4f 100644 (file)
@@ -15,7 +15,13 @@ struct target_nrg {
 int schedtune_cpu_boost(int cpu);
 int schedtune_task_boost(struct task_struct *tsk);
 
+void schedtune_group_util_update(void);
+int schedtune_need_group_balance(struct task_struct *p);
+
+int schedtune_perf_threshold(void);
+
 int schedtune_prefer_idle(struct task_struct *tsk);
+int schedtune_prefer_perf(struct task_struct *tsk);
 
 void schedtune_enqueue_task(struct task_struct *p, int cpu);
 void schedtune_dequeue_task(struct task_struct *p, int cpu);
@@ -25,7 +31,13 @@ void schedtune_dequeue_task(struct task_struct *p, int cpu);
 #define schedtune_cpu_boost(cpu)  0
 #define schedtune_task_boost(tsk) 0
 
+#define schedtune_group_util_update() do { } while (0)
+#define schedtune_need_group_balance(task) 0
+
+#define schedtune_perf_threshold() 0
+
 #define schedtune_prefer_idle(tsk) 0
+#define schedtune_prefer_perf(tsk) 0
 
 #define schedtune_enqueue_task(task, cpu) do { } while (0)
 #define schedtune_dequeue_task(task, cpu) do { } while (0)