+static unsigned long __read_mostly max_load_balance_interval = HZ/10;
+
+#define LBF_ALL_PINNED 0x01
+#define LBF_NEED_BREAK 0x02
+#define LBF_SOME_PINNED 0x04
+
+struct lb_env {
+ struct sched_domain *sd;
+
+ struct rq *src_rq;
+ int src_cpu;
+
+ int dst_cpu;
+ struct rq *dst_rq;
+
+ struct cpumask *dst_grpmask;
+ int new_dst_cpu;
+ enum cpu_idle_type idle;
+ long imbalance;
+ /* The set of CPUs under consideration for load-balancing */
+ struct cpumask *cpus;
+
+ unsigned int flags;
+
+ unsigned int loop;
+ unsigned int loop_break;
+ unsigned int loop_max;
+#ifdef CONFIG_MT_LOAD_BALANCE_ENHANCEMENT
+ int mt_check_cache_in_idle;
+#endif
+#ifdef CONFIG_MT_LOAD_BALANCE_PROFILER
+ unsigned int fail_reason;
+#endif
+};
+
+/*
+ * move_task - move a task from one runqueue to another runqueue.
+ * Both runqueues must be locked.
+ */
+static void move_task(struct task_struct *p, struct lb_env *env)
+{
+ deactivate_task(env->src_rq, p, 0);
+ set_task_cpu(p, env->dst_cpu);
+ activate_task(env->dst_rq, p, 0);
+ check_preempt_curr(env->dst_rq, p, 0);
+
+#ifdef CONFIG_HMP_POWER_AWARE_CONTROLLER
+ if(PA_MON_ENABLE) {
+ if(strcmp(p->comm, PA_MON) == 0) {
+ printk(KERN_EMERG "[PA] %s Balance From CPU%d to CPU%d\n", p->comm, env->src_rq->cpu, env->dst_rq->cpu);
+ }
+ }
+#endif /* CONFIG_HMP_POWER_AWARE_CONTROLLER */
+
+}
+
+/*
+ * Is this task likely cache-hot:
+ */
+#if defined(CONFIG_MT_LOAD_BALANCE_ENHANCEMENT)
+static int
+task_hot(struct task_struct *p, u64 now, struct sched_domain *sd, int mt_check_cache_in_idle)
+#else
+static int
+task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+#endif
+{
+ s64 delta;
+
+ if (p->sched_class != &fair_sched_class)
+ return 0;
+
+ if (unlikely(p->policy == SCHED_IDLE))
+ return 0;
+
+ /*
+ * Buddy candidates are cache hot:
+ */
+#ifdef CONFIG_MT_LOAD_BALANCE_ENHANCEMENT
+ if (!mt_check_cache_in_idle){
+ if ( !this_rq()->nr_running && (task_rq(p)->nr_running >= 2) )
+ return 0;
+ }
+#endif
+ if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
+ (&p->se == cfs_rq_of(&p->se)->next ||
+ &p->se == cfs_rq_of(&p->se)->last))
+ return 1;
+
+ if (sysctl_sched_migration_cost == -1)
+ return 1;
+ if (sysctl_sched_migration_cost == 0)
+ return 0;
+
+ delta = now - p->se.exec_start;
+
+ return delta < (s64)sysctl_sched_migration_cost;
+}
+
+/*
+ * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
+ */
+static
+int can_migrate_task(struct task_struct *p, struct lb_env *env)
+{
+ int tsk_cache_hot = 0;
+ /*
+ * We do not migrate tasks that are:
+ * 1) throttled_lb_pair, or
+ * 2) cannot be migrated to this CPU due to cpus_allowed, or
+ * 3) running (obviously), or
+ * 4) are cache-hot on their current CPU.
+ */
+ if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
+ return 0;
+
+ if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
+ int cpu;
+
+ schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
+#ifdef CONFIG_MT_LOAD_BALANCE_PROFILER
+ mt_lbprof_stat_or(env->fail_reason, MT_LBPROF_AFFINITY);
+ if(mt_lbprof_lt (env->sd->mt_lbprof_nr_balance_failed, MT_LBPROF_NR_BALANCED_FAILED_UPPER_BOUND)){
+ char strings[128]="";
+ snprintf(strings, 128, "%d:balance fail:affinity:%d:%d:%s:0x%lu"
+ , env->dst_cpu, env->src_cpu, p->pid, p->comm, p->cpus_allowed.bits[0]);
+ trace_sched_lbprof_log(strings);
+ }
+#endif
+
+ /*
+ * Remember if this task can be migrated to any other cpu in
+ * our sched_group. We may want to revisit it if we couldn't
+ * meet load balance goals by pulling other tasks on src_cpu.
+ *
+ * Also avoid computing new_dst_cpu if we have already computed
+ * one in current iteration.
+ */
+ if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED))
+ return 0;
+
+ /* Prevent to re-select dst_cpu via env's cpus */
+ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
+ if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+ env->flags |= LBF_SOME_PINNED;
+ env->new_dst_cpu = cpu;
+ break;
+ }
+ }
+
+ return 0;
+ }
+
+ /* Record that we found atleast one task that could run on dst_cpu */
+ env->flags &= ~LBF_ALL_PINNED;
+
+ if (task_running(env->src_rq, p)) {
+ schedstat_inc(p, se.statistics.nr_failed_migrations_running);
+#ifdef CONFIG_MT_LOAD_BALANCE_PROFILER
+ mt_lbprof_stat_or(env->fail_reason, MT_LBPROF_RUNNING);
+ if( mt_lbprof_lt (env->sd->mt_lbprof_nr_balance_failed, MT_LBPROF_NR_BALANCED_FAILED_UPPER_BOUND)){
+ char strings[128]="";
+ snprintf(strings, 128, "%d:balance fail:running:%d:%d:%s"
+ , env->dst_cpu, env->src_cpu, p->pid, p->comm);
+ trace_sched_lbprof_log(strings);
+ }
+#endif
+ return 0;
+ }
+
+ /*
+ * Aggressive migration if:
+ * 1) task is cache cold, or
+ * 2) too many balance attempts have failed.
+ */
+#if defined(CONFIG_MT_LOAD_BALANCE_ENHANCEMENT)
+ tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd, env->mt_check_cache_in_idle);
+#else
+ tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
+#endif
+ if (!tsk_cache_hot ||
+ env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
+
+ if (tsk_cache_hot) {
+ schedstat_inc(env->sd, lb_hot_gained[env->idle]);
+ schedstat_inc(p, se.statistics.nr_forced_migrations);
+ }
+
+ return 1;
+ }
+
+ schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
+#ifdef CONFIG_MT_LOAD_BALANCE_PROFILER
+ mt_lbprof_stat_or(env->fail_reason, MT_LBPROF_CACHEHOT);
+ if(mt_lbprof_lt (env->sd->mt_lbprof_nr_balance_failed, MT_LBPROF_NR_BALANCED_FAILED_UPPER_BOUND)){
+ char strings[128]="";
+ snprintf(strings, 128, "%d:balance fail:cache hot:%d:%d:%s"
+ , env->dst_cpu, env->src_cpu, p->pid, p->comm);
+ trace_sched_lbprof_log(strings);
+ }
+#endif
+ return 0;
+}
+
+/*
+ * move_one_task tries to move exactly one task from busiest to this_rq, as
+ * part of active balancing operations within "domain".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_one_task(struct lb_env *env)
+{
+ struct task_struct *p, *n;
+#ifdef CONFIG_MT_LOAD_BALANCE_ENHANCEMENT
+ env->mt_check_cache_in_idle = 1;
+#endif
+#ifdef CONFIG_MT_LOAD_BALANCE_PROFILER
+ mt_lbprof_stat_set(env->fail_reason, MT_LBPROF_NO_TRIGGER);
+#endif
+
+ list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
+#if defined (CONFIG_MTK_SCHED_CMP_LAZY_BALANCE) && !defined(CONFIG_HMP_LAZY_BALANCE)
+ if(need_lazy_balance(env->dst_cpu, env->src_cpu, p))
+ continue;
+#endif
+ if (!can_migrate_task(p, env))
+ continue;
+
+ move_task(p, env);
+ /*
+ * Right now, this is only the second place move_task()
+ * is called, so we can safely collect move_task()
+ * stats here rather than inside move_task().
+ */
+ schedstat_inc(env->sd, lb_gained[env->idle]);
+ return 1;
+ }
+ return 0;
+}
+
+static unsigned long task_h_load(struct task_struct *p);
+
+static const unsigned int sched_nr_migrate_break = 32;
+
+/* in second round load balance, we migrate heavy load_weight task
+ as long as RT tasks exist in busy cpu*/
+#ifdef CONFIG_MT_LOAD_BALANCE_ENHANCEMENT
+ #define over_imbalance(lw, im) \
+ (((lw)/2 > (im)) && \
+ ((env->mt_check_cache_in_idle==1) || \
+ (env->src_rq->rt.rt_nr_running==0) || \
+ (pulled>0)))
+#else
+ #define over_imbalance(lw, im) (((lw) / 2) > (im))
+#endif
+
+/*
+ * move_tasks tries to move up to imbalance weighted load from busiest to
+ * this_rq, as part of a balancing operation within domain "sd".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_tasks(struct lb_env *env)
+{
+ struct list_head *tasks = &env->src_rq->cfs_tasks;
+ struct task_struct *p;
+ unsigned long load;
+ int pulled = 0;
+
+ if (env->imbalance <= 0)
+ return 0;
+
+ mt_sched_printf("move_tasks start ");
+
+ while (!list_empty(tasks)) {
+ p = list_first_entry(tasks, struct task_struct, se.group_node);
+
+ env->loop++;
+ /* We've more or less seen every task there is, call it quits */
+ if (env->loop > env->loop_max)
+ break;
+
+ /* take a breather every nr_migrate tasks */
+ if (env->loop > env->loop_break) {
+ env->loop_break += sched_nr_migrate_break;
+ env->flags |= LBF_NEED_BREAK;
+ break;
+ }
+#if defined (CONFIG_MTK_SCHED_CMP_LAZY_BALANCE) && !defined(CONFIG_HMP_LAZY_BALANCE)
+ if(need_lazy_balance(env->dst_cpu, env->src_cpu, p))
+ goto next;
+#endif
+ if (!can_migrate_task(p, env))
+ goto next;
+
+ load = task_h_load(p);
+
+ if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
+ goto next;
+
+ if (over_imbalance(load, env->imbalance))
+ {
+ goto next;
+ }
+
+ move_task(p, env);
+ pulled++;
+ env->imbalance -= load;
+
+#ifdef CONFIG_PREEMPT
+ /*
+ * NEWIDLE balancing is a source of latency, so preemptible
+ * kernels will stop after the first task is pulled to minimize
+ * the critical section.
+ */
+ if (env->idle == CPU_NEWLY_IDLE)
+ break;
+#endif
+
+ /*
+ * We only want to steal up to the prescribed amount of
+ * weighted load.
+ */
+ if (env->imbalance <= 0)
+ break;
+
+ continue;
+next:
+ list_move_tail(&p->se.group_node, tasks);
+ }
+
+ /*
+ * Right now, this is one of only two places move_task() is called,
+ * so we can safely collect move_task() stats here rather than
+ * inside move_task().
+ */
+ schedstat_add(env->sd, lb_gained[env->idle], pulled);
+
+ mt_sched_printf("move_tasks end");
+
+ return pulled;
+}
+
+#ifdef CONFIG_MTK_SCHED_CMP
+#ifdef CONFIG_MTK_SCHED_CMP_TGS
+static int cmp_can_migrate_task(struct task_struct *p, struct lb_env *env)
+{
+ struct sched_domain *sd = env->sd;
+
+ BUG_ON(sd == NULL);
+
+ if (!(sd->flags & SD_BALANCE_TG))
+ return 0;
+
+ if (arch_is_multi_cluster()) {
+ int src_clid, dst_clid;
+ int src_nr_cpus;
+ struct thread_group_info_t *src_tginfo, *dst_tginfo;
+
+ src_clid = get_cluster_id(env->src_cpu);
+ dst_clid = get_cluster_id(env->dst_cpu);
+ BUG_ON(dst_clid == -1 || src_clid == -1);
+ BUG_ON(p == NULL || p->group_leader == NULL);
+ src_tginfo = &p->group_leader->thread_group_info[src_clid];
+ dst_tginfo = &p->group_leader->thread_group_info[dst_clid];
+ src_nr_cpus = nr_cpus_in_cluster(src_clid, false);
+
+#ifdef CONFIG_MT_SCHED_INFO
+ mt_sched_printf("check rule0: pid=%d comm=%s load=%ld src:clid=%d tginfo->nr_running=%ld nr_cpus=%d load_avg_ratio=%ld",
+ p->pid, p->comm, p->se.avg.load_avg_ratio,
+ src_clid, src_tginfo->nr_running, src_nr_cpus,
+ src_tginfo->load_avg_ratio);
+#endif
+#ifdef CONFIG_MTK_SCHED_CMP_TGS_WAKEUP
+ if ( (!thread_group_empty(p)) &&
+ (src_tginfo->nr_running <= src_nr_cpus) &&
+ (src_tginfo->nr_running > dst_tginfo->nr_running)){
+ mt_sched_printf("hit ruleA: bypass pid=%d comm=%s src:nr_running=%lu nr_cpus=%d dst:nr_running=%lu",
+ p->pid, p->comm, src_tginfo->nr_running, src_nr_cpus, dst_tginfo->nr_running);
+ return 0;
+ }
+#endif
+ }
+ return 1;
+}
+
+static int need_migrate_task_immediately(struct task_struct *p,
+ struct lb_env *env, struct clb_env *clbenv)
+{
+ struct sched_domain *sd = env->sd;
+
+ BUG_ON(sd == NULL);
+
+ if (arch_is_big_little()) {
+ mt_sched_printf("[%s] b.L arch", __func__);
+#ifdef CONFIG_MT_SCHED_INFO
+ mt_sched_printf("check rule0: pid=%d comm=%s src=%d dst=%d p->prio=%d p->se.avg.load_avg_ratio=%ld",
+ p->pid, p->comm, env->src_cpu, env->dst_cpu, p->prio, p->se.avg.load_avg_ratio);
+#endif
+ /* from LITTLE to big */
+ if (arch_cpu_is_little(env->src_cpu) && arch_cpu_is_big(env->dst_cpu)) {
+ BUG_ON(env->src_cpu != clbenv->ltarget);
+ if (p->se.avg.load_avg_ratio >= clbenv->bstats.threshold)
+ return 1;
+
+ /* from big to LITTLE */
+ } else if (arch_cpu_is_big(env->src_cpu) && arch_cpu_is_little(env->dst_cpu)) {
+ BUG_ON(env->src_cpu != clbenv->btarget);
+ if (p->se.avg.load_avg_ratio < clbenv->lstats.threshold)
+ return 1;
+ }
+ return 0;
+ }
+
+ if (arch_is_multi_cluster() && (sd->flags & SD_BALANCE_TG)) {
+ int src_clid, dst_clid;
+ int src_nr_cpus;
+ struct thread_group_info_t *src_tginfo, *dst_tginfo;
+
+ src_clid = get_cluster_id(env->src_cpu);
+ dst_clid = get_cluster_id(env->dst_cpu);
+ BUG_ON(dst_clid == -1 || src_clid == -1);
+ BUG_ON(p == NULL || p->group_leader == NULL);
+ src_tginfo = &p->group_leader->thread_group_info[src_clid];
+ dst_tginfo = &p->group_leader->thread_group_info[dst_clid];
+ src_nr_cpus = nr_cpus_in_cluster(src_clid, false);
+ mt_sched_printf("[%s] L.L arch", __func__);
+
+ if ((p->se.avg.load_avg_ratio*4 >= NICE_0_LOAD*3) &&
+ src_tginfo->nr_running > src_nr_cpus &&
+ src_tginfo->load_avg_ratio*10 > NICE_0_LOAD*src_nr_cpus*9) {
+ //pr_warn("[%s] hit rule0, candidate_load_move/load_move (%ld/%ld)\n",
+ // __func__, candidate_load_move, env->imbalance);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+#endif
+
+/*
+ * move_tasks tries to move up to load_move weighted load from busiest to
+ * this_rq, as part of a balancing operation within domain "sd".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int cmp_move_tasks(struct sched_domain *sd, struct lb_env *env)
+{
+ struct list_head *tasks = &env->src_rq->cfs_tasks;
+ struct task_struct *p;
+ unsigned long load = 0;
+ int pulled = 0;
+
+ long tg_load_move, other_load_move;
+ struct list_head tg_tasks, other_tasks;
+ int src_clid, dst_clid;
+#ifdef CONFIG_MTK_SCHED_CMP_TGS_WAKEUP
+ struct cpumask tmp, *cpus = &tmp;
+#endif
+#ifdef MTK_QUICK
+ int flag = 0;
+#endif
+ struct clb_env clbenv;
+ struct cpumask srcmask, dstmask;
+
+ if (env->imbalance <= 0)
+ return 0;
+
+ other_load_move = env->imbalance;
+ INIT_LIST_HEAD(&other_tasks);
+
+// if (sd->flags & SD_BALANCE_TG) {
+ tg_load_move = env->imbalance;
+ INIT_LIST_HEAD(&tg_tasks);
+ src_clid = get_cluster_id(env->src_cpu);
+ dst_clid = get_cluster_id(env->dst_cpu);
+ BUG_ON(dst_clid == -1 || src_clid == -1);
+
+#ifdef CONFIG_MTK_SCHED_CMP_TGS_WAKEUP
+ get_cluster_cpus(cpus, src_clid, true);
+#endif
+ mt_sched_printf("move_tasks_tg start: src:cpu=%d clid=%d runnable_load=%lu dst:cpu=%d clid=%d runnable_load=%lu imbalance=%ld curr->on_rq=%d",
+ env->src_cpu, src_clid, cpu_rq(env->src_cpu)->cfs.runnable_load_avg,
+ env->dst_cpu, dst_clid, cpu_rq(env->dst_cpu)->cfs.runnable_load_avg,
+ env->imbalance, env->dst_rq->curr->on_rq);
+// }
+
+ mt_sched_printf("max=%d busiest->nr_running=%d",
+ env->loop_max, cpu_rq(env->src_cpu)->nr_running);
+
+ if (arch_is_big_little()) {
+ get_cluster_cpus(&srcmask, src_clid, true);
+ get_cluster_cpus(&dstmask, dst_clid, true);
+ memset(&clbenv, 0, sizeof(clbenv));
+ clbenv.flags |= HMP_LB;
+ clbenv.ltarget = arch_cpu_is_little(env->src_cpu) ? env->src_cpu : env->dst_cpu;
+ clbenv.btarget = arch_cpu_is_big(env->src_cpu) ? env->src_cpu : env->dst_cpu;
+ clbenv.lcpus = arch_cpu_is_little(env->src_cpu) ? &srcmask : &dstmask;
+ clbenv.bcpus = arch_cpu_is_big(env->src_cpu) ? &srcmask : &dstmask;
+ sched_update_clbstats(&clbenv);
+ }
+
+ while (!list_empty(tasks)) {
+ struct thread_group_info_t *src_tginfo, *dst_tginfo;
+
+ p = list_first_entry(tasks, struct task_struct, se.group_node);
+
+#ifdef CONFIG_MT_SCHED_INFO
+ mt_sched_printf("check: pid=%d comm=%s load_avg_contrib=%lu h_load=%lu runnable_load_avg=%lu loop=%d, env->imbalance=%ld tg_load_move=%ld",
+ p->pid, p->comm, p->se.avg.load_avg_contrib,
+ task_cfs_rq(p)->h_load, task_cfs_rq(p)->runnable_load_avg,
+ env->loop, env->imbalance, tg_load_move);
+#endif
+ env->loop++;
+ /* We've more or less seen every task there is, call it quits */
+ if (env->loop > env->loop_max)
+ break;
+
+#if 0 // TO check
+ /* take a breather every nr_migrate tasks */
+ if (env->loop > env->loop_break) {
+ env->loop_break += sched_nr_migrate_break;
+ env->flags |= LBF_NEED_BREAK;
+ break;
+ }
+#endif
+ BUG_ON(p == NULL || p->group_leader == NULL);
+ src_tginfo = &p->group_leader->thread_group_info[src_clid];
+ dst_tginfo = &p->group_leader->thread_group_info[dst_clid];
+
+ /* rule0 */
+ if (!can_migrate_task(p, env)) {
+ mt_sched_printf("can not migrate: pid=%d comm=%s",
+ p->pid, p->comm);
+ goto next;
+ }
+
+ load = task_h_load(p);
+
+ if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) {
+ mt_sched_printf("can not migrate: pid=%d comm=%s sched_feat",
+ p->pid, p->comm );
+ goto next;
+ }
+
+ if (over_imbalance(load, env->imbalance)) {
+ mt_sched_printf("can not migrate: pid=%d comm=%s load=%ld imbalance=%ld",
+ p->pid, p->comm, load, env->imbalance );
+ goto next;
+ }
+
+ /* meet rule0 , migrate immediately */
+ if (need_migrate_task_immediately(p, env, &clbenv)) {
+ pulled++;
+ env->imbalance -= load;
+ tg_load_move -= load;
+ other_load_move -= load;
+ mt_sched_printf("hit rule0: pid=%d comm=%s load=%ld imbalance=%ld tg_imbalance=%ld other_load_move=%ld",
+ p->pid, p->comm, load, env->imbalance, tg_load_move, other_load_move);
+ move_task(p, env);
+ if (env->imbalance <= 0)
+ break;
+ continue;
+ }
+
+ /* for TGS */
+ if (!cmp_can_migrate_task(p, env))
+ goto next;
+
+ if (sd->flags & SD_BALANCE_TG){
+ if (over_imbalance(load, tg_load_move)) {
+ mt_sched_printf("can not migrate: pid=%d comm=%s load=%ld imbalance=%ld",
+ p->pid, p->comm, load, tg_load_move );
+ goto next;
+ }
+
+#ifdef MTK_QUICK
+ if (candidate_load_move <= 0) {
+ mt_sched_printf("check: pid=%d comm=%s candidate_load_move=%d",
+ p->pid, p->comm, candidate_load_move);
+ goto next;
+ }
+#endif
+
+ /* rule1, single thread */
+#ifdef CONFIG_MT_SCHED_INFO
+ mt_sched_printf("check rule1: pid=%d p->comm=%s thread_group_cnt=%lu thread_group_empty(p)=%d",
+ p->pid, p->comm,
+ p->group_leader->thread_group_info[0].nr_running +
+ p->group_leader->thread_group_info[1].nr_running,
+ thread_group_empty(p));
+#endif
+
+ if (thread_group_empty(p)) {
+ list_move_tail(&p->se.group_node, &tg_tasks);
+ tg_load_move -= load;
+ other_load_move -= load;
+ mt_sched_printf("hit rule1: pid=%d p->comm=%s load=%ld tg_imbalance=%ld",
+ p->pid, p->comm, load, tg_load_move);
+ continue;
+ }
+
+ /* rule2 */
+#ifdef CONFIG_MT_SCHED_INFO
+ mt_sched_printf("check rule2: pid=%d p->comm=%s %ld, %ld, %ld, %ld, %ld",
+ p->pid, p->comm, src_tginfo->nr_running, src_tginfo->cfs_nr_running, dst_tginfo->nr_running,
+ p->se.avg.load_avg_ratio, src_tginfo->load_avg_ratio);
+#endif
+ if ((src_tginfo->nr_running < dst_tginfo->nr_running) &&
+ ((p->se.avg.load_avg_ratio * src_tginfo->cfs_nr_running) <=
+ src_tginfo->load_avg_ratio)) {
+ list_move_tail(&p->se.group_node, &tg_tasks);
+ tg_load_move -= load;
+ other_load_move -= load;
+ mt_sched_printf("hit rule2: pid=%d p->comm=%s load=%ld tg_imbalance=%ld",
+ p->pid, p->comm, load, tg_load_move);
+ continue;
+ }
+
+ if (over_imbalance(load, other_load_move))
+ goto next;
+/*
+ if (other_load_move <= 0)
+ goto next;
+*/
+
+ list_move_tail(&p->se.group_node, &other_tasks);
+ other_load_move -= load;
+ continue;
+ }else{
+ list_move_tail(&p->se.group_node, &other_tasks);
+ other_load_move -= load;
+ continue;
+ }
+
+ // ytchang
+#if defined (CONFIG_MTK_SCHED_CMP_LAZY_BALANCE) && !defined(CONFIG_HMP_LAZY_BALANCE)
+ if(need_lazy_balance(env->dst_cpu, env->src_cpu, p))
+ goto next;
+#endif
+
+next:
+ /* original rule */
+ list_move_tail(&p->se.group_node, tasks);
+ } // end of while()
+
+ if ( sd->flags & SD_BALANCE_TG){
+ while (!list_empty(&tg_tasks)) {
+ p = list_first_entry(&tg_tasks, struct task_struct, se.group_node);
+ list_move_tail(&p->se.group_node, tasks);
+
+ if (env->imbalance > 0) {
+ load = task_h_load(p);
+ if (over_imbalance(load, env->imbalance)){
+ mt_sched_printf("overload rule1,2: pid=%d p->comm=%s load=%ld imbalance=%ld",
+ p->pid, p->comm, load, env->imbalance);
+#ifdef MTK_QUICK
+
+ flag=1;
+#endif
+ continue;
+ }
+
+ move_task(p, env);
+ env->imbalance -= load;
+ pulled++;
+
+ mt_sched_printf("migrate hit rule1,2: pid=%d p->comm=%s load=%ld imbalance=%ld",
+ p->pid, p->comm, load, env->imbalance);
+ }
+ }
+ }
+
+ mt_sched_printf("move_tasks_tg finish rule migrate");
+
+ while (!list_empty(&other_tasks)) {
+ p = list_first_entry(&other_tasks, struct task_struct, se.group_node);
+ list_move_tail(&p->se.group_node, tasks);
+
+#ifdef MTK_QUICK
+ if (!flag && (env->imbalance > 0)) {
+#else
+ if (env->imbalance > 0) {
+#endif
+ load = task_h_load(p);
+
+ if (over_imbalance(load, env->imbalance)){
+ mt_sched_printf("overload others: pid=%d p->comm=%s load=%ld imbalance=%ld",
+ p->pid, p->comm, load, env->imbalance);
+ continue;
+ }
+
+ move_task(p, env);
+ env->imbalance -= load;
+ pulled++;
+
+ mt_sched_printf("migrate others: pid=%d p->comm=%s load=%ld imbalance=%ld",
+ p->pid, p->comm, load, env->imbalance);
+ }
+ }
+
+ /*
+ * Right now, this is one of only two places move_task() is called,
+ * so we can safely collect move_task() stats here rather than
+ * inside move_task().
+ */
+ schedstat_add(env->sd, lb_gained[env->idle], pulled);
+
+ mt_sched_printf("move_tasks_tg finish pulled=%d imbalance=%ld", pulled, env->imbalance);
+
+ return pulled;
+}
+
+#endif /* CONFIG_MTK_SCHED_CMP */
+
+
+#if defined (CONFIG_MTK_SCHED_CMP_LAZY_BALANCE) && !defined(CONFIG_HMP_LAZY_BALANCE)
+static int need_lazy_balance(int dst_cpu, int src_cpu, struct task_struct *p)
+{
+ /* Lazy balnace for small task
+ 1. src cpu is buddy cpu
+ 2. src cpu is not busy cpu
+ 3. p is light task
+ */
+#ifdef CONFIG_MTK_SCHED_CMP_POWER_AWARE_CONTROLLER
+ if ( PA_ENABLE && cpumask_test_cpu(src_cpu, &buddy_cpu_map) &&
+ !is_buddy_busy(src_cpu) && is_light_task(p)) {
+#else
+ if (cpumask_test_cpu(src_cpu, &buddy_cpu_map) &&
+ !is_buddy_busy(src_cpu) && is_light_task(p)) {
+#endif
+#ifdef CONFIG_MTK_SCHED_CMP_POWER_AWARE_CONTROLLER
+ unsigned int i;
+ AVOID_LOAD_BALANCE_FROM_CPUX_TO_CPUY_COUNT[src_cpu][dst_cpu]++;
+ mt_sched_printf("[PA]pid=%d, Lazy balance from CPU%d to CPU%d\n)\n", p->pid, src_cpu, dst_cpu);
+ for(i=0;i<4;i++) {
+ if(PA_MON_ENABLE && (strcmp(p->comm, &PA_MON[i][0]) == 0)) {
+ printk(KERN_EMERG "[PA] %s Lazy balance from CPU%d to CPU%d\n", p->comm, src_cpu, dst_cpu);
+ // printk(KERN_EMERG "[PA] src_cpu RQ Usage = %u, Period = %u, NR = %u\n",
+ // per_cpu(BUDDY_CPU_RQ_USAGE, src_cpu),
+ // per_cpu(BUDDY_CPU_RQ_PERIOD, src_cpu),
+ // per_cpu(BUDDY_CPU_RQ_NR, src_cpu));
+ // printk(KERN_EMERG "[PA] Task Usage = %u, Period = %u\n",
+ // p->se.avg.usage_avg_sum,
+ // p->se.avg.runnable_avg_period);
+ }
+ }
+#endif
+ return 1;
+ }
+ else
+ return 0;
+}
+#endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * update tg->load_weight by folding this cpu's load_avg
+ */
+static void __update_blocked_averages_cpu(struct task_group *tg, int cpu)
+{
+ struct sched_entity *se = tg->se[cpu];
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu];
+
+ /* throttled entities do not contribute to load */
+ if (throttled_hierarchy(cfs_rq))
+ return;
+
+ update_cfs_rq_blocked_load(cfs_rq, 1);
+
+ if (se) {
+ update_entity_load_avg(se, 1);
+ /*
+ * We pivot on our runnable average having decayed to zero for
+ * list removal. This generally implies that all our children
+ * have also been removed (modulo rounding error or bandwidth
+ * control); however, such cases are rare and we can fix these
+ * at enqueue.
+ *
+ * TODO: fix up out-of-order children on enqueue.
+ */
+ if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running)
+ list_del_leaf_cfs_rq(cfs_rq);
+ } else {
+ struct rq *rq = rq_of(cfs_rq);
+ update_rq_runnable_avg(rq, rq->nr_running);
+ }
+}
+
+static void update_blocked_averages(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct cfs_rq *cfs_rq;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ update_rq_clock(rq);
+ /*
+ * Iterates the task_group tree in a bottom up fashion, see
+ * list_add_leaf_cfs_rq() for details.
+ */
+ for_each_leaf_cfs_rq(rq, cfs_rq) {
+ /*
+ * Note: We may want to consider periodically releasing
+ * rq->lock about these updates so that creating many task
+ * groups does not result in continually extending hold time.
+ */
+ __update_blocked_averages_cpu(cfs_rq->tg, rq->cpu);
+ }
+
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+ unsigned long load;
+ long cpu = (long)data;
+
+ if (!tg->parent) {
+ /*
+ * rq's sched_avg is not updated accordingly. adopt rq's
+ * corresponding cfs_rq runnable loading instead.
+ *
+ * a003a25b sched: Consider runnable load average...
+ *
+
+ load = cpu_rq(cpu)->avg.load_avg_contrib;
+
+ */
+ load = cpu_rq(cpu)->cfs.runnable_load_avg;
+ } else {
+ load = tg->parent->cfs_rq[cpu]->h_load;
+ load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib,
+ tg->parent->cfs_rq[cpu]->runnable_load_avg + 1);
+ }
+
+ tg->cfs_rq[cpu]->h_load = load;
+
+ return 0;
+}
+
+static void update_h_load(long cpu)
+{
+ rcu_read_lock();
+ walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+ rcu_read_unlock();
+}
+
+static unsigned long task_h_load(struct task_struct *p)
+{
+ struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+ return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
+ cfs_rq->runnable_load_avg + 1);
+}
+#else
+static inline void update_blocked_averages(int cpu)
+{
+}
+
+static inline void update_h_load(long cpu)
+{
+}
+
+static unsigned long task_h_load(struct task_struct *p)
+{
+ return p->se.avg.load_avg_contrib;
+}
+#endif
+
+/********** Helpers for find_busiest_group ************************/
+/*
+ * sd_lb_stats - Structure to store the statistics of a sched_domain
+ * during load balancing.
+ */
+struct sd_lb_stats {
+ struct sched_group *busiest; /* Busiest group in this sd */
+ struct sched_group *this; /* Local group in this sd */
+ unsigned long total_load; /* Total load of all groups in sd */
+ unsigned long total_pwr; /* Total power of all groups in sd */
+ unsigned long avg_load; /* Average load across all groups in sd */
+
+ /** Statistics of this group */
+ unsigned long this_load;
+ unsigned long this_load_per_task;
+ unsigned long this_nr_running;
+ unsigned long this_has_capacity;
+ unsigned int this_idle_cpus;
+
+ /* Statistics of the busiest group */
+ unsigned int busiest_idle_cpus;
+ unsigned long max_load;
+ unsigned long busiest_load_per_task;
+ unsigned long busiest_nr_running;
+ unsigned long busiest_group_capacity;
+ unsigned long busiest_has_capacity;
+ unsigned int busiest_group_weight;
+
+ int group_imb; /* Is there imbalance in this sd */
+};
+
+/*
+ * sg_lb_stats - stats of a sched_group required for load_balancing
+ */
+struct sg_lb_stats {
+ unsigned long avg_load; /*Avg load across the CPUs of the group */
+ unsigned long group_load; /* Total load over the CPUs of the group */
+ unsigned long sum_nr_running; /* Nr tasks running in the group */
+ unsigned long sum_weighted_load; /* Weighted load of group's tasks */
+ unsigned long group_capacity;
+ unsigned long idle_cpus;
+ unsigned long group_weight;
+ int group_imb; /* Is there an imbalance in the group ? */
+ int group_has_capacity; /* Is there extra capacity in the group? */
+};
+
+/**
+ * get_sd_load_idx - Obtain the load index for a given sched domain.
+ * @sd: The sched_domain whose load_idx is to be obtained.
+ * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+ */
+static inline int get_sd_load_idx(struct sched_domain *sd,
+ enum cpu_idle_type idle)
+{
+ int load_idx;
+
+ switch (idle) {
+ case CPU_NOT_IDLE:
+ load_idx = sd->busy_idx;
+ break;
+
+ case CPU_NEWLY_IDLE:
+ load_idx = sd->newidle_idx;
+ break;
+ default:
+ load_idx = sd->idle_idx;
+ break;
+ }
+
+ return load_idx;
+}
+
+static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return SCHED_POWER_SCALE;
+}
+
+unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return default_scale_freq_power(sd, cpu);
+}
+
+static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+ unsigned long weight = sd->span_weight;
+ unsigned long smt_gain = sd->smt_gain;
+
+ smt_gain /= weight;
+
+ return smt_gain;
+}
+
+unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+ return default_scale_smt_power(sd, cpu);
+}
+
+static unsigned long scale_rt_power(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ u64 total, available, age_stamp, avg;
+
+ /*
+ * Since we're reading these variables without serialization make sure
+ * we read them once before doing sanity checks on them.
+ */
+ age_stamp = ACCESS_ONCE(rq->age_stamp);
+ avg = ACCESS_ONCE(rq->rt_avg);
+
+ total = sched_avg_period() + (rq->clock - age_stamp);
+
+ if (unlikely(total < avg)) {
+ /* Ensures that power won't end up being negative */
+ available = 0;
+ } else {
+ available = total - avg;
+ }
+
+ if (unlikely((s64)total < SCHED_POWER_SCALE))
+ total = SCHED_POWER_SCALE;
+
+ total >>= SCHED_POWER_SHIFT;
+
+ return div_u64(available, total);
+}
+
+static void update_cpu_power(struct sched_domain *sd, int cpu)
+{
+ unsigned long weight = sd->span_weight;
+ unsigned long power = SCHED_POWER_SCALE;
+ struct sched_group *sdg = sd->groups;
+
+ if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
+ if (sched_feat(ARCH_POWER))
+ power *= arch_scale_smt_power(sd, cpu);
+ else
+ power *= default_scale_smt_power(sd, cpu);
+
+ power >>= SCHED_POWER_SHIFT;
+ }
+
+ sdg->sgp->power_orig = power;
+
+ if (sched_feat(ARCH_POWER))
+ power *= arch_scale_freq_power(sd, cpu);
+ else
+ power *= default_scale_freq_power(sd, cpu);
+
+ power >>= SCHED_POWER_SHIFT;
+
+ power *= scale_rt_power(cpu);
+ power >>= SCHED_POWER_SHIFT;
+
+ if (!power)
+ power = 1;
+
+ cpu_rq(cpu)->cpu_power = power;
+ sdg->sgp->power = power;
+}
+
+void update_group_power(struct sched_domain *sd, int cpu)
+{
+ struct sched_domain *child = sd->child;
+ struct sched_group *group, *sdg = sd->groups;
+ unsigned long power;
+ unsigned long interval;
+
+ interval = msecs_to_jiffies(sd->balance_interval);
+ interval = clamp(interval, 1UL, max_load_balance_interval);
+ sdg->sgp->next_update = jiffies + interval;
+
+ if (!child) {
+ update_cpu_power(sd, cpu);
+ return;
+ }
+
+ power = 0;
+
+ if (child->flags & SD_OVERLAP) {
+ /*
+ * SD_OVERLAP domains cannot assume that child groups
+ * span the current group.
+ */
+
+ for_each_cpu(cpu, sched_group_cpus(sdg))
+ power += power_of(cpu);
+ } else {
+ /*
+ * !SD_OVERLAP domains can assume that child groups
+ * span the current group.
+ */
+
+ group = child->groups;
+ do {
+ power += group->sgp->power;
+ group = group->next;
+ } while (group != child->groups);
+ }
+
+ sdg->sgp->power_orig = sdg->sgp->power = power;
+}
+
+/*
+ * Try and fix up capacity for tiny siblings, this is needed when
+ * things like SD_ASYM_PACKING need f_b_g to select another sibling
+ * which on its own isn't powerful enough.
+ *
+ * See update_sd_pick_busiest() and check_asym_packing().
+ */
+static inline int
+fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+{
+ /*
+ * Only siblings can have significantly less than SCHED_POWER_SCALE
+ */
+ if (!(sd->flags & SD_SHARE_CPUPOWER))
+ return 0;
+
+ /*
+ * If ~90% of the cpu_power is still there, we're good.
+ */
+ if (group->sgp->power * 32 > group->sgp->power_orig * 29)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * update_sg_lb_stats - Update sched_group's statistics for load balancing.
+ * @env: The load balancing environment.
+ * @group: sched_group whose statistics are to be updated.
+ * @load_idx: Load index of sched_domain of this_cpu for load calc.
+ * @local_group: Does group contain this_cpu.
+ * @balance: Should we balance.
+ * @sgs: variable to hold the statistics for this group.
+ */
+static inline void update_sg_lb_stats(struct lb_env *env,
+ struct sched_group *group, int load_idx,
+ int local_group, int *balance, struct sg_lb_stats *sgs)
+{
+ unsigned long nr_running, max_nr_running, min_nr_running;
+ unsigned long load, max_cpu_load, min_cpu_load;
+ unsigned int balance_cpu = -1, first_idle_cpu = 0;
+ unsigned long avg_load_per_task = 0;
+ int i;
+
+ if (local_group)
+ balance_cpu = group_balance_cpu(group);
+
+ /* Tally up the load of all CPUs in the group */
+ max_cpu_load = 0;
+ min_cpu_load = ~0UL;
+ max_nr_running = 0;
+ min_nr_running = ~0UL;
+
+ for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
+ struct rq *rq = cpu_rq(i);
+
+ nr_running = rq->nr_running;
+
+ /* Bias balancing toward cpus of our domain */
+ if (local_group) {
+ if (idle_cpu(i) && !first_idle_cpu &&
+ cpumask_test_cpu(i, sched_group_mask(group))) {
+ first_idle_cpu = 1;
+ balance_cpu = i;
+ }
+
+ load = target_load(i, load_idx);
+ } else {
+ load = source_load(i, load_idx);
+ if (load > max_cpu_load)
+ max_cpu_load = load;
+ if (min_cpu_load > load)
+ min_cpu_load = load;