Merge commit '8700c95adb03' into timers/nohz
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / sched / fair.c
index 25aaf93281deb81ae3c10764ca391e1bfdc2dba4..c61a614465c8ebf13b5f71aabf23a78c9e8533a6 100644 (file)
@@ -3896,12 +3896,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
        int tsk_cache_hot = 0;
        /*
         * We do not migrate tasks that are:
-        * 1) running (obviously), or
+        * 1) throttled_lb_pair, or
         * 2) cannot be migrated to this CPU due to cpus_allowed, or
-        * 3) are cache-hot on their current CPU.
+        * 3) running (obviously), or
+        * 4) are cache-hot on their current CPU.
         */
+       if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
+               return 0;
+
        if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
-               int new_dst_cpu;
+               int cpu;
 
                schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
 
@@ -3916,12 +3920,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
                if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED))
                        return 0;
 
-               new_dst_cpu = cpumask_first_and(env->dst_grpmask,
-                                               tsk_cpus_allowed(p));
-               if (new_dst_cpu < nr_cpu_ids) {
-                       env->flags |= LBF_SOME_PINNED;
-                       env->new_dst_cpu = new_dst_cpu;
+               /* Prevent to re-select dst_cpu via env's cpus */
+               for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
+                       if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+                               env->flags |= LBF_SOME_PINNED;
+                               env->new_dst_cpu = cpu;
+                               break;
+                       }
                }
+
                return 0;
        }
 
@@ -3967,9 +3974,6 @@ static int move_one_task(struct lb_env *env)
        struct task_struct *p, *n;
 
        list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
-               if (throttled_lb_pair(task_group(p), env->src_rq->cpu, env->dst_cpu))
-                       continue;
-
                if (!can_migrate_task(p, env))
                        continue;
 
@@ -4021,7 +4025,7 @@ static int move_tasks(struct lb_env *env)
                        break;
                }
 
-               if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
+               if (!can_migrate_task(p, env))
                        goto next;
 
                load = task_h_load(p);
@@ -4032,9 +4036,6 @@ static int move_tasks(struct lb_env *env)
                if ((load / 2) > env->imbalance)
                        goto next;
 
-               if (!can_migrate_task(p, env))
-                       goto next;
-
                move_task(p, env);
                pulled++;
                env->imbalance -= load;
@@ -4979,7 +4980,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 #define MAX_PINNED_INTERVAL    512
 
 /* Working cpumask for load_balance and load_balance_newidle. */
-DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 
 static int need_active_balance(struct lb_env *env)
 {
@@ -5010,11 +5011,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                        int *balance)
 {
        int ld_moved, cur_ld_moved, active_balance = 0;
-       int lb_iterations, max_lb_iterations;
        struct sched_group *group;
        struct rq *busiest;
        unsigned long flags;
-       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
+       struct cpumask *cpus = __get_cpu_var(load_balance_mask);
 
        struct lb_env env = {
                .sd             = sd,
@@ -5026,8 +5026,14 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                .cpus           = cpus,
        };
 
+       /*
+        * For NEWLY_IDLE load_balancing, we don't need to consider
+        * other cpus in our group
+        */
+       if (idle == CPU_NEWLY_IDLE)
+               env.dst_grpmask = NULL;
+
        cpumask_copy(cpus, cpu_active_mask);
-       max_lb_iterations = cpumask_weight(env.dst_grpmask);
 
        schedstat_inc(sd, lb_count[idle]);
 
@@ -5053,7 +5059,6 @@ redo:
        schedstat_add(sd, lb_imbalance[idle], env.imbalance);
 
        ld_moved = 0;
-       lb_iterations = 1;
        if (busiest->nr_running > 1) {
                /*
                 * Attempt to move tasks. If find_busiest_group has found
@@ -5110,14 +5115,17 @@ more_balance:
                 * moreover subsequent load balance cycles should correct the
                 * excess load moved.
                 */
-               if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 &&
-                               lb_iterations++ < max_lb_iterations) {
+               if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
 
                        env.dst_rq       = cpu_rq(env.new_dst_cpu);
                        env.dst_cpu      = env.new_dst_cpu;
                        env.flags       &= ~LBF_SOME_PINNED;
                        env.loop         = 0;
                        env.loop_break   = sched_nr_migrate_break;
+
+                       /* Prevent to re-select dst_cpu via env's cpus */
+                       cpumask_clear_cpu(env.dst_cpu, env.cpus);
+
                        /*
                         * Go back to "more_balance" rather than "redo" since we
                         * need to continue with same src_cpu.
@@ -5347,7 +5355,7 @@ out_unlock:
        return 0;
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * idle load balancing details
  * - When one of the busy CPUs notice that there may be an idle rebalancing
@@ -5412,13 +5420,16 @@ static inline void set_cpu_sd_state_busy(void)
        struct sched_domain *sd;
        int cpu = smp_processor_id();
 
-       if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
-               return;
-       clear_bit(NOHZ_IDLE, nohz_flags(cpu));
-
        rcu_read_lock();
-       for_each_domain(cpu, sd)
+       sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+
+       if (!sd || !sd->nohz_idle)
+               goto unlock;
+       sd->nohz_idle = 0;
+
+       for (; sd; sd = sd->parent)
                atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+unlock:
        rcu_read_unlock();
 }
 
@@ -5427,13 +5438,16 @@ void set_cpu_sd_state_idle(void)
        struct sched_domain *sd;
        int cpu = smp_processor_id();
 
-       if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
-               return;
-       set_bit(NOHZ_IDLE, nohz_flags(cpu));
-
        rcu_read_lock();
-       for_each_domain(cpu, sd)
+       sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+
+       if (!sd || sd->nohz_idle)
+               goto unlock;
+       sd->nohz_idle = 1;
+
+       for (; sd; sd = sd->parent)
                atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+unlock:
        rcu_read_unlock();
 }
 
@@ -5523,10 +5537,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                if (time_after_eq(jiffies, sd->last_balance + interval)) {
                        if (load_balance(cpu, rq, sd, idle, &balance)) {
                                /*
-                                * We've pulled tasks over so either we're no
-                                * longer idle.
+                                * The LBF_SOME_PINNED logic could have changed
+                                * env->dst_cpu, so we can't know our idle
+                                * state even if we migrated tasks. Update it.
                                 */
-                               idle = CPU_NOT_IDLE;
+                               idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
                        }
                        sd->last_balance = jiffies;
                }
@@ -5557,9 +5572,9 @@ out:
                rq->next_balance = next_balance;
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
- * In CONFIG_NO_HZ case, the idle balance kickee will do the
+ * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
  * rebalancing for all the cpus for whom scheduler ticks are stopped.
  */
 static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
@@ -5702,7 +5717,7 @@ void trigger_load_balance(struct rq *rq, int cpu)
        if (time_after_eq(jiffies, rq->next_balance) &&
            likely(!on_null_domain(cpu)))
                raise_softirq(SCHED_SOFTIRQ);
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
        if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
                nohz_balancer_kick(cpu);
 #endif
@@ -6172,7 +6187,7 @@ __init void init_sched_fair_class(void)
 #ifdef CONFIG_SMP
        open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
        nohz.next_balance = jiffies;
        zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
        cpu_notifier(sched_ilb_notifier, 0);