sched: remove precise CPU load calculations #2

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index a40ab657ad193cc7972b64833628ca2597b948bf..f6a81061fd502bd90dd07585ee85e210eb5ec9f0 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -61,6 +61,7 @@
  #include <linux/delayacct.h>
  #include <linux/reciprocal_div.h>
  #include <linux/unistd.h>
+#include <linux/pagemap.h>
  
  #include <asm/tlb.h>
  
@@ -172,8 +173,6 @@ struct rt_prio_array {
  
  struct load_stat {
         struct load_weight load;
-       u64 load_update_start, load_update_last;
-       unsigned long delta_fair, delta_exec, delta_stat;
  };
  
  /* CFS-related fields in a runqueue */
@@ -668,7 +667,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
  /*
   * Shift right and round:
   */
-#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
  
  static unsigned long
  calc_delta_mine(unsigned long delta_exec, unsigned long weight,
@@ -684,10 +683,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
          * Check whether we'd overflow the 64-bit multiplication:
          */
         if (unlikely(tmp > WMULT_CONST))
-               tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+               tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
                         WMULT_SHIFT/2);
         else
-               tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
+               tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
  
         return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
  }
@@ -792,15 +791,6 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  
  #define sched_class_highest (&rt_sched_class)
  
-static void __update_curr_load(struct rq *rq, struct load_stat *ls)
-{
-       if (rq->curr != rq->idle && ls->load.weight) {
-               ls->delta_exec += ls->delta_stat;
-               ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
-               ls->delta_stat = 0;
-       }
-}
-
  /*
   * Update delta_exec, delta_fair fields for rq.
   *
@@ -816,31 +806,13 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
   * This function is called /before/ updating rq->ls.load
   * and when switching tasks.
   */
-static void update_curr_load(struct rq *rq)
-{
-       struct load_stat *ls = &rq->ls;
-       u64 start;
-
-       start = ls->load_update_start;
-       ls->load_update_start = rq->clock;
-       ls->delta_stat += rq->clock - start;
-       /*
-        * Stagger updates to ls->delta_fair. Very frequent updates
-        * can be expensive.
-        */
-       if (ls->delta_stat >= sysctl_sched_stat_granularity)
-               __update_curr_load(rq, ls);
-}
-
  static inline void inc_load(struct rq *rq, const struct task_struct *p)
  {
-       update_curr_load(rq);
         update_load_add(&rq->ls.load, p->se.load.weight);
  }
  
  static inline void dec_load(struct rq *rq, const struct task_struct *p)
  {
-       update_curr_load(rq);
         update_load_sub(&rq->ls.load, p->se.load.weight);
  }
  
@@ -858,7 +830,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq)
  
  static void set_load_weight(struct task_struct *p)
  {
-       task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
         p->se.wait_runtime = 0;
  
         if (task_has_rt_policy(p)) {
@@ -1587,9 +1558,7 @@ static void __sched_fork(struct task_struct *p)
         p->se.wait_start_fair           = 0;
         p->se.exec_start                = 0;
         p->se.sum_exec_runtime          = 0;
-       p->se.delta_exec                = 0;
-       p->se.delta_fair_run            = 0;
-       p->se.delta_fair_sleep          = 0;
+       p->se.prev_sum_exec_runtime     = 0;
         p->se.wait_runtime              = 0;
         p->se.sleep_start_fair          = 0;
  
@@ -1602,6 +1571,7 @@ static void __sched_fork(struct task_struct *p)
         p->se.sleep_max                 = 0;
         p->se.block_max                 = 0;
         p->se.exec_max                  = 0;
+       p->se.slice_max                 = 0;
         p->se.wait_max                  = 0;
         p->se.wait_runtime_overruns     = 0;
         p->se.wait_runtime_underruns    = 0;
@@ -1656,12 +1626,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
         put_cpu();
  }
  
-/*
- * After fork, child runs first. (default) If set to 0 then
- * parent will (try to) run first.
- */
-unsigned int __read_mostly sysctl_sched_child_runs_first = 1;
-
  /*
   * wake_up_new_task - wake up a newly created task for the first time.
   *
@@ -1682,10 +1646,13 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
  
         p->prio = effective_prio(p);
  
-       if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
-                       (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
-                       !current->se.on_rq) {
+       if (rt_prio(p->prio))
+               p->sched_class = &rt_sched_class;
+       else
+               p->sched_class = &fair_sched_class;
  
+       if (task_cpu(p) != this_cpu || !p->sched_class->task_new ||
+                                                       !current->se.on_rq) {
                 activate_task(rq, p, 0);
         } else {
                 /*
@@ -1976,42 +1943,10 @@ unsigned long nr_active(void)
   */
  static void update_cpu_load(struct rq *this_rq)
  {
-       u64 fair_delta64, exec_delta64, idle_delta64, sample_interval64, tmp64;
-       unsigned long total_load = this_rq->ls.load.weight;
-       unsigned long this_load =  total_load;
-       struct load_stat *ls = &this_rq->ls;
+       unsigned long this_load = this_rq->ls.load.weight;
         int i, scale;
  
         this_rq->nr_load_updates++;
-       if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD)))
-               goto do_avg;
-
-       /* Update delta_fair/delta_exec fields first */
-       update_curr_load(this_rq);
-
-       fair_delta64 = ls->delta_fair + 1;
-       ls->delta_fair = 0;
-
-       exec_delta64 = ls->delta_exec + 1;
-       ls->delta_exec = 0;
-
-       sample_interval64 = this_rq->clock - ls->load_update_last;
-       ls->load_update_last = this_rq->clock;
-
-       if ((s64)sample_interval64 < (s64)TICK_NSEC)
-               sample_interval64 = TICK_NSEC;
-
-       if (exec_delta64 > sample_interval64)
-               exec_delta64 = sample_interval64;
-
-       idle_delta64 = sample_interval64 - exec_delta64;
-
-       tmp64 = div64_64(SCHED_LOAD_SCALE * exec_delta64, fair_delta64);
-       tmp64 = div64_64(tmp64 * exec_delta64, sample_interval64);
-
-       this_load = (unsigned long)tmp64;
-
-do_avg:
  
         /* Update our load: */
         for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
@@ -2021,7 +1956,13 @@ do_avg:
  
                 old_load = this_rq->cpu_load[i];
                 new_load = this_load;
-
+               /*
+                * Round up the averaging division if load is increasing. This
+                * prevents us from getting stuck on 9 if the load is 10, for
+                * example.
+                */
+               if (new_load > old_load)
+                       new_load += scale-1;
                 this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
         }
  }
@@ -2511,7 +2452,7 @@ group_next:
          * a think about bumping its value to force at least one task to be
          * moved
          */
-       if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) {
+       if (*imbalance < busiest_load_per_task) {
                 unsigned long tmp, pwr_now, pwr_move;
                 unsigned int imbn;
  
@@ -2563,10 +2504,8 @@ small_imbalance:
                 pwr_move /= SCHED_LOAD_SCALE;
  
                 /* Move if we gain throughput */
-               if (pwr_move <= pwr_now)
-                       goto out_balanced;
-
-               *imbalance = busiest_load_per_task;
+               if (pwr_move > pwr_now)
+                       *imbalance = busiest_load_per_task;
         }
  
         return busiest;
@@ -3632,10 +3571,9 @@ EXPORT_SYMBOL(default_wake_function);
  static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
                              int nr_exclusive, int sync, void *key)
  {
-       struct list_head *tmp, *next;
+       wait_queue_t *curr, *next;
  
-       list_for_each_safe(tmp, next, &q->task_list) {
-               wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+       list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
                 unsigned flags = curr->flags;
  
                 if (curr->func(curr, mode, sync, key) &&
@@ -4552,10 +4490,7 @@ asmlinkage long sys_sched_yield(void)
         struct rq *rq = this_rq_lock();
  
         schedstat_inc(rq, yld_cnt);
-       if (unlikely(rq->nr_running == 1))
-               schedstat_inc(rq, yld_act_empty);
-       else
-               current->sched_class->yield_task(rq, current);
+       current->sched_class->yield_task(rq, current);
  
         /*
          * Since we are going to call schedule() anyway, there's
@@ -4899,32 +4834,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
   */
  cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  
-/*
- * Increase the granularity value when there are more CPUs,
- * because with more CPUs the 'effective latency' as visible
- * to users decreases. But the relationship is not linear,
- * so pick a second-best guess by going with the log2 of the
- * number of CPUs.
- *
- * This idea comes from the SD scheduler of Con Kolivas:
- */
-static inline void sched_init_granularity(void)
-{
-       unsigned int factor = 1 + ilog2(num_online_cpus());
-       const unsigned long limit = 100000000;
-
-       sysctl_sched_min_granularity *= factor;
-       if (sysctl_sched_min_granularity > limit)
-               sysctl_sched_min_granularity = limit;
-
-       sysctl_sched_latency *= factor;
-       if (sysctl_sched_latency > limit)
-               sysctl_sched_latency = limit;
-
-       sysctl_sched_runtime_limit = sysctl_sched_latency * 5;
-       sysctl_sched_wakeup_granularity = sysctl_sched_latency / 2;
-}
-
  #ifdef CONFIG_SMP
  /*
   * This is how migration works:
@@ -6492,12 +6401,10 @@ void __init sched_init_smp(void)
         /* Move init over to a non-isolated CPU */
         if (set_cpus_allowed(current, non_isolated_cpus) < 0)
                 BUG();
-       sched_init_granularity();
  }
  #else
  void __init sched_init_smp(void)
  {
-       sched_init_granularity();
  }
  #endif /* CONFIG_SMP */
  
@@ -6522,7 +6429,6 @@ static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
  
  void __init sched_init(void)
  {
-       u64 now = sched_clock();
         int highest_cpu = 0;
         int i, j;
  
@@ -6547,8 +6453,6 @@ void __init sched_init(void)
                 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
                 list_add(&rq->cfs.leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
  #endif
-               rq->ls.load_update_last = now;
-               rq->ls.load_update_start = now;
  
                 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
                         rq->cpu_load[j] = 0;