sched: Keep at least 1 tick per second for active dynticks tasks

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index cb49b2ab0e16de90ef5626f29ee7a331df7737d2..3bdf986a091a503f4d30e39baebae7e08a82d98a 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -512,11 +512,6 @@ static inline void init_hrtick(void)
   * the target CPU.
   */
  #ifdef CONFIG_SMP
-
-#ifndef tsk_is_polling
-#define tsk_is_polling(t) 0
-#endif
-
  void resched_task(struct task_struct *p)
  {
         int cpu;
@@ -549,7 +544,7 @@ void resched_cpu(int cpu)
         raw_spin_unlock_irqrestore(&rq->lock, flags);
  }
  
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
  /*
   * In the semi idle case, use the nearest busy cpu for migrating timers
   * from an idle cpu.  This is good for power-savings.
@@ -587,7 +582,7 @@ unlock:
   * account when the CPU goes back to idle and evaluates the timer
   * wheel for the next timer event.
   */
-void wake_up_idle_cpu(int cpu)
+static void wake_up_idle_cpu(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
  
@@ -617,20 +612,56 @@ void wake_up_idle_cpu(int cpu)
                 smp_send_reschedule(cpu);
  }
  
+static bool wake_up_full_nohz_cpu(int cpu)
+{
+       if (tick_nohz_full_cpu(cpu)) {
+               if (cpu != smp_processor_id() ||
+                   tick_nohz_tick_stopped())
+                       smp_send_reschedule(cpu);
+               return true;
+       }
+
+       return false;
+}
+
+void wake_up_nohz_cpu(int cpu)
+{
+       if (!wake_up_full_nohz_cpu(cpu))
+               wake_up_idle_cpu(cpu);
+}
+
  static inline bool got_nohz_idle_kick(void)
  {
         int cpu = smp_processor_id();
         return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
  }
  
-#else /* CONFIG_NO_HZ */
+#else /* CONFIG_NO_HZ_COMMON */
  
  static inline bool got_nohz_idle_kick(void)
  {
         return false;
  }
  
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
+
+#ifdef CONFIG_NO_HZ_FULL
+bool sched_can_stop_tick(void)
+{
+       struct rq *rq;
+
+       rq = this_rq();
+
+       /* Make sure rq->nr_running update is visible after the IPI */
+       smp_rmb();
+
+       /* More than one running task need preemption */
+       if (rq->nr_running > 1)
+               return false;
+
+       return true;
+}
+#endif /* CONFIG_NO_HZ_FULL */
  
  void sched_avg_update(struct rq *rq)
  {
@@ -1362,7 +1393,8 @@ static void sched_ttwu_pending(void)
  
  void scheduler_ipi(void)
  {
-       if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
+       if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
+           && !tick_nohz_full_cpu(smp_processor_id()))
                 return;
  
         /*
@@ -1379,6 +1411,7 @@ void scheduler_ipi(void)
          * somewhat pessimize the simple resched case.
          */
         irq_enter();
+       tick_nohz_full_check();
         sched_ttwu_pending();
  
         /*
@@ -1498,8 +1531,10 @@ static void try_to_wake_up_local(struct task_struct *p)
  {
         struct rq *rq = task_rq(p);
  
-       BUG_ON(rq != this_rq());
-       BUG_ON(p == current);
+       if (WARN_ON_ONCE(rq != this_rq()) ||
+           WARN_ON_ONCE(p == current))
+               return;
+
         lockdep_assert_held(&rq->lock);
  
         if (!raw_spin_trylock(&p->pi_lock)) {
@@ -1858,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
                 kprobe_flush_task(prev);
                 put_task_struct(prev);
         }
+
+       tick_nohz_task_switch(current);
  }
  
  #ifdef CONFIG_SMP
@@ -2121,7 +2158,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
         return load >> FSHIFT;
  }
  
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
  /*
   * Handle NO_HZ for the global load-average.
   *
@@ -2347,12 +2384,12 @@ static void calc_global_nohz(void)
         smp_wmb();
         calc_load_idx++;
  }
-#else /* !CONFIG_NO_HZ */
+#else /* !CONFIG_NO_HZ_COMMON */
  
  static inline long calc_load_fold_idle(void) { return 0; }
  static inline void calc_global_nohz(void) { }
  
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
  
  /*
   * calc_load - update the avenrun load estimates 10 ticks after the
@@ -2512,7 +2549,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
         sched_avg_update(this_rq);
  }
  
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
  /*
   * There is no sane way to deal with nohz on smp when using jiffies because the
   * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -2572,7 +2609,7 @@ void update_cpu_load_nohz(void)
         }
         raw_spin_unlock(&this_rq->lock);
  }
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
  
  /*
   * Called from scheduler_tick()
@@ -2699,7 +2736,34 @@ void scheduler_tick(void)
         rq->idle_balance = idle_cpu(cpu);
         trigger_load_balance(rq, cpu);
  #endif
+       rq_last_tick_reset(rq);
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+/**
+ * scheduler_tick_max_deferment
+ *
+ * Keep at least one tick per second when a single
+ * active task is running because the scheduler doesn't
+ * yet completely support full dynticks environment.
+ *
+ * This makes sure that uptime, CFS vruntime, load
+ * balancing, etc... continue to move forward, even
+ * with a very low granularity.
+ */
+u64 scheduler_tick_max_deferment(void)
+{
+       struct rq *rq = this_rq();
+       unsigned long next, now = ACCESS_ONCE(jiffies);
+
+       next = rq->last_sched_tick + HZ;
+
+       if (time_before_eq(next, now))
+               return 0;
+
+       return jiffies_to_usecs(next - now) * NSEC_PER_USEC;
  }
+#endif
  
  notrace unsigned long get_parent_ip(unsigned long addr)
  {
@@ -2997,51 +3061,6 @@ void __sched schedule_preempt_disabled(void)
         preempt_disable();
  }
  
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-
-static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
-{
-       if (lock->owner != owner)
-               return false;
-
-       /*
-        * Ensure we emit the owner->on_cpu, dereference _after_ checking
-        * lock->owner still matches owner, if that fails, owner might
-        * point to free()d memory, if it still matches, the rcu_read_lock()
-        * ensures the memory stays valid.
-        */
-       barrier();
-
-       return owner->on_cpu;
-}
-
-/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
- */
-int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
-{
-       if (!sched_feat(OWNER_SPIN))
-               return 0;
-
-       rcu_read_lock();
-       while (owner_running(lock, owner)) {
-               if (need_resched())
-                       break;
-
-               arch_mutex_cpu_relax();
-       }
-       rcu_read_unlock();
-
-       /*
-        * We break out the loop above on need_resched() and when the
-        * owner changed, which is a sign for heavy contention. Return
-        * success only when lock->owner is NULL.
-        */
-       return lock->owner == NULL;
-}
-#endif
-
  #ifdef CONFIG_PREEMPT
  /*
   * this is the entry point to schedule() from in-kernel preemption
@@ -4130,6 +4149,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
         get_task_struct(p);
         rcu_read_unlock();
  
+       if (p->flags & PF_NO_SETAFFINITY) {
+               retval = -EINVAL;
+               goto out_put_task;
+       }
         if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
                 retval = -ENOMEM;
                 goto out_put_task;
@@ -4777,11 +4800,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
                 goto out;
         }
  
-       if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
         do_set_cpus_allowed(p, new_mask);
  
         /* Can the task run on the task's current CPU? If so, we're done */
@@ -5003,7 +5021,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
  }
  
  static int min_load_idx = 0;
-static int max_load_idx = CPU_LOAD_IDX_MAX;
+static int max_load_idx = CPU_LOAD_IDX_MAX-1;
  
  static void
  set_table_entry(struct ctl_table *entry,
@@ -6999,9 +7017,12 @@ void __init sched_init(void)
                 INIT_LIST_HEAD(&rq->cfs_tasks);
  
                 rq_attach_root(rq, &def_root_domain);
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
                 rq->nohz_flags = 0;
  #endif
+#ifdef CONFIG_NO_HZ_FULL
+               rq->last_sched_tick = 0;
+#endif
  #endif
                 init_rq_hrtick(rq);
                 atomic_set(&rq->nr_iowait, 0);