From c58d25f371f5e4b2dfbec3a7bd6f3c24dd79095b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 May 2016 09:19:59 +0200 Subject: [PATCH] sched/fair: Move record_wakee() Since I want to make ->task_woken() conditional on the task getting migrated, we cannot use it to call record_wakee(). Move it to select_task_rq_fair(), which gets called in almost all the same conditions. The only exception is if the woken task (@p) is CPU-bound (as per the nr_cpus_allowed test in select_task_rq()). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Hunter Cc: Ben Segall Cc: Linus Torvalds Cc: Matt Fleming Cc: Mike Galbraith Cc: Mike Galbraith Cc: Morten Rasmussen Cc: Paul Turner Cc: Pavan Kondeti Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: byungchul.park@lge.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 61 ++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 84e465ae7c63..48633a1c3b46 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4810,24 +4810,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) return 0; } -static void record_wakee(struct task_struct *p) -{ - /* - * Rough decay (wiping) for cost saving, don't worry - * about the boundary, really active task won't care - * about the loss. - */ - if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) { - current->wakee_flips >>= 1; - current->wakee_flip_decay_ts = jiffies; - } - - if (current->last_wakee != p) { - current->last_wakee = p; - current->wakee_flips++; - } -} - static void task_waking_fair(struct task_struct *p) { struct sched_entity *se = &p->se; @@ -4847,7 +4829,6 @@ static void task_waking_fair(struct task_struct *p) #endif se->vruntime -= min_vruntime; - record_wakee(p); } #ifdef CONFIG_FAIR_GROUP_SCHED @@ -4965,17 +4946,39 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) #endif +static void record_wakee(struct task_struct *p) +{ + /* + * Only decay a single time; tasks that have less then 1 wakeup per + * jiffy will not have built up many flips. + */ + if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) { + current->wakee_flips >>= 1; + current->wakee_flip_decay_ts = jiffies; + } + + if (current->last_wakee != p) { + current->last_wakee = p; + current->wakee_flips++; + } +} + /* * Detect M:N waker/wakee relationships via a switching-frequency heuristic. + * * A waker of many should wake a different task than the one last awakened - * at a frequency roughly N times higher than one of its wakees. In order - * to determine whether we should let the load spread vs consolodating to - * shared cache, we look for a minimum 'flip' frequency of llc_size in one - * partner, and a factor of lls_size higher frequency in the other. With - * both conditions met, we can be relatively sure that the relationship is - * non-monogamous, with partner count exceeding socket size. Waker/wakee - * being client/server, worker/dispatcher, interrupt source or whatever is - * irrelevant, spread criteria is apparent partner count exceeds socket size. + * at a frequency roughly N times higher than one of its wakees. + * + * In order to determine whether we should let the load spread vs consolidating + * to shared cache, we look for a minimum 'flip' frequency of llc_size in one + * partner, and a factor of lls_size higher frequency in the other. + * + * With both conditions met, we can be relatively sure that the relationship is + * non-monogamous, with partner count exceeding socket size. + * + * Waker/wakee being client/server, worker/dispatcher, interrupt source or + * whatever is irrelevant, spread criteria is apparent partner count exceeds + * socket size. */ static int wake_wide(struct task_struct *p) { @@ -5280,8 +5283,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int want_affine = 0; int sync = wake_flags & WF_SYNC; - if (sd_flag & SD_BALANCE_WAKE) + if (sd_flag & SD_BALANCE_WAKE) { + record_wakee(p); want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + } rcu_read_lock(); for_each_domain(cpu, tmp) { -- 2.20.1