From 2cc3df5e1c7be786e3089c7882716a89bd8c0d1b Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sat, 26 Sep 2015 18:19:54 +0100 Subject: [PATCH] ANDROID: sched: Update max cpu capacity in case of max frequency constraints Wakeup balancing uses cpu capacity awareness and needs to know the system-wide maximum cpu capacity. Patch "sched: Store system-wide maximum cpu capacity in root domain" finds the system-wide maximum cpu capacity during scheduler domain hierarchy setup. This is sufficient as long as maximum frequency invariance is not enabled. If it is enabled, the system-wide maximum cpu capacity can change between scheduler domain hierarchy setups due to frequency capping. The cpu capacity is changed in update_cpu_capacity() which is called in load balance on the lowest scheduler domain hierarchy level. To be able to know if a change in cpu capacity for a certain cpu also has an effect on the system-wide maximum cpu capacity it is normally necessary to iterate over all cpus. This would be way too costly. That's why this patch follows a different approach. The unsigned long max_cpu_capacity value in struct root_domain is replaced with a struct max_cpu_capacity, containing value (the max_cpu_capacity) and cpu (the cpu index of the cpu providing the maximum cpu_capacity). Changes to the system-wide maximum cpu capacity and the cpu index are made if: 1 System-wide maximum cpu capacity < cpu capacity 2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu There are no changes to the system-wide maximum cpu capacity in all other cases. Atomic read and write access to the pair (max_cpu_capacity.val, max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock. The access to max_cpu_capacity.val in task_fits_max() is still performed without taking the max_cpu_capacity.lock. The code to set max cpu capacity in build_sched_domains() has been removed because the whole functionality is now provided by update_cpu_capacity() instead. This approach can introduce errors temporarily, e.g. in case the cpu currently providing the max cpu capacity has its cpu capacity lowered due to frequency capping and calls update_cpu_capacity() before any cpu which might provide the max cpu now. Signed-off-by: Dietmar Eggemann Signed-off-by: Ionela Voinescu Change-Id: Idaa7a16723001e222e476de34df332558e48dd13 --- kernel/sched/fair.c | 32 +++++++++++++++++++++++++++++++- kernel/sched/sched.h | 10 +++++++++- kernel/sched/topology.c | 13 ++----------- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 469521870371..45dfaec611b2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7236,7 +7236,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) return 0; min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu)); - max_cap = cpu_rq(cpu)->rd->max_cpu_capacity; + max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val; /* Minimum capacity is close to max, no need to abort wake_affine */ if (max_cap - min_cap < max_cap >> 3) @@ -8834,16 +8834,46 @@ static unsigned long scale_rt_capacity(int cpu) return 1; } +void init_max_cpu_capacity(struct max_cpu_capacity *mcc) +{ + raw_spin_lock_init(&mcc->lock); + mcc->val = 0; + mcc->cpu = -1; +} + static void update_cpu_capacity(struct sched_domain *sd, int cpu) { unsigned long capacity = arch_scale_cpu_capacity(sd, cpu); struct sched_group *sdg = sd->groups; + struct max_cpu_capacity *mcc; + unsigned long max_capacity; + int max_cap_cpu; + unsigned long flags; cpu_rq(cpu)->cpu_capacity_orig = capacity; capacity *= arch_scale_max_freq_capacity(sd, cpu); capacity >>= SCHED_CAPACITY_SHIFT; + mcc = &cpu_rq(cpu)->rd->max_cpu_capacity; + + raw_spin_lock_irqsave(&mcc->lock, flags); + max_capacity = mcc->val; + max_cap_cpu = mcc->cpu; + + if (max_capacity > capacity && max_cap_cpu == cpu || + max_capacity < capacity) { + mcc->val = capacity; + mcc->cpu = cpu; +#ifdef CONFIG_SCHED_DEBUG + raw_spin_unlock_irqrestore(&mcc->lock, flags); + pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity); + goto skip_unlock; +#endif + } + raw_spin_unlock_irqrestore(&mcc->lock, flags); + +skip_unlock: __attribute__ ((unused)); capacity *= scale_rt_capacity(cpu); capacity >>= SCHED_CAPACITY_SHIFT; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c7956c4c0844..235e519caf55 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -612,6 +612,12 @@ static inline bool sched_asym_prefer(int a, int b) return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); } +struct max_cpu_capacity { + raw_spinlock_t lock; + unsigned long val; + int cpu; +}; + /* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by @@ -663,7 +669,8 @@ struct root_domain { cpumask_var_t rto_mask; struct cpupri cpupri; - unsigned long max_cpu_capacity; + /* Maximum cpu capacity in the system. */ + struct max_cpu_capacity max_cpu_capacity; /* First cpu with maximum and minimum original capacity */ int max_cap_orig_cpu, min_cap_orig_cpu; @@ -673,6 +680,7 @@ extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; extern void init_defrootdomain(void); +extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); extern int sched_init_domains(const struct cpumask *cpu_map); extern void rq_attach_root(struct rq *rq, struct root_domain *rd); extern void sched_get_rd(struct root_domain *rd); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8a9be7d4d286..9e967e88676b 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -304,6 +304,8 @@ static int init_rootdomain(struct root_domain *rd) rd->max_cap_orig_cpu = rd->min_cap_orig_cpu = -1; + init_max_cpu_capacity(&rd->max_cpu_capacity); + return 0; free_cpudl: @@ -1818,7 +1820,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att enum s_alloc alloc_state; struct sched_domain *sd; struct s_data d; - struct rq *rq = NULL; int i, ret = -ENOMEM; alloc_state = __visit_domain_allocation_hell(&d, cpu_map); @@ -1873,13 +1874,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu); int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu); - rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); - /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ - if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) - WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); - if ((max_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig > cpu_rq(max_cpu)->cpu_capacity_orig)) WRITE_ONCE(d.rd->max_cap_orig_cpu, i); @@ -1895,11 +1891,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att if (!cpumask_empty(cpu_map)) update_asym_cpucapacity(cpumask_first(cpu_map)); - if (rq && sched_debug_enabled) { - pr_info("span: %*pbl (max cpu_capacity = %lu)\n", - cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); - } - ret = 0; error: __free_domain_allocs(&d, alloc_state, cpu_map); -- 2.20.1