sched/core: Introduce 'struct sched_domain_shared'
authorPeter Zijlstra <peterz@infradead.org>
Mon, 9 May 2016 08:37:59 +0000 (10:37 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 30 Sep 2016 08:54:06 +0000 (10:54 +0200)
Since struct sched_domain is strictly per cpu; introduce a structure
that is shared between all 'identical' sched_domains.

Limit to SD_SHARE_PKG_RESOURCES domains for now, as we'll only use it
for shared cache state; if another use comes up later we can easily
relax this.

While the sched_group's are normally shared between CPUs, these are
not natural to use when we need some shared state on a domain level --
since that would require the domain to have a parent, which is not a
given.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/sched.h
kernel/sched/core.c

index b99fcd1b341e6a00d6c616529cc557c933bcaaea..8a878b9649a163c50acc59ed7125c2778bc89cea 100644 (file)
@@ -1067,6 +1067,10 @@ extern int sched_domain_level_max;
 
 struct sched_group;
 
+struct sched_domain_shared {
+       atomic_t        ref;
+};
+
 struct sched_domain {
        /* These fields must be setup */
        struct sched_domain *parent;    /* top domain must be null terminated */
@@ -1135,6 +1139,7 @@ struct sched_domain {
                void *private;          /* used during construction */
                struct rcu_head rcu;    /* used during destruction */
        };
+       struct sched_domain_shared *shared;
 
        unsigned int span_weight;
        /*
@@ -1168,6 +1173,7 @@ typedef int (*sched_domain_flags_f)(void);
 
 struct sd_data {
        struct sched_domain **__percpu sd;
+       struct sched_domain_shared **__percpu sds;
        struct sched_group **__percpu sg;
        struct sched_group_capacity **__percpu sgc;
 };
index 662d08d7b1dfd158e695a2686f4d7defffe0bffb..97d3c18d00bf8b2a5bab48ec825d42f71e35b41f 100644 (file)
@@ -5947,6 +5947,8 @@ static void destroy_sched_domain(struct sched_domain *sd)
                kfree(sd->groups->sgc);
                kfree(sd->groups);
        }
+       if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
+               kfree(sd->shared);
        kfree(sd);
 }
 
@@ -6385,6 +6387,9 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
        WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
        *per_cpu_ptr(sdd->sd, cpu) = NULL;
 
+       if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
+               *per_cpu_ptr(sdd->sds, cpu) = NULL;
+
        if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
                *per_cpu_ptr(sdd->sg, cpu) = NULL;
 
@@ -6429,10 +6434,12 @@ static int sched_domains_curr_level;
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
+       const struct cpumask *cpu_map,
        struct sched_domain *child, int cpu)
 {
-       struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-       int sd_weight, sd_flags = 0;
+       struct sd_data *sdd = &tl->data;
+       struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+       int sd_id, sd_weight, sd_flags = 0;
 
 #ifdef CONFIG_NUMA
        /*
@@ -6487,6 +6494,9 @@ sd_init(struct sched_domain_topology_level *tl,
 #endif
        };
 
+       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+       sd_id = cpumask_first(sched_domain_span(sd));
+
        /*
         * Convert topological properties into behaviour.
         */
@@ -6529,7 +6539,16 @@ sd_init(struct sched_domain_topology_level *tl,
                sd->idle_idx = 1;
        }
 
-       sd->private = &tl->data;
+       /*
+        * For all levels sharing cache; connect a sched_domain_shared
+        * instance.
+        */
+       if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+               sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
+               atomic_inc(&sd->shared->ref);
+       }
+
+       sd->private = sdd;
 
        return sd;
 }
@@ -6839,6 +6858,10 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
                if (!sdd->sd)
                        return -ENOMEM;
 
+               sdd->sds = alloc_percpu(struct sched_domain_shared *);
+               if (!sdd->sds)
+                       return -ENOMEM;
+
                sdd->sg = alloc_percpu(struct sched_group *);
                if (!sdd->sg)
                        return -ENOMEM;
@@ -6849,6 +6872,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
                for_each_cpu(j, cpu_map) {
                        struct sched_domain *sd;
+                       struct sched_domain_shared *sds;
                        struct sched_group *sg;
                        struct sched_group_capacity *sgc;
 
@@ -6859,6 +6883,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
                        *per_cpu_ptr(sdd->sd, j) = sd;
 
+                       sds = kzalloc_node(sizeof(struct sched_domain_shared),
+                                       GFP_KERNEL, cpu_to_node(j));
+                       if (!sds)
+                               return -ENOMEM;
+
+                       *per_cpu_ptr(sdd->sds, j) = sds;
+
                        sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
                                        GFP_KERNEL, cpu_to_node(j));
                        if (!sg)
@@ -6898,6 +6929,8 @@ static void __sdt_free(const struct cpumask *cpu_map)
                                kfree(*per_cpu_ptr(sdd->sd, j));
                        }
 
+                       if (sdd->sds)
+                               kfree(*per_cpu_ptr(sdd->sds, j));
                        if (sdd->sg)
                                kfree(*per_cpu_ptr(sdd->sg, j));
                        if (sdd->sgc)
@@ -6905,6 +6938,8 @@ static void __sdt_free(const struct cpumask *cpu_map)
                }
                free_percpu(sdd->sd);
                sdd->sd = NULL;
+               free_percpu(sdd->sds);
+               sdd->sds = NULL;
                free_percpu(sdd->sg);
                sdd->sg = NULL;
                free_percpu(sdd->sgc);
@@ -6916,9 +6951,8 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
                const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                struct sched_domain *child, int cpu)
 {
-       struct sched_domain *sd = sd_init(tl, child, cpu);
+       struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
 
-       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
        if (child) {
                sd->level = child->level + 1;
                sched_domain_level_max = max(sched_domain_level_max, sd->level);