sched/core, x86/topology: Fix NUMA in package topology bug
authorTim Chen <tim.c.chen@linux.intel.com>
Wed, 21 Sep 2016 19:19:03 +0000 (12:19 -0700)
committerIngo Molnar <mingo@kernel.org>
Fri, 30 Sep 2016 08:53:18 +0000 (10:53 +0200)
Current code can call set_cpu_sibling_map() and invoke sched_set_topology()
more than once (e.g. on CPU hot plug).  When this happens after
sched_init_smp() has been called, we lose the NUMA topology extension to
sched_domain_topology in sched_init_numa().  This results in incorrect
topology when the sched domain is rebuilt.

This patch fixes the bug and issues warning if we call sched_set_topology()
after sched_init_smp().

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@suse.de
Cc: jolsa@redhat.com
Cc: rjw@rjwysocki.net
Link: http://lkml.kernel.org/r/1474485552-141429-2-git-send-email-srinivas.pandruvada@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/smpboot.c
kernel/sched/core.c

index 4296beb8fdd3b2d91a9644c9843d66dd2132c2ad..7137ec4eea9a8da6ef127976e3da3d1260ccfbe7 100644 (file)
@@ -471,7 +471,7 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
        return false;
 }
 
-static struct sched_domain_topology_level numa_inside_package_topology[] = {
+static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
 #ifdef CONFIG_SCHED_SMT
        { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 #endif
@@ -480,22 +480,23 @@ static struct sched_domain_topology_level numa_inside_package_topology[] = {
 #endif
        { NULL, },
 };
+
+static struct sched_domain_topology_level x86_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+       { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+       { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+       { NULL, },
+};
+
 /*
- * set_sched_topology() sets the topology internal to a CPU.  The
- * NUMA topologies are layered on top of it to build the full
- * system topology.
- *
- * If NUMA nodes are observed to occur within a CPU package, this
- * function should be called.  It forces the sched domain code to
- * only use the SMT level for the CPU portion of the topology.
- * This essentially falls back to relying on NUMA information
- * from the SRAT table to describe the entire system topology
- * (except for hyperthreads).
+ * Set if a package/die has multiple NUMA nodes inside.
+ * AMD Magny-Cours and Intel Cluster-on-Die have this.
  */
-static void primarily_use_numa_for_topology(void)
-{
-       set_sched_topology(numa_inside_package_topology);
-}
+static bool x86_has_numa_in_package;
 
 void set_cpu_sibling_map(int cpu)
 {
@@ -558,7 +559,7 @@ void set_cpu_sibling_map(int cpu)
                                c->booted_cores = cpu_data(i).booted_cores;
                }
                if (match_die(c, o) && !topology_same_node(c, o))
-                       primarily_use_numa_for_topology();
+                       x86_has_numa_in_package = true;
        }
 
        threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -1304,6 +1305,16 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
                zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
                zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
        }
+
+       /*
+        * Set 'default' x86 topology, this matches default_topology() in that
+        * it has NUMA nodes as a topology level. See also
+        * native_smp_cpus_done().
+        *
+        * Must be done before set_cpus_sibling_map() is ran.
+        */
+       set_sched_topology(x86_topology);
+
        set_cpu_sibling_map(0);
 
        switch (smp_sanity_check(max_cpus)) {
@@ -1370,6 +1381,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 {
        pr_debug("Boot done\n");
 
+       if (x86_has_numa_in_package)
+               set_sched_topology(x86_numa_in_package_topology);
+
        nmi_selftest();
        impress_friends();
        setup_ioapic_dest();
index 8bae0cd09e9eb3eeb1ab4659ca9ad56e536ddf7a..af5d4d7962df948e07b9d25ae884bc554577ade9 100644 (file)
@@ -6552,6 +6552,9 @@ static struct sched_domain_topology_level *sched_domain_topology =
 
 void set_sched_topology(struct sched_domain_topology_level *tl)
 {
+       if (WARN_ON_ONCE(sched_smp_initialized))
+               return;
+
        sched_domain_topology = tl;
 }