sched/debug: Optimize sched_domain sysctl generation
authorPeter Zijlstra <peterz@infradead.org>
Thu, 10 Aug 2017 15:10:26 +0000 (17:10 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 25 Aug 2017 09:12:20 +0000 (11:12 +0200)
Currently we unconditionally destroy all sysctl bits and regenerate
them after we've rebuild the domains (even if that rebuild is a
no-op).

And since we unconditionally (re)build the sysctl for all possible
CPUs, onlining all CPUs gets us O(n^2) time. Instead change this to
only rebuild the bits for CPUs we've actually installed new domains
on.

Reported-by: Ofer Levi(SW) <oferle@mellanox.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/debug.c
kernel/sched/sched.h
kernel/sched/topology.c

index cfd84f79e0755f69de11aa560c58b82b1c42d283..4a23bbc3111bd287ce4437e6fa2de530f9a56406 100644 (file)
@@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
        return table;
 }
 
+static cpumask_var_t sd_sysctl_cpus;
 static struct ctl_table_header *sd_sysctl_header;
+
 void register_sched_domain_sysctl(void)
 {
-       int i, cpu_num = num_possible_cpus();
-       struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
+       static struct ctl_table *cpu_entries;
+       static struct ctl_table **cpu_idx;
        char buf[32];
+       int i;
 
-       WARN_ON(sd_ctl_dir[0].child);
-       sd_ctl_dir[0].child = entry;
+       if (!cpu_entries) {
+               cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
+               if (!cpu_entries)
+                       return;
 
-       if (entry == NULL)
-               return;
+               WARN_ON(sd_ctl_dir[0].child);
+               sd_ctl_dir[0].child = cpu_entries;
+       }
 
-       for_each_possible_cpu(i) {
-               snprintf(buf, 32, "cpu%d", i);
-               entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0555;
-               entry->child = sd_alloc_ctl_cpu_table(i);
-               entry++;
+       if (!cpu_idx) {
+               struct ctl_table *e = cpu_entries;
+
+               cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
+               if (!cpu_idx)
+                       return;
+
+               /* deal with sparse possible map */
+               for_each_possible_cpu(i) {
+                       cpu_idx[i] = e;
+                       e++;
+               }
+       }
+
+       if (!cpumask_available(sd_sysctl_cpus)) {
+               if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
+                       return;
+
+               /* init to possible to not have holes in @cpu_entries */
+               cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
+       }
+
+       for_each_cpu(i, sd_sysctl_cpus) {
+               struct ctl_table *e = cpu_idx[i];
+
+               if (e->child)
+                       sd_free_ctl_entry(&e->child);
+
+               if (!e->procname) {
+                       snprintf(buf, 32, "cpu%d", i);
+                       e->procname = kstrdup(buf, GFP_KERNEL);
+               }
+               e->mode = 0555;
+               e->child = sd_alloc_ctl_cpu_table(i);
+
+               __cpumask_clear_cpu(i, sd_sysctl_cpus);
        }
 
        WARN_ON(sd_sysctl_header);
        sd_sysctl_header = register_sysctl_table(sd_ctl_root);
 }
 
+void dirty_sched_domain_sysctl(int cpu)
+{
+       if (cpumask_available(sd_sysctl_cpus))
+               __cpumask_set_cpu(cpu, sd_sysctl_cpus);
+}
+
 /* may be called multiple times per register */
 void unregister_sched_domain_sysctl(void)
 {
        unregister_sysctl_table(sd_sysctl_header);
        sd_sysctl_header = NULL;
-       if (sd_ctl_dir[0].child)
-               sd_free_ctl_entry(&sd_ctl_dir[0].child);
 }
 #endif /* CONFIG_SYSCTL */
 #endif /* CONFIG_SMP */
index eeef1a3086d1e74af034782cd780bbf7e44333b2..25e5cb1107f3f4ebaca68432b5eed90b00eef3a7 100644 (file)
@@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sched_group *sg);
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
 void register_sched_domain_sysctl(void);
+void dirty_sched_domain_sysctl(int cpu);
 void unregister_sched_domain_sysctl(void);
 #else
 static inline void register_sched_domain_sysctl(void)
 {
 }
+static inline void dirty_sched_domain_sysctl(int cpu)
+{
+}
 static inline void unregister_sched_domain_sysctl(void)
 {
 }
index 727daa2a0abe962d3bbd9bcde62a099360d75d41..6f7b43982f735d340948c6d2c1be41460ea72806 100644 (file)
@@ -459,6 +459,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
        rq_attach_root(rq, rd);
        tmp = rq->sd;
        rcu_assign_pointer(rq->sd, sd);
+       dirty_sched_domain_sysctl(cpu);
        destroy_sched_domains(tmp);
 
        update_top_cache_domain(cpu);