rcu: Bind grace-period kthreads to non-NO_HZ_FULL CPUs
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Wed, 4 Jun 2014 20:46:03 +0000 (13:46 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Wed, 9 Jul 2014 16:15:02 +0000 (09:15 -0700)
Binding the grace-period kthreads to the timekeeping CPU resulted in
significant performance decreases for some workloads.  For more detail,
see:

https://lkml.org/lkml/2014/6/3/395 for benchmark numbers

https://lkml.org/lkml/2014/6/4/218 for CPU statistics

It turns out that it is necessary to bind the grace-period kthreads
to the timekeeping CPU only when all but CPU 0 is a nohz_full CPU
on the one hand or if CONFIG_NO_HZ_FULL_SYSIDLE=y on the other.
In other cases, it suffices to bind the grace-period kthreads to the
set of non-nohz_full CPUs.

This commit therefore creates a tick_nohz_not_full_mask that is the
complement of tick_nohz_full_mask, and then binds the grace-period
kthread to the set of CPUs indicated by this new mask, which covers
the CONFIG_NO_HZ_FULL_SYSIDLE=n case.  The CONFIG_NO_HZ_FULL_SYSIDLE=y
case still binds the grace-period kthreads to the timekeeping CPU.
This commit also includes the tick_nohz_full_enabled() check suggested
by Frederic Weisbecker.

Reported-by: Jet Chen <jet.chen@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Created housekeeping_affine() and housekeeping_mask per
  fweisbec feedback. ]

include/linux/tick.h
kernel/rcu/tree_plugin.h
kernel/time/tick-sched.c

index b84773cb9f4c1f3fc480a460ba3c37a2121e1557..06cc093ab7ad37771f7facae78cc95d40555ccce 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/hrtimer.h>
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -162,6 +163,7 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
 #ifdef CONFIG_NO_HZ_FULL
 extern bool tick_nohz_full_running;
 extern cpumask_var_t tick_nohz_full_mask;
+extern cpumask_var_t housekeeping_mask;
 
 static inline bool tick_nohz_full_enabled(void)
 {
@@ -194,6 +196,24 @@ static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
 #endif
 
+static inline bool is_housekeeping_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ_FULL
+       if (tick_nohz_full_enabled())
+               return cpumask_test_cpu(cpu, housekeeping_mask);
+#endif
+       return true;
+}
+
+static inline void housekeeping_affine(struct task_struct *t)
+{
+#ifdef CONFIG_NO_HZ_FULL
+       if (tick_nohz_full_enabled())
+               set_cpus_allowed_ptr(t, housekeeping_mask);
+
+#endif
+}
+
 static inline void tick_nohz_full_check(void)
 {
        if (tick_nohz_full_enabled())
index 719587af7b101aac93ba0627718ccfaae7ac237c..b39ba7239bd64c9c7fb427ad75cd4d219ff929f4 100644 (file)
@@ -2846,12 +2846,16 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
  */
 static void rcu_bind_gp_kthread(void)
 {
-#ifdef CONFIG_NO_HZ_FULL
-       int cpu = tick_do_timer_cpu;
+       int __maybe_unused cpu;
 
-       if (cpu < 0 || cpu >= nr_cpu_ids)
+       if (!tick_nohz_full_enabled())
                return;
-       if (raw_smp_processor_id() != cpu)
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+       cpu = tick_do_timer_cpu;
+       if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu)
                set_cpus_allowed_ptr(current, cpumask_of(cpu));
-#endif /* #ifdef CONFIG_NO_HZ_FULL */
+#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+       if (!is_housekeeping_cpu(raw_smp_processor_id()))
+               housekeeping_affine(current);
+#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
index 6558b7ac112d2e1293886721a22d020ea78a3e4d..f784d83e29f1dde2ea4a2f69db51845ef3b83014 100644 (file)
@@ -154,6 +154,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
+cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
 
 static bool can_stop_full_tick(void)
@@ -281,6 +282,7 @@ static int __init tick_nohz_full_setup(char *str)
        int cpu;
 
        alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+       alloc_bootmem_cpumask_var(&housekeeping_mask);
        if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
                pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
                return 1;
@@ -291,6 +293,8 @@ static int __init tick_nohz_full_setup(char *str)
                pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
                cpumask_clear_cpu(cpu, tick_nohz_full_mask);
        }
+       cpumask_andnot(housekeeping_mask,
+                      cpu_possible_mask, tick_nohz_full_mask);
        tick_nohz_full_running = true;
 
        return 1;
@@ -332,9 +336,15 @@ static int tick_nohz_init_all(void)
                pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
                return err;
        }
+       if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) {
+               pr_err("NO_HZ: Can't allocate not-full dynticks cpumask\n");
+               return err;
+       }
        err = 0;
        cpumask_setall(tick_nohz_full_mask);
        cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
+       cpumask_clear(housekeeping_mask);
+       cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
        tick_nohz_full_running = true;
 #endif
        return err;