rcu: Control grace-period duration from sysfs
authorPaul E. McKenney <paul.mckenney@linaro.org>
Wed, 27 Jun 2012 03:45:57 +0000 (20:45 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Sun, 23 Sep 2012 14:41:54 +0000 (07:41 -0700)
Although almost everyone is well-served by the defaults, some uses of RCU
benefit from shorter grace periods, while others benefit more from the
greater efficiency provided by longer grace periods.  Situations requiring
a large number of grace periods to elapse (and wireshark startup has
been called out as an example of this) are helped by lower-latency
grace periods.  Furthermore, in some embedded applications, people are
willing to accept a small degradation in update efficiency (due to there
being more of the shorter grace-period operations) in order to gain the
lower latency.

In contrast, those few systems with thousands of CPUs need longer grace
periods because the CPU overhead of a grace period rises roughly
linearly with the number of CPUs.  Such systems normally do not make
much use of facilities that require large numbers of grace periods to
elapse, so this is a good tradeoff.

Therefore, this commit allows the durations to be controlled from sysfs.
There are two sysfs parameters, one named "jiffies_till_first_fqs" that
specifies the delay in jiffies from the end of grace-period initialization
until the first attempt to force quiescent states, and the other named
"jiffies_till_next_fqs" that specifies the delay (again in jiffies)
between subsequent attempts to force quiescent states.  They both default
to three jiffies, which is compatible with the old hard-coded behavior.

At some future time, it may be possible to automatically increase the
grace-period length with the number of CPUs, but we do not yet have
sufficient data to do a good job.  Preliminary data indicates that we
should add an addiitonal jiffy to each of the delays for every 200 CPUs
in the system, but more experimentation is needed.  For now, the number
of systems with more than 1,000 CPUs is small enough that this can be
relegated to boot-time hand tuning.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Documentation/kernel-parameters.txt
kernel/rcutree.c

index ad7e2e5088c126ce48e0362d1f3296f3e8b55d2b..55ada0471f9343e3db7fa01f03791970c9b806dc 100644 (file)
@@ -2385,6 +2385,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
        rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
                        Set timeout for RCU CPU stall warning messages.
 
+       rcutree.jiffies_till_first_fqs= [KNL,BOOT]
+                       Set delay from grace-period initialization to
+                       first attempt to force quiescent states.
+                       Units are jiffies, minimum value is zero,
+                       and maximum value is HZ.
+
+       rcutree.jiffies_till_next_fqs= [KNL,BOOT]
+                       Set delay between subsequent attempts to force
+                       quiescent states.  Units are jiffies, minimum
+                       value is one, and maximum value is HZ.
+
        rcutorture.fqs_duration= [KNL,BOOT]
                        Set duration of force_quiescent_state bursts.
 
index 43d57a17fcc51042eee22dd09a260c9f202301dc..c0d3d56f040416e347856a0ed166fdb1253d0e68 100644 (file)
@@ -226,6 +226,12 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_suppress, int, 0644);
 module_param(rcu_cpu_stall_timeout, int, 0644);
 
+static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
+static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
+
+module_param(jiffies_till_first_fqs, ulong, 0644);
+module_param(jiffies_till_next_fqs, ulong, 0644);
+
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(int cpu);
@@ -1177,6 +1183,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 static int __noreturn rcu_gp_kthread(void *arg)
 {
        int fqs_state;
+       unsigned long j;
        int ret;
        struct rcu_state *rsp = arg;
        struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1197,14 +1204,18 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
                /* Handle quiescent-state forcing. */
                fqs_state = RCU_SAVE_DYNTICK;
+               j = jiffies_till_first_fqs;
+               if (j > HZ) {
+                       j = HZ;
+                       jiffies_till_first_fqs = HZ;
+               }
                for (;;) {
-                       rsp->jiffies_force_qs = jiffies +
-                                               RCU_JIFFIES_TILL_FORCE_QS;
+                       rsp->jiffies_force_qs = jiffies + j;
                        ret = wait_event_interruptible_timeout(rsp->gp_wq,
                                        (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
                                        (!ACCESS_ONCE(rnp->qsmask) &&
                                         !rcu_preempt_blocked_readers_cgp(rnp)),
-                                       RCU_JIFFIES_TILL_FORCE_QS);
+                                       j);
                        /* If grace period done, leave loop. */
                        if (!ACCESS_ONCE(rnp->qsmask) &&
                            !rcu_preempt_blocked_readers_cgp(rnp))
@@ -1218,6 +1229,14 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                cond_resched();
                                flush_signals(current);
                        }
+                       j = jiffies_till_next_fqs;
+                       if (j > HZ) {
+                               j = HZ;
+                               jiffies_till_next_fqs = HZ;
+                       } else if (j < 1) {
+                               j = 1;
+                               jiffies_till_next_fqs = 1;
+                       }
                }
 
                /* Handle grace-period end. */