rcu: Remove expedited GP funnel-lock bypass
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Sun, 31 Jan 2016 01:23:19 +0000 (17:23 -0800)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Thu, 31 Mar 2016 20:34:07 +0000 (13:34 -0700)
Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking")
turns out to be a pessimization at high load because it forces a tree
full of tasks to wait for an expedited grace period that they probably
do not need.  This commit therefore removes this optimization.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Documentation/RCU/trace.txt
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_trace.c

index ec6998b1b6d04f3139ed6c066537cc059c89838d..00a3a38b375ae9946425fc2ea94fa0c2383e867c 100644 (file)
@@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
 
 The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
 
-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
 
 These fields are as follows:
 
 o      "s" is the sequence number, with an odd number indicating that
        an expedited grace period is in progress.
 
-o      "wd0", "wd1", "wd2", and "wd3" are the number of times that an
-       attempt to start an expedited grace period found that someone
-       else had completed an expedited grace period that satisfies the
-       attempted request.  "Our work is done."
+o      "wd1", "wd2", and "wd3" are the number of times that an attempt
+       to start an expedited grace period found that someone else had
+       completed an expedited grace period that satisfies the attempted
+       request.  "Our work is done."
 
 o      "n" is number of times that a concurrent CPU-hotplug operation
        forced a fallback to a normal grace period.
index 524026fd9dd7f2167be8ee7829c084298b987ad3..62e73e0a929f92dbb5be6c5a638b3e6ed22c08ec 100644 (file)
@@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
        struct rcu_node *rnp0;
        struct rcu_node *rnp1 = NULL;
 
-       /*
-        * First try directly acquiring the root lock in order to reduce
-        * latency in the common case where expedited grace periods are
-        * rare.  We check mutex_is_locked() to avoid pathological levels of
-        * memory contention on ->exp_funnel_mutex in the heavy-load case.
-        */
-       rnp0 = rcu_get_root(rsp);
-       if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
-               if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
-                       trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
-                                                 rnp0->grplo, rnp0->grphi,
-                                                 TPS("acq"));
-                       if (sync_exp_work_done(rsp, rnp0, NULL,
-                                              &rdp->expedited_workdone0, s))
-                               return NULL;
-                       return rnp0;
-               }
-       }
-
        /*
         * Each pass through the following loop works its way
         * up the rcu_node tree, returning if others have done the
index df668c0f9e64991346dd94a872ee67e16e97d735..ac9a7b0c36aea3fb2734429b8e5a3e4af071b5d8 100644 (file)
@@ -388,7 +388,6 @@ struct rcu_data {
        struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
        struct mutex exp_funnel_mutex;
-       atomic_long_t expedited_workdone0;      /* # done by others #0. */
        atomic_long_t expedited_workdone1;      /* # done by others #1. */
        atomic_long_t expedited_workdone2;      /* # done by others #2. */
        atomic_long_t expedited_workdone3;      /* # done by others #3. */
index 1088e64f01ad84f98143b95c549bf77ad9c655ab..d149c412a4e5170871261bd6c92c38a4ee1ec31d 100644 (file)
@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
        int cpu;
        struct rcu_state *rsp = (struct rcu_state *)m->private;
        struct rcu_data *rdp;
-       unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+       unsigned long s1 = 0, s2 = 0, s3 = 0;
 
        for_each_possible_cpu(cpu) {
                rdp = per_cpu_ptr(rsp->rda, cpu);
-               s0 += atomic_long_read(&rdp->expedited_workdone0);
                s1 += atomic_long_read(&rdp->expedited_workdone1);
                s2 += atomic_long_read(&rdp->expedited_workdone2);
                s3 += atomic_long_read(&rdp->expedited_workdone3);
        }
-       seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
-                  rsp->expedited_sequence, s0, s1, s2, s3,
+       seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+                  rsp->expedited_sequence, s1, s2, s3,
                   atomic_long_read(&rsp->expedited_normal),
                   atomic_read(&rsp->expedited_need_qs),
                   rsp->expedited_sequence / 2);