Merge tag 'trace-v4.14-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rosted...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 4 Oct 2017 15:34:01 +0000 (08:34 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 4 Oct 2017 15:34:01 +0000 (08:34 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Oct 2017 15:34:01 +0000 (08:34 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Oct 2017 15:34:01 +0000 (08:34 -0700)
diff --combined kernel/rcu/tree.c

index 0c44c7b42e6d50bbb67ae47cbe257a734020b16c,c03152f7e458f8e48d8e9a8a7d9dab25f6043f65..b0ad62b0e7b8f8028d20694d79e144e4e07ec9c0
--- 1/kernel/rcu/tree.c
--- 2/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@ -97,6 -97,9 +97,6 @@@ struct rcu_state sname##_state = { 
         .gp_state = RCU_GP_IDLE, \
         .gpnum = 0UL - 300UL, \
         .completed = 0UL - 300UL, \
- -      .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
- -      .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
- -      .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
         .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
         .name = RCU_STATE_NAME(sname), \
         .abbr = sabbr, \
@@@ -840,9 -843,13 +840,9 @@@ static void rcu_eqs_enter(bool user
    */
   void rcu_idle_enter(void)
   {
- -      unsigned long flags;
- -
- -      local_irq_save(flags);
+ +      RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");
         rcu_eqs_enter(false);
- -      local_irq_restore(flags);
   }
- -EXPORT_SYMBOL_GPL(rcu_idle_enter);
   
   #ifdef CONFIG_NO_HZ_FULL
   /**
@@@ -855,8 -862,7 +855,8 @@@
    */
   void rcu_user_enter(void)
   {
- -      rcu_eqs_enter(1);
+ +      RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!");
+ +      rcu_eqs_enter(true);
   }
   #endif /* CONFIG_NO_HZ_FULL */
   
@@@ -884,7 -890,7 +884,7 @@@ void rcu_irq_exit(void
         rdtp = this_cpu_ptr(&rcu_dynticks);
   
         /* Page faults can happen in NMI handlers, so check... */
-       if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+       if (rdtp->dynticks_nmi_nesting)
                 return;
   
         WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@@ -954,10 -960,8 +954,10 @@@ static void rcu_eqs_exit(bool user
         if (oldval & DYNTICK_TASK_NEST_MASK) {
                 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
         } else {
+ +              __this_cpu_inc(disable_rcu_irq_enter);
                 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
                 rcu_eqs_exit_common(oldval, user);
+ +              __this_cpu_dec(disable_rcu_irq_enter);
         }
   }
   
@@@ -980,6 -984,7 +980,6 @@@ void rcu_idle_exit(void
         rcu_eqs_exit(false);
         local_irq_restore(flags);
   }
- -EXPORT_SYMBOL_GPL(rcu_idle_exit);
   
   #ifdef CONFIG_NO_HZ_FULL
   /**
@@@ -1022,7 -1027,7 +1022,7 @@@ void rcu_irq_enter(void
         rdtp = this_cpu_ptr(&rcu_dynticks);
   
         /* Page faults can happen in NMI handlers, so check... */
-       if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+       if (rdtp->dynticks_nmi_nesting)
                 return;
   
         oldval = rdtp->dynticks_nesting;
@@@ -1363,13 -1368,12 +1363,13 @@@ static void rcu_check_gp_kthread_starva
         j = jiffies;
         gpa = READ_ONCE(rsp->gp_activity);
         if (j - gpa > 2 * HZ) {
- -              pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
+ +              pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
                        rsp->name, j - gpa,
                        rsp->gpnum, rsp->completed,
                        rsp->gp_flags,
                        gp_state_getname(rsp->gp_state), rsp->gp_state,
- -                     rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
+ +                     rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
+ +                     rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
                 if (rsp->gp_kthread) {
                         sched_show_task(rsp->gp_kthread);
                         wake_up_process(rsp->gp_kthread);
@@@ -2073,8 -2077,8 +2073,8 @@@ static bool rcu_gp_init(struct rcu_stat
   }
   
   /*
- - * Helper function for wait_event_interruptible_timeout() wakeup
- - * at force-quiescent-state time.
+ + * Helper function for swait_event_idle() wakeup at force-quiescent-state
+ + * time.
    */
   static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
   {
@@@ -2212,8 -2216,9 +2212,8 @@@ static int __noreturn rcu_gp_kthread(vo
                                                READ_ONCE(rsp->gpnum),
                                                TPS("reqwait"));
                         rsp->gp_state = RCU_GP_WAIT_GPS;
- -                      swait_event_interruptible(rsp->gp_wq,
- -                                               READ_ONCE(rsp->gp_flags) &
- -                                               RCU_GP_FLAG_INIT);
+ +                      swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
+ +                                                   RCU_GP_FLAG_INIT);
                         rsp->gp_state = RCU_GP_DONE_GPS;
                         /* Locking provides needed memory barrier. */
                         if (rcu_gp_init(rsp))
@@@ -2244,7 -2249,7 +2244,7 @@@
                                                READ_ONCE(rsp->gpnum),
                                                TPS("fqswait"));
                         rsp->gp_state = RCU_GP_WAIT_FQS;
- -                      ret = swait_event_interruptible_timeout(rsp->gp_wq,
+ +                      ret = swait_event_idle_timeout(rsp->gp_wq,
                                         rcu_gp_fqs_check_wake(rsp, &gf), j);
                         rsp->gp_state = RCU_GP_DOING_FQS;
                         /* Locking provides needed memory barriers. */
@@@ -2414,8 -2419,6 +2414,8 @@@ rcu_report_qs_rnp(unsigned long mask, s
                         return;
                 }
                 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
+ +              WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
+ +                           rcu_preempt_blocked_readers_cgp(rnp));
                 rnp->qsmask &= ~mask;
                 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
                                                  mask, rnp->qsmask, rnp->level,
@@@ -2569,6 -2572,85 +2569,6 @@@ rcu_check_quiescent_state(struct rcu_st
         rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
   }
   
- -/*
- - * Send the specified CPU's RCU callbacks to the orphanage.  The
- - * specified CPU must be offline, and the caller must hold the
- - * ->orphan_lock.
- - */
- -static void
- -rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
- -                        struct rcu_node *rnp, struct rcu_data *rdp)
- -{
- -      lockdep_assert_held(&rsp->orphan_lock);
- -
- -      /* No-CBs CPUs do not have orphanable callbacks. */
- -      if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
- -              return;
- -
- -      /*
- -       * Orphan the callbacks.  First adjust the counts.  This is safe
- -       * because _rcu_barrier() excludes CPU-hotplug operations, so it
- -       * cannot be running now.  Thus no memory barrier is required.
- -       */
- -      rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
- -      rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
- -
- -      /*
- -       * Next, move those callbacks still needing a grace period to
- -       * the orphanage, where some other CPU will pick them up.
- -       * Some of the callbacks might have gone partway through a grace
- -       * period, but that is too bad.  They get to start over because we
- -       * cannot assume that grace periods are synchronized across CPUs.
- -       */
- -      rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
- -
- -      /*
- -       * Then move the ready-to-invoke callbacks to the orphanage,
- -       * where some other CPU will pick them up.  These will not be
- -       * required to pass though another grace period: They are done.
- -       */
- -      rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
- -
- -      /* Finally, disallow further callbacks on this CPU.  */
- -      rcu_segcblist_disable(&rdp->cblist);
- -}
- -
- -/*
- - * Adopt the RCU callbacks from the specified rcu_state structure's
- - * orphanage.  The caller must hold the ->orphan_lock.
- - */
- -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
- -{
- -      struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
- -
- -      lockdep_assert_held(&rsp->orphan_lock);
- -
- -      /* No-CBs CPUs are handled specially. */
- -      if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
- -          rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
- -              return;
- -
- -      /* Do the accounting first. */
- -      rdp->n_cbs_adopted += rsp->orphan_done.len;
- -      if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
- -              rcu_idle_count_callbacks_posted();
- -      rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
- -
- -      /*
- -       * We do not need a memory barrier here because the only way we
- -       * can get here if there is an rcu_barrier() in flight is if
- -       * we are the task doing the rcu_barrier().
- -       */
- -
- -      /* First adopt the ready-to-invoke callbacks, then the done ones. */
- -      rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
- -      WARN_ON_ONCE(rsp->orphan_done.head);
- -      rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
- -      WARN_ON_ONCE(rsp->orphan_pend.head);
- -      WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
- -                   !rcu_segcblist_n_cbs(&rdp->cblist));
- -}
- -
   /*
    * Trace the fact that this CPU is going offline.
    */
@@@ -2632,12 -2714,14 +2632,12 @@@ static void rcu_cleanup_dead_rnp(struc
   
   /*
    * The CPU has been completely removed, and some other CPU is reporting
- - * this fact from process context.  Do the remainder of the cleanup,
- - * including orphaning the outgoing CPU's RCU callbacks, and also
- - * adopting them.  There can only be one CPU hotplug operation at a time,
- - * so no other CPU can be attempting to update rcu_cpu_kthread_task.
+ + * this fact from process context.  Do the remainder of the cleanup.
+ + * There can only be one CPU hotplug operation at a time, so no need for
+ + * explicit locking.
    */
   static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
   {
- -      unsigned long flags;
         struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
         struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
   
@@@ -2646,6 -2730,18 +2646,6 @@@
   
         /* Adjust any no-longer-needed kthreads. */
         rcu_boost_kthread_setaffinity(rnp, -1);
- -
- -      /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
- -      raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
- -      rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
- -      rcu_adopt_orphan_cbs(rsp, flags);
- -      raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
- -
- -      WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
- -                !rcu_segcblist_empty(&rdp->cblist),
- -                "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
- -                cpu, rcu_segcblist_n_cbs(&rdp->cblist),
- -                rcu_segcblist_first_cb(&rdp->cblist));
   }
   
   /*
@@@ -3483,11 -3579,10 +3483,11 @@@ static void rcu_barrier_callback(struc
         struct rcu_state *rsp = rdp->rsp;
   
         if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
- -              _rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence);
+ +              _rcu_barrier_trace(rsp, TPS("LastCB"), -1,
+ +                                 rsp->barrier_sequence);
                 complete(&rsp->barrier_completion);
         } else {
- -              _rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence);
+ +              _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);
         }
   }
   
@@@ -3499,15 -3594,14 +3499,15 @@@ static void rcu_barrier_func(void *type
         struct rcu_state *rsp = type;
         struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
   
- -      _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
+ +      _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);
         rdp->barrier_head.func = rcu_barrier_callback;
         debug_rcu_head_queue(&rdp->barrier_head);
         if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
                 atomic_inc(&rsp->barrier_cpu_count);
         } else {
                 debug_rcu_head_unqueue(&rdp->barrier_head);
- -              _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
+ +              _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1,
+ +                                 rsp->barrier_sequence);
         }
   }
   
@@@ -3521,15 -3615,14 +3521,15 @@@ static void _rcu_barrier(struct rcu_sta
         struct rcu_data *rdp;
         unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
   
- -      _rcu_barrier_trace(rsp, "Begin", -1, s);
+ +      _rcu_barrier_trace(rsp, TPS("Begin"), -1, s);
   
         /* Take mutex to serialize concurrent rcu_barrier() requests. */
         mutex_lock(&rsp->barrier_mutex);
   
         /* Did someone else do our work for us? */
         if (rcu_seq_done(&rsp->barrier_sequence, s)) {
- -              _rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence);
+ +              _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1,
+ +                                 rsp->barrier_sequence);
                 smp_mb(); /* caller's subsequent code after above check. */
                 mutex_unlock(&rsp->barrier_mutex);
                 return;
@@@ -3537,7 -3630,7 +3537,7 @@@
   
         /* Mark the start of the barrier operation. */
         rcu_seq_start(&rsp->barrier_sequence);
- -      _rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence);
+ +      _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);
   
         /*
          * Initialize the count to one rather than to zero in order to
@@@ -3560,10 -3653,10 +3560,10 @@@
                 rdp = per_cpu_ptr(rsp->rda, cpu);
                 if (rcu_is_nocb_cpu(cpu)) {
                         if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
- -                              _rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
+ +                              _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,
                                                    rsp->barrier_sequence);
                         } else {
- -                              _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
+ +                              _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,
                                                    rsp->barrier_sequence);
                                 smp_mb__before_atomic();
                                 atomic_inc(&rsp->barrier_cpu_count);
@@@ -3571,11 -3664,11 +3571,11 @@@
                                            rcu_barrier_callback, rsp, cpu, 0);
                         }
                 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
- -                      _rcu_barrier_trace(rsp, "OnlineQ", cpu,
+ +                      _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,
                                            rsp->barrier_sequence);
                         smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
                 } else {
- -                      _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
+ +                      _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,
                                            rsp->barrier_sequence);
                 }
         }
@@@ -3592,7 -3685,7 +3592,7 @@@
         wait_for_completion(&rsp->barrier_completion);
   
         /* Mark the end of the barrier operation. */
- -      _rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence);
+ +      _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);
         rcu_seq_end(&rsp->barrier_sequence);
   
         /* Other rcu_barrier() invocations can now safely proceed. */
@@@ -3694,6 -3787,8 +3694,6 @@@ rcu_init_percpu_data(int cpu, struct rc
          */
         rnp = rdp->mynode;
         raw_spin_lock_rcu_node(rnp);            /* irqs already disabled. */
- -      if (!rdp->beenonline)
- -              WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
         rdp->beenonline = true;  /* We have now been online. */
         rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
         rdp->completed = rnp->completed;
@@@ -3797,8 -3892,6 +3797,8 @@@ void rcu_cpu_starting(unsigned int cpu
   {
         unsigned long flags;
         unsigned long mask;
+ +      int nbits;
+ +      unsigned long oldmask;
         struct rcu_data *rdp;
         struct rcu_node *rnp;
         struct rcu_state *rsp;
@@@ -3809,15 -3902,9 +3809,15 @@@
                 mask = rdp->grpmask;
                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
                 rnp->qsmaskinitnext |= mask;
+ +              oldmask = rnp->expmaskinitnext;
                 rnp->expmaskinitnext |= mask;
+ +              oldmask ^= rnp->expmaskinitnext;
+ +              nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
+ +              /* Allow lockless access for expedited grace periods. */
+ +              smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
                 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
         }
+ +      smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
   }
   
   #ifdef CONFIG_HOTPLUG_CPU
@@@ -3860,50 -3947,6 +3860,50 @@@ void rcu_report_dead(unsigned int cpu
         for_each_rcu_flavor(rsp)
                 rcu_cleanup_dying_idle_cpu(cpu, rsp);
   }
+ +
+ +/* Migrate the dead CPU's callbacks to the current CPU. */
+ +static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
+ +{
+ +      unsigned long flags;
+ +      struct rcu_data *my_rdp;
+ +      struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+ +      struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+ +
+ +      if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
+ +              return;  /* No callbacks to migrate. */
+ +
+ +      local_irq_save(flags);
+ +      my_rdp = this_cpu_ptr(rsp->rda);
+ +      if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
+ +              local_irq_restore(flags);
+ +              return;
+ +      }
+ +      raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
+ +      rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
+ +      rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
+ +      rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
+ +      WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
+ +                   !rcu_segcblist_n_cbs(&my_rdp->cblist));
+ +      raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
+ +      WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
+ +                !rcu_segcblist_empty(&rdp->cblist),
+ +                "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
+ +                cpu, rcu_segcblist_n_cbs(&rdp->cblist),
+ +                rcu_segcblist_first_cb(&rdp->cblist));
+ +}
+ +
+ +/*
+ + * The outgoing CPU has just passed through the dying-idle state,
+ + * and we are being invoked from the CPU that was IPIed to continue the
+ + * offline operation.  We need to migrate the outgoing CPU's callbacks.
+ + */
+ +void rcutree_migrate_callbacks(int cpu)
+ +{
+ +      struct rcu_state *rsp;
+ +
+ +      for_each_rcu_flavor(rsp)
+ +              rcu_migrate_callbacks(cpu, rsp);
+ +}
   #endif
   
   /*
@@@ -4101,7 -4144,7 +4101,7 @@@ static void __init rcu_init_geometry(vo
         if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
             nr_cpu_ids == NR_CPUS)
                 return;
- -      pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
+ +      pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
                 rcu_fanout_leaf, nr_cpu_ids);
   
         /*
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 4 Oct 2017 15:34:01 +0000 (08:34 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 4 Oct 2017 15:34:01 +0000 (08:34 -0700)