locking: Introduce smp_mb__after_spinlock()
authorPeter Zijlstra <peterz@infradead.org>
Mon, 5 Sep 2016 09:37:53 +0000 (11:37 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 10 Aug 2017 10:29:02 +0000 (12:29 +0200)
Since its inception, our understanding of ACQUIRE, esp. as applied to
spinlocks, has changed somewhat. Also, I wonder if, with a simple
change, we cannot make it provide more.

The problem with the comment is that the STORE done by spin_lock isn't
itself ordered by the ACQUIRE, and therefore a later LOAD can pass over
it and cross with any prior STORE, rendering the default WMB
insufficient (pointed out by Alan).

Now, this is only really a problem on PowerPC and ARM64, both of
which already defined smp_mb__before_spinlock() as a smp_mb().

At the same time, we can get a much stronger construct if we place
that same barrier _inside_ the spin_lock(). In that case we upgrade
the RCpc spinlock to an RCsc.  That would make all schedule() calls
fully transitive against one another.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/arm64/include/asm/spinlock.h
arch/powerpc/include/asm/spinlock.h
include/linux/atomic.h
include/linux/spinlock.h
kernel/sched/core.c

index cae331d553f81b8ab649aa4e65c6da35038c5f97..b103888b694a2f14db9978e8abf82873ed2965de 100644 (file)
@@ -367,5 +367,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
  * smp_mb__before_spinlock() can restore the required ordering.
  */
 #define smp_mb__before_spinlock()      smp_mb()
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()       smp_mb()
 
 #endif /* __ASM_SPINLOCK_H */
index 8c1b913de6d72ccb2a54759570fc8a511a4ea4e9..c1b1ec94b06cb0f97cdd3a25170ff0dcc382ee3a 100644 (file)
@@ -342,5 +342,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)  __rw_yield(lock)
 #define arch_write_relax(lock) __rw_yield(lock)
 
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()   smp_mb()
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SPINLOCK_H */
index c56be74101305f74524bc816d9c8fdb67ccb895d..40d6bfec0e0d0fb8ef40de6a6bb37a9d1905cf93 100644 (file)
@@ -38,6 +38,9 @@
  * Besides, if an arch has a special barrier for acquire/release, it could
  * implement its own __atomic_op_* and use the same framework for building
  * variants
+ *
+ * If an architecture overrides __atomic_op_acquire() it will probably want
+ * to define smp_mb__after_spinlock().
  */
 #ifndef __atomic_op_acquire
 #define __atomic_op_acquire(op, args...)                               \
index d9510e8522d4d33d11bc7e1f9e699f9c353a28ef..840281095933e41d41acdf16ff4e7fa7dae5e06d 100644 (file)
@@ -130,6 +130,42 @@ do {                                                               \
 #define smp_mb__before_spinlock()      smp_wmb()
 #endif
 
+/*
+ * This barrier must provide two things:
+ *
+ *   - it must guarantee a STORE before the spin_lock() is ordered against a
+ *     LOAD after it, see the comments at its two usage sites.
+ *
+ *   - it must ensure the critical section is RCsc.
+ *
+ * The latter is important for cases where we observe values written by other
+ * CPUs in spin-loops, without barriers, while being subject to scheduling.
+ *
+ * CPU0                        CPU1                    CPU2
+ *
+ *                     for (;;) {
+ *                       if (READ_ONCE(X))
+ *                         break;
+ *                     }
+ * X=1
+ *                     <sched-out>
+ *                                             <sched-in>
+ *                                             r = X;
+ *
+ * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
+ * we get migrated and CPU2 sees X==0.
+ *
+ * Since most load-store architectures implement ACQUIRE with an smp_mb() after
+ * the LL/SC loop, they need no further barriers. Similarly all our TSO
+ * architectures imply an smp_mb() for each atomic instruction and equally don't
+ * need more.
+ *
+ * Architectures that can implement ACQUIRE better need to take care.
+ */
+#ifndef smp_mb__after_spinlock
+#define smp_mb__after_spinlock()       do { } while (0)
+#endif
+
 /**
  * raw_spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
index 0869b20fba81f6f1a7f5f73bde65fadc483f0fa2..9fece583a1f0d6b9ab19b74d35d0a079856fb455 100644 (file)
@@ -1967,8 +1967,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         * reordered with p->state check below. This pairs with mb() in
         * set_current_state() the waiting thread does.
         */
-       smp_mb__before_spinlock();
        raw_spin_lock_irqsave(&p->pi_lock, flags);
+       smp_mb__after_spinlock();
        if (!(p->state & state))
                goto out;
 
@@ -3281,8 +3281,8 @@ static void __sched notrace __schedule(bool preempt)
         * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
         * done by the caller to avoid the race with signal_wake_up().
         */
-       smp_mb__before_spinlock();
        rq_lock(rq, &rf);
+       smp_mb__after_spinlock();
 
        /* Promote REQ to ACT */
        rq->clock_update_flags <<= 1;