arch: Introduce smp_load_acquire(), smp_store_release()
authorPeter Zijlstra <peterz@infradead.org>
Wed, 6 Nov 2013 13:57:36 +0000 (14:57 +0100)
committerIngo Molnar <mingo@kernel.org>
Sun, 12 Jan 2014 09:37:17 +0000 (10:37 +0100)
A number of situations currently require the heavyweight smp_mb(),
even though there is no need to order prior stores against later
loads.  Many architectures have much cheaper ways to handle these
situations, but the Linux kernel currently has no portable way
to make use of them.

This commit therefore supplies smp_load_acquire() and
smp_store_release() to remedy this situation.  The new
smp_load_acquire() primitive orders the specified load against
any subsequent reads or writes, while the new smp_store_release()
primitive orders the specifed store against any prior reads or
writes.  These primitives allow array-based circular FIFOs to be
implemented without an smp_mb(), and also allow a theoretical
hole in rcu_assign_pointer() to be closed at no additional
expense on most architectures.

In addition, the RCU experience transitioning from explicit
smp_read_barrier_depends() and smp_wmb() to rcu_dereference()
and rcu_assign_pointer(), respectively resulted in substantial
improvements in readability.  It therefore seems likely that
replacing other explicit barriers with smp_load_acquire() and
smp_store_release() will provide similar benefits.  It appears
that roughly half of the explicit barriers in core kernel code
might be so replaced.

[Changelog by PaulMck]

Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Victor Kaplansky <VICTORK@il.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/arm/include/asm/barrier.h
arch/arm64/include/asm/barrier.h
arch/ia64/include/asm/barrier.h
arch/metag/include/asm/barrier.h
arch/mips/include/asm/barrier.h
arch/powerpc/include/asm/barrier.h
arch/s390/include/asm/barrier.h
arch/sparc/include/asm/barrier_64.h
arch/x86/include/asm/barrier.h
include/asm-generic/barrier.h
include/linux/compiler.h

index 60f15e274e6d461814eb467cc84dc8b0f0395163..2f59f74433964016003167007badbeffcbba465a 100644 (file)
 #define smp_wmb()      dmb(ishst)
 #endif
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
 #define read_barrier_depends()         do { } while(0)
 #define smp_read_barrier_depends()     do { } while(0)
 
index d4a63338a53c49a1085c4c72ce7330f207e3b3bf..78e20ba8806b8b86338ee1ccafc1c3aa97b5a034 100644 (file)
 #define smp_mb()       barrier()
 #define smp_rmb()      barrier()
 #define smp_wmb()      barrier()
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
 #else
+
 #define smp_mb()       asm volatile("dmb ish" : : : "memory")
 #define smp_rmb()      asm volatile("dmb ishld" : : : "memory")
 #define smp_wmb()      asm volatile("dmb ishst" : : : "memory")
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       switch (sizeof(*p)) {                                           \
+       case 4:                                                         \
+               asm volatile ("stlr %w1, %0"                            \
+                               : "=Q" (*p) : "r" (v) : "memory");      \
+               break;                                                  \
+       case 8:                                                         \
+               asm volatile ("stlr %1, %0"                             \
+                               : "=Q" (*p) : "r" (v) : "memory");      \
+               break;                                                  \
+       }                                                               \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1;                                               \
+       compiletime_assert_atomic_type(*p);                             \
+       switch (sizeof(*p)) {                                           \
+       case 4:                                                         \
+               asm volatile ("ldar %w0, %1"                            \
+                       : "=r" (___p1) : "Q" (*p) : "memory");          \
+               break;                                                  \
+       case 8:                                                         \
+               asm volatile ("ldar %0, %1"                             \
+                       : "=r" (___p1) : "Q" (*p) : "memory");          \
+               break;                                                  \
+       }                                                               \
+       ___p1;                                                          \
+})
+
 #endif
 
 #define read_barrier_depends()         do { } while(0)
index 60576e06b6fb4b3205292f621c727626a6232c92..d0a69aa35e27decc3eacb83e299a910945221480 100644 (file)
 # define smp_rmb()     rmb()
 # define smp_wmb()     wmb()
 # define smp_read_barrier_depends()    read_barrier_depends()
+
 #else
+
 # define smp_mb()      barrier()
 # define smp_rmb()     barrier()
 # define smp_wmb()     barrier()
 # define smp_read_barrier_depends()    do { } while(0)
+
 #endif
 
+/*
+ * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
+ * need for asm trickery!
+ */
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ___p1;                                                          \
+})
+
 /*
  * XXX check on this ---I suspect what Linus really wants here is
  * acquire vs release semantics but we can't discuss this stuff with
index c90bfc6bf64892c76867fc2540183c02684afc3e..5d6b4b407ddab29b677a7aa5328715127dfdad56 100644 (file)
@@ -82,4 +82,19 @@ static inline void fence(void)
 #define smp_read_barrier_depends()     do { } while (0)
 #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
 #endif /* _ASM_METAG_BARRIER_H */
index f26d8e1bf3c37575b4b42144282587caaa1c6a3f..e1aa4e4c2984230e2353760a4b9863387d90ad24 100644 (file)
 #define nudge_writes() mb()
 #endif
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
 #endif /* __ASM_BARRIER_H */
index ae782254e731bbcd03c8152379b4adf6ddea01a7..f89da808ce310e1f373da55e96fc52d58bd4ccc9 100644 (file)
 #    define SMPWMB      eieio
 #endif
 
+#define __lwsync()     __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+
 #define smp_mb()       mb()
-#define smp_rmb()      __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define smp_rmb()      __lwsync()
 #define smp_wmb()      __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
 #define smp_read_barrier_depends()     read_barrier_depends()
 #else
+#define __lwsync()     barrier()
+
 #define smp_mb()       barrier()
 #define smp_rmb()      barrier()
 #define smp_wmb()      barrier()
 #define data_barrier(x)        \
        asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       __lwsync();                                                     \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       __lwsync();                                                     \
+       ___p1;                                                          \
+})
+
 #endif /* _ASM_POWERPC_BARRIER_H */
index 16760eeb79b09ec5ea6da10274157491bb5a5d0f..578680f6207acb62ccc8f52c321c349f510f73a6 100644 (file)
 
 #define set_mb(var, value)             do { var = value; mb(); } while (0)
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ___p1;                                                          \
+})
+
 #endif /* __ASM_BARRIER_H */
index 95d45986f908d4224d6b5bf6ec4d5e2a4d697ceb..b5aad964558e756bbf6c3f3c3e29616dceabde94 100644 (file)
@@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t" \
 
 #define smp_read_barrier_depends()     do { } while(0)
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ___p1;                                                          \
+})
+
 #endif /* !(__SPARC64_BARRIER_H) */
index c6cd358a1eec806a779371a631a97a8de2d28912..04a48903b2eb31973080d60d36cfd93b3fc68a5f 100644 (file)
 #endif
 #define smp_read_barrier_depends()     read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
+#else /* !SMP */
 #define smp_mb()       barrier()
 #define smp_rmb()      barrier()
 #define smp_wmb()      barrier()
 #define smp_read_barrier_depends()     do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif /* SMP */
+
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+/*
+ * For either of these options x86 doesn't have a strong TSO memory
+ * model and we should fall back to full barriers.
+ */
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
+#else /* regular x86 TSO memory ordering */
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       barrier();                                                      \
+       ___p1;                                                          \
+})
+
 #endif
 
 /*
index d12a90f93689b54df868c529499e887c1b8f0684..6f692f8ac664565eda7f0e2f7a551d1f5ebc1df4 100644 (file)
 #define set_mb(var, value)  do { (var) = (value); mb(); } while (0)
 #endif
 
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ACCESS_ONCE(*p) = (v);                                          \
+} while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
+       compiletime_assert_atomic_type(*p);                             \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
index 92669cd182a6daca2550e2de8e4b8c7e2fd65a59..fe7a686dfd8dc3b87dcc35d0bdd3e50a47324014 100644 (file)
@@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 #endif
 
+/* Is this type a native word size -- useful for atomic operations */
+#ifndef __native_word
+# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#endif
+
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
@@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #define compiletime_assert(condition, msg) \
        _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
 
+#define compiletime_assert_atomic_type(t)                              \
+       compiletime_assert(__native_word(t),                            \
+               "Need native word sized stores/loads for atomicity.")
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),