powerpc: atomic: Implement acquire/release/relaxed variants for cmpxchg
authorBoqun Feng <boqun.feng@gmail.com>
Tue, 15 Dec 2015 14:24:17 +0000 (22:24 +0800)
committerMichael Ellerman <mpe@ellerman.id.au>
Wed, 17 Feb 2016 13:11:39 +0000 (00:11 +1100)
Implement cmpxchg{,64}_relaxed and atomic{,64}_cmpxchg_relaxed, based on
which _release variants can be built.

To avoid superfluous barriers in _acquire variants, we implement these
operations with assembly code rather use __atomic_op_acquire() to build
them automatically.

For the same reason, we keep the assembly implementation of fully
ordered cmpxchg operations.

However, we don't do the similar for _release, because that will require
putting barriers in the middle of ll/sc loops, which is probably a bad
idea.

Note cmpxchg{,64}_relaxed and atomic{,64}_cmpxchg_relaxed are not
compiler barriers.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/atomic.h
arch/powerpc/include/asm/cmpxchg.h

index a19fcdc318ee6b91382bb79616865d340bd8511f..ae0751ef8788fd55f7c4d6b37a8fbb73d82d317f 100644 (file)
@@ -176,6 +176,11 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 #define atomic_dec_return_relaxed atomic_dec_return_relaxed
 
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_cmpxchg_relaxed(v, o, n) \
+       cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define atomic_cmpxchg_acquire(v, o, n) \
+       cmpxchg_acquire(&((v)->counter), (o), (n))
+
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
 
@@ -444,6 +449,11 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic64_cmpxchg_relaxed(v, o, n) \
+       cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define atomic64_cmpxchg_acquire(v, o, n) \
+       cmpxchg_acquire(&((v)->counter), (o), (n))
+
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 #define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
 
index 17c7e14b37ca6fae470822c16b35235ba0ed2eec..cae4fa85250c8c40c5e242186ea6232ec6656507 100644 (file)
@@ -181,6 +181,56 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
        return prev;
 }
 
+static __always_inline unsigned long
+__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new)
+{
+       unsigned long prev;
+
+       __asm__ __volatile__ (
+"1:    lwarx   %0,0,%2         # __cmpxchg_u32_relaxed\n"
+"      cmpw    0,%0,%3\n"
+"      bne-    2f\n"
+       PPC405_ERR77(0, %2)
+"      stwcx.  %4,0,%2\n"
+"      bne-    1b\n"
+"2:"
+       : "=&r" (prev), "+m" (*p)
+       : "r" (p), "r" (old), "r" (new)
+       : "cc");
+
+       return prev;
+}
+
+/*
+ * cmpxchg family don't have order guarantee if cmp part fails, therefore we
+ * can avoid superfluous barriers if we use assembly code to implement
+ * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for
+ * cmpxchg_release() because that will result in putting a barrier in the
+ * middle of a ll/sc loop, which is probably a bad idea. For example, this
+ * might cause the conditional store more likely to fail.
+ */
+static __always_inline unsigned long
+__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new)
+{
+       unsigned long prev;
+
+       __asm__ __volatile__ (
+"1:    lwarx   %0,0,%2         # __cmpxchg_u32_acquire\n"
+"      cmpw    0,%0,%3\n"
+"      bne-    2f\n"
+       PPC405_ERR77(0, %2)
+"      stwcx.  %4,0,%2\n"
+"      bne-    1b\n"
+       PPC_ACQUIRE_BARRIER
+       "\n"
+"2:"
+       : "=&r" (prev), "+m" (*p)
+       : "r" (p), "r" (old), "r" (new)
+       : "cc", "memory");
+
+       return prev;
+}
+
 #ifdef CONFIG_PPC64
 static __always_inline unsigned long
 __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
@@ -224,6 +274,46 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
 
        return prev;
 }
+
+static __always_inline unsigned long
+__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new)
+{
+       unsigned long prev;
+
+       __asm__ __volatile__ (
+"1:    ldarx   %0,0,%2         # __cmpxchg_u64_relaxed\n"
+"      cmpd    0,%0,%3\n"
+"      bne-    2f\n"
+"      stdcx.  %4,0,%2\n"
+"      bne-    1b\n"
+"2:"
+       : "=&r" (prev), "+m" (*p)
+       : "r" (p), "r" (old), "r" (new)
+       : "cc");
+
+       return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new)
+{
+       unsigned long prev;
+
+       __asm__ __volatile__ (
+"1:    ldarx   %0,0,%2         # __cmpxchg_u64_acquire\n"
+"      cmpd    0,%0,%3\n"
+"      bne-    2f\n"
+"      stdcx.  %4,0,%2\n"
+"      bne-    1b\n"
+       PPC_ACQUIRE_BARRIER
+       "\n"
+"2:"
+       : "=&r" (prev), "+m" (*p)
+       : "r" (p), "r" (old), "r" (new)
+       : "cc", "memory");
+
+       return prev;
+}
 #endif
 
 /* This function doesn't exist, so you'll get a linker error
@@ -262,6 +352,37 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
        return old;
 }
 
+static __always_inline unsigned long
+__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
+                 unsigned int size)
+{
+       switch (size) {
+       case 4:
+               return __cmpxchg_u32_relaxed(ptr, old, new);
+#ifdef CONFIG_PPC64
+       case 8:
+               return __cmpxchg_u64_relaxed(ptr, old, new);
+#endif
+       }
+       __cmpxchg_called_with_bad_pointer();
+       return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
+                 unsigned int size)
+{
+       switch (size) {
+       case 4:
+               return __cmpxchg_u32_acquire(ptr, old, new);
+#ifdef CONFIG_PPC64
+       case 8:
+               return __cmpxchg_u64_acquire(ptr, old, new);
+#endif
+       }
+       __cmpxchg_called_with_bad_pointer();
+       return old;
+}
 #define cmpxchg(ptr, o, n)                                              \
   ({                                                                    \
      __typeof__(*(ptr)) _o_ = (o);                                      \
@@ -279,6 +400,23 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
                                    (unsigned long)_n_, sizeof(*(ptr))); \
   })
 
+#define cmpxchg_relaxed(ptr, o, n)                                     \
+({                                                                     \
+       __typeof__(*(ptr)) _o_ = (o);                                   \
+       __typeof__(*(ptr)) _n_ = (n);                                   \
+       (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr),                   \
+                       (unsigned long)_o_, (unsigned long)_n_,         \
+                       sizeof(*(ptr)));                                \
+})
+
+#define cmpxchg_acquire(ptr, o, n)                                     \
+({                                                                     \
+       __typeof__(*(ptr)) _o_ = (o);                                   \
+       __typeof__(*(ptr)) _n_ = (n);                                   \
+       (__typeof__(*(ptr))) __cmpxchg_acquire((ptr),                   \
+                       (unsigned long)_o_, (unsigned long)_n_,         \
+                       sizeof(*(ptr)));                                \
+})
 #ifdef CONFIG_PPC64
 #define cmpxchg64(ptr, o, n)                                           \
   ({                                                                   \
@@ -290,7 +428,16 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
        BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
        cmpxchg_local((ptr), (o), (n));                                 \
   })
-#define cmpxchg64_relaxed      cmpxchg64_local
+#define cmpxchg64_relaxed(ptr, o, n)                                   \
+({                                                                     \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       cmpxchg_relaxed((ptr), (o), (n));                               \
+})
+#define cmpxchg64_acquire(ptr, o, n)                                   \
+({                                                                     \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       cmpxchg_acquire((ptr), (o), (n));                               \
+})
 #else
 #include <asm-generic/cmpxchg-local.h>
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))