arm64: atomics: patch in lse instructions when supported by the CPU
authorWill Deacon <will.deacon@arm.com>
Tue, 3 Feb 2015 16:14:13 +0000 (16:14 +0000)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:50 +0000 (15:28 +0100)
On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of atomic_t and atomic64_t
routines so that the call-site for the out-of-line ll/sc sequences is
patched with an LSE atomic instruction when we detect that
the CPU supports it.

If binutils is not recent enough to assemble the LSE instructions, then
the ll/sc sequences are inlined as though CONFIG_ARM64_LSE_ATOMICS=n.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/Makefile
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/lse.h [new file with mode: 0644]
arch/arm64/kernel/setup.c

index 0953a97b51196fa081913f324a08f3cb3de71ba2..15ff5b4156fd74a041f3ad926efbeae05ba382dc 100644 (file)
@@ -17,7 +17,18 @@ GZFLAGS              :=-9
 
 KBUILD_DEFCONFIG := defconfig
 
-KBUILD_CFLAGS  += -mgeneral-regs-only
+# Check for binutils support for specific extensions
+lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1)
+
+ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y)
+  ifeq ($(lseinstr),)
+$(warning LSE atomics not supported by binutils)
+  endif
+endif
+
+KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr)
+KBUILD_AFLAGS  += $(lseinstr)
+
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS        += -mbig-endian
 AS             += -EB
index 84635f2d3d0a8a4e293b47f3873b1f76a543a792..836226d5e12cd6d56a6192200747e7f9ae1a6798 100644 (file)
 #define __ASM_ATOMIC_H
 
 #include <linux/compiler.h>
-#include <linux/stringify.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
+#include <asm/lse.h>
 
 #define ATOMIC_INIT(i) { (i) }
 
@@ -33,7 +33,7 @@
 
 #define __ARM64_IN_ATOMIC_IMPL
 
-#ifdef CONFIG_ARM64_LSE_ATOMICS
+#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
 #include <asm/atomic_lse.h>
 #else
 #include <asm/atomic_ll_sc.h>
index c33fa2cd399e396d615d759f6910811c34aee674..4b981ba57e788bfe4d7c8494ac938d089c3512a4 100644 (file)
  * (the optimize attribute silently ignores these options).
  */
 
-#ifndef __LL_SC_INLINE
-#define __LL_SC_INLINE         static inline
-#endif
-
-#ifndef __LL_SC_PREFIX
-#define __LL_SC_PREFIX(x)      x
-#endif
-
-#ifndef __LL_SC_EXPORT
-#define __LL_SC_EXPORT(x)
-#endif
-
 #define ATOMIC_OP(op, asm_op)                                          \
 __LL_SC_INLINE void                                                    \
 __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))                                \
index dce6ede740e8814b68acae97c81457e7f92cecc2..6e21b5e0c9d62b210cf57231d7ff0e05a8c77d79 100644 (file)
 #error "please don't include this file directly"
 #endif
 
-/* Move the ll/sc atomics out-of-line */
-#define __LL_SC_INLINE
-#define __LL_SC_PREFIX(x)      __ll_sc_##x
-#define __LL_SC_EXPORT(x)      EXPORT_SYMBOL(__LL_SC_PREFIX(x))
-
-/* Macros for constructing calls to out-of-line ll/sc atomics */
-#define __LL_SC_CALL(op)                                               \
-       "bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n"
-#define __LL_SC_CALL64(op)                                             \
-       "bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n"
-
-#define ATOMIC_OP(op, asm_op)                                          \
-static inline void atomic_##op(int i, atomic_t *v)                     \
-{                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(                                                   \
-       __LL_SC_CALL(op)                                                \
-       : "+r" (w0), "+Q" (v->counter)                                  \
-       : "r" (x1)                                                      \
-       : "x30");                                                       \
-}                                                                      \
-
-#define ATOMIC_OP_RETURN(op, asm_op)                                   \
-static inline int atomic_##op##_return(int i, atomic_t *v)             \
-{                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(                                                   \
-       __LL_SC_CALL(op##_return)                                       \
-       : "+r" (w0)                                                     \
-       : "r" (x1)                                                      \
-       : "x30", "memory");                                             \
-                                                                       \
-       return w0;                                                      \
-}
-
-#define ATOMIC_OPS(op, asm_op)                                         \
-       ATOMIC_OP(op, asm_op)                                           \
-       ATOMIC_OP_RETURN(op, asm_op)
-
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
-
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, orr)
-ATOMIC_OP(xor, eor)
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
+#define __LL_SC_ATOMIC(op)     __LL_SC_CALL(atomic_##op)
+
+static inline void atomic_andnot(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
+       "       stclr   %w[i], %[v]\n")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic_or(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
+       "       stset   %w[i], %[v]\n")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic_xor(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
+       "       steor   %w[i], %[v]\n")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
+       "       stadd   %w[i], %[v]\n")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC(add_return),
+       /* LSE atomics */
+       "       ldaddal %w[i], w30, %[v]\n"
+       "       add     %w[i], %w[i], w30")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30", "memory");
+
+       return w0;
+}
+
+static inline void atomic_and(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC(and),
+       /* LSE atomics */
+       "       mvn     %w[i], %w[i]\n"
+       "       stclr   %w[i], %[v]")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC(sub),
+       /* LSE atomics */
+       "       neg     %w[i], %w[i]\n"
+       "       stadd   %w[i], %[v]")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+       register int w0 asm ("w0") = i;
+       register atomic_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC(sub_return)
+       "       nop",
+       /* LSE atomics */
+       "       neg     %w[i], %w[i]\n"
+       "       ldaddal %w[i], w30, %[v]\n"
+       "       add     %w[i], %w[i], w30")
+       : [i] "+r" (w0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30", "memory");
+
+       return w0;
+}
 
 static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 {
@@ -86,69 +155,164 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
        register int w1 asm ("w1") = old;
        register int w2 asm ("w2") = new;
 
-       asm volatile(
-       __LL_SC_CALL(cmpxchg)
-       : "+r" (x0)
-       : "r" (w1), "r" (w2)
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC(cmpxchg)
+       "       nop",
+       /* LSE atomics */
+       "       mov     w30, %w[old]\n"
+       "       casal   w30, %w[new], %[v]\n"
+       "       mov     %w[ret], w30")
+       : [ret] "+r" (x0), [v] "+Q" (ptr->counter)
+       : [old] "r" (w1), [new] "r" (w2)
        : "x30", "cc", "memory");
 
        return x0;
 }
 
-#define ATOMIC64_OP(op, asm_op)                                                \
-static inline void atomic64_##op(long i, atomic64_t *v)                        \
-{                                                                      \
-       register long x0 asm ("x0") = i;                                \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(                                                   \
-       __LL_SC_CALL64(op)                                              \
-       : "+r" (x0), "+Q" (v->counter)                                  \
-       : "r" (x1)                                                      \
-       : "x30");                                                       \
-}                                                                      \
-
-#define ATOMIC64_OP_RETURN(op, asm_op)                                 \
-static inline long atomic64_##op##_return(long i, atomic64_t *v)       \
-{                                                                      \
-       register long x0 asm ("x0") = i;                                \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(                                                   \
-       __LL_SC_CALL64(op##_return)                                     \
-       : "+r" (x0)                                                     \
-       : "r" (x1)                                                      \
-       : "x30", "memory");                                             \
-                                                                       \
-       return x0;                                                      \
-}
-
-#define ATOMIC64_OPS(op, asm_op)                                       \
-       ATOMIC64_OP(op, asm_op)                                         \
-       ATOMIC64_OP_RETURN(op, asm_op)
-
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
-
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, orr)
-ATOMIC64_OP(xor, eor)
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
+#undef __LL_SC_ATOMIC
 
+#define __LL_SC_ATOMIC64(op)   __LL_SC_CALL(atomic64_##op)
+
+static inline void atomic64_andnot(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
+       "       stclr   %[i], %[v]\n")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic64_or(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
+       "       stset   %[i], %[v]\n")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic64_xor(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
+       "       steor   %[i], %[v]\n")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic64_add(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
+       "       stadd   %[i], %[v]\n")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline long atomic64_add_return(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC64(add_return),
+       /* LSE atomics */
+       "       ldaddal %[i], x30, %[v]\n"
+       "       add     %[i], %[i], x30")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30", "memory");
+
+       return x0;
+}
+
+static inline void atomic64_and(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC64(and),
+       /* LSE atomics */
+       "       mvn     %[i], %[i]\n"
+       "       stclr   %[i], %[v]")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline void atomic64_sub(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC64(sub),
+       /* LSE atomics */
+       "       neg     %[i], %[i]\n"
+       "       stadd   %[i], %[v]")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30");
+}
+
+static inline long atomic64_sub_return(long i, atomic64_t *v)
+{
+       register long x0 asm ("x0") = i;
+       register atomic64_t *x1 asm ("x1") = v;
+
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC64(sub_return)
+       "       nop",
+       /* LSE atomics */
+       "       neg     %[i], %[i]\n"
+       "       ldaddal %[i], x30, %[v]\n"
+       "       add     %[i], %[i], x30")
+       : [i] "+r" (x0), [v] "+Q" (v->counter)
+       : "r" (x1)
+       : "x30", "memory");
+
+       return x0;
+}
 static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 {
        register unsigned long x0 asm ("x0") = (unsigned long)ptr;
        register long x1 asm ("x1") = old;
        register long x2 asm ("x2") = new;
 
-       asm volatile(
-       __LL_SC_CALL64(cmpxchg)
-       : "+r" (x0)
-       : "r" (x1), "r" (x2)
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC64(cmpxchg)
+       "       nop",
+       /* LSE atomics */
+       "       mov     x30, %[old]\n"
+       "       casal   x30, %[new], %[v]\n"
+       "       mov     %[ret], x30")
+       : [ret] "+r" (x0), [v] "+Q" (ptr->counter)
+       : [old] "r" (x1), [new] "r" (x2)
        : "x30", "cc", "memory");
 
        return x0;
@@ -156,15 +320,33 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
-       register unsigned long x0 asm ("x0") = (unsigned long)v;
+       register long x0 asm ("x0") = (long)v;
 
-       asm volatile(
-       __LL_SC_CALL64(dec_if_positive)
-       : "+r" (x0)
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+       /* LL/SC */
+       "       nop\n"
+       __LL_SC_ATOMIC64(dec_if_positive)
+       "       nop\n"
+       "       nop\n"
+       "       nop\n"
+       "       nop\n"
+       "       nop",
+       /* LSE atomics */
+       "1:     ldr     x30, %[v]\n"
+       "       subs    %[ret], x30, #1\n"
+       "       b.mi    2f\n"
+       "       casal   x30, %[ret], %[v]\n"
+       "       sub     x30, x30, #1\n"
+       "       sub     x30, x30, %[ret]\n"
+       "       cbnz    x30, 1b\n"
+       "2:")
+       : [ret] "+&r" (x0), [v] "+Q" (v->counter)
        :
        : "x30", "cc", "memory");
 
        return x0;
 }
 
+#undef __LL_SC_ATOMIC64
+
 #endif /* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
new file mode 100644 (file)
index 0000000..d516624
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef __ASM_LSE_H
+#define __ASM_LSE_H
+
+#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+
+#include <linux/stringify.h>
+
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+
+__asm__(".arch_extension       lse");
+
+/* Move the ll/sc atomics out-of-line */
+#define __LL_SC_INLINE
+#define __LL_SC_PREFIX(x)      __ll_sc_##x
+#define __LL_SC_EXPORT(x)      EXPORT_SYMBOL(__LL_SC_PREFIX(x))
+
+/* Macro for constructing calls to out-of-line ll/sc atomics */
+#define __LL_SC_CALL(op)       "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+
+/* In-line patching at runtime */
+#define ARM64_LSE_ATOMIC_INSN(llsc, lse)                               \
+       ALTERNATIVE(llsc, lse, ARM64_CPU_FEAT_LSE_ATOMICS)
+
+#else
+
+#define __LL_SC_INLINE         static inline
+#define __LL_SC_PREFIX(x)      x
+#define __LL_SC_EXPORT(x)
+
+#define ARM64_LSE_ATOMIC_INSN(llsc, lse)       llsc
+
+#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#endif /* __ASM_LSE_H */
index f4fbbc884893f30f258df60858d933b95708c4d1..97785c01acbfcc88dc7b968758a668b69f445094 100644 (file)
@@ -285,6 +285,9 @@ static void __init setup_processor(void)
                case 2:
                        elf_hwcap |= HWCAP_ATOMICS;
                        cpus_set_cap(ARM64_CPU_FEAT_LSE_ATOMICS);
+                       if (IS_ENABLED(CONFIG_AS_LSE) &&
+                           IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS))
+                               pr_info("LSE atomics supported\n");
                case 1:
                        /* RESERVED */
                case 0: