arm64: introduce CONFIG_ARM64_LSE_ATOMICS as fallback to ll/sc atomics
authorWill Deacon <will.deacon@arm.com>
Tue, 3 Feb 2015 12:39:03 +0000 (12:39 +0000)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:50 +0000 (15:28 +0100)
In order to patch in the new atomic instructions at runtime, we need to
generate wrappers around the out-of-line exclusive load/store atomics.

This patch adds a new Kconfig option, CONFIG_ARM64_LSE_ATOMICS. which
causes our atomic functions to branch to the out-of-line ll/sc
implementations. To avoid the register spill overhead of the PCS, the
out-of-line functions are compiled with specific compiler flags to
force out-of-line save/restore of any registers that are usually
caller-saved.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/Kconfig
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h [new file with mode: 0644]
arch/arm64/lib/Makefile
arch/arm64/lib/atomic_ll_sc.c [new file with mode: 0644]

index 5372e1e0c11c4fb63d438c7c8f60ade54752953e..8dabffa82ef810842e8961b60e9812b3384c326a 100644 (file)
@@ -618,6 +618,18 @@ config ARM64_PAN
         The feature is detected at runtime, and will remain as a 'nop'
         instruction if the cpu does not implement the feature.
 
+config ARM64_LSE_ATOMICS
+       bool "ARMv8.1 atomic instructions"
+       help
+         As part of the Large System Extensions, ARMv8.1 introduces new
+         atomic instructions that are designed specifically to scale in
+         very large systems.
+
+         Say Y here to make use of these instructions for the in-kernel
+         atomic routines. This incurs a small overhead on CPUs that do
+         not support these instructions and requires the kernel to be
+         built with binutils >= 2.25.
+
 menuconfig ARMV8_DEPRECATED
        bool "Emulate deprecated/obsolete ARMv8 instructions"
        depends on COMPAT
index 632c47064722dc84fc96da0e32bff9c7f5c07adf..84635f2d3d0a8a4e293b47f3873b1f76a543a792 100644 (file)
@@ -21,6 +21,7 @@
 #define __ASM_ATOMIC_H
 
 #include <linux/compiler.h>
+#include <linux/stringify.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
 
 #ifdef __KERNEL__
 
+#define __ARM64_IN_ATOMIC_IMPL
+
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+#include <asm/atomic_lse.h>
+#else
 #include <asm/atomic_ll_sc.h>
+#endif
+
+#undef __ARM64_IN_ATOMIC_IMPL
 
 /*
  * On ARM, ordinary assignment (str instruction) doesn't clear the local
index 66e992a58f6b91df385f2019cf75b53abbf7a372..c33fa2cd399e396d615d759f6910811c34aee674 100644 (file)
 #ifndef __ASM_ATOMIC_LL_SC_H
 #define __ASM_ATOMIC_LL_SC_H
 
+#ifndef __ARM64_IN_ATOMIC_IMPL
+#error "please don't include this file directly"
+#endif
+
 /*
  * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
  * store exclusive to ensure that these are atomic.  We may loop
 #define __LL_SC_PREFIX(x)      x
 #endif
 
+#ifndef __LL_SC_EXPORT
+#define __LL_SC_EXPORT(x)
+#endif
+
 #define ATOMIC_OP(op, asm_op)                                          \
 __LL_SC_INLINE void                                                    \
 __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))                                \
@@ -56,6 +64,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))                               \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : "Ir" (i));                                                    \
 }                                                                      \
+__LL_SC_EXPORT(atomic_##op);
 
 #define ATOMIC_OP_RETURN(op, asm_op)                                   \
 __LL_SC_INLINE int                                                     \
@@ -75,7 +84,8 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))              \
                                                                        \
        smp_mb();                                                       \
        return result;                                                  \
-}
+}                                                                      \
+__LL_SC_EXPORT(atomic_##op##_return);
 
 #define ATOMIC_OPS(op, asm_op)                                         \
        ATOMIC_OP(op, asm_op)                                           \
@@ -115,6 +125,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
        smp_mb();
        return oldval;
 }
+__LL_SC_EXPORT(atomic_cmpxchg);
 
 #define ATOMIC64_OP(op, asm_op)                                                \
 __LL_SC_INLINE void                                                    \
@@ -131,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))                        \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : "Ir" (i));                                                    \
 }                                                                      \
+__LL_SC_EXPORT(atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op, asm_op)                                 \
 __LL_SC_INLINE long                                                    \
@@ -150,7 +162,8 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))               \
                                                                        \
        smp_mb();                                                       \
        return result;                                                  \
-}
+}                                                                      \
+__LL_SC_EXPORT(atomic64_##op##_return);
 
 #define ATOMIC64_OPS(op, asm_op)                                       \
        ATOMIC64_OP(op, asm_op)                                         \
@@ -190,6 +203,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
        smp_mb();
        return oldval;
 }
+__LL_SC_EXPORT(atomic64_cmpxchg);
 
 __LL_SC_INLINE long
 __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
@@ -211,5 +225,6 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 
        return result;
 }
+__LL_SC_EXPORT(atomic64_dec_if_positive);
 
 #endif /* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
new file mode 100644 (file)
index 0000000..dce6ede
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ * Based on arch/arm/include/asm/atomic.h
+ *
+ * Copyright (C) 1996 Russell King.
+ * Copyright (C) 2002 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_ATOMIC_LSE_H
+#define __ASM_ATOMIC_LSE_H
+
+#ifndef __ARM64_IN_ATOMIC_IMPL
+#error "please don't include this file directly"
+#endif
+
+/* Move the ll/sc atomics out-of-line */
+#define __LL_SC_INLINE
+#define __LL_SC_PREFIX(x)      __ll_sc_##x
+#define __LL_SC_EXPORT(x)      EXPORT_SYMBOL(__LL_SC_PREFIX(x))
+
+/* Macros for constructing calls to out-of-line ll/sc atomics */
+#define __LL_SC_CALL(op)                                               \
+       "bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n"
+#define __LL_SC_CALL64(op)                                             \
+       "bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n"
+
+#define ATOMIC_OP(op, asm_op)                                          \
+static inline void atomic_##op(int i, atomic_t *v)                     \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(                                                   \
+       __LL_SC_CALL(op)                                                \
+       : "+r" (w0), "+Q" (v->counter)                                  \
+       : "r" (x1)                                                      \
+       : "x30");                                                       \
+}                                                                      \
+
+#define ATOMIC_OP_RETURN(op, asm_op)                                   \
+static inline int atomic_##op##_return(int i, atomic_t *v)             \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(                                                   \
+       __LL_SC_CALL(op##_return)                                       \
+       : "+r" (w0)                                                     \
+       : "r" (x1)                                                      \
+       : "x30", "memory");                                             \
+                                                                       \
+       return w0;                                                      \
+}
+
+#define ATOMIC_OPS(op, asm_op)                                         \
+       ATOMIC_OP(op, asm_op)                                           \
+       ATOMIC_OP_RETURN(op, asm_op)
+
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(andnot, bic)
+ATOMIC_OP(or, orr)
+ATOMIC_OP(xor, eor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+       register unsigned long x0 asm ("x0") = (unsigned long)ptr;
+       register int w1 asm ("w1") = old;
+       register int w2 asm ("w2") = new;
+
+       asm volatile(
+       __LL_SC_CALL(cmpxchg)
+       : "+r" (x0)
+       : "r" (w1), "r" (w2)
+       : "x30", "cc", "memory");
+
+       return x0;
+}
+
+#define ATOMIC64_OP(op, asm_op)                                                \
+static inline void atomic64_##op(long i, atomic64_t *v)                        \
+{                                                                      \
+       register long x0 asm ("x0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(                                                   \
+       __LL_SC_CALL64(op)                                              \
+       : "+r" (x0), "+Q" (v->counter)                                  \
+       : "r" (x1)                                                      \
+       : "x30");                                                       \
+}                                                                      \
+
+#define ATOMIC64_OP_RETURN(op, asm_op)                                 \
+static inline long atomic64_##op##_return(long i, atomic64_t *v)       \
+{                                                                      \
+       register long x0 asm ("x0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(                                                   \
+       __LL_SC_CALL64(op##_return)                                     \
+       : "+r" (x0)                                                     \
+       : "r" (x1)                                                      \
+       : "x30", "memory");                                             \
+                                                                       \
+       return x0;                                                      \
+}
+
+#define ATOMIC64_OPS(op, asm_op)                                       \
+       ATOMIC64_OP(op, asm_op)                                         \
+       ATOMIC64_OP_RETURN(op, asm_op)
+
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(andnot, bic)
+ATOMIC64_OP(or, orr)
+ATOMIC64_OP(xor, eor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
+{
+       register unsigned long x0 asm ("x0") = (unsigned long)ptr;
+       register long x1 asm ("x1") = old;
+       register long x2 asm ("x2") = new;
+
+       asm volatile(
+       __LL_SC_CALL64(cmpxchg)
+       : "+r" (x0)
+       : "r" (x1), "r" (x2)
+       : "x30", "cc", "memory");
+
+       return x0;
+}
+
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+       register unsigned long x0 asm ("x0") = (unsigned long)v;
+
+       asm volatile(
+       __LL_SC_CALL64(dec_if_positive)
+       : "+r" (x0)
+       :
+       : "x30", "cc", "memory");
+
+       return x0;
+}
+
+#endif /* __ASM_ATOMIC_LSE_H */
index d98d3e39879eb91789ad174305492fc865a38f93..1a811ecf71da8a8032a1e8cda8cf686f8fc71189 100644 (file)
@@ -3,3 +3,16 @@ lib-y          := bitops.o clear_user.o delay.o copy_from_user.o       \
                   clear_page.o memchr.o memcpy.o memmove.o memset.o    \
                   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o       \
                   strchr.o strrchr.o
+
+# Tell the compiler to treat all general purpose registers as
+# callee-saved, which allows for efficient runtime patching of the bl
+# instruction in the caller with an atomic instruction when supported by
+# the CPU. Result and argument registers are handled correctly, based on
+# the function prototype.
+lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
+CFLAGS_atomic_ll_sc.o  := -fcall-used-x0 -ffixed-x1 -ffixed-x2         \
+                  -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6          \
+                  -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9           \
+                  -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12   \
+                  -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15   \
+                  -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
diff --git a/arch/arm64/lib/atomic_ll_sc.c b/arch/arm64/lib/atomic_ll_sc.c
new file mode 100644 (file)
index 0000000..b0c538b
--- /dev/null
@@ -0,0 +1,3 @@
+#include <asm/atomic.h>
+#define __ARM64_IN_ATOMIC_IMPL
+#include <asm/atomic_ll_sc.h>