arm64: xchg: patch in lse instructions when supported by the CPU
authorWill Deacon <will.deacon@arm.com>
Tue, 31 Mar 2015 13:11:24 +0000 (14:11 +0100)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:51 +0000 (15:28 +0100)
On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of our xchg primitives so that
the LSE swp instruction (yes, you read right!) is used instead.

Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/cmpxchg.h

index d8c25b7b18fbf42ddc66ab888fc22c530d752d15..d0cce80689026c908542bb9382188a5d8a18bc3d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/mmdebug.h>
 
 #include <asm/barrier.h>
+#include <asm/lse.h>
 
 static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
 {
@@ -29,37 +30,65 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 
        switch (size) {
        case 1:
-               asm volatile("//        __xchg1\n"
+               asm volatile(ARM64_LSE_ATOMIC_INSN(
+               /* LL/SC */
                "1:     ldxrb   %w0, %2\n"
                "       stlxrb  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
+               "       dmb     ish",
+               /* LSE atomics */
+               "       nop\n"
+               "       swpalb  %w3, %w0, %2\n"
+               "       nop\n"
+               "       nop")
                        : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
                        : "r" (x)
                        : "memory");
                break;
        case 2:
-               asm volatile("//        __xchg2\n"
+               asm volatile(ARM64_LSE_ATOMIC_INSN(
+               /* LL/SC */
                "1:     ldxrh   %w0, %2\n"
                "       stlxrh  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
+               "       dmb     ish",
+               /* LSE atomics */
+               "       nop\n"
+               "       swpalh  %w3, %w0, %2\n"
+               "       nop\n"
+               "       nop")
                        : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
                        : "r" (x)
                        : "memory");
                break;
        case 4:
-               asm volatile("//        __xchg4\n"
+               asm volatile(ARM64_LSE_ATOMIC_INSN(
+               /* LL/SC */
                "1:     ldxr    %w0, %2\n"
                "       stlxr   %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
+               "       dmb     ish",
+               /* LSE atomics */
+               "       nop\n"
+               "       swpal   %w3, %w0, %2\n"
+               "       nop\n"
+               "       nop")
                        : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
                        : "r" (x)
                        : "memory");
                break;
        case 8:
-               asm volatile("//        __xchg8\n"
+               asm volatile(ARM64_LSE_ATOMIC_INSN(
+               /* LL/SC */
                "1:     ldxr    %0, %2\n"
                "       stlxr   %w1, %3, %2\n"
                "       cbnz    %w1, 1b\n"
+               "       dmb     ish",
+               /* LSE atomics */
+               "       nop\n"
+               "       swpal   %3, %0, %2\n"
+               "       nop\n"
+               "       nop")
                        : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
                        : "r" (x)
                        : "memory");
@@ -68,7 +97,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
                BUILD_BUG();
        }
 
-       smp_mb();
        return ret;
 }