powerpc: Add support for the optimised lockref implementation
authorMichael Ellerman <mpe@ellerman.id.au>
Wed, 15 Jan 2014 07:14:28 +0000 (18:14 +1100)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Tue, 28 Jan 2014 03:45:43 +0000 (14:45 +1100)
This commit adds the architecture support required to enable the
optimised implementation of lockrefs.

That's as simple as defining arch_spin_value_unlocked() and selecting
the Kconfig option.

We also define cmpxchg64_relaxed(), because the lockref code does not
need the cmpxchg to have barrier semantics.

Using Linus' test case[1] on one system I see a 4x improvement for the
basic enablement, and a further 1.3x for cmpxchg64_relaxed(), for a
total of 5.3x vs the baseline.

On another system I see more like 2x improvement.

[1]: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/Kconfig
arch/powerpc/include/asm/cmpxchg.h
arch/powerpc/include/asm/spinlock.h

index fa395179ddd6022612a005a4526d62e8046e76fe..6ca5d5cabeb1998779ec6fc6195b3756b14d1304 100644 (file)
@@ -139,6 +139,7 @@ config PPC
        select OLD_SIGACTION if PPC32
        select HAVE_DEBUG_STACKOVERFLOW
        select HAVE_IRQ_EXIT_ON_IRQ_STACK
+       select ARCH_USE_CMPXCHG_LOCKREF if PPC64
 
 config GENERIC_CSUM
        def_bool CPU_LITTLE_ENDIAN
index e245aab7f191cc98bea0e6f8868fb7fb67d3cffe..d463c68fe7f05fa798a151ea179a5f88a842ddee 100644 (file)
@@ -300,6 +300,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
        BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
        cmpxchg_local((ptr), (o), (n));                                 \
   })
+#define cmpxchg64_relaxed      cmpxchg64_local
 #else
 #include <asm-generic/cmpxchg-local.h>
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
index 5f54a744dcc5e26921ddafe1d267985f71dd8540..5162f8cd18c090d0b50cafa295c24035fc60c6e7 100644 (file)
 #define SYNC_IO
 #endif
 
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+       return lock.slock == 0;
+}
+
 /*
  * This returns the old value in the lock, so we succeeded
  * in getting the lock if the return value is 0.