From 442302820356977237e32a76a211e7942255003a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Sep 2014 08:20:43 +0200 Subject: [PATCH] s390/spinlock: optimize spin_unlock code Use a memory barrier + store sequence instead of a load + compare and swap sequence to unlock a spinlock and an rw lock. For the spinlock case this saves us two memory reads and a not needed cpu serialization after the compare and swap instruction stored the new value. The kernel size (performance_defconfig) gets reduced by ~14k. Average execution time of a tight inlined spin_unlock loop drops from 5.8ns to 0.7ns on a zEC12 machine. An artificial stress test case where several counters are protected with a single spinlock and which are only incremented while holding the spinlock shows ~30% improvement on a 4 cpu machine. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/barrier.h | 6 ++++-- arch/s390/include/asm/spinlock.h | 21 ++++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 19ff956b752b..b5dce6544d76 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -15,11 +15,13 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 14,0\n" #else -#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 15,0\n" #endif +#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) + #define rmb() mb() #define wmb() mb() #define read_barrier_depends() do { } while(0) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 96879f7ad6da..d26ad2ac7cb2 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); } -static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) -{ - return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); -} - static inline void arch_spin_lock(arch_spinlock_t *lp) { if (!arch_spin_trylock_once(lp)) @@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - arch_spin_tryrelease_once(lp); + typecheck(unsigned int, lp->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (lp->lock) + : "d" (0) + : "cc", "memory"); } static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) @@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) static inline void arch_write_unlock(arch_rwlock_t *rw) { - _raw_compare_and_swap(&rw->lock, 0x80000000, 0); + typecheck(unsigned int, rw->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (rw->lock) + : "d" (0) + : "cc", "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) -- 2.20.1