locking/mutex: Optimize mutex_trylock() fast-path
authorPeter Zijlstra <peterz@infradead.org>
Wed, 1 Jun 2016 18:58:15 +0000 (20:58 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 8 Jun 2016 13:17:01 +0000 (15:17 +0200)
A while back Viro posted a number of 'interesting' mutex_is_locked()
users on IRC, one of those was RCU.

RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
regular load before modify pattern.

While the use isn't wrong per se, its curious in that its needed at all,
mutex_trylock() should be good enough on its own to avoid the pointless
cacheline bounces.

So fix those and remove the mutex_is_locked() (ab)use from RCU.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hpe.com>
Link: http://lkml.kernel.org/r/20160601185815.GW3190@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/ia64/include/asm/mutex.h
arch/powerpc/include/asm/mutex.h
arch/x86/include/asm/mutex_32.h
arch/x86/include/asm/mutex_64.h
include/asm-generic/mutex-dec.h
include/asm-generic/mutex-xchg.h
kernel/rcu/tree.c

index f41e66d65e31c4b6c1eb15e9bdb0ee71d2018378..28cb819e0ff93adadf6ffbb1974cdcd1a5a9d8fc 100644 (file)
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (cmpxchg_acq(count, 1, 0) == 1)
+       if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
                return 1;
        return 0;
 }
index 127ab23e1f6ccdbe038b6cfdd84c569742968e27..078155fa118992f1f456d9643786f7ccf7d19741 100644 (file)
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
                return 1;
        return 0;
 }
index 85e6cda45a0297e20204fe926757e46b647faa2b..e9355a84fc675b936c624bcd00d5640ed43f64b8 100644 (file)
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
                                           int (*fail_fn)(atomic_t *))
 {
        /* cmpxchg because it never induces a false contention state. */
-       if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
                return 1;
 
        return 0;
index 07537a44216ec9b2eed302183af7c57d8949a5a0..d9850758464eee05d898eb7c10b5b99cf13c3669 100644 (file)
@@ -118,10 +118,10 @@ do {                                                              \
 static inline int __mutex_fastpath_trylock(atomic_t *count,
                                           int (*fail_fn)(atomic_t *))
 {
-       if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
                return 1;
-       else
-               return 0;
+
+       return 0;
 }
 
 #endif /* _ASM_X86_MUTEX_64_H */
index fd694cfd678af712b2c3ca1055331df24a8db8ba..c54829d3de3700ce6df65494b30f3dd1b32f63fe 100644 (file)
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
                return 1;
        return 0;
 }
index a6b4a7bd6ac9770356e066c51f295c6b9c33793f..3269ec4e195fbaba5b44fa61b74896ff285ac989 100644 (file)
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       int prev = atomic_xchg_acquire(count, 0);
+       int prev;
 
+       if (atomic_read(count) != 1)
+               return 0;
+
+       prev = atomic_xchg_acquire(count, 0);
        if (unlikely(prev < 0)) {
                /*
                 * The lock was marked contended so we must restore that
index c7f1bc4f817c4a34e19ebc160693a27f034dbac2..b7326893221ff7bfe46df362848341f99523d1ff 100644 (file)
@@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
        if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
            (rnp == rnp_root ||
             ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
-           !mutex_is_locked(&rsp->exp_mutex) &&
            mutex_trylock(&rsp->exp_mutex))
                goto fastpath;