locking/mutex: Fix mutex handoff
authorPeter Zijlstra <peterz@infradead.org>
Wed, 11 Jan 2017 13:17:48 +0000 (14:17 +0100)
committerIngo Molnar <mingo@kernel.org>
Sat, 14 Jan 2017 10:14:38 +0000 (11:14 +0100)
While reviewing the ww_mutex patches, I noticed that it was still
possible to (incorrectly) succeed for (incorrect) code like:

mutex_lock(&a);
mutex_lock(&a);

This was possible if the second mutex_lock() would block (as expected)
but then receive a spurious wakeup. At that point it would find itself
at the front of the queue, request a handoff and instantly claim
ownership and continue, since owner would point to itself.

Avoid this scenario and simplify the code by introducing a third low
bit to signal handoff pickup. So once we request handoff, unlock
clears the handoff bit and sets the pickup bit along with the new
owner.

This also removes the need for the .handoff argument to
__mutex_trylock(), since that becomes superfluous with PICKUP.

In order to guarantee enough low bits, ensure task_struct alignment is
at least L1_CACHE_BYTES (which seems a good ideal regardless).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9d659ae14b54 ("locking/mutex: Add lock handoff to avoid starvation")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/mutex.h
kernel/fork.c
kernel/locking/mutex.c

index b97870f2debd063ba358a4d2343955f39dd6b994..3e1fccb47f11010e9416eff8e49cd5ec87de251c 100644 (file)
@@ -65,7 +65,7 @@ struct mutex {
 
 static inline struct task_struct *__mutex_owner(struct mutex *lock)
 {
-       return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+       return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
 }
 
 /*
index 11c5c8ab827c4be8ef8cb09072de2364d90aff6c..a90510d0bbf8929b6ab91801d9d66902579bde31 100644 (file)
@@ -432,11 +432,13 @@ void __init fork_init(void)
        int i;
 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
-#define ARCH_MIN_TASKALIGN     L1_CACHE_BYTES
+#define ARCH_MIN_TASKALIGN     0
 #endif
+       int align = min_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
+
        /* create a slab on which task_structs can be allocated */
        task_struct_cachep = kmem_cache_create("task_struct",
-                       arch_task_struct_size, ARCH_MIN_TASKALIGN,
+                       arch_task_struct_size, align,
                        SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
 #endif
 
index 97d142486f939d42e8f300bb15e4f70dec0591af..24284497c42547ddcfbb1b8fe448a2968f334aa8 100644 (file)
@@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init);
 /*
  * @owner: contains: 'struct task_struct *' to the current lock owner,
  * NULL means not owned. Since task_struct pointers are aligned at
- * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low
- * bits to store extra state.
+ * at least L1_CACHE_BYTES, we have low bits to store extra state.
  *
  * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
  * Bit1 indicates unlock needs to hand the lock to the top-waiter
+ * Bit2 indicates handoff has been done and we're waiting for pickup.
  */
 #define MUTEX_FLAG_WAITERS     0x01
 #define MUTEX_FLAG_HANDOFF     0x02
+#define MUTEX_FLAG_PICKUP      0x04
 
-#define MUTEX_FLAGS            0x03
+#define MUTEX_FLAGS            0x07
 
 static inline struct task_struct *__owner_task(unsigned long owner)
 {
@@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
 }
 
 /*
- * Actual trylock that will work on any unlocked state.
- *
- * When setting the owner field, we must preserve the low flag bits.
- *
- * Be careful with @handoff, only set that in a wait-loop (where you set
- * HANDOFF) to avoid recursive lock attempts.
+ * Trylock variant that retuns the owning task on failure.
  */
-static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
+static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
 {
        unsigned long owner, curr = (unsigned long)current;
 
        owner = atomic_long_read(&lock->owner);
        for (;;) { /* must loop, can race against a flag */
                unsigned long old, flags = __owner_flags(owner);
+               unsigned long task = owner & ~MUTEX_FLAGS;
+
+               if (task) {
+                       if (likely(task != curr))
+                               break;
+
+                       if (likely(!(flags & MUTEX_FLAG_PICKUP)))
+                               break;
 
-               if (__owner_task(owner)) {
-                       if (handoff && unlikely(__owner_task(owner) == current)) {
-                               /*
-                                * Provide ACQUIRE semantics for the lock-handoff.
-                                *
-                                * We cannot easily use load-acquire here, since
-                                * the actual load is a failed cmpxchg, which
-                                * doesn't imply any barriers.
-                                *
-                                * Also, this is a fairly unlikely scenario, and
-                                * this contains the cost.
-                                */
-                               smp_mb(); /* ACQUIRE */
-                               return true;
-                       }
-
-                       return false;
+                       flags &= ~MUTEX_FLAG_PICKUP;
+               } else {
+#ifdef CONFIG_DEBUG_MUTEXES
+                       DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
+#endif
                }
 
                /*
@@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
                 * past the point where we acquire it. This would be possible
                 * if we (accidentally) set the bit on an unlocked mutex.
                 */
-               if (handoff)
-                       flags &= ~MUTEX_FLAG_HANDOFF;
+               flags &= ~MUTEX_FLAG_HANDOFF;
 
                old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
                if (old == owner)
-                       return true;
+                       return NULL;
 
                owner = old;
        }
+
+       return __owner_task(owner);
+}
+
+/*
+ * Actual trylock that will work on any unlocked state.
+ */
+static inline bool __mutex_trylock(struct mutex *lock)
+{
+       return !__mutex_trylock_or_owner(lock);
 }
 
 #ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
 
 /*
  * Give up ownership to a specific task, when @task = NULL, this is equivalent
- * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE
- * semantics like a regular unlock, the __mutex_trylock() provides matching
- * ACQUIRE semantics for the handoff.
+ * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
+ * WAITERS. Provides RELEASE semantics like a regular unlock, the
+ * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
  */
 static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
 {
@@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
 
 #ifdef CONFIG_DEBUG_MUTEXES
                DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+               DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
 #endif
 
                new = (owner & MUTEX_FLAG_WAITERS);
                new |= (unsigned long)task;
+               if (task)
+                       new |= MUTEX_FLAG_PICKUP;
 
                old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
                if (old == owner)
@@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock,
                                  struct ww_acquire_ctx *ww_ctx,
                                  const bool use_ww_ctx, const bool waiter)
 {
-       struct task_struct *task = current;
-
        if (!waiter) {
                /*
                 * The purpose of the mutex_can_spin_on_owner() function is
@@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
                                goto fail_unlock;
                }
 
+               /* Try to acquire the mutex... */
+               owner = __mutex_trylock_or_owner(lock);
+               if (!owner)
+                       break;
+
                /*
-                * If there's an owner, wait for it to either
+                * There's an owner, wait for it to either
                 * release the lock or go to sleep.
                 */
-               owner = __mutex_owner(lock);
-               if (owner) {
-                       if (waiter && owner == task) {
-                               smp_mb(); /* ACQUIRE */
-                               break;
-                       }
-
-                       if (!mutex_spin_on_owner(lock, owner))
-                               goto fail_unlock;
-               }
-
-               /* Try to acquire the mutex if it is unlocked. */
-               if (__mutex_trylock(lock, waiter))
-                       break;
+               if (!mutex_spin_on_owner(lock, owner))
+                       goto fail_unlock;
 
                /*
                 * The cpu_relax() call is a compiler barrier which forces
@@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        preempt_disable();
        mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
-       if (__mutex_trylock(lock, false) ||
+       if (__mutex_trylock(lock) ||
            mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
                /* got the lock, yay! */
                lock_acquired(&lock->dep_map, ip);
@@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        /*
         * After waiting to acquire the wait_lock, try again.
         */
-       if (__mutex_trylock(lock, false))
+       if (__mutex_trylock(lock))
                goto skip_wait;
 
        debug_mutex_lock_common(lock, &waiter);
@@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * before testing the error conditions to make sure we pick up
                 * the handoff.
                 */
-               if (__mutex_trylock(lock, first))
+               if (__mutex_trylock(lock))
                        goto acquired;
 
                /*
@@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * state back to RUNNING and fall through the next schedule(),
                 * or we must see its unlock and acquire.
                 */
-               if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
-                    __mutex_trylock(lock, first))
+               if (__mutex_trylock(lock) ||
+                   (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)))
                        break;
 
                spin_lock_mutex(&lock->wait_lock, flags);
@@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 
 #ifdef CONFIG_DEBUG_MUTEXES
                DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+               DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
 #endif
 
                if (owner & MUTEX_FLAG_HANDOFF)
@@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
  */
 int __sched mutex_trylock(struct mutex *lock)
 {
-       bool locked = __mutex_trylock(lock, false);
+       bool locked = __mutex_trylock(lock);
 
        if (locked)
                mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);