locking/ww_mutex: Add waiters in stamp order
authorNicolai Hähnle <Nicolai.Haehnle@amd.com>
Wed, 21 Dec 2016 18:46:34 +0000 (19:46 +0100)
committerIngo Molnar <mingo@kernel.org>
Sat, 14 Jan 2017 10:14:42 +0000 (11:14 +0100)
Add regular waiters in stamp order. Keep adding waiters that have no
context in FIFO order and take care not to starve them.

While adding our task as a waiter, back off if we detect that there is
a waiter with a lower stamp in front of us.

Make sure to call lock_contended even when we back off early.

For w/w mutexes, being first in the wait list is only stable when
taking the lock without a context. Therefore, the purpose of the first
flag is split into two: 'first' remains to indicate whether we want to
spin optimistically, while 'handoff' indicates that we should be
prepared to accept a handoff.

For w/w locking with a context, we always accept handoffs after the
first schedule(), to handle the following sequence of events:

 1. Task #0 unlocks and hands off to Task #2 which is first in line

 2. Task #1 adds itself in front of Task #2

 3. Task #2 wakes up and must accept the handoff even though it is no
    longer first in line

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-7-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/mutex.h
kernel/locking/mutex.c

index 3e1fccb47f11010e9416eff8e49cd5ec87de251c..e17942ffb3fc6fc727d8eb80df247a5f92f7fd15 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/osq_lock.h>
 #include <linux/debug_locks.h>
 
+struct ww_acquire_ctx;
+
 /*
  * Simple, straightforward mutexes with strict semantics:
  *
@@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
 struct mutex_waiter {
        struct list_head        list;
        struct task_struct      *task;
+       struct ww_acquire_ctx   *ww_ctx;
 #ifdef CONFIG_DEBUG_MUTEXES
        void                    *magic;
 #endif
index c696614a6b8b2e79122a2a8bd13306924aafda4d..d0f7628b5a3ded6155ae06ab15a2d8eb34971b45 100644 (file)
@@ -615,6 +615,52 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
        return 0;
 }
 
+static inline int __sched
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+                     struct mutex *lock,
+                     struct ww_acquire_ctx *ww_ctx)
+{
+       struct mutex_waiter *cur;
+       struct list_head *pos;
+
+       if (!ww_ctx) {
+               list_add_tail(&waiter->list, &lock->wait_list);
+               return 0;
+       }
+
+       /*
+        * Add the waiter before the first waiter with a higher stamp.
+        * Waiters without a context are skipped to avoid starving
+        * them.
+        */
+       pos = &lock->wait_list;
+       list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+               if (!cur->ww_ctx)
+                       continue;
+
+               if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+                       /* Back off immediately if necessary. */
+                       if (ww_ctx->acquired > 0) {
+#ifdef CONFIG_DEBUG_MUTEXES
+                               struct ww_mutex *ww;
+
+                               ww = container_of(lock, struct ww_mutex, base);
+                               DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+                               ww_ctx->contending_lock = ww;
+#endif
+                               return -EDEADLK;
+                       }
+
+                       break;
+               }
+
+               pos = &cur->list;
+       }
+
+       list_add_tail(&waiter->list, pos);
+       return 0;
+}
+
 /*
  * Lock a mutex (possibly interruptible), slowpath:
  */
@@ -659,15 +705,25 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        debug_mutex_lock_common(lock, &waiter);
        debug_mutex_add_waiter(lock, &waiter, current);
 
-       /* add waiting tasks to the end of the waitqueue (FIFO): */
-       list_add_tail(&waiter.list, &lock->wait_list);
+       lock_contended(&lock->dep_map, ip);
+
+       if (!use_ww_ctx) {
+               /* add waiting tasks to the end of the waitqueue (FIFO): */
+               list_add_tail(&waiter.list, &lock->wait_list);
+       } else {
+               /* Add in stamp order, waking up waiters that must back off. */
+               ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
+               if (ret)
+                       goto err_early_backoff;
+
+               waiter.ww_ctx = ww_ctx;
+       }
+
        waiter.task = current;
 
        if (__mutex_waiter_is_first(lock, &waiter))
                __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
 
-       lock_contended(&lock->dep_map, ip);
-
        set_current_state(state);
        for (;;) {
                /*
@@ -698,9 +754,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                spin_unlock_mutex(&lock->wait_lock, flags);
                schedule_preempt_disabled();
 
-               if (!first && __mutex_waiter_is_first(lock, &waiter)) {
-                       first = true;
-                       __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
+               /*
+                * ww_mutex needs to always recheck its position since its waiter
+                * list is not FIFO ordered.
+                */
+               if ((use_ww_ctx && ww_ctx) || !first) {
+                       first = __mutex_waiter_is_first(lock, &waiter);
+                       if (first)
+                               __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
                }
 
                set_current_state(state);
@@ -739,6 +800,7 @@ skip_wait:
 err:
        __set_current_state(TASK_RUNNING);
        mutex_remove_waiter(lock, &waiter, current);
+err_early_backoff:
        spin_unlock_mutex(&lock->wait_lock, flags);
        debug_mutex_free_waiter(&waiter);
        mutex_release(&lock->dep_map, 1, ip);