locking/mcs: Better differentiate between MCS variants
authorDavidlohr Bueso <dave@stgolabs.net>
Tue, 6 Jan 2015 19:45:07 +0000 (11:45 -0800)
committerIngo Molnar <mingo@kernel.org>
Wed, 14 Jan 2015 14:07:32 +0000 (15:07 +0100)
We have two flavors of the MCS spinlock: standard and cancelable (OSQ).
While each one is independent of the other, we currently mix and match
them. This patch:

  - Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional
    version) into include/linux/osq_lock.h. No unnecessary code is added to the
    more global header file, anything locks that make use of OSQ must include
    it anyway.

  - Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code.

  - Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock
    if there is support for it.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/osq_lock.h
kernel/Kconfig.locks
kernel/locking/Makefile
kernel/locking/mcs_spinlock.c [deleted file]
kernel/locking/mcs_spinlock.h
kernel/locking/osq_lock.c [new file with mode: 0644]

index 90230d5811c5aab49b8755fceda1e1d5e6d326aa..3a6490e81b2856821ca190f438c039136a7fa207 100644 (file)
@@ -5,8 +5,11 @@
  * An MCS like lock especially tailored for optimistic spinning for sleeping
  * lock implementations (mutex, rwsem, etc).
  */
-
-#define OSQ_UNLOCKED_VAL (0)
+struct optimistic_spin_node {
+       struct optimistic_spin_node *next, *prev;
+       int locked; /* 1 if lock acquired */
+       int cpu; /* encoded CPU # + 1 value */
+};
 
 struct optimistic_spin_queue {
        /*
@@ -16,6 +19,8 @@ struct optimistic_spin_queue {
        atomic_t tail;
 };
 
+#define OSQ_UNLOCKED_VAL (0)
+
 /* Init macro and function. */
 #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
 
@@ -24,4 +29,7 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
        atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
 }
 
+extern bool osq_lock(struct optimistic_spin_queue *lock);
+extern void osq_unlock(struct optimistic_spin_queue *lock);
+
 #endif
index 76768ee812b27b7a48e13710ec23326af9b828af..08561f1acd130bd68314e03278402e629b028ba4 100644 (file)
@@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER
        def_bool y
        depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
 
+config LOCK_SPIN_ON_OWNER
+       def_bool y
+       depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
+
 config ARCH_USE_QUEUE_RWLOCK
        bool
 
index 8541bfdfd232bb4213629f265cbb68a6bfb50c72..4ca8eb1519755ac17e314259fa595d18f76db058 100644 (file)
@@ -1,5 +1,5 @@
 
-obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
+obj-y += mutex.o semaphore.o rwsem.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y)
 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
 endif
 obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
 obj-$(CONFIG_SMP) += lglock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
deleted file mode 100644 (file)
index 9887a90..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-#include <linux/percpu.h>
-#include <linux/sched.h>
-#include "mcs_spinlock.h"
-
-#ifdef CONFIG_SMP
-
-/*
- * An MCS like lock especially tailored for optimistic spinning for sleeping
- * lock implementations (mutex, rwsem, etc).
- *
- * Using a single mcs node per CPU is safe because sleeping locks should not be
- * called from interrupt context and we have preemption disabled while
- * spinning.
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
-
-/*
- * We use the value 0 to represent "no CPU", thus the encoded value
- * will be the CPU number incremented by 1.
- */
-static inline int encode_cpu(int cpu_nr)
-{
-       return cpu_nr + 1;
-}
-
-static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
-{
-       int cpu_nr = encoded_cpu_val - 1;
-
-       return per_cpu_ptr(&osq_node, cpu_nr);
-}
-
-/*
- * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
- * Can return NULL in case we were the last queued and we updated @lock instead.
- */
-static inline struct optimistic_spin_node *
-osq_wait_next(struct optimistic_spin_queue *lock,
-             struct optimistic_spin_node *node,
-             struct optimistic_spin_node *prev)
-{
-       struct optimistic_spin_node *next = NULL;
-       int curr = encode_cpu(smp_processor_id());
-       int old;
-
-       /*
-        * If there is a prev node in queue, then the 'old' value will be
-        * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
-        * we're currently last in queue, then the queue will then become empty.
-        */
-       old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
-
-       for (;;) {
-               if (atomic_read(&lock->tail) == curr &&
-                   atomic_cmpxchg(&lock->tail, curr, old) == curr) {
-                       /*
-                        * We were the last queued, we moved @lock back. @prev
-                        * will now observe @lock and will complete its
-                        * unlock()/unqueue().
-                        */
-                       break;
-               }
-
-               /*
-                * We must xchg() the @node->next value, because if we were to
-                * leave it in, a concurrent unlock()/unqueue() from
-                * @node->next might complete Step-A and think its @prev is
-                * still valid.
-                *
-                * If the concurrent unlock()/unqueue() wins the race, we'll
-                * wait for either @lock to point to us, through its Step-B, or
-                * wait for a new @node->next from its Step-C.
-                */
-               if (node->next) {
-                       next = xchg(&node->next, NULL);
-                       if (next)
-                               break;
-               }
-
-               cpu_relax_lowlatency();
-       }
-
-       return next;
-}
-
-bool osq_lock(struct optimistic_spin_queue *lock)
-{
-       struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
-       struct optimistic_spin_node *prev, *next;
-       int curr = encode_cpu(smp_processor_id());
-       int old;
-
-       node->locked = 0;
-       node->next = NULL;
-       node->cpu = curr;
-
-       old = atomic_xchg(&lock->tail, curr);
-       if (old == OSQ_UNLOCKED_VAL)
-               return true;
-
-       prev = decode_cpu(old);
-       node->prev = prev;
-       ACCESS_ONCE(prev->next) = node;
-
-       /*
-        * Normally @prev is untouchable after the above store; because at that
-        * moment unlock can proceed and wipe the node element from stack.
-        *
-        * However, since our nodes are static per-cpu storage, we're
-        * guaranteed their existence -- this allows us to apply
-        * cmpxchg in an attempt to undo our queueing.
-        */
-
-       while (!smp_load_acquire(&node->locked)) {
-               /*
-                * If we need to reschedule bail... so we can block.
-                */
-               if (need_resched())
-                       goto unqueue;
-
-               cpu_relax_lowlatency();
-       }
-       return true;
-
-unqueue:
-       /*
-        * Step - A  -- stabilize @prev
-        *
-        * Undo our @prev->next assignment; this will make @prev's
-        * unlock()/unqueue() wait for a next pointer since @lock points to us
-        * (or later).
-        */
-
-       for (;;) {
-               if (prev->next == node &&
-                   cmpxchg(&prev->next, node, NULL) == node)
-                       break;
-
-               /*
-                * We can only fail the cmpxchg() racing against an unlock(),
-                * in which case we should observe @node->locked becomming
-                * true.
-                */
-               if (smp_load_acquire(&node->locked))
-                       return true;
-
-               cpu_relax_lowlatency();
-
-               /*
-                * Or we race against a concurrent unqueue()'s step-B, in which
-                * case its step-C will write us a new @node->prev pointer.
-                */
-               prev = ACCESS_ONCE(node->prev);
-       }
-
-       /*
-        * Step - B -- stabilize @next
-        *
-        * Similar to unlock(), wait for @node->next or move @lock from @node
-        * back to @prev.
-        */
-
-       next = osq_wait_next(lock, node, prev);
-       if (!next)
-               return false;
-
-       /*
-        * Step - C -- unlink
-        *
-        * @prev is stable because its still waiting for a new @prev->next
-        * pointer, @next is stable because our @node->next pointer is NULL and
-        * it will wait in Step-A.
-        */
-
-       ACCESS_ONCE(next->prev) = prev;
-       ACCESS_ONCE(prev->next) = next;
-
-       return false;
-}
-
-void osq_unlock(struct optimistic_spin_queue *lock)
-{
-       struct optimistic_spin_node *node, *next;
-       int curr = encode_cpu(smp_processor_id());
-
-       /*
-        * Fast path for the uncontended case.
-        */
-       if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
-               return;
-
-       /*
-        * Second most likely case.
-        */
-       node = this_cpu_ptr(&osq_node);
-       next = xchg(&node->next, NULL);
-       if (next) {
-               ACCESS_ONCE(next->locked) = 1;
-               return;
-       }
-
-       next = osq_wait_next(lock, node, NULL);
-       if (next)
-               ACCESS_ONCE(next->locked) = 1;
-}
-
-#endif
-
index 4d60986fcbee74a4fde3906e0d87fc113c5e8172..d1fe2ba5bac958bc85da8e8868408d8c6c809dc3 100644 (file)
@@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
        arch_mcs_spin_unlock_contended(&next->locked);
 }
 
-/*
- * Cancellable version of the MCS lock above.
- *
- * Intended for adaptive spinning of sleeping locks:
- * mutex_lock()/rwsem_down_{read,write}() etc.
- */
-
-struct optimistic_spin_node {
-       struct optimistic_spin_node *next, *prev;
-       int locked; /* 1 if lock acquired */
-       int cpu; /* encoded CPU # value */
-};
-
-extern bool osq_lock(struct optimistic_spin_queue *lock);
-extern void osq_unlock(struct optimistic_spin_queue *lock);
-
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
new file mode 100644 (file)
index 0000000..ec83d4d
--- /dev/null
@@ -0,0 +1,203 @@
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/osq_lock.h>
+
+/*
+ * An MCS like lock especially tailored for optimistic spinning for sleeping
+ * lock implementations (mutex, rwsem, etc).
+ *
+ * Using a single mcs node per CPU is safe because sleeping locks should not be
+ * called from interrupt context and we have preemption disabled while
+ * spinning.
+ */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
+
+/*
+ * We use the value 0 to represent "no CPU", thus the encoded value
+ * will be the CPU number incremented by 1.
+ */
+static inline int encode_cpu(int cpu_nr)
+{
+       return cpu_nr + 1;
+}
+
+static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
+{
+       int cpu_nr = encoded_cpu_val - 1;
+
+       return per_cpu_ptr(&osq_node, cpu_nr);
+}
+
+/*
+ * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
+ * Can return NULL in case we were the last queued and we updated @lock instead.
+ */
+static inline struct optimistic_spin_node *
+osq_wait_next(struct optimistic_spin_queue *lock,
+             struct optimistic_spin_node *node,
+             struct optimistic_spin_node *prev)
+{
+       struct optimistic_spin_node *next = NULL;
+       int curr = encode_cpu(smp_processor_id());
+       int old;
+
+       /*
+        * If there is a prev node in queue, then the 'old' value will be
+        * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
+        * we're currently last in queue, then the queue will then become empty.
+        */
+       old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
+
+       for (;;) {
+               if (atomic_read(&lock->tail) == curr &&
+                   atomic_cmpxchg(&lock->tail, curr, old) == curr) {
+                       /*
+                        * We were the last queued, we moved @lock back. @prev
+                        * will now observe @lock and will complete its
+                        * unlock()/unqueue().
+                        */
+                       break;
+               }
+
+               /*
+                * We must xchg() the @node->next value, because if we were to
+                * leave it in, a concurrent unlock()/unqueue() from
+                * @node->next might complete Step-A and think its @prev is
+                * still valid.
+                *
+                * If the concurrent unlock()/unqueue() wins the race, we'll
+                * wait for either @lock to point to us, through its Step-B, or
+                * wait for a new @node->next from its Step-C.
+                */
+               if (node->next) {
+                       next = xchg(&node->next, NULL);
+                       if (next)
+                               break;
+               }
+
+               cpu_relax_lowlatency();
+       }
+
+       return next;
+}
+
+bool osq_lock(struct optimistic_spin_queue *lock)
+{
+       struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
+       struct optimistic_spin_node *prev, *next;
+       int curr = encode_cpu(smp_processor_id());
+       int old;
+
+       node->locked = 0;
+       node->next = NULL;
+       node->cpu = curr;
+
+       old = atomic_xchg(&lock->tail, curr);
+       if (old == OSQ_UNLOCKED_VAL)
+               return true;
+
+       prev = decode_cpu(old);
+       node->prev = prev;
+       ACCESS_ONCE(prev->next) = node;
+
+       /*
+        * Normally @prev is untouchable after the above store; because at that
+        * moment unlock can proceed and wipe the node element from stack.
+        *
+        * However, since our nodes are static per-cpu storage, we're
+        * guaranteed their existence -- this allows us to apply
+        * cmpxchg in an attempt to undo our queueing.
+        */
+
+       while (!smp_load_acquire(&node->locked)) {
+               /*
+                * If we need to reschedule bail... so we can block.
+                */
+               if (need_resched())
+                       goto unqueue;
+
+               cpu_relax_lowlatency();
+       }
+       return true;
+
+unqueue:
+       /*
+        * Step - A  -- stabilize @prev
+        *
+        * Undo our @prev->next assignment; this will make @prev's
+        * unlock()/unqueue() wait for a next pointer since @lock points to us
+        * (or later).
+        */
+
+       for (;;) {
+               if (prev->next == node &&
+                   cmpxchg(&prev->next, node, NULL) == node)
+                       break;
+
+               /*
+                * We can only fail the cmpxchg() racing against an unlock(),
+                * in which case we should observe @node->locked becomming
+                * true.
+                */
+               if (smp_load_acquire(&node->locked))
+                       return true;
+
+               cpu_relax_lowlatency();
+
+               /*
+                * Or we race against a concurrent unqueue()'s step-B, in which
+                * case its step-C will write us a new @node->prev pointer.
+                */
+               prev = ACCESS_ONCE(node->prev);
+       }
+
+       /*
+        * Step - B -- stabilize @next
+        *
+        * Similar to unlock(), wait for @node->next or move @lock from @node
+        * back to @prev.
+        */
+
+       next = osq_wait_next(lock, node, prev);
+       if (!next)
+               return false;
+
+       /*
+        * Step - C -- unlink
+        *
+        * @prev is stable because its still waiting for a new @prev->next
+        * pointer, @next is stable because our @node->next pointer is NULL and
+        * it will wait in Step-A.
+        */
+
+       ACCESS_ONCE(next->prev) = prev;
+       ACCESS_ONCE(prev->next) = next;
+
+       return false;
+}
+
+void osq_unlock(struct optimistic_spin_queue *lock)
+{
+       struct optimistic_spin_node *node, *next;
+       int curr = encode_cpu(smp_processor_id());
+
+       /*
+        * Fast path for the uncontended case.
+        */
+       if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
+               return;
+
+       /*
+        * Second most likely case.
+        */
+       node = this_cpu_ptr(&osq_node);
+       next = xchg(&node->next, NULL);
+       if (next) {
+               ACCESS_ONCE(next->locked) = 1;
+               return;
+       }
+
+       next = osq_wait_next(lock, node, NULL);
+       if (next)
+               ACCESS_ONCE(next->locked) = 1;
+}