locking/barriers: Replace smp_cond_acquire() with smp_cond_load_acquire()
authorPeter Zijlstra <peterz@infradead.org>
Mon, 4 Apr 2016 08:57:12 +0000 (10:57 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 14 Jun 2016 09:54:27 +0000 (11:54 +0200)
This new form allows using hardware assisted waiting.

Some hardware (ARM64 and x86) allow monitoring an address for changes,
so by providing a pointer we can use this to replace the cpu_relax()
with hardware optimized methods in the future.

Requested-by: Will Deacon <will.deacon@arm.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/compiler.h
kernel/locking/qspinlock.c
kernel/sched/core.c
kernel/sched/sched.h
kernel/smp.c

index 06f27fd9d760eed100c0e586afdc2501b63468ab..2bcaedc0f0321b4d05a145feb8b2ef562a9f7d04 100644 (file)
@@ -305,21 +305,34 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 })
 
 /**
- * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
  * @cond: boolean expression to wait for
  *
  * Equivalent to using smp_load_acquire() on the condition variable but employs
  * the control dependency of the wait to reduce the barrier on many platforms.
  *
+ * Due to C lacking lambda expressions we load the value of *ptr into a
+ * pre-named variable @VAL to be used in @cond.
+ *
  * The control dependency provides a LOAD->STORE order, the additional RMB
  * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
  * aka. ACQUIRE.
  */
-#define smp_cond_acquire(cond) do {            \
-       while (!(cond))                         \
-               cpu_relax();                    \
-       smp_rmb(); /* ctrl + rmb := acquire */  \
-} while (0)
+#ifndef smp_cond_load_acquire
+#define smp_cond_load_acquire(ptr, cond_expr) ({               \
+       typeof(ptr) __PTR = (ptr);                              \
+       typeof(*ptr) VAL;                                       \
+       for (;;) {                                              \
+               VAL = READ_ONCE(*__PTR);                        \
+               if (cond_expr)                                  \
+                       break;                                  \
+               cpu_relax();                                    \
+       }                                                       \
+       smp_rmb(); /* ctrl + rmb := acquire */                  \
+       VAL;                                                    \
+})
+#endif
 
 #endif /* __KERNEL__ */
 
index 2f9153b183c9c3b30b454793f7ceebc0061a65c2..1b8dda90ebfa4d9fa999b0631429340673bdf8bd 100644 (file)
@@ -475,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
         * sequentiality; this is because not all clear_pending_set_locked()
         * implementations imply full barriers.
         */
-       smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
+       smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
 
        /*
         * take ownership and clear the pending bit.
@@ -562,7 +562,7 @@ queue:
         *
         * The PV pv_wait_head_or_lock function, if active, will acquire
         * the lock and return a non-zero value. So we have to skip the
-        * smp_cond_acquire() call. As the next PV queue head hasn't been
+        * smp_cond_load_acquire() call. As the next PV queue head hasn't been
         * designated yet, there is no way for the locked value to become
         * _Q_SLOW_VAL. So both the set_locked() and the
         * atomic_cmpxchg_relaxed() calls will be safe.
@@ -573,7 +573,7 @@ queue:
        if ((val = pv_wait_head_or_lock(lock, node)))
                goto locked;
 
-       smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
+       val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
 
 locked:
        /*
@@ -593,9 +593,9 @@ locked:
                        break;
                }
                /*
-                * The smp_cond_acquire() call above has provided the necessary
-                * acquire semantics required for locking. At most two
-                * iterations of this loop may be ran.
+                * The smp_cond_load_acquire() call above has provided the
+                * necessary acquire semantics required for locking. At most
+                * two iterations of this loop may be ran.
                 */
                old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
                if (old == val)
index 017d5394f5dc9206c21259d61f77936396796448..5cd6931cb2cb4827388e4c372ba52f7749af127f 100644 (file)
@@ -1935,7 +1935,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * chain to provide order. Instead we do:
  *
  *   1) smp_store_release(X->on_cpu, 0)
- *   2) smp_cond_acquire(!X->on_cpu)
+ *   2) smp_cond_load_acquire(!X->on_cpu)
  *
  * Example:
  *
@@ -1946,7 +1946,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *   sched-out X
  *   smp_store_release(X->on_cpu, 0);
  *
- *                    smp_cond_acquire(!X->on_cpu);
+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
  *                    X->state = WAKING
  *                    set_task_cpu(X,2)
  *
@@ -1972,7 +1972,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * This means that any means of doing remote wakeups must order the CPU doing
  * the wakeup against the CPU the task is going to end up running on. This,
  * however, is already required for the regular Program-Order guarantee above,
- * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
  *
  */
 
@@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         * This ensures that tasks getting woken will be fully ordered against
         * their previous state and preserve Program Order.
         */
-       smp_cond_acquire(!p->on_cpu);
+       smp_cond_load_acquire(&p->on_cpu, !VAL);
 
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
index 72f1f3087b04a72fc0afc9ff81d768018eb05391..425bf5ddaa5a06159833be834a5b53713be095ed 100644 (file)
@@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
         * In particular, the load of prev->state in finish_task_switch() must
         * happen before this.
         *
-        * Pairs with the smp_cond_acquire() in try_to_wake_up().
+        * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
         */
        smp_store_release(&prev->on_cpu, 0);
 #endif
index 74165443c240147cd701489298d2b552c8f46adf..36552beed39713526aa384a9cf9f1878630834fb 100644 (file)
@@ -107,7 +107,7 @@ void __init call_function_init(void)
  */
 static __always_inline void csd_lock_wait(struct call_single_data *csd)
 {
-       smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
+       smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
 }
 
 static __always_inline void csd_lock(struct call_single_data *csd)