futex: Drop hb->lock before enqueueing on the rtmutex

author Peter Zijlstra <peterz@infradead.org>

Wed, 22 Mar 2017 10:36:00 +0000 (11:36 +0100)

committer Thomas Gleixner <tglx@linutronix.de>

Thu, 23 Mar 2017 18:14:59 +0000 (19:14 +0100)
author Peter Zijlstra <peterz@infradead.org>
Wed, 22 Mar 2017 10:36:00 +0000 (11:36 +0100)
committer Thomas Gleixner <tglx@linutronix.de>
Thu, 23 Mar 2017 18:14:59 +0000 (19:14 +0100)
diff --git a/kernel/futex.c b/kernel/futex.c

index 4cdc603b00c31a324fbd03be734c243edc617a74..628be42296ebf31aafbf839633c4e8e2c01715d0 100644 (file)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2654,20 +2654,33 @@ retry_private:
                 goto no_block;
         }
  
+       rt_mutex_init_waiter(&rt_waiter);
+
         /*
-        * We must add ourselves to the rt_mutex waitlist while holding hb->lock
-        * such that the hb and rt_mutex wait lists match.
+        * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
+        * hold it while doing rt_mutex_start_proxy(), because then it will
+        * include hb->lock in the blocking chain, even through we'll not in
+        * fact hold it while blocking. This will lead it to report -EDEADLK
+        * and BUG when futex_unlock_pi() interleaves with this.
+        *
+        * Therefore acquire wait_lock while holding hb->lock, but drop the
+        * latter before calling rt_mutex_start_proxy_lock(). This still fully
+        * serializes against futex_unlock_pi() as that does the exact same
+        * lock handoff sequence.
          */
-       rt_mutex_init_waiter(&rt_waiter);
-       ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
+       raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
+       spin_unlock(q.lock_ptr);
+       ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
+       raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
+
         if (ret) {
                 if (ret == 1)
                         ret = 0;
  
+               spin_lock(q.lock_ptr);
                 goto no_block;
         }
  
-       spin_unlock(q.lock_ptr);
  
         if (unlikely(to))
                 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
@@ -2680,6 +2693,9 @@ retry_private:
          * first acquire the hb->lock before removing the lock from the
          * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
          * wait lists consistent.
+        *
+        * In particular; it is important that futex_unlock_pi() can not
+        * observe this inconsistency.
          */
         if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
                 ret = 0;
@@ -2791,10 +2807,6 @@ retry:
  
                 get_pi_state(pi_state);
                 /*
-                * Since modifying the wait_list is done while holding both
-                * hb->lock and wait_lock, holding either is sufficient to
-                * observe it.
-                *
                  * By taking wait_lock while still holding hb->lock, we ensure
                  * there is no point where we hold neither; and therefore
                  * wake_futex_pi() must observe a state consistent with what we
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c

index 48418a1733b8538faa8145d000c532b032f23150..dd103124166be2562e26b60fdaab1e2d773eff81 100644 (file)
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1669,31 +1669,14 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
         rt_mutex_set_owner(lock, NULL);
  }
  
-/**
- * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
- * @lock:              the rt_mutex to take
- * @waiter:            the pre-initialized rt_mutex_waiter
- * @task:              the task to prepare
- *
- * Returns:
- *  0 - task blocked on lock
- *  1 - acquired the lock for task, caller should wake it up
- * <0 - error
- *
- * Special API call for FUTEX_REQUEUE_PI support.
- */
-int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                               struct rt_mutex_waiter *waiter,
                               struct task_struct *task)
  {
         int ret;
  
-       raw_spin_lock_irq(&lock->wait_lock);
-
-       if (try_to_take_rt_mutex(lock, task, NULL)) {
-               raw_spin_unlock_irq(&lock->wait_lock);
+       if (try_to_take_rt_mutex(lock, task, NULL))
                 return 1;
-       }
  
         /* We enforce deadlock detection for futexes */
         ret = task_blocks_on_rt_mutex(lock, waiter, task,
@@ -1712,13 +1695,37 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
         if (unlikely(ret))
                 remove_waiter(lock, waiter);
  
-       raw_spin_unlock_irq(&lock->wait_lock);
-
         debug_rt_mutex_print_deadlock(waiter);
  
         return ret;
  }
  
+/**
+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock:              the rt_mutex to take
+ * @waiter:            the pre-initialized rt_mutex_waiter
+ * @task:              the task to prepare
+ *
+ * Returns:
+ *  0 - task blocked on lock
+ *  1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for FUTEX_REQUEUE_PI support.
+ */
+int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+                             struct rt_mutex_waiter *waiter,
+                             struct task_struct *task)
+{
+       int ret;
+
+       raw_spin_lock_irq(&lock->wait_lock);
+       ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
+       raw_spin_unlock_irq(&lock->wait_lock);
+
+       return ret;
+}
+
  /**
   * rt_mutex_next_owner - return the next owner of the lock
   *
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h

index 1e93e15a0e452c27e4a84a01b67c86765001ecc4..b1ccfea2effe8683f8be147710ac0c84a0f00fe9 100644 (file)
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -104,6 +104,9 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
                                   struct task_struct *proxy_owner);
  extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
+extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+                                    struct rt_mutex_waiter *waiter,
+                                    struct task_struct *task);
  extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                                      struct rt_mutex_waiter *waiter,
                                      struct task_struct *task);
author	Peter Zijlstra <peterz@infradead.org>
	Wed, 22 Mar 2017 10:36:00 +0000 (11:36 +0100)
committer	Thomas Gleixner <tglx@linutronix.de>
	Thu, 23 Mar 2017 18:14:59 +0000 (19:14 +0100)
kernel/futex.c		patch \| blob \| blame \| history
kernel/locking/rtmutex.c		patch \| blob \| blame \| history
kernel/locking/rtmutex_common.h		patch \| blob \| blame \| history