futex: Replace PF_EXITPIDONE with a state
authorThomas Gleixner <tglx@linutronix.de>
Wed, 6 Nov 2019 21:55:37 +0000 (22:55 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Dec 2019 14:38:23 +0000 (15:38 +0100)
commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream.

The futex exit handling relies on PF_ flags. That's suboptimal as it
requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in
the middle of do_exit() to enforce the observability of PF_EXITING in the
futex code.

Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic
over to the new state. The PF_EXITING dependency will be cleaned up in a
later step.

This prepares for handling various futex exit issues later.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/linux/futex.h
include/linux/sched.h
kernel/exit.c
kernel/futex.c

index bc4138b87035530021c86339fff9554b2935b617..35703ec3a2556e7e4e4e942e435749a8ead758ab 100644 (file)
@@ -53,6 +53,10 @@ union futex_key {
 #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
 
 #ifdef CONFIG_FUTEX
+enum {
+       FUTEX_STATE_OK,
+       FUTEX_STATE_DEAD,
+};
 
 static inline void futex_init_task(struct task_struct *tsk)
 {
@@ -62,6 +66,34 @@ static inline void futex_init_task(struct task_struct *tsk)
 #endif
        INIT_LIST_HEAD(&tsk->pi_state_list);
        tsk->pi_state_cache = NULL;
+       tsk->futex_state = FUTEX_STATE_OK;
+}
+
+/**
+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk:       task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This has two callers:
+ *
+ * - futex_mm_release() after the futex exit cleanup has been done
+ *
+ * - do_exit() from the recursive fault handling path.
+ *
+ * In case of a recursive fault this is best effort. Either the futex exit
+ * code has run already or not. If the OWNER_DIED bit has been set on the
+ * futex then the waiter can take it over. If not, the problem is pushed
+ * back to user space. If the futex exit code did not run yet, then an
+ * already queued waiter might block forever, but there is nothing which
+ * can be done about that.
+ */
+static inline void futex_exit_done(struct task_struct *tsk)
+{
+       tsk->futex_state = FUTEX_STATE_DEAD;
 }
 
 void futex_mm_release(struct task_struct *tsk);
@@ -71,6 +103,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 #else
 static inline void futex_init_task(struct task_struct *tsk) { }
 static inline void futex_mm_release(struct task_struct *tsk) { }
+static inline void futex_exit_done(struct task_struct *tsk) { }
 #endif
 
 #endif
index 866439c361a95e4a2bed7af6eee32125e00f02e9..6a3e0053791a4d62f386eaaf82e410d849f4c8b1 100644 (file)
@@ -959,6 +959,7 @@ struct task_struct {
 #endif
        struct list_head                pi_state_list;
        struct futex_pi_state           *pi_state_cache;
+       unsigned int                    futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
        struct perf_event_context       *perf_event_ctxp[perf_nr_task_contexts];
@@ -1334,7 +1335,6 @@ extern struct pid *cad_pid;
  */
 #define PF_IDLE                        0x00000002      /* I am an IDLE thread */
 #define PF_EXITING             0x00000004      /* Getting shut down */
-#define PF_EXITPIDONE          0x00000008      /* PI exit done on shut down */
 #define PF_VCPU                        0x00000010      /* I'm a virtual CPU */
 #define PF_WQ_WORKER           0x00000020      /* I'm a workqueue worker */
 #define PF_FORKNOEXEC          0x00000040      /* Forked but didn't exec */
index 15437cfdcd70d7314494840e3a1c74c6f6bcfdc9..c5435a2d37c3325d3abd864fa430f8584b2fa86d 100644 (file)
@@ -803,16 +803,7 @@ void __noreturn do_exit(long code)
         */
        if (unlikely(tsk->flags & PF_EXITING)) {
                pr_alert("Fixing recursive fault but reboot is needed!\n");
-               /*
-                * We can do this unlocked here. The futex code uses
-                * this flag just to verify whether the pi state
-                * cleanup has been done or not. In the worst case it
-                * loops once more. We pretend that the cleanup was
-                * done as there is no way to return. Either the
-                * OWNER_DIED bit is set by now or we push the blocked
-                * task into the wait for ever nirwana as well.
-                */
-               tsk->flags |= PF_EXITPIDONE;
+               futex_exit_done(tsk);
                set_current_state(TASK_UNINTERRUPTIBLE);
                schedule();
        }
@@ -902,12 +893,7 @@ void __noreturn do_exit(long code)
         * Make sure we are holding no locks:
         */
        debug_check_no_locks_held();
-       /*
-        * We can do this unlocked here. The futex code uses this flag
-        * just to verify whether the pi state cleanup has been done
-        * or not. In the worst case it loops once more.
-        */
-       tsk->flags |= PF_EXITPIDONE;
+       futex_exit_done(tsk);
 
        if (tsk->io_context)
                exit_io_context(tsk);
index 75edb19fa3a2330acfe7896e8601a268d76fa5a0..6d576c421abbc080f72c933bd99a95223b8fe999 100644 (file)
@@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
        u32 uval2;
 
        /*
-        * If PF_EXITPIDONE is not yet set, then try again.
+        * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
+        * for it to finish.
         */
-       if (tsk && !(tsk->flags & PF_EXITPIDONE))
+       if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
                return -EAGAIN;
 
        /*
@@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
         *    *uaddr = 0xC0000000;           tsk = get_task(PID);
         *   }                               if (!tsk->flags & PF_EXITING) {
         *  ...                                attach();
-        *  tsk->flags |= PF_EXITPIDONE;     } else {
-        *                                     if (!(tsk->flags & PF_EXITPIDONE))
+        *  tsk->futex_state =               } else {
+        *      FUTEX_STATE_DEAD;              if (tsk->futex_state !=
+        *                                        FUTEX_STATE_DEAD)
         *                                       return -EAGAIN;
         *                                     return -ESRCH; <--- FAIL
         *                                   }
@@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
        }
 
        /*
-        * We need to look at the task state flags to figure out,
-        * whether the task is exiting. To protect against the do_exit
-        * change of the task flags, we do this protected by
-        * p->pi_lock:
+        * We need to look at the task state to figure out, whether the
+        * task is exiting. To protect against the change of the task state
+        * in futex_exit_release(), we do this protected by p->pi_lock:
         */
        raw_spin_lock_irq(&p->pi_lock);
-       if (unlikely(p->flags & PF_EXITING)) {
+       if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
                /*
-                * The task is on the way out. When PF_EXITPIDONE is
-                * set, we know that the task has finished the
-                * cleanup:
+                * The task is on the way out. When the futex state is
+                * FUTEX_STATE_DEAD, we know that the task has finished
+                * the cleanup:
                 */
                int ret = handle_exit_race(uaddr, uval, p);