sched/core: Create preempt_count invariant
authorPeter Zijlstra <peterz@infradead.org>
Mon, 28 Sep 2015 15:52:18 +0000 (17:52 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 6 Oct 2015 15:08:14 +0000 (17:08 +0200)
Assuming units of PREEMPT_DISABLE_OFFSET for preempt_count() numbers.

Now that TASK_DEAD no longer results in preempt_count() == 3 during
scheduling, we will always call context_switch() with preempt_count()
== 2.

However, we don't always end up with preempt_count() == 2 in
finish_task_switch() because new tasks get created with
preempt_count() == 1.

Create FORK_PREEMPT_COUNT and set it to 2 and use that in the right
places. Note that we cannot use INIT_PREEMPT_COUNT as that serves
another purpose (boot).

After this, preempt_count() is invariant across the context switch,
with exception of PREEMPT_ACTIVE.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/preempt.h
include/asm-generic/preempt.h
include/linux/sched.h
kernel/sched/core.c

index b12f81022a6b2c574f9c037e616f90373502c24e..01e700d392cbae9119a265c0def43fca01d4eea5 100644 (file)
@@ -31,7 +31,7 @@ static __always_inline void preempt_count_set(int pc)
  * must be macros to avoid header recursion hell
  */
 #define init_task_preempt_count(p) do { \
-       task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
+       task_thread_info(p)->saved_preempt_count = FORK_PREEMPT_COUNT; \
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
index 0bec580a48854f0a49b012b203663ebeddf98257..5d8ffa3e6f8c8a4e3715f3da4294abc860390386 100644 (file)
@@ -24,7 +24,7 @@ static __always_inline void preempt_count_set(int pc)
  * must be macros to avoid header recursion hell
  */
 #define init_task_preempt_count(p) do { \
-       task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
+       task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
index e5b8cbc4b8d66f131ffefaca3bbed7c2f82e2cd9..23ca455d9582693173bd85b9468f9f487164a86c 100644 (file)
@@ -599,11 +599,7 @@ struct task_cputime_atomic {
                .sum_exec_runtime = ATOMIC64_INIT(0),           \
        }
 
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_DISABLED       (1 + PREEMPT_ENABLED)
-#else
-#define PREEMPT_DISABLED       PREEMPT_ENABLED
-#endif
+#define PREEMPT_DISABLED       (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
 
 /*
  * Disable preemption until the scheduler is running -- use an unconditional
@@ -613,6 +609,17 @@ struct task_cputime_atomic {
  */
 #define INIT_PREEMPT_COUNT     PREEMPT_OFFSET
 
+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT     (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:    atomic thread group interval timers.
index 530fe8baa6450b7f69ae538e231502c1facd44cd..8d8722b84dee752b1f2067e1932708c5c4704dd7 100644 (file)
@@ -2504,6 +2504,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
        struct mm_struct *mm = rq->prev_mm;
        long prev_state;
 
+       /*
+        * The previous task will have left us with a preempt_count of 2
+        * because it left us after:
+        *
+        *      schedule()
+        *        preempt_disable();                    // 1
+        *        __schedule()
+        *          raw_spin_lock_irq(&rq->lock)        // 2
+        *
+        * Also, see FORK_PREEMPT_COUNT.
+        */
+
        rq->prev_mm = NULL;
 
        /*
@@ -2588,8 +2600,15 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 {
        struct rq *rq;
 
-       /* finish_task_switch() drops rq->lock and enables preemtion */
-       preempt_disable();
+       /*
+        * New tasks start with FORK_PREEMPT_COUNT, see there and
+        * finish_task_switch() for details.
+        *
+        * finish_task_switch() will drop rq->lock() and lower preempt_count
+        * and the preempt_enable() will end up enabling preemption (on
+        * PREEMPT_COUNT kernels).
+        */
+
        rq = finish_task_switch(prev);
        balance_callback(rq);
        preempt_enable();