sched/deadline: Implement cancel_dl_timer() to use in switched_from_dl()
authorKirill Tkhai <ktkhai@parallels.com>
Mon, 27 Oct 2014 14:40:52 +0000 (17:40 +0300)
committerIngo Molnar <mingo@kernel.org>
Tue, 4 Nov 2014 06:17:50 +0000 (07:17 +0100)
Currently used hrtimer_try_to_cancel() is racy:

raw_spin_lock(&rq->lock)
...                            dl_task_timer                 raw_spin_lock(&rq->lock)
...                               raw_spin_lock(&rq->lock)   ...
   switched_from_dl()             ...                        ...
      hrtimer_try_to_cancel()     ...                        ...
   switched_to_fair()             ...                        ...
...                               ...                        ...
...                               ...                        ...
raw_spin_unlock(&rq->lock)        ...                        (asquired)
...                               ...                        ...
...                               ...                        ...
do_exit()                         ...                        ...
   schedule()                     ...                        ...
      raw_spin_lock(&rq->lock)    ...                        raw_spin_unlock(&rq->lock)
      ...                         ...                        ...
      raw_spin_unlock(&rq->lock)  ...                        raw_spin_lock(&rq->lock)
      ...                         ...                        (asquired)
      put_task_struct()           ...                        ...
          free_task_struct()      ...                        ...
      ...                         ...                        raw_spin_unlock(&rq->lock)
...                               (asquired)                 ...
...                               ...                        ...
...                               (use after free)           ...

So, let's implement 100% guaranteed way to cancel the timer and let's
be sure we are safe even in very unlikely situations.

rq unlocking does not limit the area of switched_from_dl() use, because
this has already been possible in pull_dl_task() below.

Let's consider the safety of of this unlocking. New code in the patch
is working when hrtimer_try_to_cancel() fails. This means the callback
is running. In this case hrtimer_cancel() is just waiting till the
callback is finished. Two

1) Since we are in switched_from_dl(), new class is not dl_sched_class and
new prio is not less MAX_DL_PRIO. So, the callback returns early; it's
right after !dl_task() check. After that hrtimer_cancel() returns back too.

The above is:

raw_spin_lock(rq->lock);                  ...
...                                       dl_task_timer()
...                                          raw_spin_lock(rq->lock);
   switched_from_dl()                        ...
       hrtimer_try_to_cancel()               ...
          raw_spin_unlock(rq->lock);         ...
          hrtimer_cancel()                   ...
          ...                                raw_spin_unlock(rq->lock);
          ...                                return HRTIMER_NORESTART;
          ...                             ...
          raw_spin_lock(rq->lock);        ...

2) But the below is also possible:
                                   dl_task_timer()
                                      raw_spin_lock(rq->lock);
                                      ...
                                      raw_spin_unlock(rq->lock);
raw_spin_lock(rq->lock);              ...
   switched_from_dl()                 ...
       hrtimer_try_to_cancel()        ...
       ...                            return HRTIMER_NORESTART;
       raw_spin_unlock(rq->lock);  ...
       hrtimer_cancel();           ...
       raw_spin_lock(rq->lock);    ...

In this case hrtimer_cancel() returns immediately. Very unlikely case,
just to mention.

Nobody can manipulate the task, because check_class_changed() is
always called with pi_lock locked. Nobody can force the task to
participate in (concurrent) priority inheritance schemes (the same reason).

All concurrent task operations require pi_lock, which is held by us.
No deadlocks with dl_task_timer() are possible, because it returns
right after !dl_task() check (it does nothing).

If we receive a new dl_task during the time of unlocked rq, we just
don't have to do pull_dl_task() in switched_from_dl() further.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
[ Added comments]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1414420852.19914.186.camel@tkhai
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/sched.h

index 0cd34e68680cab320080babdf1d5723f8fef4cdf..379cb87da69df36798a38bb66551a97acf091dbb 100644 (file)
@@ -1008,6 +1008,9 @@ inline int task_curr(const struct task_struct *p)
        return cpu_curr(task_cpu(p)) == p;
 }
 
+/*
+ * Can drop rq->lock because from sched_class::switched_from() methods drop it.
+ */
 static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                                       const struct sched_class *prev_class,
                                       int oldprio)
@@ -1015,6 +1018,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
        if (prev_class != p->sched_class) {
                if (prev_class->switched_from)
                        prev_class->switched_from(rq, p);
+               /* Possble rq->lock 'hole'.  */
                p->sched_class->switched_to(rq, p);
        } else if (oldprio != p->prio || dl_task(p))
                p->sched_class->prio_changed(rq, p, oldprio);
index 2e31a30e623c220401ddd9bd2edf48130610235c..9d483e862e58d9954cde5d83293e413812d0a35e 100644 (file)
@@ -563,11 +563,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
        struct hrtimer *timer = &dl_se->dl_timer;
 
-       if (hrtimer_active(timer)) {
-               hrtimer_try_to_cancel(timer);
-               return;
-       }
-
        hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        timer->function = dl_task_timer;
 }
@@ -1610,10 +1605,35 @@ void init_sched_dl_class(void)
 
 #endif /* CONFIG_SMP */
 
+/*
+ *  Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
+ */
+static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
+{
+       struct hrtimer *dl_timer = &p->dl.dl_timer;
+
+       /* Nobody will change task's class if pi_lock is held */
+       lockdep_assert_held(&p->pi_lock);
+
+       if (hrtimer_active(dl_timer)) {
+               int ret = hrtimer_try_to_cancel(dl_timer);
+
+               if (unlikely(ret == -1)) {
+                       /*
+                        * Note, p may migrate OR new deadline tasks
+                        * may appear in rq when we are unlocking it.
+                        * A caller of us must be fine with that.
+                        */
+                       raw_spin_unlock(&rq->lock);
+                       hrtimer_cancel(dl_timer);
+                       raw_spin_lock(&rq->lock);
+               }
+       }
+}
+
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
-       if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
-               hrtimer_try_to_cancel(&p->dl.dl_timer);
+       cancel_dl_timer(rq, p);
 
        __dl_clear_params(p);
 
index ec3917c5f898fc41513126fae54e9f3f51771bd7..49b941fe2cc29c74e24a31181b115780ed1c3344 100644 (file)
@@ -1157,6 +1157,11 @@ struct sched_class {
        void (*task_fork) (struct task_struct *p);
        void (*task_dead) (struct task_struct *p);
 
+       /*
+        * The switched_from() call is allowed to drop rq->lock, therefore we
+        * cannot assume the switched_from/switched_to pair is serliazed by
+        * rq->lock. They are however serialized by p->pi_lock.
+        */
        void (*switched_from) (struct rq *this_rq, struct task_struct *task);
        void (*switched_to) (struct rq *this_rq, struct task_struct *task);
        void (*prio_changed) (struct rq *this_rq, struct task_struct *task,