sched/deadline: Track the "total rq utilization" too

author Luca Abeni <luca.abeni@santannapisa.it>

Thu, 18 May 2017 20:13:34 +0000 (22:13 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 8 Jun 2017 08:31:53 +0000 (10:31 +0200)
author Luca Abeni <luca.abeni@santannapisa.it>
Thu, 18 May 2017 20:13:34 +0000 (22:13 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 8 Jun 2017 08:31:53 +0000 (10:31 +0200)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index 61ea3039cdc17cd8dd1907a91696f199772b5515..6c6a1f099d61edccf6b4601a4be2e2ff267f3188 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -51,6 +51,7 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
         lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
         dl_rq->running_bw += dl_bw;
         SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
+       SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  }
  
  static inline
@@ -65,25 +66,52 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
                 dl_rq->running_bw = 0;
  }
  
+static inline
+void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+       u64 old = dl_rq->this_bw;
+
+       lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+       dl_rq->this_bw += dl_bw;
+       SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
+}
+
+static inline
+void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+       u64 old = dl_rq->this_bw;
+
+       lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+       dl_rq->this_bw -= dl_bw;
+       SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
+       if (dl_rq->this_bw > old)
+               dl_rq->this_bw = 0;
+       SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+}
+
  void dl_change_utilization(struct task_struct *p, u64 new_bw)
  {
-       if (task_on_rq_queued(p))
-               return;
+       struct rq *rq;
  
-       if (!p->dl.dl_non_contending)
+       if (task_on_rq_queued(p))
                 return;
  
-       sub_running_bw(p->dl.dl_bw, &task_rq(p)->dl);
-       p->dl.dl_non_contending = 0;
-       /*
-        * If the timer handler is currently running and the
-        * timer cannot be cancelled, inactive_task_timer()
-        * will see that dl_not_contending is not set, and
-        * will not touch the rq's active utilization,
-        * so we are still safe.
-        */
-       if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
-               put_task_struct(p);
+       rq = task_rq(p);
+       if (p->dl.dl_non_contending) {
+               sub_running_bw(p->dl.dl_bw, &rq->dl);
+               p->dl.dl_non_contending = 0;
+               /*
+                * If the timer handler is currently running and the
+                * timer cannot be cancelled, inactive_task_timer()
+                * will see that dl_not_contending is not set, and
+                * will not touch the rq's active utilization,
+                * so we are still safe.
+                */
+               if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+                       put_task_struct(p);
+       }
+       sub_rq_bw(p->dl.dl_bw, &rq->dl);
+       add_rq_bw(new_bw, &rq->dl);
  }
  
  /*
@@ -178,6 +206,8 @@ static void task_non_contending(struct task_struct *p)
                 if (!dl_task(p) || p->state == TASK_DEAD) {
                         struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  
+                       if (p->state == TASK_DEAD)
+                               sub_rq_bw(p->dl.dl_bw, &rq->dl);
                         raw_spin_lock(&dl_b->lock);
                         __dl_clear(dl_b, p->dl.dl_bw);
                         __dl_clear_params(p);
@@ -192,7 +222,7 @@ static void task_non_contending(struct task_struct *p)
         hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
  }
  
-static void task_contending(struct sched_dl_entity *dl_se)
+static void task_contending(struct sched_dl_entity *dl_se, int flags)
  {
         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  
@@ -203,6 +233,9 @@ static void task_contending(struct sched_dl_entity *dl_se)
         if (dl_se->dl_runtime == 0)
                 return;
  
+       if (flags & ENQUEUE_MIGRATED)
+               add_rq_bw(dl_se->dl_bw, dl_rq);
+
         if (dl_se->dl_non_contending) {
                 dl_se->dl_non_contending = 0;
                 /*
@@ -268,6 +301,7 @@ void init_dl_rq(struct dl_rq *dl_rq)
  #endif
  
         dl_rq->running_bw = 0;
+       dl_rq->this_bw = 0;
         init_dl_rq_bw_ratio(dl_rq);
  }
  
@@ -1042,6 +1076,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
  
                 if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
                         sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+                       sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
                         dl_se->dl_non_contending = 0;
                 }
  
@@ -1207,7 +1242,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
          * we want a replenishment of its runtime.
          */
         if (flags & ENQUEUE_WAKEUP) {
-               task_contending(dl_se);
+               task_contending(dl_se, flags);
                 update_dl_entity(dl_se, pi_se);
         } else if (flags & ENQUEUE_REPLENISH) {
                 replenish_dl_entity(dl_se, pi_se);
@@ -1260,8 +1295,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
         if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
                 dl_check_constrained_dl(&p->dl);
  
-       if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE)
+       if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
+               add_rq_bw(p->dl.dl_bw, &rq->dl);
                 add_running_bw(p->dl.dl_bw, &rq->dl);
+       }
  
         /*
          * If p is throttled, we do not enqueue it. In fact, if it exhausted
@@ -1277,7 +1314,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
          */
         if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
                 if (flags & ENQUEUE_WAKEUP)
-                       task_contending(&p->dl);
+                       task_contending(&p->dl, flags);
  
                 return;
         }
@@ -1299,8 +1336,10 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
         update_curr_dl(rq);
         __dequeue_task_dl(rq, p, flags);
  
-       if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE)
+       if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
                 sub_running_bw(p->dl.dl_bw, &rq->dl);
+               sub_rq_bw(p->dl.dl_bw, &rq->dl);
+       }
  
         /*
          * This check allows to start the inactive timer (or to immediately
@@ -1394,7 +1433,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
  {
         struct rq *rq;
  
-       if (!(p->state == TASK_WAKING) || !(p->dl.dl_non_contending))
+       if (p->state != TASK_WAKING)
                 return;
  
         rq = task_rq(p);
@@ -1404,18 +1443,20 @@ static void migrate_task_rq_dl(struct task_struct *p)
          * rq->lock is not... So, lock it
          */
         raw_spin_lock(&rq->lock);
-       sub_running_bw(p->dl.dl_bw, &rq->dl);
-       p->dl.dl_non_contending = 0;
-       /*
-        * If the timer handler is currently running and the
-        * timer cannot be cancelled, inactive_task_timer()
-        * will see that dl_not_contending is not set, and
-        * will not touch the rq's active utilization,
-        * so we are still safe.
-        */
-       if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
-               put_task_struct(p);
-
+       if (p->dl.dl_non_contending) {
+               sub_running_bw(p->dl.dl_bw, &rq->dl);
+               p->dl.dl_non_contending = 0;
+               /*
+                * If the timer handler is currently running and the
+                * timer cannot be cancelled, inactive_task_timer()
+                * will see that dl_not_contending is not set, and
+                * will not touch the rq's active utilization,
+                * so we are still safe.
+                */
+               if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+                       put_task_struct(p);
+       }
+       sub_rq_bw(p->dl.dl_bw, &rq->dl);
         raw_spin_unlock(&rq->lock);
  }
  
@@ -1858,7 +1899,9 @@ retry:
  
         deactivate_task(rq, next_task, 0);
         sub_running_bw(next_task->dl.dl_bw, &rq->dl);
+       sub_rq_bw(next_task->dl.dl_bw, &rq->dl);
         set_task_cpu(next_task, later_rq->cpu);
+       add_rq_bw(next_task->dl.dl_bw, &later_rq->dl);
         add_running_bw(next_task->dl.dl_bw, &later_rq->dl);
         activate_task(later_rq, next_task, 0);
         ret = 1;
@@ -1948,7 +1991,9 @@ static void pull_dl_task(struct rq *this_rq)
  
                         deactivate_task(src_rq, p, 0);
                         sub_running_bw(p->dl.dl_bw, &src_rq->dl);
+                       sub_rq_bw(p->dl.dl_bw, &src_rq->dl);
                         set_task_cpu(p, this_cpu);
+                       add_rq_bw(p->dl.dl_bw, &this_rq->dl);
                         add_running_bw(p->dl.dl_bw, &this_rq->dl);
                         activate_task(this_rq, p, 0);
                         dmin = p->dl.deadline;
@@ -2057,6 +2102,9 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
         if (task_on_rq_queued(p) && p->dl.dl_runtime)
                 task_non_contending(p);
  
+       if (!task_on_rq_queued(p))
+               sub_rq_bw(p->dl.dl_bw, &rq->dl);
+
         /*
          * We cannot use inactive_task_timer() to invoke sub_running_bw()
          * at the 0-lag time, because the task could have been migrated
@@ -2086,9 +2134,11 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
                 put_task_struct(p);
  
         /* If p is not queued we will update its parameters at next wakeup. */
-       if (!task_on_rq_queued(p))
-               return;
+       if (!task_on_rq_queued(p)) {
+               add_rq_bw(p->dl.dl_bw, &rq->dl);
  
+               return;
+       }
         /*
          * If p is boosted we already updated its params in
          * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH),
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 878fe757d6ad0d3148c15075455768a1c8e2c690..b7321dac03c11cf5c3e7fbdda553fd99c5a21d85 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -566,6 +566,17 @@ struct dl_rq {
          */
         u64 running_bw;
  
+       /*
+        * Utilization of the tasks "assigned" to this runqueue (including
+        * the tasks that are in runqueue and the tasks that executed on this
+        * CPU and blocked). Increased when a task moves to this runqueue, and
+        * decreased when the task moves away (migrates, changes scheduling
+        * policy, or terminates).
+        * This is needed to compute the "inactive utilization" for the
+        * runqueue (inactive utilization = this_bw - running_bw).
+        */
+       u64 this_bw;
+
         /*
          * Inverse of the fraction of CPU utilization that can be reclaimed
          * by the GRUB algorithm.
author	Luca Abeni <luca.abeni@santannapisa.it>
	Thu, 18 May 2017 20:13:34 +0000 (22:13 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 8 Jun 2017 08:31:53 +0000 (10:31 +0200)
kernel/sched/deadline.c		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history