return &cpu_rq(i)->rd->dl_bw;
}
-static inline int dl_bw_cpus(int i)
+inline int dl_bw_cpus(int i)
{
struct root_domain *rd = cpu_rq(i)->rd;
int cpus = 0;
return &cpu_rq(i)->dl.dl_bw;
}
-static inline int dl_bw_cpus(int i)
+inline int dl_bw_cpus(int i)
{
return 1;
}
if (dl_policy(policy) && !task_has_dl_policy(p) &&
!__dl_overflow(dl_b, cpus, 0, new_bw)) {
if (hrtimer_active(&p->dl.inactive_timer))
- __dl_clear(dl_b, p->dl.dl_bw);
- __dl_add(dl_b, new_bw);
+ __dl_clear(dl_b, p->dl.dl_bw, cpus);
+ __dl_add(dl_b, new_bw, cpus);
err = 0;
} else if (dl_policy(policy) && task_has_dl_policy(p) &&
!__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
* But this would require to set the task's "inactive
* timer" when the task is not inactive.
*/
- __dl_clear(dl_b, p->dl.dl_bw);
- __dl_add(dl_b, new_bw);
+ __dl_clear(dl_b, p->dl.dl_bw, cpus);
+ __dl_add(dl_b, new_bw, cpus);
dl_change_utilization(p, new_bw);
err = 0;
} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
* We will free resources in the source root_domain
* later on (see set_cpus_allowed_dl()).
*/
- __dl_add(dl_b, p->dl.dl_bw);
+ __dl_add(dl_b, p->dl.dl_bw, cpus);
}
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched();
{
if (global_rt_runtime() == RUNTIME_INF) {
dl_rq->bw_ratio = 1 << RATIO_SHIFT;
+ dl_rq->extra_bw = 1 << BW_SHIFT;
} else {
dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
+ dl_rq->extra_bw = to_ratio(global_rt_period(),
+ global_rt_runtime());
}
}
if (p->state == TASK_DEAD)
sub_rq_bw(p->dl.dl_bw, &rq->dl);
raw_spin_lock(&dl_b->lock);
- __dl_clear(dl_b, p->dl.dl_bw);
+ __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
__dl_clear_params(p);
raw_spin_unlock(&dl_b->lock);
}
/*
* This function implements the GRUB accounting rule:
* according to the GRUB reclaiming algorithm, the runtime is
- * not decreased as "dq = -dt", but as "dq = -max{u, (1 - Uinact)} dt",
- * where u is the utilization of the task and Uinact is the
- * (per-runqueue) inactive utilization, computed as the difference
- * between the "total runqueue utilization" and the runqueue
- * active utilization.
+ * not decreased as "dq = -dt", but as
+ * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
+ * where u is the utilization of the task, Umax is the maximum reclaimable
+ * utilization, Uinact is the (per-runqueue) inactive utilization, computed
+ * as the difference between the "total runqueue utilization" and the
+ * runqueue active utilization, and Uextra is the (per runqueue) extra
+ * reclaimable utilization.
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
- * multiplied by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
+ * multiplied by 2^BW_SHIFT, the result has to be shifted right by
+ * BW_SHIFT.
+ * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
+ * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
+ * Since delta is a 64 bit variable, to have an overflow its value
+ * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
+ * So, overflow is not an issue here.
*/
u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
{
u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
u64 u_act;
+ u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
/*
- * Instead of computing max{u, (1 - u_inact)}, we compare
- * u_inact with 1 - u, because u_inact can be larger than 1
- * (so, 1 - u_inact would be negative leading to wrong results)
+ * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
+ * we compare u_inact + rq->dl.extra_bw with
+ * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
+ * u_inact + rq->dl.extra_bw can be larger than
+ * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
+ * leading to wrong results)
*/
- if (u_inact > BW_UNIT - dl_se->dl_bw)
- u_act = dl_se->dl_bw;
+ if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
+ u_act = u_act_min;
else
- u_act = BW_UNIT - u_inact;
+ u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
return (delta * u_act) >> BW_SHIFT;
}
}
raw_spin_lock(&dl_b->lock);
- __dl_clear(dl_b, p->dl.dl_bw);
+ __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
raw_spin_unlock(&dl_b->lock);
__dl_clear_params(p);
* until we complete the update.
*/
raw_spin_lock(&src_dl_b->lock);
- __dl_clear(src_dl_b, p->dl.dl_bw);
+ __dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
raw_spin_unlock(&src_dl_b->lock);
}
}
extern struct dl_bw *dl_bw_of(int i);
+extern int dl_bw_cpus(int i);
struct dl_bw {
raw_spinlock_t lock;
u64 bw, total_bw;
};
+static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
+
static inline
-void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
+void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
{
dl_b->total_bw -= tsk_bw;
+ __dl_update(dl_b, (s32)tsk_bw / cpus);
}
static inline
-void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
{
dl_b->total_bw += tsk_bw;
+ __dl_update(dl_b, -((s32)tsk_bw / cpus));
}
static inline
* runqueue (inactive utilization = this_bw - running_bw).
*/
u64 this_bw;
+ u64 extra_bw;
/*
* Inverse of the fraction of CPU utilization that can be reclaimed
static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#endif
+
+#ifdef CONFIG_SMP
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+ struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
+ int i;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ "sched RCU must be held");
+ for_each_cpu_and(i, rd->span, cpu_active_mask) {
+ struct rq *rq = cpu_rq(i);
+
+ rq->dl.extra_bw += bw;
+ }
+}
+#else
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+ struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
+
+ dl->extra_bw += bw;
+}
+#endif
+
+
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;