struct mm_struct *prev_mm;
u64 clock;
+ u64 clock_task;
atomic_t nr_iowait;
#endif /* CONFIG_CGROUP_SCHED */
+static u64 irq_time_cpu(int cpu);
+
inline void update_rq_clock(struct rq *rq)
{
- if (!rq->skip_clock_update)
- rq->clock = sched_clock_cpu(cpu_of(rq));
+ if (!rq->skip_clock_update) {
+ int cpu = cpu_of(rq);
+ u64 irq_time;
+
+ rq->clock = sched_clock_cpu(cpu);
+ irq_time = irq_time_cpu(cpu);
+ if (rq->clock - irq_time > rq->clock_task)
+ rq->clock_task = rq->clock - irq_time;
+ }
}
/*
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * There are no locks covering percpu hardirq/softirq time.
+ * They are only modified in account_system_vtime, on corresponding CPU
+ * with interrupts disabled. So, writes are safe.
+ * They are read and saved off onto struct rq in update_rq_clock().
+ * This may result in other CPU reading this CPU's irq time and can
+ * race with irq/account_system_vtime on this CPU. We would either get old
+ * or new value (or semi updated value on 32 bit) with a side effect of
+ * accounting a slice of irq time to wrong task when irq is in progress
+ * while we read rq->clock. That is a worthy compromise in place of having
+ * locks on each irq in account_system_time.
+ */
static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
sched_clock_irqtime = 0;
}
+static u64 irq_time_cpu(int cpu)
+{
+ if (!sched_clock_irqtime)
+ return 0;
+
+ return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+
void account_system_vtime(struct task_struct *curr)
{
unsigned long flags;
local_irq_restore(flags);
}
+#else
+
+static u64 irq_time_cpu(int cpu)
+{
+ return 0;
+}
+
#endif
#include "sched_idletask.c"
if (task_current(rq, p)) {
update_rq_clock(rq);
- ns = rq->clock - p->se.exec_start;
+ ns = rq->clock_task - p->se.exec_start;
if ((s64)ns < 0)
ns = 0;
}
static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- u64 now = rq_of(cfs_rq)->clock;
+ u64 now = rq_of(cfs_rq)->clock_task;
unsigned long delta_exec;
if (unlikely(!curr))
/*
* We are starting a new run period:
*/
- se->exec_start = rq_of(cfs_rq)->clock;
+ se->exec_start = rq_of(cfs_rq)->clock_task;
}
/**************************************************
* 2) too many balance attempts have failed.
*/
- tsk_cache_hot = task_hot(p, rq->clock, sd);
+ tsk_cache_hot = task_hot(p, rq->clock_task, sd);
if (!tsk_cache_hot ||
sd->nr_balance_failed > sd->cache_nice_tries) {
#ifdef CONFIG_SCHEDSTATS
if (!task_has_rt_policy(curr))
return;
- delta_exec = rq->clock - curr->se.exec_start;
+ delta_exec = rq->clock_task - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
- curr->se.exec_start = rq->clock;
+ curr->se.exec_start = rq->clock_task;
cpuacct_charge(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
} while (rt_rq);
p = rt_task_of(rt_se);
- p->se.exec_start = rq->clock;
+ p->se.exec_start = rq->clock_task;
return p;
}
{
struct task_struct *p = rq->curr;
- p->se.exec_start = rq->clock;
+ p->se.exec_start = rq->clock_task;
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);