sched: Do not account irq time to current task

author Venkatesh Pallipadi <venki@google.com>

Tue, 5 Oct 2010 00:03:21 +0000 (17:03 -0700)

committer Ingo Molnar <mingo@elte.hu>

Mon, 18 Oct 2010 18:52:26 +0000 (20:52 +0200)
author Venkatesh Pallipadi <venki@google.com>
Tue, 5 Oct 2010 00:03:21 +0000 (17:03 -0700)
committer Ingo Molnar <mingo@elte.hu>
Mon, 18 Oct 2010 18:52:26 +0000 (20:52 +0200)
diff --git a/kernel/sched.c b/kernel/sched.c

index 9b302e3557912c595d739f0bc4044b743cc15929..9e01b7100ef620face9da6a0a7c7730cbc229c3b 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -491,6 +491,7 @@ struct rq {
         struct mm_struct *prev_mm;
  
         u64 clock;
+       u64 clock_task;
  
         atomic_t nr_iowait;
  
@@ -641,10 +642,19 @@ static inline struct task_group *task_group(struct task_struct *p)
  
  #endif /* CONFIG_CGROUP_SCHED */
  
+static u64 irq_time_cpu(int cpu);
+
  inline void update_rq_clock(struct rq *rq)
  {
-       if (!rq->skip_clock_update)
-               rq->clock = sched_clock_cpu(cpu_of(rq));
+       if (!rq->skip_clock_update) {
+               int cpu = cpu_of(rq);
+               u64 irq_time;
+
+               rq->clock = sched_clock_cpu(cpu);
+               irq_time = irq_time_cpu(cpu);
+               if (rq->clock - irq_time > rq->clock_task)
+                       rq->clock_task = rq->clock - irq_time;
+       }
  }
  
  /*
@@ -1910,6 +1920,18 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
  
  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  
+/*
+ * There are no locks covering percpu hardirq/softirq time.
+ * They are only modified in account_system_vtime, on corresponding CPU
+ * with interrupts disabled. So, writes are safe.
+ * They are read and saved off onto struct rq in update_rq_clock().
+ * This may result in other CPU reading this CPU's irq time and can
+ * race with irq/account_system_vtime on this CPU. We would either get old
+ * or new value (or semi updated value on 32 bit) with a side effect of
+ * accounting a slice of irq time to wrong task when irq is in progress
+ * while we read rq->clock. That is a worthy compromise in place of having
+ * locks on each irq in account_system_time.
+ */
  static DEFINE_PER_CPU(u64, cpu_hardirq_time);
  static DEFINE_PER_CPU(u64, cpu_softirq_time);
  
@@ -1926,6 +1948,14 @@ void disable_sched_clock_irqtime(void)
         sched_clock_irqtime = 0;
  }
  
+static u64 irq_time_cpu(int cpu)
+{
+       if (!sched_clock_irqtime)
+               return 0;
+
+       return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+
  void account_system_vtime(struct task_struct *curr)
  {
         unsigned long flags;
@@ -1955,6 +1985,13 @@ void account_system_vtime(struct task_struct *curr)
         local_irq_restore(flags);
  }
  
+#else
+
+static u64 irq_time_cpu(int cpu)
+{
+       return 0;
+}
+
  #endif
  
  #include "sched_idletask.c"
@@ -3322,7 +3359,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
  
         if (task_current(rq, p)) {
                 update_rq_clock(rq);
-               ns = rq->clock - p->se.exec_start;
+               ns = rq->clock_task - p->se.exec_start;
                 if ((s64)ns < 0)
                         ns = 0;
         }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index f1c615ff39d63dbab5c8327cb99002f1868bdcf8..c358d4081b81854dedd3d6bb8488af9dfbc18497 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -519,7 +519,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
  static void update_curr(struct cfs_rq *cfs_rq)
  {
         struct sched_entity *curr = cfs_rq->curr;
-       u64 now = rq_of(cfs_rq)->clock;
+       u64 now = rq_of(cfs_rq)->clock_task;
         unsigned long delta_exec;
  
         if (unlikely(!curr))
@@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
         /*
          * We are starting a new run period:
          */
-       se->exec_start = rq_of(cfs_rq)->clock;
+       se->exec_start = rq_of(cfs_rq)->clock_task;
  }
  
  /**************************************************
@@ -1802,7 +1802,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
          * 2) too many balance attempts have failed.
          */
  
-       tsk_cache_hot = task_hot(p, rq->clock, sd);
+       tsk_cache_hot = task_hot(p, rq->clock_task, sd);
         if (!tsk_cache_hot ||
                 sd->nr_balance_failed > sd->cache_nice_tries) {
  #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index ab77aa00b7b17158bc4089469c140369b5b8d6ca..bea7d79f7e9ca958bba514cbd8eb48ceab47bab3 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq)
         if (!task_has_rt_policy(curr))
                 return;
  
-       delta_exec = rq->clock - curr->se.exec_start;
+       delta_exec = rq->clock_task - curr->se.exec_start;
         if (unlikely((s64)delta_exec < 0))
                 delta_exec = 0;
  
@@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq)
         curr->se.sum_exec_runtime += delta_exec;
         account_group_exec_runtime(curr, delta_exec);
  
-       curr->se.exec_start = rq->clock;
+       curr->se.exec_start = rq->clock_task;
         cpuacct_charge(curr, delta_exec);
  
         sched_rt_avg_update(rq, delta_exec);
@@ -1075,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
         } while (rt_rq);
  
         p = rt_task_of(rt_se);
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         return p;
  }
@@ -1713,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq)
  {
         struct task_struct *p = rq->curr;
  
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         /* The running task is never eligible for pushing */
         dequeue_pushable_task(rq, p);
author	Venkatesh Pallipadi <venki@google.com>
	Tue, 5 Oct 2010 00:03:21 +0000 (17:03 -0700)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 18 Oct 2010 18:52:26 +0000 (20:52 +0200)
kernel/sched.c		patch \| blob \| blame \| history
kernel/sched_fair.c		patch \| blob \| blame \| history
kernel/sched_rt.c		patch \| blob \| blame \| history