sched/balancing: Consider max cost of idle balance per sched domain
authorJason Low <jason.low2@hp.com>
Fri, 13 Sep 2013 18:26:52 +0000 (11:26 -0700)
committerIngo Molnar <mingo@kernel.org>
Fri, 20 Sep 2013 10:03:44 +0000 (12:03 +0200)
In this patch, we keep track of the max cost we spend doing idle load balancing
for each sched domain. If the avg time the CPU remains idle is less then the
time we have already spent on idle balancing + the max cost of idle balancing
in the sched domain, then we don't continue to attempt the balance. We also
keep a per rq variable, max_idle_balance_cost, which keeps track of the max
time spent on newidle load balances throughout all its domains so that we can
determine the avg_idle's max value.

By using the max, we avoid overrunning the average. This further reduces the
chance we attempt balancing when the CPU is not idle for longer than the cost
to balance.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/metag/include/asm/topology.h
include/linux/sched.h
include/linux/topology.h
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/sched.h

index 23f5118f58db97bde7fd6d345ac273e81af50496..db192924f4b0cadb09cbecdf77f4d782aab526a0 100644 (file)
@@ -26,6 +26,7 @@
        .last_balance           = jiffies,              \
        .balance_interval       = 1,                    \
        .nr_balance_failed      = 0,                    \
+       .max_newidle_lb_cost    = 0,                    \
 }
 
 #define cpu_to_node(cpu)       ((void)(cpu), 0)
index 6682da36b293cfad598a0c5803e9e32038f72aa3..be078ff9157f22de1393ad5d05affd0ab086f0cd 100644 (file)
@@ -810,6 +810,7 @@ struct sched_domain {
        unsigned int nr_balance_failed; /* initialise to 0 */
 
        u64 last_update;
+       u64 max_newidle_lb_cost;
 
 #ifdef CONFIG_SCHEDSTATS
        /* load_balance() stats */
index d3cf0d6e7712c115fb529559b59c2ae4f6f8cd9c..e2a2c3da29292127b634dcb0de89388da08d4094 100644 (file)
@@ -106,6 +106,7 @@ int arch_update_cpu_topology(void);
        .last_balance           = jiffies,                              \
        .balance_interval       = 1,                                    \
        .smt_gain               = 1178, /* 15% */                       \
+       .max_newidle_lb_cost    = 0,                                    \
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
@@ -135,6 +136,7 @@ int arch_update_cpu_topology(void);
                                ,                                       \
        .last_balance           = jiffies,                              \
        .balance_interval       = 1,                                    \
+       .max_newidle_lb_cost    = 0,                                    \
 }
 #endif
 #endif /* CONFIG_SCHED_MC */
@@ -166,6 +168,7 @@ int arch_update_cpu_topology(void);
                                ,                                       \
        .last_balance           = jiffies,                              \
        .balance_interval       = 1,                                    \
+       .max_newidle_lb_cost    = 0,                                    \
 }
 #endif
 
index 048f39e457611dd687a6e6c7b2a06ce8d2fcb59b..c2283c54aed07a0f44b8d35cae0049e1c6ef5096 100644 (file)
@@ -1330,7 +1330,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 
        if (rq->idle_stamp) {
                u64 delta = rq_clock(rq) - rq->idle_stamp;
-               u64 max = 2*sysctl_sched_migration_cost;
+               u64 max = 2*rq->max_idle_balance_cost;
 
                update_avg(&rq->avg_idle, delta);
 
@@ -6506,6 +6506,7 @@ void __init sched_init(void)
                rq->online = 0;
                rq->idle_stamp = 0;
                rq->avg_idle = 2*sysctl_sched_migration_cost;
+               rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
                INIT_LIST_HEAD(&rq->cfs_tasks);
 
index 0784ab6fcc59d011a621c57f86c99010ec66c59e..ffc99d8f0a95119f1d6ac8e910b32bc25adf058d 100644 (file)
@@ -5396,6 +5396,7 @@ void idle_balance(int this_cpu, struct rq *this_rq)
        struct sched_domain *sd;
        int pulled_task = 0;
        unsigned long next_balance = jiffies + HZ;
+       u64 curr_cost = 0;
 
        this_rq->idle_stamp = rq_clock(this_rq);
 
@@ -5412,15 +5413,27 @@ void idle_balance(int this_cpu, struct rq *this_rq)
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
                int continue_balancing = 1;
+               u64 t0, domain_cost;
 
                if (!(sd->flags & SD_LOAD_BALANCE))
                        continue;
 
+               if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+                       break;
+
                if (sd->flags & SD_BALANCE_NEWIDLE) {
+                       t0 = sched_clock_cpu(this_cpu);
+
                        /* If we've pulled tasks over stop searching: */
                        pulled_task = load_balance(this_cpu, this_rq,
                                                   sd, CPU_NEWLY_IDLE,
                                                   &continue_balancing);
+
+                       domain_cost = sched_clock_cpu(this_cpu) - t0;
+                       if (domain_cost > sd->max_newidle_lb_cost)
+                               sd->max_newidle_lb_cost = domain_cost;
+
+                       curr_cost += domain_cost;
                }
 
                interval = msecs_to_jiffies(sd->balance_interval);
@@ -5442,6 +5455,9 @@ void idle_balance(int this_cpu, struct rq *this_rq)
                 */
                this_rq->next_balance = next_balance;
        }
+
+       if (curr_cost > this_rq->max_idle_balance_cost)
+               this_rq->max_idle_balance_cost = curr_cost;
 }
 
 /*
index 0d7544c3dba7d539a6032a9d48f11728d522f0ce..e82484db7699b5f7e2e5a112d1fc3397a95c0c1a 100644 (file)
@@ -476,6 +476,9 @@ struct rq {
        u64 age_stamp;
        u64 idle_stamp;
        u64 avg_idle;
+
+       /* This is used to determine avg_idle's max value */
+       u64 max_idle_balance_cost;
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING