sched: ems: support find energy efficient cpu

author Park Bumgyu <bumgyu.park@samsung.com>

Tue, 27 Mar 2018 03:50:03 +0000 (12:50 +0900)

committer Chungwoo Park <cww.park@samsung.com>

Mon, 21 May 2018 08:35:30 +0000 (17:35 +0900)
author Park Bumgyu <bumgyu.park@samsung.com>
Tue, 27 Mar 2018 03:50:03 +0000 (12:50 +0900)
committer Chungwoo Park <cww.park@samsung.com>
Mon, 21 May 2018 08:35:30 +0000 (17:35 +0900)
diff --git a/include/trace/events/ems.h b/include/trace/events/ems.h

index af380165f4501c2659e9762eac82120e24e4ce15..866b1843e236754f240835171930372c5749fe79 100644 (file)
--- a/include/trace/events/ems.h
+++ b/include/trace/events/ems.h
@@ -15,6 +15,48 @@
  #include <linux/sched.h>
  #include <linux/tracepoint.h>
  
+/*
+ * Tracepoint for selecting eco cpu
+ */
+TRACE_EVENT(ems_select_eco_cpu,
+
+       TP_PROTO(struct task_struct *p, int eco_cpu, int prev_cpu, int best_cpu, int backup_cpu,
+               unsigned int prev_energy, unsigned int best_energy, unsigned int backup_energy),
+
+       TP_ARGS(p, eco_cpu, prev_cpu, best_cpu, backup_cpu,
+                       prev_energy, best_energy, backup_energy),
+
+       TP_STRUCT__entry(
+               __array(        char,           comm,   TASK_COMM_LEN   )
+               __field(        pid_t,          pid                     )
+               __field(        int,            eco_cpu                 )
+               __field(        int,            prev_cpu                )
+               __field(        int,            best_cpu                )
+               __field(        int,            backup_cpu              )
+               __field(        unsigned int,   prev_energy             )
+               __field(        unsigned int,   best_energy             )
+               __field(        unsigned int,   backup_energy           )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->eco_cpu        = eco_cpu;
+               __entry->prev_cpu       = prev_cpu;
+               __entry->best_cpu       = best_cpu;
+               __entry->backup_cpu     = backup_cpu;
+               __entry->prev_energy    = prev_energy;
+               __entry->best_energy    = best_energy;
+               __entry->backup_energy  = backup_energy;
+       ),
+
+       TP_printk("comm=%s pid=%d eco_cpu=%d prev_cpu=%d best_cpu=%d backup_cpu=%d "
+                 "prev_energy=%u best_energy=%u backup_energy=%u",
+               __entry->comm, __entry->pid,
+               __entry->eco_cpu, __entry->prev_cpu, __entry->best_cpu, __entry->backup_cpu,
+               __entry->prev_energy, __entry->best_energy, __entry->backup_energy)
+);
+
  /*
   * Tracepoint for wakeup balance
   */
diff --git a/kernel/sched/ems/core.c b/kernel/sched/ems/core.c

index 8ab7905f1c9a2ac1886528c9bffafba225a156dc..26f2e65f800bdec44e514ad35607fe767f3bd3cf 100644 (file)
--- a/kernel/sched/ems/core.c
+++ b/kernel/sched/ems/core.c
@@ -11,9 +11,308 @@
  #include "ems.h"
  #include "../sched.h"
  
-static int select_energy_cpu(struct task_struct *p)
+#define cpu_selected(cpu)      (cpu >= 0)
+
+static int task_util(struct task_struct *p)
  {
-       return -1;
+       return p->se.avg.util_avg;
+}
+
+static int cpu_util_wake(int cpu, struct task_struct *p)
+{
+       unsigned long util, capacity;
+
+       /* Task has no contribution or is new */
+       if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
+               return cpu_util(cpu);
+
+       capacity = capacity_orig_of(cpu);
+       util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0);
+
+       return (util >= capacity) ? capacity : util;
+}
+
+struct eco_env {
+       struct task_struct *p;
+
+       int prev_cpu;
+       int best_cpu;
+       int backup_cpu;
+};
+
+static void find_eco_target(struct eco_env *eenv)
+{
+       struct task_struct *p = eenv->p;
+       unsigned long best_min_cap_orig = ULONG_MAX;
+       unsigned long backup_min_cap_orig = ULONG_MAX;
+       unsigned long best_spare_cap = 0;
+       int backup_idle_cstate = INT_MAX;
+       int best_cpu = -1;
+       int backup_cpu = -1;
+       int cpu;
+
+       rcu_read_lock();
+
+       for_each_cpu_and(cpu, &p->cpus_allowed, cpu_active_mask) {
+               unsigned long capacity_orig = capacity_orig_of(cpu);
+               unsigned long wake_util, new_util;
+
+               wake_util = cpu_util_wake(cpu, p);
+               new_util = wake_util + task_util(p);
+
+               /* checking prev cpu is meaningless */
+               if (eenv->prev_cpu == cpu)
+                       continue;
+
+               /* skip over-capacity cpu */
+               if (new_util > capacity_orig)
+                       continue;
+
+               /*
+                * According to the criteria determined by the LBT(Load
+                * Balance trigger), the cpu that becomes overutilized when
+                * the task is assigned is skipped.
+                */
+               if (lbt_bring_overutilize(cpu, p))
+                       continue;
+
+               /*
+                * Backup target) shallowest idle cpu among min-cap cpu
+                *
+                * In general, assigning a task to an idle cpu is
+                * disadvantagerous in energy. To minimize the energy increase
+                * associated with selecting idle cpu, choose a cpu that is
+                * in the lowest performance and shallowest idle state.
+                */
+               if (idle_cpu(cpu)) {
+                       int idle_idx;
+
+                       if (backup_min_cap_orig < capacity_orig)
+                               continue;
+
+                       idle_idx = idle_get_state_idx(cpu_rq(cpu));
+                       if (backup_idle_cstate <= idle_idx)
+                               continue;
+
+                       backup_min_cap_orig = capacity_orig;
+                       backup_idle_cstate = idle_idx;
+                       backup_cpu = cpu;
+                       continue;
+               }
+
+               /*
+                * Best target) biggest spare cpu among min-cap cpu
+                *
+                * Select the cpu with the biggest spare capacity to maintain
+                * frequency as possible without waking up idle cpu. Also, to
+                * maximize the use of energy-efficient cpu, we choose the
+                * lowest performance cpu.
+                */
+               if (best_min_cap_orig < capacity_orig)
+                       continue;
+
+               if (best_spare_cap > (capacity_orig - new_util))
+                       continue;
+
+               best_spare_cap = capacity_orig - new_util;
+               best_min_cap_orig = capacity_orig;
+               best_cpu = cpu;
+       }
+
+       rcu_read_unlock();
+
+       eenv->best_cpu = best_cpu;
+       eenv->backup_cpu = backup_cpu;
+}
+
+struct energy_table {
+       struct capacity_state *states;
+       unsigned int nr_states;
+};
+
+DEFINE_PER_CPU(struct energy_table, energy_table);
+
+static unsigned int calculate_energy(struct task_struct *p, int target_cpu)
+{
+       unsigned long util[NR_CPUS] = {0, };
+       unsigned int total_energy = 0;
+       int cpu;
+
+       /*
+        * 0. Calculate utilization of the entire active cpu when task
+        *    is assigned to target cpu.
+        */
+       for_each_cpu(cpu, cpu_active_mask) {
+               util[cpu] = cpu_util_wake(cpu, p);
+
+               if (unlikely(cpu == target_cpu))
+                       util[cpu] += task_util(p);
+       }
+
+       for_each_possible_cpu(cpu) {
+               struct energy_table *table;
+               unsigned long max_util = 0, util_sum = 0;
+               unsigned long capacity;
+               int i, cap_idx;
+
+               /* Compute coregroup energy with only one cpu per coregroup */
+               if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
+                       continue;
+
+               /*
+                * 1. The cpu in the coregroup has same capacity and the
+                *    capacity depends on the cpu that has the biggest
+                *    utilization. Find biggest utilization in the coregroup
+                *    to know what capacity the cpu will have.
+                */
+               for_each_cpu(i, cpu_coregroup_mask(cpu))
+                       if (util[i] > max_util)
+                               max_util = util[i];
+
+               /*
+                * 2. Find the capacity according to biggest utilization in
+                *    coregroup.
+                */
+               table = &per_cpu(energy_table, cpu);
+               cap_idx = table->nr_states - 1;
+               for (i = 0; i < table->nr_states; i++) {
+                       if (table->states[i].cap >= max_util) {
+                               capacity = table->states[i].cap;
+                               cap_idx = i;
+                               break;
+                       }
+               }
+
+               /*
+                * 3. Get the utilization sum of coregroup. Since cpu
+                *    utilization of CFS reflects the performance of cpu,
+                *    normalize the utilization to calculate the amount of
+                *    cpu usuage that excludes cpu performance.
+                */
+               for_each_cpu(i, cpu_coregroup_mask(cpu)) {
+                       /* utilization with task exceeds max capacity of cpu */
+                       if (util[i] >= capacity) {
+                               util_sum += SCHED_CAPACITY_SCALE;
+                               continue;
+                       }
+
+                       /* normalize cpu utilization */
+                       util_sum += (util[i] << SCHED_CAPACITY_SHIFT) / capacity;
+               }
+
+               /*
+                * 4. compute active energy
+                */
+               total_energy += util_sum * table->states[cap_idx].power;
+       }
+
+       return total_energy;
+}
+
+static int select_eco_cpu(struct eco_env *eenv)
+{
+       unsigned int prev_energy, best_energy, backup_energy;
+       unsigned int temp_energy;
+       int temp_cpu;
+       int eco_cpu = eenv->prev_cpu;
+       int margin;
+
+       prev_energy = calculate_energy(eenv->p, eenv->prev_cpu);
+
+       /*
+        * find_eco_target() may not find best or backup cup. Ignore unfound
+        * cpu, and if both are found, select a cpu that consumes less energy
+        * when assigning task.
+        */
+       best_energy = backup_energy = UINT_MAX;
+
+       if (cpu_selected(eenv->best_cpu))
+               best_energy = calculate_energy(eenv->p, eenv->best_cpu);
+
+       if (cpu_selected(eenv->backup_cpu))
+               backup_energy = calculate_energy(eenv->p, eenv->backup_cpu);
+
+       if (best_energy < backup_energy) {
+               temp_energy = best_energy;
+               temp_cpu = eenv->best_cpu;
+       } else {
+               temp_energy = backup_energy;
+               temp_cpu = eenv->backup_cpu;
+       }
+
+       /*
+        * Compare prev cpu to target cpu among best and backup cpu to determine
+        * whether keeping the task on PREV CPU and sending the task to TARGET
+        * CPU is beneficial for energy.
+        */
+       if (temp_energy < prev_energy) {
+               /*
+                * Compute the dead-zone margin used to prevent too many task
+                * migrations with negligible energy savings.
+                * An energy saving is considered meaningful if it reduces the
+                * energy consumption of PREV CPU candidate by at least ~1.56%.
+                */
+               margin = prev_energy >> 6;
+               if ((prev_energy - temp_energy) < margin)
+                       goto out;
+
+               eco_cpu = temp_cpu;
+       }
+
+out:
+       trace_ems_select_eco_cpu(eenv->p, eco_cpu,
+                       eenv->prev_cpu, eenv->best_cpu, eenv->backup_cpu,
+                       prev_energy, best_energy, backup_energy);
+       return eco_cpu;
+}
+
+static int
+select_energy_cpu(struct task_struct *p, int prev_cpu, int sd_flag, int sync)
+{
+       struct sched_domain *sd = NULL;
+       int cpu = smp_processor_id();
+       struct eco_env eenv = {
+               .p = p,
+               .prev_cpu = prev_cpu,
+       };
+
+       if (!sched_feat(ENERGY_AWARE))
+               return -1;
+
+       /*
+        * Energy-aware wakeup placement on overutilized cpu is hard to get
+        * energy gain.
+        */
+       rcu_read_lock();
+       sd = rcu_dereference_sched(cpu_rq(prev_cpu)->sd);
+       if (!sd || sd->shared->overutilized) {
+               rcu_read_unlock();
+               return -1;
+       }
+       rcu_read_unlock();
+
+       /*
+        * We cannot do energy-aware wakeup placement sensibly for tasks
+        * with 0 utilization, so let them be placed according to the normal
+        * strategy.
+        */
+       if (!task_util(p))
+               return -1;
+
+       if (sysctl_sched_sync_hint_enable && sync)
+               if (cpumask_test_cpu(cpu, &p->cpus_allowed))
+                       return cpu;
+
+       /*
+        * Find eco-friendly target.
+        * After selecting the best and backup cpu according to strategy, we
+        * choose a cpu that is energy efficient compared to prev cpu.
+        */
+       find_eco_target(&eenv);
+       if (eenv.best_cpu < 0 && eenv.backup_cpu < 0)
+               return prev_cpu;
+
+       return select_eco_cpu(&eenv);
  }
  
  static int select_proper_cpu(struct task_struct *p)
@@ -21,11 +320,9 @@ static int select_proper_cpu(struct task_struct *p)
         return -1;
  }
  
-#define cpu_selected(cpu)      (cpu >= 0)
-
  extern void sync_entity_load_avg(struct sched_entity *se);
  
-int exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync)
+int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync)
  {
         int target_cpu = -1;
         char state[30] = "fail";
@@ -123,7 +420,7 @@ int exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync)
          * A scheduling scheme based on cpu energy, find the least power consumption
          * cpu referring energy table when assigning task.
          */
-       target_cpu = select_energy_cpu(p);
+       target_cpu = select_energy_cpu(p, prev_cpu, sd_flag, sync);
         if (cpu_selected(target_cpu)) {
                 strcpy(state, "energy cpu");
                 goto out;
diff --git a/kernel/sched/ems/ems.h b/kernel/sched/ems/ems.h

index 000d9219776eac2eb3f94ccec2d87a4867acbe3a..8d2a02b5b4d4753d02c122bac24ee343848486df 100644 (file)
--- a/kernel/sched/ems/ems.h
+++ b/kernel/sched/ems/ems.h
@@ -35,10 +35,10 @@ static inline int group_balancing(struct task_struct *p) { return -1; }
  
  #ifdef CONFIG_SCHED_EMS
  extern int
-exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync);
+exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync);
  #else
  static inline int
-exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync)
+exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync)
  {
         return -1;
  }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 8f74a808eadfa0115b195448fd37bda69e54b4c6..5a157c85c774d1ecdaafbb9c4a3b829331bfbffc 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7448,7 +7448,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
         int target_cpu;
  
         if (sched_feat(EXYNOS_MS)) {
-               target_cpu = exynos_wakeup_balance(p, sd_flag, sync);
+               target_cpu = exynos_wakeup_balance(p, prev_cpu, sd_flag, sync);
                 if (target_cpu >= 0)
                         return target_cpu;
         }
author	Park Bumgyu <bumgyu.park@samsung.com>
	Tue, 27 Mar 2018 03:50:03 +0000 (12:50 +0900)
committer	Chungwoo Park <cww.park@samsung.com>
	Mon, 21 May 2018 08:35:30 +0000 (17:35 +0900)
include/trace/events/ems.h		patch \| blob \| blame \| history
kernel/sched/ems/core.c		patch \| blob \| blame \| history
kernel/sched/ems/ems.h		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history