sched/tune: add support to compute normalized energy

author Patrick Bellasi <patrick.bellasi@arm.com>

Tue, 12 Jan 2016 18:12:13 +0000 (18:12 +0000)

committer Leo Yan <leo.yan@linaro.org>

Tue, 10 May 2016 08:53:25 +0000 (16:53 +0800)
author Patrick Bellasi <patrick.bellasi@arm.com>
Tue, 12 Jan 2016 18:12:13 +0000 (18:12 +0000)
committer Leo Yan <leo.yan@linaro.org>
Tue, 10 May 2016 08:53:25 +0000 (16:53 +0800)
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c

index 87213861bde5ebb7302088da53465ebc745e2fc5..1a8ba5a6d99b1b1649f0898c09d7e48f2693a645 100644 (file)
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -1,7 +1,9 @@
  #include <linux/cgroup.h>
  #include <linux/err.h>
+#include <linux/kernel.h>
  #include <linux/percpu.h>
  #include <linux/printk.h>
+#include <linux/reciprocal_div.h>
  #include <linux/rcupdate.h>
  #include <linux/slab.h>
  
@@ -9,6 +11,84 @@
  
  unsigned int sysctl_sched_cfs_boost __read_mostly;
  
+/*
+ * System energy normalization constants
+ */
+static struct target_nrg {
+       unsigned long min_power;
+       unsigned long max_power;
+       struct reciprocal_value rdiv;
+} schedtune_target_nrg;
+
+/* Performance Boost region (B) threshold params */
+static int perf_boost_idx;
+
+/* Performance Constraint region (C) threshold params */
+static int perf_constrain_idx;
+
+/**
+ * Performance-Energy (P-E) Space thresholds constants
+ */
+struct threshold_params {
+       int nrg_gain;
+       int cap_gain;
+};
+
+/*
+ * System specific P-E space thresholds constants
+ */
+static struct threshold_params
+threshold_gains[] = {
+       { 0, 4 }, /* >=  0% */
+       { 0, 4 }, /* >= 10% */
+       { 1, 4 }, /* >= 20% */
+       { 2, 4 }, /* >= 30% */
+       { 3, 4 }, /* >= 40% */
+       { 4, 3 }, /* >= 50% */
+       { 4, 2 }, /* >= 60% */
+       { 4, 1 }, /* >= 70% */
+       { 4, 0 }, /* >= 80% */
+       { 4, 0 }  /* >= 90% */
+};
+
+static int
+__schedtune_accept_deltas(int nrg_delta, int cap_delta,
+                         int perf_boost_idx, int perf_constrain_idx)
+{
+       int payoff = -INT_MAX;
+
+       /* Performance Boost (B) region */
+       if (nrg_delta > 0 && cap_delta > 0) {
+               /*
+                * Evaluate "Performance Boost" vs "Energy Increase"
+                * payoff criteria:
+                *    cap_delta / nrg_delta < cap_gain / nrg_gain
+                * which is:
+                *    nrg_delta * cap_gain > cap_delta * nrg_gain
+                */
+               payoff  = nrg_delta * threshold_gains[perf_boost_idx].cap_gain;
+               payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain;
+               return payoff;
+       }
+
+       /* Performance Constraint (C) region */
+       if (nrg_delta < 0 && cap_delta < 0) {
+               /*
+                * Evaluate "Performance Boost" vs "Energy Increase"
+                * payoff criteria:
+                *    cap_delta / nrg_delta > cap_gain / nrg_gain
+                * which is:
+                *    cap_delta * nrg_gain > nrg_delta * cap_gain
+                */
+               payoff  = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain;
+               payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain;
+               return payoff;
+       }
+
+       /* Default: reject schedule candidate */
+       return payoff;
+}
+
  #ifdef CONFIG_CGROUP_SCHEDTUNE
  
  /*
@@ -26,6 +106,11 @@ struct schedtune {
         /* Boost value for tasks on that SchedTune CGroup */
         int boost;
  
+       /* Performance Boost (B) region threshold params */
+       int perf_boost_idx;
+
+       /* Performance Constraint (C) region threshold params */
+       int perf_constrain_idx;
  };
  
  static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -55,8 +140,37 @@ static inline struct schedtune *parent_st(struct schedtune *st)
  static struct schedtune
  root_schedtune = {
         .boost  = 0,
+       .perf_boost_idx = 0,
+       .perf_constrain_idx = 0,
  };
  
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+                       struct task_struct *task)
+{
+       struct schedtune *ct;
+       int perf_boost_idx;
+       int perf_constrain_idx;
+
+       /* Optimal (O) region */
+       if (nrg_delta < 0 && cap_delta > 0)
+               return INT_MAX;
+
+       /* Suboptimal (S) region */
+       if (nrg_delta > 0 && cap_delta < 0)
+               return -INT_MAX;
+
+       /* Get task specific perf Boost/Constraints indexes */
+       rcu_read_lock();
+       ct = task_schedtune(task);
+       perf_boost_idx = ct->perf_boost_idx;
+       perf_constrain_idx = ct->perf_constrain_idx;
+       rcu_read_unlock();
+
+       return __schedtune_accept_deltas(nrg_delta, cap_delta,
+                       perf_boost_idx, perf_constrain_idx);
+}
+
  /*
   * Maximum number of boost groups to support
   * When per-task boosting is used we still allow only limited number of
@@ -396,6 +510,24 @@ struct cgroup_subsys schedtune_cgrp_subsys = {
         .early_init     = 1,
  };
  
+#else /* CONFIG_CGROUP_SCHEDTUNE */
+
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+                       struct task_struct *task)
+{
+       /* Optimal (O) region */
+       if (nrg_delta < 0 && cap_delta > 0)
+               return INT_MAX;
+
+       /* Suboptimal (S) region */
+       if (nrg_delta > 0 && cap_delta < 0)
+               return -INT_MAX;
+
+       return __schedtune_accept_deltas(nrg_delta, cap_delta,
+                       perf_boost_idx, perf_constrain_idx);
+}
+
  #endif /* CONFIG_CGROUP_SCHEDTUNE */
  
  int
@@ -408,5 +540,194 @@ sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
         if (ret || !write)
                 return ret;
  
+       /* Performance Boost (B) region threshold params */
+       perf_boost_idx  = sysctl_sched_cfs_boost;
+       perf_boost_idx /= 10;
+
+       /* Performance Constraint (C) region threshold params */
+       perf_constrain_idx  = 100 - sysctl_sched_cfs_boost;
+       perf_constrain_idx /= 10;
+
+       return 0;
+}
+
+/*
+ * System energy normalization
+ * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE],
+ * corresponding to the specified energy variation.
+ */
+int
+schedtune_normalize_energy(int energy_diff)
+{
+       u32 normalized_nrg;
+       int max_delta;
+
+#ifdef CONFIG_SCHED_DEBUG
+       /* Check for boundaries */
+       max_delta  = schedtune_target_nrg.max_power;
+       max_delta -= schedtune_target_nrg.min_power;
+       WARN_ON(abs(energy_diff) >= max_delta);
+#endif
+
+       /* Do scaling using positive numbers to increase the range */
+       normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff;
+
+       /* Scale by energy magnitude */
+       normalized_nrg <<= SCHED_LOAD_SHIFT;
+
+       /* Normalize on max energy for target platform */
+       normalized_nrg = reciprocal_divide(
+                       normalized_nrg, schedtune_target_nrg.rdiv);
+
+       return (energy_diff < 0) ? -normalized_nrg : normalized_nrg;
+}
+
+#ifdef CONFIG_SCHED_DEBUG
+static void
+schedtune_test_nrg(unsigned long delta_pwr)
+{
+       unsigned long test_delta_pwr;
+       unsigned long test_norm_pwr;
+       int idx;
+
+       /*
+        * Check normalization constants using some constant system
+        * energy values
+        */
+       pr_info("schedtune: verify normalization constants...\n");
+       for (idx = 0; idx < 6; ++idx) {
+               test_delta_pwr = delta_pwr >> idx;
+
+               /* Normalize on max energy for target platform */
+               test_norm_pwr = reciprocal_divide(
+                                       test_delta_pwr << SCHED_LOAD_SHIFT,
+                                       schedtune_target_nrg.rdiv);
+
+               pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
+                       idx, test_delta_pwr, test_norm_pwr);
+       }
+}
+#else
+#define schedtune_test_nrg(delta_pwr)
+#endif
+
+/*
+ * Compute the min/max power consumption of a cluster and all its CPUs
+ */
+static void
+schedtune_add_cluster_nrg(
+               struct sched_domain *sd,
+               struct sched_group *sg,
+               struct target_nrg *ste)
+{
+       struct sched_domain *sd2;
+       struct sched_group *sg2;
+
+       struct cpumask *cluster_cpus;
+       char str[32];
+
+       unsigned long min_pwr;
+       unsigned long max_pwr;
+       int cpu;
+
+       /* Get Cluster energy using EM data for the first CPU */
+       cluster_cpus = sched_group_cpus(sg);
+       snprintf(str, 32, "CLUSTER[%*pbl]",
+                cpumask_pr_args(cluster_cpus));
+
+       min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
+       max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
+       pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+               str, min_pwr, max_pwr);
+
+       /*
+        * Keep track of this cluster's energy in the computation of the
+        * overall system energy
+        */
+       ste->min_power += min_pwr;
+       ste->max_power += max_pwr;
+
+       /* Get CPU energy using EM data for each CPU in the group */
+       for_each_cpu(cpu, cluster_cpus) {
+               /* Get a SD view for the specific CPU */
+               for_each_domain(cpu, sd2) {
+                       /* Get the CPU group */
+                       sg2 = sd2->groups;
+                       min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
+                       max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
+
+                       ste->min_power += min_pwr;
+                       ste->max_power += max_pwr;
+
+                       snprintf(str, 32, "CPU[%d]", cpu);
+                       pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+                               str, min_pwr, max_pwr);
+
+                       /*
+                        * Assume we have EM data only at the CPU and
+                        * the upper CLUSTER level
+                        */
+                       BUG_ON(!cpumask_equal(
+                               sched_group_cpus(sg),
+                               sched_group_cpus(sd2->parent->groups)
+                               ));
+                       break;
+               }
+       }
+}
+
+/*
+ * Initialize the constants required to compute normalized energy.
+ * The values of these constants depends on the EM data for the specific
+ * target system and topology.
+ * Thus, this function is expected to be called by the code
+ * that bind the EM to the topology information.
+ */
+static int
+schedtune_init_late(void)
+{
+       struct target_nrg *ste = &schedtune_target_nrg;
+       unsigned long delta_pwr = 0;
+       struct sched_domain *sd;
+       struct sched_group *sg;
+
+       pr_info("schedtune: init normalization constants...\n");
+       ste->max_power = 0;
+       ste->min_power = 0;
+
+       rcu_read_lock();
+
+       /*
+        * When EAS is in use, we always have a pointer to the highest SD
+        * which provides EM data.
+        */
+       sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
+       if (!sd) {
+               pr_info("schedtune: no energy model data\n");
+               goto nodata;
+       }
+
+       sg = sd->groups;
+       do {
+               schedtune_add_cluster_nrg(sd, sg, ste);
+       } while (sg = sg->next, sg != sd->groups);
+
+       rcu_read_unlock();
+
+       pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+               "SYSTEM", ste->min_power, ste->max_power);
+
+       /* Compute normalization constants */
+       delta_pwr = ste->max_power - ste->min_power;
+       ste->rdiv = reciprocal_value(delta_pwr);
+       pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
+               ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
+
+       schedtune_test_nrg(delta_pwr);
         return 0;
+
+nodata:
+       rcu_read_unlock();
+       return -EINVAL;
  }
+late_initcall(schedtune_init_late);
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h

index d756ce7b06e08fddd0d3eb11bc38a6825942e1f0..f7273a5d994aa4708c74ad5d815fe984a2ef1a20 100644 (file)
--- a/kernel/sched/tune.h
+++ b/kernel/sched/tune.h
@@ -16,9 +16,16 @@ void schedtune_dequeue_task(struct task_struct *p, int cpu);
  
  #endif /* CONFIG_CGROUP_SCHEDTUNE */
  
+int schedtune_normalize_energy(int energy);
+int schedtune_accept_deltas(int nrg_delta, int cap_delta,
+                           struct task_struct *task);
+
  #else /* CONFIG_SCHED_TUNE */
  
  #define schedtune_enqueue_task(task, cpu) do { } while (0)
  #define schedtune_dequeue_task(task, cpu) do { } while (0)
  
+#define schedtune_normalize_energy(energy) energy
+#define schedtune_accept_deltas(nrg_delta, cap_delta, task) nrg_delta
+
  #endif /* CONFIG_SCHED_TUNE */
author	Patrick Bellasi <patrick.bellasi@arm.com>
	Tue, 12 Jan 2016 18:12:13 +0000 (18:12 +0000)
committer	Leo Yan <leo.yan@linaro.org>
	Tue, 10 May 2016 08:53:25 +0000 (16:53 +0800)
kernel/sched/tune.c		patch \| blob \| blame \| history
kernel/sched/tune.h		patch \| blob \| blame \| history