cpufreq: intel_pstate: One set of global limits in active mode
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Fri, 17 Mar 2017 23:57:39 +0000 (00:57 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Fri, 17 Mar 2017 23:57:39 +0000 (00:57 +0100)
In the active mode intel_pstate currently uses two sets of global
limits, each associated with one of the possible scaling_governor
settings in that mode: "powersave" or "performance".

The driver switches over from one of those sets to the other
depending on the scaling_governor setting for the last CPU whose
per-policy cpufreq interface in sysfs was last used to change
parameters exposed in there.  That obviously leads to no end of
issues when the scaling_governor settings differ between CPUs.

The most recent issue was introduced by commit a240c4aa5d0f (cpufreq:
intel_pstate: Do not reinit performance limits in ->setpolicy)
that eliminated the reinitialization of "performance" limits in
intel_pstate_set_policy() preventing the max limit from being set
to anything below 100, among other things.

Namely, an undesirable side effect of commit a240c4aa5d0f is that
now, after setting scaling_governor to "performance" in the active
mode, the per-policy limits for the CPU in question go to the highest
level and stay there even when it is switched back to "powersave"
later.

As it turns out, some distributions set scaling_governor to
"performance" temporarily for all CPUs to speed-up system
initialization, so that change causes them to misbehave later.

To fix that, get rid of the performance/powersave global limits
split and use just one set of global limits for everything.

From the user's persepctive, after this modification, when
scaling_governor is switched from "performance" to "powersave"
or the other way around on one CPU, the limits settings (ie. the
global max/min_perf_pct and per-policy scaling_max/min_freq for
any CPUs) will not change.  Still, switching from "performance"
to "powersave" or the other way around changes the way in which
P-states are selected and in particular "performance" causes the
driver to always request the highest P-state it is allowed to ask
for for the given CPU.

Fixes: a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/cpufreq/intel_pstate.c

index 08e134ffba68e28656374fc55fab95345050cf8c..7b07803e7042567c2fc6144453d57dc34d7fb9df 100644 (file)
@@ -364,9 +364,7 @@ static bool driver_registered __read_mostly;
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits performance_limits;
-static struct perf_limits powersave_limits;
-static struct perf_limits *limits;
+static struct perf_limits global;
 
 static void intel_pstate_init_limits(struct perf_limits *limits)
 {
@@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits)
        limits->max_sysfs_pct = 100;
 }
 
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-       intel_pstate_init_limits(limits);
-       limits->min_perf_pct = 100;
-       limits->min_perf = int_ext_tofp(1);
-       limits->min_sysfs_pct = 100;
-}
-
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
 
@@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
         * correct max turbo frequency based on the turbo state.
         * Also need to convert to MHz as _PSS freq is in MHz.
         */
-       if (!limits->turbo_disabled)
+       if (!global.turbo_disabled)
                cpu->acpi_perf_data.states[0].core_frequency =
                                        policy->cpuinfo.max_freq / 1000;
        cpu->valid_pss_table = true;
@@ -626,7 +616,7 @@ static inline void update_turbo_state(void)
 
        cpu = all_cpu_data[0];
        rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
-       limits->turbo_disabled =
+       global.turbo_disabled =
                (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
@@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
        int min, hw_min, max, hw_max, cpu;
-       struct perf_limits *perf_limits = limits;
+       struct perf_limits *perf_limits = &global;
        u64 value, cap;
 
        for_each_cpu(cpu, policy->cpus) {
@@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
                rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
                hw_min = HWP_LOWEST_PERF(cap);
-               if (limits->no_turbo)
+               if (global.no_turbo)
                        hw_max = HWP_GUARANTEED_PERF(cap);
                else
                        hw_max = HWP_HIGHEST_PERF(cap);
 
-               min = fp_ext_toint(hw_max * perf_limits->min_perf);
+               max = fp_ext_toint(hw_max * perf_limits->max_perf);
+               if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+                       min = max;
+               else
+                       min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
                value &= ~HWP_MIN_PERF(~0L);
                value |= HWP_MIN_PERF(min);
 
-               max = fp_ext_toint(hw_max * perf_limits->max_perf);
                value &= ~HWP_MAX_PERF(~0L);
                value |= HWP_MAX_PERF(max);
 
@@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
 }
 
 static void intel_pstate_update_policies(void)
-       __releases(&intel_pstate_limits_lock)
-       __acquires(&intel_pstate_limits_lock)
 {
-       struct perf_limits *saved_limits = limits;
        int cpu;
 
-       mutex_unlock(&intel_pstate_limits_lock);
-
        for_each_possible_cpu(cpu)
                cpufreq_update_policy(cpu);
-
-       mutex_lock(&intel_pstate_limits_lock);
-
-       limits = saved_limits;
 }
 
 /************************** debugfs begin ************************/
@@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void)
        static ssize_t show_##file_name                                 \
        (struct kobject *kobj, struct attribute *attr, char *buf)       \
        {                                                               \
-               return sprintf(buf, "%u\n", limits->object);            \
+               return sprintf(buf, "%u\n", global.object);             \
        }
 
 static ssize_t intel_pstate_show_status(char *buf);
@@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj,
        }
 
        update_turbo_state();
-       if (limits->turbo_disabled)
-               ret = sprintf(buf, "%u\n", limits->turbo_disabled);
+       if (global.turbo_disabled)
+               ret = sprintf(buf, "%u\n", global.turbo_disabled);
        else
-               ret = sprintf(buf, "%u\n", limits->no_turbo);
+               ret = sprintf(buf, "%u\n", global.no_turbo);
 
        mutex_unlock(&intel_pstate_driver_lock);
 
@@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
        mutex_lock(&intel_pstate_limits_lock);
 
        update_turbo_state();
-       if (limits->turbo_disabled) {
+       if (global.turbo_disabled) {
                pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
                mutex_unlock(&intel_pstate_limits_lock);
                mutex_unlock(&intel_pstate_driver_lock);
                return -EPERM;
        }
 
-       limits->no_turbo = clamp_t(int, input, 0, 1);
-
-       intel_pstate_update_policies();
+       global.no_turbo = clamp_t(int, input, 0, 1);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
+       intel_pstate_update_policies();
+
        mutex_unlock(&intel_pstate_driver_lock);
 
        return count;
@@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
        mutex_lock(&intel_pstate_limits_lock);
 
-       limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
-       limits->max_perf_pct = min(limits->max_policy_pct,
-                                  limits->max_sysfs_pct);
-       limits->max_perf_pct = max(limits->min_policy_pct,
-                                  limits->max_perf_pct);
-       limits->max_perf_pct = max(limits->min_perf_pct,
-                                  limits->max_perf_pct);
-       limits->max_perf = percent_ext_fp(limits->max_perf_pct);
-
-       intel_pstate_update_policies();
+       global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
+       global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
+       global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
+       global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
+       global.max_perf = percent_ext_fp(global.max_perf_pct);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
+       intel_pstate_update_policies();
+
        mutex_unlock(&intel_pstate_driver_lock);
 
        return count;
@@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
        mutex_lock(&intel_pstate_limits_lock);
 
-       limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
-       limits->min_perf_pct = max(limits->min_policy_pct,
-                                  limits->min_sysfs_pct);
-       limits->min_perf_pct = min(limits->max_policy_pct,
-                                  limits->min_perf_pct);
-       limits->min_perf_pct = min(limits->max_perf_pct,
-                                  limits->min_perf_pct);
-       limits->min_perf = percent_ext_fp(limits->min_perf_pct);
-
-       intel_pstate_update_policies();
+       global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
+       global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
+       global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
+       global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
+       global.min_perf = percent_ext_fp(global.min_perf_pct);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
+       intel_pstate_update_policies();
+
        mutex_unlock(&intel_pstate_driver_lock);
 
        return count;
@@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
        u32 vid;
 
        val = (u64)pstate << 8;
-       if (limits->no_turbo && !limits->turbo_disabled)
+       if (global.no_turbo && !global.turbo_disabled)
                val |= (u64)1 << 32;
 
        vid_fp = cpudata->vid.min + mul_fp(
@@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
        u64 val;
 
        val = (u64)pstate << 8;
-       if (limits->no_turbo && !limits->turbo_disabled)
+       if (global.no_turbo && !global.turbo_disabled)
                val |= (u64)1 << 32;
 
        return val;
@@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
        int max_perf = cpu->pstate.turbo_pstate;
        int max_perf_adj;
        int min_perf;
-       struct perf_limits *perf_limits = limits;
+       struct perf_limits *perf_limits = &global;
 
-       if (limits->no_turbo || limits->turbo_disabled)
+       if (global.no_turbo || global.turbo_disabled)
                max_perf = cpu->pstate.max_pstate;
 
        if (per_cpu_limits)
@@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 
        sample->busy_scaled = busy_frac * 100;
 
-       target = limits->no_turbo || limits->turbo_disabled ?
+       target = global.no_turbo || global.turbo_disabled ?
                        cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
        target += target >> 2;
        target = mul_fp(target, busy_frac);
@@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
        struct cpudata *cpu;
-       struct perf_limits *perf_limits = NULL;
+       struct perf_limits *perf_limits = &global;
 
        if (!policy->cpuinfo.max_freq)
                return -ENODEV;
@@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
        mutex_lock(&intel_pstate_limits_lock);
 
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
-               pr_debug("set performance\n");
-               if (!perf_limits) {
-                       limits = &performance_limits;
-                       perf_limits = limits;
-               }
-       } else {
-               pr_debug("set powersave\n");
-               if (!perf_limits) {
-                       limits = &powersave_limits;
-                       perf_limits = limits;
-               }
-
-       }
-
        intel_pstate_update_perf_limits(policy, perf_limits);
 
        if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
@@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
        struct cpudata *cpu = all_cpu_data[policy->cpu];
-       struct perf_limits *perf_limits;
-
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
-               perf_limits = &performance_limits;
-       else
-               perf_limits = &powersave_limits;
 
        update_turbo_state();
-       policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
-                                       perf_limits->no_turbo ?
+       policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
                                        cpu->pstate.max_freq :
                                        cpu->pstate.turbo_freq;
 
@@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
                unsigned int max_freq, min_freq;
 
                max_freq = policy->cpuinfo.max_freq *
-                                       perf_limits->max_sysfs_pct / 100;
+                                       global.max_sysfs_pct / 100;
                min_freq = policy->cpuinfo.max_freq *
-                                       perf_limits->min_sysfs_pct / 100;
+                                       global.min_sysfs_pct / 100;
                cpufreq_verify_within_limits(policy, min_freq, max_freq);
        }
 
@@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
        /* cpuinfo and default policy values */
        policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
        update_turbo_state();
-       policy->cpuinfo.max_freq = limits->turbo_disabled ?
+       policy->cpuinfo.max_freq = global.turbo_disabled ?
                        cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
        policy->cpuinfo.max_freq *= cpu->pstate.scaling;
 
@@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
                return ret;
 
        policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-       if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+       if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
                policy->policy = CPUFREQ_POLICY_PERFORMANCE;
        else
                policy->policy = CPUFREQ_POLICY_POWERSAVE;
@@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
        struct cpudata *cpu = all_cpu_data[policy->cpu];
 
        update_turbo_state();
-       policy->cpuinfo.max_freq = limits->turbo_disabled ?
+       policy->cpuinfo.max_freq = global.turbo_disabled ?
                        cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 
        cpufreq_verify_within_cpu_limits(policy);
@@ -2317,7 +2273,7 @@ static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
 
        update_turbo_state();
 
-       max_freq = limits->no_turbo || limits->turbo_disabled ?
+       max_freq = global.no_turbo || global.turbo_disabled ?
                        cpu->pstate.max_freq : cpu->pstate.turbo_freq;
        policy->cpuinfo.max_freq = max_freq;
        if (policy->max > max_freq)
@@ -2425,13 +2381,7 @@ static int intel_pstate_register_driver(void)
 {
        int ret;
 
-       intel_pstate_init_limits(&powersave_limits);
-       intel_pstate_set_performance_limits(&performance_limits);
-       if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
-           intel_pstate_driver == &intel_pstate)
-               limits = &performance_limits;
-       else
-               limits = &powersave_limits;
+       intel_pstate_init_limits(&global);
 
        ret = cpufreq_register_driver(intel_pstate_driver);
        if (ret) {