The initialization of intel_pstate for a given CPU involves populating
the fields of its struct cpudata that represent the previous sample,
but currently that is done in a problematic way.
Namely, intel_pstate_init_cpu() makes an extra call to
intel_pstate_sample() so it reads the current register values that
will be used to populate the "previous sample" record during the
next invocation of intel_pstate_sample(). However, after commit
a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization
update callbacks) that doesn't work for last_sample_time, because
the time value is passed to intel_pstate_sample() as an argument now.
Passing 0 to it from intel_pstate_init_cpu() is problematic, because
that causes cpu->last_sample_time == 0 to be visible in
get_target_pstate_use_performance() (and hence the extra
cpu->last_sample_time > 0 check in there) and effectively allows
the first invocation of intel_pstate_sample() from
intel_pstate_update_util() to happen immediately after the
initialization which may lead to a significant "turn on"
effect in the governor algorithm.
To mitigate that issue, rework the initialization to avoid the
extra intel_pstate_sample() call from intel_pstate_init_cpu().
Instead, make intel_pstate_sample() return false if it has been
called with cpu->sample.time equal to zero, which will make
intel_pstate_update_util() skip the sample in that case, and
reset cpu->sample.time from intel_pstate_set_update_util_hook()
to make the algorithm start properly every time the hook is set.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
- return true;
+ /*
+ * First time this function is invoked in a given cycle, all of the
+ * previous sample data fields are equal to zero or stale and they must
+ * be populated with meaningful numbers for things to work, so assume
+ * that sample.time will always be reset before setting the utilization
+ * update hook and make the caller skip the sample then.
+ */
+ return !!cpu->last_sample_time;
}
static inline int32_t get_avg_frequency(struct cpudata *cpu)
* enough period of time to adjust our busyness.
*/
duration_ns = cpu->sample.time - cpu->last_sample_time;
- if ((s64)duration_ns > pid_params.sample_rate_ns * 3
- && cpu->last_sample_time > 0) {
+ if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
int_tofp(duration_ns));
core_busy = mul_fp(core_busy, sample_ratio);
intel_pstate_get_cpu_pstates(cpu);
intel_pstate_busy_pid_reset(cpu);
- intel_pstate_sample(cpu, 0);
cpu->update_util.func = intel_pstate_update_util;
return get_avg_frequency(cpu);
}
-static void intel_pstate_set_update_util_hook(unsigned int cpu)
+static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
{
- cpufreq_set_update_util_data(cpu, &all_cpu_data[cpu]->update_util);
+ struct cpudata *cpu = all_cpu_data[cpu_num];
+
+ /* Prevent intel_pstate_update_util() from using stale data. */
+ cpu->sample.time = 0;
+ cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
}
static void intel_pstate_clear_update_util_hook(unsigned int cpu)