x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID
authorLen Brown <len.brown@intel.com>
Fri, 17 Jun 2016 05:22:51 +0000 (01:22 -0400)
committerIngo Molnar <mingo@kernel.org>
Mon, 11 Jul 2016 19:30:13 +0000 (21:30 +0200)
Skylake CPU base-frequency and TSC frequency may differ
by up to 2%.

Enumerate CPU and TSC frequencies separately, allowing
cpu_khz and tsc_khz to differ.

The existing CPU frequency calibration mechanism is unchanged.
However, CPUID extensions are preferred, when available.

CPUID.0x16 is preferred over MSR and timer calibration
for CPU frequency discovery.

CPUID.0x15 takes precedence over CPU-frequency
for TSC frequency discovery.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/b27ec289fd005833b27d694d9c2dbb716c5cdff7.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/tsc.h
arch/x86/include/asm/x86_init.h
arch/x86/kernel/tsc.c
arch/x86/kernel/x86_init.c

index db1f779a37663b9f45791cd68222871daeafd753..a30591e1567c87d1829ba2a22ca557960ecbc939 100644 (file)
@@ -36,6 +36,7 @@ extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
 extern int check_tsc_disabled(void);
+extern unsigned long native_calibrate_cpu(void);
 extern unsigned long native_calibrate_tsc(void);
 extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
 
index 4dcdf74dfed8606ebd0a948bd733a4d721c0d1b8..08a08a800e17315a8a1898e911265744fd7732cf 100644 (file)
@@ -181,7 +181,8 @@ struct x86_legacy_features {
 
 /**
  * struct x86_platform_ops - platform specific runtime functions
- * @calibrate_tsc:             calibrate TSC
+ * @calibrate_cpu:             calibrate CPU
+ * @calibrate_tsc:             calibrate TSC, if different from CPU
  * @get_wallclock:             get time from HW clock like RTC etc.
  * @set_wallclock:             set time back to HW clock
  * @is_untracked_pat_range     exclude from PAT logic
@@ -200,6 +201,7 @@ struct x86_legacy_features {
  *                             semantics.
  */
 struct x86_platform_ops {
+       unsigned long (*calibrate_cpu)(void);
        unsigned long (*calibrate_tsc)(void);
        void (*get_wallclock)(struct timespec *ts);
        int (*set_wallclock)(const struct timespec *ts);
index 35a3976c19cc5e5fd0ac91686d40f8a8bb5efd9c..e1496b79c28ac874713fb6a2ea51a434ad42d026 100644 (file)
@@ -239,7 +239,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
        return ns;
 }
 
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu)
 {
        unsigned long long tsc_now, ns_now;
        struct cyc2ns_data *data;
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
        local_irq_save(flags);
        sched_clock_idle_sleep_event();
 
-       if (!cpu_khz)
+       if (!khz)
                goto done;
 
        data = cyc2ns_write_begin(cpu);
@@ -261,7 +261,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
         * time function is continuous; see the comment near struct
         * cyc2ns_data.
         */
-       clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+       clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
                               NSEC_PER_MSEC, 0);
 
        /*
@@ -665,15 +665,72 @@ success:
 }
 
 /**
- * native_calibrate_tsc - calibrate the tsc on boot
+ * native_calibrate_tsc
+ * Determine TSC frequency via CPUID, else return 0.
  */
 unsigned long native_calibrate_tsc(void)
+{
+       unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
+       unsigned int crystal_khz;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return 0;
+
+       if (boot_cpu_data.cpuid_level < 0x15)
+               return 0;
+
+       eax_denominator = ebx_numerator = ecx_hz = edx = 0;
+
+       /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
+       cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
+
+       if (ebx_numerator == 0 || eax_denominator == 0)
+               return 0;
+
+       crystal_khz = ecx_hz / 1000;
+
+       if (crystal_khz == 0) {
+               switch (boot_cpu_data.x86_model) {
+               case 0x4E:      /* SKL */
+               case 0x5E:      /* SKL */
+                       crystal_khz = 24000;    /* 24 MHz */
+               }
+       }
+
+       return crystal_khz * ebx_numerator / eax_denominator;
+}
+
+static unsigned long cpu_khz_from_cpuid(void)
+{
+       unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return 0;
+
+       if (boot_cpu_data.cpuid_level < 0x16)
+               return 0;
+
+       eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
+
+       cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
+
+       return eax_base_mhz * 1000;
+}
+
+/**
+ * native_calibrate_cpu - calibrate the cpu on boot
+ */
+unsigned long native_calibrate_cpu(void)
 {
        u64 tsc1, tsc2, delta, ref1, ref2;
        unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
        unsigned long flags, latch, ms, fast_calibrate;
        int hpet = is_hpet_enabled(), i, loopmin;
 
+       fast_calibrate = cpu_khz_from_cpuid();
+       if (fast_calibrate)
+               return fast_calibrate;
+
        fast_calibrate = cpu_khz_from_msr();
        if (fast_calibrate)
                return fast_calibrate;
@@ -834,8 +891,10 @@ int recalibrate_cpu_khz(void)
        if (!boot_cpu_has(X86_FEATURE_TSC))
                return -ENODEV;
 
+       cpu_khz = x86_platform.calibrate_cpu();
        tsc_khz = x86_platform.calibrate_tsc();
-       cpu_khz = tsc_khz;
+       if (tsc_khz == 0)
+               tsc_khz = cpu_khz;
        cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
                                                    cpu_khz_old, cpu_khz);
 
@@ -1241,8 +1300,10 @@ void __init tsc_init(void)
                return;
        }
 
+       cpu_khz = x86_platform.calibrate_cpu();
        tsc_khz = x86_platform.calibrate_tsc();
-       cpu_khz = tsc_khz;
+       if (tsc_khz == 0)
+               tsc_khz = cpu_khz;
 
        if (!tsc_khz) {
                mark_tsc_unstable("could not calculate TSC khz");
@@ -1262,7 +1323,7 @@ void __init tsc_init(void)
         */
        for_each_possible_cpu(cpu) {
                cyc2ns_init(cpu);
-               set_cyc2ns_scale(cpu_khz, cpu);
+               set_cyc2ns_scale(tsc_khz, cpu);
        }
 
        if (tsc_disabled > 0)
index dad5fe9633a37e03215892c60386a779745fdb61..58b459296e13d007655e3ba8854d11832a535154 100644 (file)
@@ -92,6 +92,7 @@ static void default_nmi_init(void) { };
 static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
+       .calibrate_cpu                  = native_calibrate_cpu,
        .calibrate_tsc                  = native_calibrate_tsc,
        .get_wallclock                  = mach_get_cmos_time,
        .set_wallclock                  = mach_set_rtc_mmss,