KVM: x86: Make cpu_tsc_khz updates use local CPU

author Zachary Amsden <zamsden@redhat.com>

Fri, 20 Aug 2010 08:07:21 +0000 (22:07 -1000)

committer Avi Kivity <avi@redhat.com>

Sun, 24 Oct 2010 08:51:22 +0000 (10:51 +0200)
author Zachary Amsden <zamsden@redhat.com>
Fri, 20 Aug 2010 08:07:21 +0000 (22:07 -1000)
committer Avi Kivity <avi@redhat.com>
Sun, 24 Oct 2010 08:51:22 +0000 (10:51 +0200)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index e7da14c317e6571930cec64e8e6af4853d820913..699c6b89c1b48ba7d911d20f2513a868a21313fb 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -895,6 +895,15 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
  
  static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
  
+static inline int kvm_tsc_changes_freq(void)
+{
+       int cpu = get_cpu();
+       int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+                 cpufreq_quick_get(cpu) != 0;
+       put_cpu();
+       return ret;
+}
+
  void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
  {
         struct kvm *kvm = vcpu->kvm;
@@ -940,7 +949,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
  }
  EXPORT_SYMBOL_GPL(kvm_write_tsc);
  
-static void kvm_write_guest_time(struct kvm_vcpu *v)
+static int kvm_write_guest_time(struct kvm_vcpu *v)
  {
         struct timespec ts;
         unsigned long flags;
@@ -949,24 +958,27 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
         unsigned long this_tsc_khz;
  
         if ((!vcpu->time_page))
-               return;
-
-       this_tsc_khz = get_cpu_var(cpu_tsc_khz);
-       if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
-               kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
-               vcpu->hv_clock_tsc_khz = this_tsc_khz;
-       }
-       put_cpu_var(cpu_tsc_khz);
+               return 0;
  
         /* Keep irq disabled to prevent changes to the clock */
         local_irq_save(flags);
         kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
         ktime_get_ts(&ts);
         monotonic_to_bootbased(&ts);
+       this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
         local_irq_restore(flags);
  
-       /* With all the info we got, fill in the values */
+       if (unlikely(this_tsc_khz == 0)) {
+               kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
+               return 1;
+       }
  
+       if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
+               kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
+               vcpu->hv_clock_tsc_khz = this_tsc_khz;
+       }
+
+       /* With all the info we got, fill in the values */
         vcpu->hv_clock.system_time = ts.tv_nsec +
                                      (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
  
@@ -987,6 +999,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
         kunmap_atomic(shared_kaddr, KM_USER0);
  
         mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+       return 0;
  }
  
  static int kvm_request_guest_time_update(struct kvm_vcpu *v)
@@ -1853,12 +1866,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
         }
  
         kvm_x86_ops->vcpu_load(vcpu, cpu);
-       if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
-               unsigned long khz = cpufreq_quick_get(cpu);
-               if (!khz)
-                       khz = tsc_khz;
-               per_cpu(cpu_tsc_khz, cpu) = khz;
-       }
         kvm_request_guest_time_update(vcpu);
  }
  
@@ -4152,9 +4159,23 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
  }
  EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
  
-static void bounce_off(void *info)
+static void tsc_bad(void *info)
+{
+       __get_cpu_var(cpu_tsc_khz) = 0;
+}
+
+static void tsc_khz_changed(void *data)
  {
-       /* nothing */
+       struct cpufreq_freqs *freq = data;
+       unsigned long khz = 0;
+
+       if (data)
+               khz = freq->new;
+       else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+               khz = cpufreq_quick_get(raw_smp_processor_id());
+       if (!khz)
+               khz = tsc_khz;
+       __get_cpu_var(cpu_tsc_khz) = khz;
  }
  
  static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -4165,11 +4186,51 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
         struct kvm_vcpu *vcpu;
         int i, send_ipi = 0;
  
+       /*
+        * We allow guests to temporarily run on slowing clocks,
+        * provided we notify them after, or to run on accelerating
+        * clocks, provided we notify them before.  Thus time never
+        * goes backwards.
+        *
+        * However, we have a problem.  We can't atomically update
+        * the frequency of a given CPU from this function; it is
+        * merely a notifier, which can be called from any CPU.
+        * Changing the TSC frequency at arbitrary points in time
+        * requires a recomputation of local variables related to
+        * the TSC for each VCPU.  We must flag these local variables
+        * to be updated and be sure the update takes place with the
+        * new frequency before any guests proceed.
+        *
+        * Unfortunately, the combination of hotplug CPU and frequency
+        * change creates an intractable locking scenario; the order
+        * of when these callouts happen is undefined with respect to
+        * CPU hotplug, and they can race with each other.  As such,
+        * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
+        * undefined; you can actually have a CPU frequency change take
+        * place in between the computation of X and the setting of the
+        * variable.  To protect against this problem, all updates of
+        * the per_cpu tsc_khz variable are done in an interrupt
+        * protected IPI, and all callers wishing to update the value
+        * must wait for a synchronous IPI to complete (which is trivial
+        * if the caller is on the CPU already).  This establishes the
+        * necessary total order on variable updates.
+        *
+        * Note that because a guest time update may take place
+        * anytime after the setting of the VCPU's request bit, the
+        * correct TSC value must be set before the request.  However,
+        * to ensure the update actually makes it to any guest which
+        * starts running in hardware virtualization between the set
+        * and the acquisition of the spinlock, we must also ping the
+        * CPU after setting the request bit.
+        *
+        */
+
         if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
                 return 0;
         if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
                 return 0;
-       per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
+
+       smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
  
         spin_lock(&kvm_lock);
         list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -4179,7 +4240,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                         if (!kvm_request_guest_time_update(vcpu))
                                 continue;
                         if (vcpu->cpu != smp_processor_id())
-                               send_ipi++;
+                               send_ipi = 1;
                 }
         }
         spin_unlock(&kvm_lock);
@@ -4197,32 +4258,48 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                  * guest context is entered kvmclock will be updated,
                  * so the guest will not see stale values.
                  */
-               smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+               smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
         }
         return 0;
  }
  
  static struct notifier_block kvmclock_cpufreq_notifier_block = {
-        .notifier_call  = kvmclock_cpufreq_notifier
+       .notifier_call  = kvmclock_cpufreq_notifier
+};
+
+static int kvmclock_cpu_notifier(struct notifier_block *nfb,
+                                       unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+
+       switch (action) {
+               case CPU_ONLINE:
+               case CPU_DOWN_FAILED:
+                       smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
+                       break;
+               case CPU_DOWN_PREPARE:
+                       smp_call_function_single(cpu, tsc_bad, NULL, 1);
+                       break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block kvmclock_cpu_notifier_block = {
+       .notifier_call  = kvmclock_cpu_notifier,
+       .priority = -INT_MAX
  };
  
  static void kvm_timer_init(void)
  {
         int cpu;
  
+       register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
                 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
                                           CPUFREQ_TRANSITION_NOTIFIER);
-               for_each_online_cpu(cpu) {
-                       unsigned long khz = cpufreq_get(cpu);
-                       if (!khz)
-                               khz = tsc_khz;
-                       per_cpu(cpu_tsc_khz, cpu) = khz;
-               }
-       } else {
-               for_each_possible_cpu(cpu)
-                       per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
         }
+       for_each_online_cpu(cpu)
+               smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
  }
  
  static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -4324,6 +4401,7 @@ void kvm_arch_exit(void)
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
                                             CPUFREQ_TRANSITION_NOTIFIER);
+       unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
         kvm_x86_ops = NULL;
         kvm_mmu_module_exit();
  }
@@ -4739,8 +4817,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                         kvm_mmu_unload(vcpu);
                 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
                         __kvm_migrate_timers(vcpu);
-               if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
-                       kvm_write_guest_time(vcpu);
+               if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) {
+                       r = kvm_write_guest_time(vcpu);
+                       if (unlikely(r))
+                               goto out;
+               }
                 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
                         kvm_mmu_sync_roots(vcpu);
                 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
@@ -5423,17 +5504,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
  
  int kvm_arch_hardware_enable(void *garbage)
  {
-       /*
-        * Since this may be called from a hotplug notifcation,
-        * we can't get the CPU frequency directly.
-        */
-       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-               int cpu = raw_smp_processor_id();
-               per_cpu(cpu_tsc_khz, cpu) = 0;
-       }
-
         kvm_shared_msr_cpu_online();
-
         return kvm_x86_ops->hardware_enable(garbage);
  }
author	Zachary Amsden <zamsden@redhat.com>
	Fri, 20 Aug 2010 08:07:21 +0000 (22:07 -1000)
committer	Avi Kivity <avi@redhat.com>
	Sun, 24 Oct 2010 08:51:22 +0000 (10:51 +0200)