KVM: x86: require matched TSC offsets for master clock
authorMarcelo Tosatti <mtosatti@redhat.com>
Wed, 28 Nov 2012 01:29:03 +0000 (23:29 -0200)
committerMarcelo Tosatti <mtosatti@redhat.com>
Wed, 28 Nov 2012 01:29:15 +0000 (23:29 -0200)
With master clock, a pvclock clock read calculates:

ret = system_timestamp + [ (rdtsc + tsc_offset) - tsc_timestamp ]

Where 'rdtsc' is the host TSC.

system_timestamp and tsc_timestamp are unique, one tuple
per VM: the "master clock".

Given a host with synchronized TSCs, its obvious that
guest TSC must be matched for the above to guarantee monotonicity.

Allow master clock usage only if guest TSCs are synchronized.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/trace.h
arch/x86/kvm/x86.c

index 32f0e4a063b7dc835975b2e6f17513b63628ce7e..9fb6d8da7a435be70afce19211cc4b8c9a33320b 100644 (file)
@@ -561,6 +561,7 @@ struct kvm_arch {
        u64 cur_tsc_write;
        u64 cur_tsc_offset;
        u8  cur_tsc_generation;
+       int nr_vcpus_matched_tsc;
 
        spinlock_t pvclock_gtod_sync_lock;
        bool use_master_clock;
index 1d6526856080f9f5902727c1ccadde363c2a340e..fe5e00ed70369c129442a61df519bfe1823f6112 100644 (file)
@@ -764,21 +764,54 @@ TRACE_EVENT(
        {VCLOCK_HPET, "hpet"}                           \
 
 TRACE_EVENT(kvm_update_master_clock,
-       TP_PROTO(bool use_master_clock, unsigned int host_clock),
-       TP_ARGS(use_master_clock, host_clock),
+       TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
+       TP_ARGS(use_master_clock, host_clock, offset_matched),
 
        TP_STRUCT__entry(
                __field(                bool,   use_master_clock        )
                __field(        unsigned int,   host_clock              )
+               __field(                bool,   offset_matched          )
        ),
 
        TP_fast_assign(
                __entry->use_master_clock       = use_master_clock;
                __entry->host_clock             = host_clock;
+               __entry->offset_matched         = offset_matched;
        ),
 
-       TP_printk("masterclock %d hostclock %s",
+       TP_printk("masterclock %d hostclock %s offsetmatched %u",
                  __entry->use_master_clock,
+                 __print_symbolic(__entry->host_clock, host_clocks),
+                 __entry->offset_matched)
+);
+
+TRACE_EVENT(kvm_track_tsc,
+       TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
+                unsigned int online_vcpus, bool use_master_clock,
+                unsigned int host_clock),
+       TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
+               host_clock),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vcpu_id                 )
+               __field(        unsigned int,   nr_vcpus_matched_tsc    )
+               __field(        unsigned int,   online_vcpus            )
+               __field(        bool,           use_master_clock        )
+               __field(        unsigned int,   host_clock              )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id                = vcpu_id;
+               __entry->nr_vcpus_matched_tsc   = nr_matched;
+               __entry->online_vcpus           = online_vcpus;
+               __entry->use_master_clock       = use_master_clock;
+               __entry->host_clock             = host_clock;
+       ),
+
+       TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
+                 " hostclock %s",
+                 __entry->vcpu_id, __entry->use_master_clock,
+                 __entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
                  __print_symbolic(__entry->host_clock, host_clocks))
 );
 
index f3c069efc72a1f732ef599880d923236297ef111..422ef5ed219443c8ebd61ec6638163fca4c4a644 100644 (file)
@@ -1103,12 +1103,40 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
        return tsc;
 }
 
+void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+       bool vcpus_matched;
+       bool do_request = false;
+       struct kvm_arch *ka = &vcpu->kvm->arch;
+       struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+       vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
+                        atomic_read(&vcpu->kvm->online_vcpus));
+
+       if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
+               if (!ka->use_master_clock)
+                       do_request = 1;
+
+       if (!vcpus_matched && ka->use_master_clock)
+                       do_request = 1;
+
+       if (do_request)
+               kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+
+       trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
+                           atomic_read(&vcpu->kvm->online_vcpus),
+                           ka->use_master_clock, gtod->clock.vclock_mode);
+#endif
+}
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 {
        struct kvm *kvm = vcpu->kvm;
        u64 offset, ns, elapsed;
        unsigned long flags;
        s64 usdiff;
+       bool matched;
 
        raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
        offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
@@ -1151,6 +1179,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
                        offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
                        pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
                }
+               matched = true;
        } else {
                /*
                 * We split periods of matched TSC writes into generations.
@@ -1165,6 +1194,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
                kvm->arch.cur_tsc_nsec = ns;
                kvm->arch.cur_tsc_write = data;
                kvm->arch.cur_tsc_offset = offset;
+               matched = false;
                pr_debug("kvm: new tsc generation %u, clock %llu\n",
                         kvm->arch.cur_tsc_generation, data);
        }
@@ -1188,6 +1218,15 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 
        kvm_x86_ops->write_tsc_offset(vcpu, offset);
        raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+
+       spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+       if (matched)
+               kvm->arch.nr_vcpus_matched_tsc++;
+       else
+               kvm->arch.nr_vcpus_matched_tsc = 0;
+
+       kvm_track_tsc_matching(vcpu);
+       spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
 }
 
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
@@ -1279,8 +1318,9 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
 
 /*
  *
- * Assuming a stable TSC across physical CPUS, the following condition
- * is possible. Each numbered line represents an event visible to both
+ * Assuming a stable TSC across physical CPUS, and a stable TSC
+ * across virtual CPUs, the following condition is possible.
+ * Each numbered line represents an event visible to both
  * CPUs at the next numbered event.
  *
  * "timespecX" represents host monotonic time. "tscX" represents
@@ -1313,7 +1353,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
  * copy of host monotonic time values. Update that master copy
  * in lockstep.
  *
- * Rely on synchronization of host TSCs for monotonicity.
+ * Rely on synchronization of host TSCs and guest TSCs for monotonicity.
  *
  */
 
@@ -1322,20 +1362,27 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 #ifdef CONFIG_X86_64
        struct kvm_arch *ka = &kvm->arch;
        int vclock_mode;
+       bool host_tsc_clocksource, vcpus_matched;
+
+       vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
+                       atomic_read(&kvm->online_vcpus));
 
        /*
         * If the host uses TSC clock, then passthrough TSC as stable
         * to the guest.
         */
-       ka->use_master_clock = kvm_get_time_and_clockread(
+       host_tsc_clocksource = kvm_get_time_and_clockread(
                                        &ka->master_kernel_ns,
                                        &ka->master_cycle_now);
 
+       ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
+
        if (ka->use_master_clock)
                atomic_set(&kvm_guest_has_master_clock, 1);
 
        vclock_mode = pvclock_gtod_data.clock.vclock_mode;
-       trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode);
+       trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
+                                       vcpus_matched);
 #endif
 }