kvm: Add a tracepoint write_tsc_offset
authorYoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Wed, 12 Jun 2013 07:43:44 +0000 (16:43 +0900)
committerGleb Natapov <gleb@redhat.com>
Thu, 27 Jun 2013 11:20:51 +0000 (14:20 +0300)
Add a tracepoint write_tsc_offset for tracing TSC offset change.
We want to merge ftrace's trace data of guest OSs and the host OS using
TSC for timestamp in chronological order. We need "TSC offset" values for
each guest when merge those because the TSC value on a guest is always the
host TSC plus guest's TSC offset. If we get the TSC offset values, we can
calculate the host TSC value for each guest events from the TSC offset and
the event TSC value. The host TSC values of the guest events are used when we
want to merge trace data of guests and the host in chronological order.
(Note: the trace_clock of both the host and the guest must be set x86-tsc in
this case)

This tracepoint also records vcpu_id which can be used to merge trace data for
SMP guests. A merge tool will read TSC offset for each vcpu, then the tool
converts guest TSC values to host TSC values for each vcpu.

TSC offset is stored in the VMCS by vmx_write_tsc_offset() or
vmx_adjust_tsc_offset(). KVM executes the former function when a guest boots.
The latter function is executed when kvm clock is updated. Only host can read
TSC offset value from VMCS, so a host needs to output TSC offset value
when TSC offset is changed.

Since the TSC offset is not often changed, it could be overwritten by other
frequent events while tracing. To avoid that, I recommend to use a special
instance for getting this event:

1. set a instance before booting a guest
 # cd /sys/kernel/debug/tracing/instances
 # mkdir tsc_offset
 # cd tsc_offset
 # echo x86-tsc > trace_clock
 # echo 1 > events/kvm/kvm_write_tsc_offset/enable

2. boot a guest

Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c

index a14a6eaf871d9ea312d6dbcdd1bccb5a3c9846ac..c0bc80391e40a9bd1dfbf97243160ddcef61f145 100644 (file)
@@ -1026,7 +1026,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
                g_tsc_offset = svm->vmcb->control.tsc_offset -
                               svm->nested.hsave->control.tsc_offset;
                svm->nested.hsave->control.tsc_offset = offset;
-       }
+       } else
+               trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+                                          svm->vmcb->control.tsc_offset,
+                                          offset);
 
        svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
 
@@ -1044,6 +1047,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
        svm->vmcb->control.tsc_offset += adjustment;
        if (is_guest_mode(vcpu))
                svm->nested.hsave->control.tsc_offset += adjustment;
+       else
+               trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+                                    svm->vmcb->control.tsc_offset - adjustment,
+                                    svm->vmcb->control.tsc_offset);
+
        mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
index fe5e00ed70369c129442a61df519bfe1823f6112..545245d7cc63f3128879724a1fc2811c31bdf55c 100644 (file)
@@ -756,6 +756,27 @@ TRACE_EVENT(
                  __entry->gpa_match ? "GPA" : "GVA")
 );
 
+TRACE_EVENT(kvm_write_tsc_offset,
+       TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset,
+                __u64 next_tsc_offset),
+       TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset),
+
+       TP_STRUCT__entry(
+               __field( unsigned int,  vcpu_id                         )
+               __field(        __u64,  previous_tsc_offset             )
+               __field(        __u64,  next_tsc_offset                 )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id                = vcpu_id;
+               __entry->previous_tsc_offset    = previous_tsc_offset;
+               __entry->next_tsc_offset        = next_tsc_offset;
+       ),
+
+       TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id,
+                 __entry->previous_tsc_offset, __entry->next_tsc_offset)
+);
+
 #ifdef CONFIG_X86_64
 
 #define host_clocks                                    \
index f4a5b3f552fa83b04fa556f888efea99a5ea9eeb..036e8636f68591efdad24a32395382d0000ba99b 100644 (file)
@@ -2096,6 +2096,8 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
                        (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
                         vmcs12->tsc_offset : 0));
        } else {
+               trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+                                          vmcs_read64(TSC_OFFSET), offset);
                vmcs_write64(TSC_OFFSET, offset);
        }
 }
@@ -2103,11 +2105,14 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
 {
        u64 offset = vmcs_read64(TSC_OFFSET);
+
        vmcs_write64(TSC_OFFSET, offset + adjustment);
        if (is_guest_mode(vcpu)) {
                /* Even when running L2, the adjustment needs to apply to L1 */
                to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
-       }
+       } else
+               trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
+                                          offset + adjustment);
 }
 
 static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
index aac5ffcc8f8d9aa6a13e79f61c51cc27065c1cb2..7d71c0fb11deb801b933101785bb320a0e0a8d24 100644 (file)
@@ -7303,3 +7303,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);