KVM: Add kvm trace userspace interface
authorFeng(Eric) Liu <eric.e.liu@intel.com>
Thu, 10 Apr 2008 12:47:53 +0000 (08:47 -0400)
committerAvi Kivity <avi@qumranet.com>
Sun, 27 Apr 2008 09:01:22 +0000 (12:01 +0300)
This interface allows user a space application to read the trace of kvm
related events through relayfs.

Signed-off-by: Feng (Eric) Liu <eric.e.liu@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
include/linux/kvm_host.h
virt/kvm/kvm_main.c
virt/kvm/kvm_trace.c [new file with mode: 0644]

index 76c70ab44382a9441af17848fdfcd6ff1d9f44ce..8d45fabc5f3baf556b1da2558a42df183ab09e80 100644 (file)
@@ -50,6 +50,17 @@ config KVM_AMD
          Provides support for KVM on AMD processors equipped with the AMD-V
          (SVM) extensions.
 
+config KVM_TRACE
+       bool "KVM trace support"
+       depends on KVM && MARKERS && SYSFS
+       select RELAY
+       select DEBUG_FS
+       default n
+       ---help---
+         This option allows reading a trace of kvm-related events through
+         relayfs.  Note the ABI is not considered stable and will be
+         modified in future updates.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/lguest/Kconfig
index 4d0c22e11f1abfd1276cf6d0c43896985d04ce62..c97d35c218dbe0ad4827776ff30c20e13d107852 100644 (file)
@@ -3,6 +3,9 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+ifeq ($(CONFIG_KVM_TRACE),y)
+common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
+endif
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
index 578c3638bbbaf695bce347c68e1b1aed12432990..bd0c2d2d840f38e4cb63cd27fe867df159ece047 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
+#include <linux/marker.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -309,5 +310,18 @@ struct kvm_stats_debugfs_item {
        struct dentry *dentry;
 };
 extern struct kvm_stats_debugfs_item debugfs_entries[];
+extern struct dentry *debugfs_dir;
+
+#ifdef CONFIG_KVM_TRACE
+int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
+void kvm_trace_cleanup(void);
+#else
+static inline
+int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
+{
+       return -EINVAL;
+}
+#define kvm_trace_cleanup() ((void)0)
+#endif
 
 #endif
index 6a52c084e0680ec5c0487ad3352b464cf9daab8f..d5911d9895c3d6867d8e289dc520b8a6a6a52c62 100644 (file)
@@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
 
 static __read_mostly struct preempt_ops kvm_preempt_ops;
 
-static struct dentry *debugfs_dir;
+struct dentry *debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                           unsigned long arg);
@@ -1191,6 +1191,11 @@ static long kvm_dev_ioctl(struct file *filp,
                r += PAGE_SIZE;    /* pio data page */
 #endif
                break;
+       case KVM_TRACE_ENABLE:
+       case KVM_TRACE_PAUSE:
+       case KVM_TRACE_DISABLE:
+               r = kvm_trace_ioctl(ioctl, arg);
+               break;
        default:
                return kvm_arch_dev_ioctl(filp, ioctl, arg);
        }
@@ -1519,6 +1524,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
+       kvm_trace_cleanup();
        misc_deregister(&kvm_dev);
        kmem_cache_destroy(kvm_vcpu_cache);
        sysdev_unregister(&kvm_sysdev);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
new file mode 100644 (file)
index 0000000..5425440
--- /dev/null
@@ -0,0 +1,276 @@
+/*
+ * kvm trace
+ *
+ * It is designed to allow debugging traces of kvm to be generated
+ * on UP / SMP machines.  Each trace entry can be timestamped so that
+ * it's possible to reconstruct a chronological record of trace events.
+ * The implementation refers to blktrace kernel support.
+ *
+ * Copyright (c) 2008 Intel Corporation
+ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
+ *
+ * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
+ *
+ * Date:    Feb 2008
+ */
+
+#include <linux/module.h>
+#include <linux/relay.h>
+#include <linux/debugfs.h>
+
+#include <linux/kvm_host.h>
+
+#define KVM_TRACE_STATE_RUNNING        (1 << 0)
+#define KVM_TRACE_STATE_PAUSE          (1 << 1)
+#define KVM_TRACE_STATE_CLEARUP        (1 << 2)
+
+struct kvm_trace {
+       int trace_state;
+       struct rchan *rchan;
+       struct dentry *lost_file;
+       atomic_t lost_records;
+};
+static struct kvm_trace *kvm_trace;
+
+struct kvm_trace_probe {
+       const char *name;
+       const char *format;
+       u32 cycle_in;
+       marker_probe_func *probe_func;
+};
+
+static inline int calc_rec_size(int cycle, int extra)
+{
+       int rec_size = KVM_TRC_HEAD_SIZE;
+
+       rec_size += extra;
+       return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
+}
+
+static void kvm_add_trace(void *probe_private, void *call_data,
+                         const char *format, va_list *args)
+{
+       struct kvm_trace_probe *p = probe_private;
+       struct kvm_trace *kt = kvm_trace;
+       struct kvm_trace_rec rec;
+       struct kvm_vcpu *vcpu;
+       int    i, extra, size;
+
+       if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
+               return;
+
+       rec.event       = va_arg(*args, u32);
+       vcpu            = va_arg(*args, struct kvm_vcpu *);
+       rec.pid         = current->tgid;
+       rec.vcpu_id     = vcpu->vcpu_id;
+
+       extra           = va_arg(*args, u32);
+       WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
+       extra           = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
+       rec.extra_u32   = extra;
+
+       rec.cycle_in    = p->cycle_in;
+
+       if (rec.cycle_in) {
+               u64 cycle = 0;
+
+               cycle = get_cycles();
+               rec.u.cycle.cycle_lo = (u32)cycle;
+               rec.u.cycle.cycle_hi = (u32)(cycle >> 32);
+
+               for (i = 0; i < rec.extra_u32; i++)
+                       rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
+       } else {
+               for (i = 0; i < rec.extra_u32; i++)
+                       rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
+       }
+
+       size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
+       relay_write(kt->rchan, &rec, size);
+}
+
+static struct kvm_trace_probe kvm_trace_probes[] = {
+       { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
+       { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
+};
+
+static int lost_records_get(void *data, u64 *val)
+{
+       struct kvm_trace *kt = data;
+
+       *val = atomic_read(&kt->lost_records);
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
+
+/*
+ *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
+ *  many times we encountered a full subbuffer, to tell user space app the
+ *  lost records there were.
+ */
+static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+                                    void *prev_subbuf, size_t prev_padding)
+{
+       struct kvm_trace *kt;
+
+       if (!relay_buf_full(buf))
+               return 1;
+
+       kt = buf->chan->private_data;
+       atomic_inc(&kt->lost_records);
+
+       return 0;
+}
+
+static struct dentry *kvm_create_buf_file_callack(const char *filename,
+                                                struct dentry *parent,
+                                                int mode,
+                                                struct rchan_buf *buf,
+                                                int *is_global)
+{
+       return debugfs_create_file(filename, mode, parent, buf,
+                                  &relay_file_operations);
+}
+
+static int kvm_remove_buf_file_callback(struct dentry *dentry)
+{
+       debugfs_remove(dentry);
+       return 0;
+}
+
+static struct rchan_callbacks kvm_relay_callbacks = {
+       .subbuf_start           = kvm_subbuf_start_callback,
+       .create_buf_file        = kvm_create_buf_file_callack,
+       .remove_buf_file        = kvm_remove_buf_file_callback,
+};
+
+static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
+{
+       struct kvm_trace *kt;
+       int i, r = -ENOMEM;
+
+       if (!kuts->buf_size || !kuts->buf_nr)
+               return -EINVAL;
+
+       kt = kzalloc(sizeof(*kt), GFP_KERNEL);
+       if (!kt)
+               goto err;
+
+       r = -EIO;
+       atomic_set(&kt->lost_records, 0);
+       kt->lost_file = debugfs_create_file("lost_records", 0444, debugfs_dir,
+                                           kt, &kvm_trace_lost_ops);
+       if (!kt->lost_file)
+               goto err;
+
+       kt->rchan = relay_open("trace", debugfs_dir, kuts->buf_size,
+                               kuts->buf_nr, &kvm_relay_callbacks, kt);
+       if (!kt->rchan)
+               goto err;
+
+       kvm_trace = kt;
+
+       for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
+               struct kvm_trace_probe *p = &kvm_trace_probes[i];
+
+               r = marker_probe_register(p->name, p->format, p->probe_func, p);
+               if (r)
+                       printk(KERN_INFO "Unable to register probe %s\n",
+                              p->name);
+       }
+
+       kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
+
+       return 0;
+err:
+       if (kt) {
+               if (kt->lost_file)
+                       debugfs_remove(kt->lost_file);
+               if (kt->rchan)
+                       relay_close(kt->rchan);
+               kfree(kt);
+       }
+       return r;
+}
+
+static int kvm_trace_enable(char __user *arg)
+{
+       struct kvm_user_trace_setup kuts;
+       int ret;
+
+       ret = copy_from_user(&kuts, arg, sizeof(kuts));
+       if (ret)
+               return -EFAULT;
+
+       ret = do_kvm_trace_enable(&kuts);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int kvm_trace_pause(void)
+{
+       struct kvm_trace *kt = kvm_trace;
+       int r = -EINVAL;
+
+       if (kt == NULL)
+               return r;
+
+       if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
+               kt->trace_state = KVM_TRACE_STATE_PAUSE;
+               relay_flush(kt->rchan);
+               r = 0;
+       }
+
+       return r;
+}
+
+void kvm_trace_cleanup(void)
+{
+       struct kvm_trace *kt = kvm_trace;
+       int i;
+
+       if (kt == NULL)
+               return;
+
+       if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
+           kt->trace_state == KVM_TRACE_STATE_PAUSE) {
+
+               kt->trace_state = KVM_TRACE_STATE_CLEARUP;
+
+               for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
+                       struct kvm_trace_probe *p = &kvm_trace_probes[i];
+                       marker_probe_unregister(p->name, p->probe_func, p);
+               }
+
+               relay_close(kt->rchan);
+               debugfs_remove(kt->lost_file);
+               kfree(kt);
+       }
+}
+
+int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
+{
+       void __user *argp = (void __user *)arg;
+       long r = -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       switch (ioctl) {
+       case KVM_TRACE_ENABLE:
+               r = kvm_trace_enable(argp);
+               break;
+       case KVM_TRACE_PAUSE:
+               r = kvm_trace_pause();
+               break;
+       case KVM_TRACE_DISABLE:
+               r = 0;
+               kvm_trace_cleanup();
+               break;
+       }
+
+       return r;
+}