perf_counter: Allow sharing of output channels
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Wed, 19 Aug 2009 09:18:27 +0000 (11:18 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 25 Aug 2009 07:36:13 +0000 (09:36 +0200)
Provide the ability to configure a counter to send its output
to another (already existing) counter's output stream.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090819092023.980284148@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/linux/perf_counter.h
kernel/perf_counter.c

index b53f7006cc4e0bb5e1ad5f09ae76249d150d2ca4..e022b847c90deb5525ee3d48f59810891ad57565 100644 (file)
@@ -216,6 +216,7 @@ struct perf_counter_attr {
 #define PERF_COUNTER_IOC_REFRESH       _IO ('$', 2)
 #define PERF_COUNTER_IOC_RESET         _IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD                _IOW('$', 4, u64)
+#define PERF_COUNTER_IOC_SET_OUTPUT    _IO ('$', 5)
 
 enum perf_counter_ioc_flags {
        PERF_IOC_FLAG_GROUP             = 1U << 0,
@@ -415,6 +416,9 @@ enum perf_callchain_context {
        PERF_CONTEXT_MAX                = (__u64)-4095,
 };
 
+#define PERF_FLAG_FD_NO_GROUP  (1U << 0)
+#define PERF_FLAG_FD_OUTPUT    (1U << 1)
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -536,6 +540,7 @@ struct perf_counter {
        struct list_head                sibling_list;
        int                             nr_siblings;
        struct perf_counter             *group_leader;
+       struct perf_counter             *output;
        const struct pmu                *pmu;
 
        enum perf_counter_active_state  state;
index 06bf6a4f260842f20dadd844ffad7fe9d2a03dbb..53abcbefa0bf64e6c0d7abea64174b59595345db 100644 (file)
@@ -1692,6 +1692,11 @@ static void free_counter(struct perf_counter *counter)
                        atomic_dec(&nr_task_counters);
        }
 
+       if (counter->output) {
+               fput(counter->output->filp);
+               counter->output = NULL;
+       }
+
        if (counter->destroy)
                counter->destroy(counter);
 
@@ -1977,6 +1982,8 @@ unlock:
        return ret;
 }
 
+int perf_counter_set_output(struct perf_counter *counter, int output_fd);
+
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct perf_counter *counter = file->private_data;
@@ -2000,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case PERF_COUNTER_IOC_PERIOD:
                return perf_counter_period(counter, (u64 __user *)arg);
 
+       case PERF_COUNTER_IOC_SET_OUTPUT:
+               return perf_counter_set_output(counter, arg);
+
        default:
                return -ENOTTY;
        }
@@ -2270,6 +2280,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
        WARN_ON_ONCE(counter->ctx->parent_ctx);
        mutex_lock(&counter->mmap_mutex);
+       if (counter->output) {
+               ret = -EINVAL;
+               goto unlock;
+       }
+
        if (atomic_inc_not_zero(&counter->mmap_count)) {
                if (nr_pages != counter->data->nr_pages)
                        ret = -EINVAL;
@@ -2655,6 +2670,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
                             struct perf_counter *counter, unsigned int size,
                             int nmi, int sample)
 {
+       struct perf_counter *output_counter;
        struct perf_mmap_data *data;
        unsigned int offset, head;
        int have_lost;
@@ -2664,13 +2680,17 @@ static int perf_output_begin(struct perf_output_handle *handle,
                u64                      lost;
        } lost_event;
 
+       rcu_read_lock();
        /*
         * For inherited counters we send all the output towards the parent.
         */
        if (counter->parent)
                counter = counter->parent;
 
-       rcu_read_lock();
+       output_counter = rcu_dereference(counter->output);
+       if (output_counter)
+               counter = output_counter;
+
        data = rcu_dereference(counter->data);
        if (!data)
                goto out;
@@ -4218,6 +4238,57 @@ err_size:
        goto out;
 }
 
+int perf_counter_set_output(struct perf_counter *counter, int output_fd)
+{
+       struct perf_counter *output_counter = NULL;
+       struct file *output_file = NULL;
+       struct perf_counter *old_output;
+       int fput_needed = 0;
+       int ret = -EINVAL;
+
+       if (!output_fd)
+               goto set;
+
+       output_file = fget_light(output_fd, &fput_needed);
+       if (!output_file)
+               return -EBADF;
+
+       if (output_file->f_op != &perf_fops)
+               goto out;
+
+       output_counter = output_file->private_data;
+
+       /* Don't chain output fds */
+       if (output_counter->output)
+               goto out;
+
+       /* Don't set an output fd when we already have an output channel */
+       if (counter->data)
+               goto out;
+
+       atomic_long_inc(&output_file->f_count);
+
+set:
+       mutex_lock(&counter->mmap_mutex);
+       old_output = counter->output;
+       rcu_assign_pointer(counter->output, output_counter);
+       mutex_unlock(&counter->mmap_mutex);
+
+       if (old_output) {
+               /*
+                * we need to make sure no existing perf_output_*()
+                * is still referencing this counter.
+                */
+               synchronize_rcu();
+               fput(old_output->filp);
+       }
+
+       ret = 0;
+out:
+       fput_light(output_file, fput_needed);
+       return ret;
+}
+
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -4240,7 +4311,7 @@ SYSCALL_DEFINE5(perf_counter_open,
        int ret;
 
        /* for future expandability... */
-       if (flags)
+       if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
                return -EINVAL;
 
        ret = perf_copy_attr(attr_uptr, &attr);
@@ -4268,7 +4339,7 @@ SYSCALL_DEFINE5(perf_counter_open,
         * Look up the group leader (we will attach this counter to it):
         */
        group_leader = NULL;
-       if (group_fd != -1) {
+       if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
                ret = -EINVAL;
                group_file = fget_light(group_fd, &fput_needed);
                if (!group_file)
@@ -4310,6 +4381,12 @@ SYSCALL_DEFINE5(perf_counter_open,
        if (!counter_file)
                goto err_free_put_context;
 
+       if (flags & PERF_FLAG_FD_OUTPUT) {
+               ret = perf_counter_set_output(counter, group_fd);
+               if (ret)
+                       goto err_free_put_context;
+       }
+
        counter->filp = counter_file;
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);