perf: Add a pmu capability for "exclusive" events
authorAlexander Shishkin <alexander.shishkin@linux.intel.com>
Fri, 30 Jan 2015 10:31:06 +0000 (12:31 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 2 Apr 2015 15:14:12 +0000 (17:14 +0200)
Usually, pmus that do, for example, instruction tracing, would only ever
be able to have one event per task per cpu (or per perf_event_context). For
such pmus it makes sense to disallow creating conflicting events early on,
so as to provide consistent behavior for the user.

This patch adds a pmu capability that indicates such constraint on event
creation.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1422613866-113186-1-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/perf_event.h
kernel/events/core.c

index 13a1eb3a2a2dc3570f76984e734b2dfb82f553ce..f936a1e51f2999b8bde5b27a4bbc955bd669093b 100644 (file)
@@ -176,6 +176,7 @@ struct perf_event;
 #define PERF_PMU_CAP_NO_NMI                    0x02
 #define PERF_PMU_CAP_AUX_NO_SG                 0x04
 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF          0x08
+#define PERF_PMU_CAP_EXCLUSIVE                 0x10
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -196,6 +197,7 @@ struct pmu {
 
        int * __percpu                  pmu_disable_count;
        struct perf_cpu_context * __percpu pmu_cpu_context;
+       atomic_t                        exclusive_cnt; /* < 0: cpu; > 0: tsk */
        int                             task_ctx_nr;
        int                             hrtimer_interval_ms;
 
index da51128c337a8418de226acc4bababd4b2adc1f7..6d9fdaef7b574a5e021e580416130f705746ae4d 100644 (file)
@@ -3459,6 +3459,91 @@ static void unaccount_event(struct perf_event *event)
        unaccount_event_cpu(event, event->cpu);
 }
 
+/*
+ * The following implement mutual exclusion of events on "exclusive" pmus
+ * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
+ * at a time, so we disallow creating events that might conflict, namely:
+ *
+ *  1) cpu-wide events in the presence of per-task events,
+ *  2) per-task events in the presence of cpu-wide events,
+ *  3) two matching events on the same context.
+ *
+ * The former two cases are handled in the allocation path (perf_event_alloc(),
+ * __free_event()), the latter -- before the first perf_install_in_context().
+ */
+static int exclusive_event_init(struct perf_event *event)
+{
+       struct pmu *pmu = event->pmu;
+
+       if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+               return 0;
+
+       /*
+        * Prevent co-existence of per-task and cpu-wide events on the
+        * same exclusive pmu.
+        *
+        * Negative pmu::exclusive_cnt means there are cpu-wide
+        * events on this "exclusive" pmu, positive means there are
+        * per-task events.
+        *
+        * Since this is called in perf_event_alloc() path, event::ctx
+        * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK
+        * to mean "per-task event", because unlike other attach states it
+        * never gets cleared.
+        */
+       if (event->attach_state & PERF_ATTACH_TASK) {
+               if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
+                       return -EBUSY;
+       } else {
+               if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
+                       return -EBUSY;
+       }
+
+       return 0;
+}
+
+static void exclusive_event_destroy(struct perf_event *event)
+{
+       struct pmu *pmu = event->pmu;
+
+       if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+               return;
+
+       /* see comment in exclusive_event_init() */
+       if (event->attach_state & PERF_ATTACH_TASK)
+               atomic_dec(&pmu->exclusive_cnt);
+       else
+               atomic_inc(&pmu->exclusive_cnt);
+}
+
+static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
+{
+       if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+           (e1->cpu == e2->cpu ||
+            e1->cpu == -1 ||
+            e2->cpu == -1))
+               return true;
+       return false;
+}
+
+/* Called under the same ctx::mutex as perf_install_in_context() */
+static bool exclusive_event_installable(struct perf_event *event,
+                                       struct perf_event_context *ctx)
+{
+       struct perf_event *iter_event;
+       struct pmu *pmu = event->pmu;
+
+       if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+               return true;
+
+       list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+               if (exclusive_event_match(iter_event, event))
+                       return false;
+       }
+
+       return true;
+}
+
 static void __free_event(struct perf_event *event)
 {
        if (!event->parent) {
@@ -3472,8 +3557,10 @@ static void __free_event(struct perf_event *event)
        if (event->ctx)
                put_ctx(event->ctx);
 
-       if (event->pmu)
+       if (event->pmu) {
+               exclusive_event_destroy(event);
                module_put(event->pmu->module);
+       }
 
        call_rcu(&event->rcu_head, free_event_rcu);
 }
@@ -7150,6 +7237,7 @@ got_cpu_context:
                pmu->event_idx = perf_event_idx_default;
 
        list_add_rcu(&pmu->entry, &pmus);
+       atomic_set(&pmu->exclusive_cnt, 0);
        ret = 0;
 unlock:
        mutex_unlock(&pmus_lock);
@@ -7405,16 +7493,23 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                goto err_ns;
        }
 
+       err = exclusive_event_init(event);
+       if (err)
+               goto err_pmu;
+
        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                        err = get_callchain_buffers();
                        if (err)
-                               goto err_pmu;
+                               goto err_per_task;
                }
        }
 
        return event;
 
+err_per_task:
+       exclusive_event_destroy(event);
+
 err_pmu:
        if (event->destroy)
                event->destroy(event);
@@ -7819,6 +7914,11 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_alloc;
        }
 
+       if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
+               err = -EBUSY;
+               goto err_context;
+       }
+
        if (task) {
                put_task_struct(task);
                task = NULL;
@@ -7941,6 +8041,13 @@ SYSCALL_DEFINE5(perf_event_open,
                get_ctx(ctx);
        }
 
+       if (!exclusive_event_installable(event, ctx)) {
+               err = -EBUSY;
+               mutex_unlock(&ctx->mutex);
+               fput(event_file);
+               goto err_context;
+       }
+
        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);
 
@@ -8032,6 +8139,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
+       if (!exclusive_event_installable(event, ctx)) {
+               mutex_unlock(&ctx->mutex);
+               perf_unpin_context(ctx);
+               put_ctx(ctx);
+               err = -EBUSY;
+               goto err_free;
+       }
+
        perf_install_in_context(ctx, event, cpu);
        perf_unpin_context(ctx);
        mutex_unlock(&ctx->mutex);