Vince's perf-trinity fuzzer found yet another 'interesting' problem.
When we sample the irq_work_exit tracepoint with period==1 (or
PERF_SAMPLE_PERIOD) and we add an fasync SIGNAL handler we create an
infinite event generation loop:
,-> <IPI>
| irq_work_exit() ->
| trace_irq_work_exit() ->
| ...
| __perf_event_overflow() -> (due to fasync)
| irq_work_queue() -> (irq_work_list must be empty)
'--------- arch_irq_work_raise()
Similar things can happen due to regular poll() wakeups if we exceed
the ring-buffer wakeup watermark, or have an event_limit.
To avoid this, dis-allow sampling this particular tracepoint.
In order to achieve this, create a special perf_perm function pointer
for each event and call this (when set) on trying to create a
tracepoint perf event.
[ roasted: use expr... to allow for ',' in your expression ]
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20131114152304.GC5364@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
*/
DEFINE_IRQ_VECTOR_EVENT(irq_work);
+/*
+ * We must dis-allow sampling irq_work_exit() because perf event sampling
+ * itself can cause irq_work, which would lead to an infinite loop;
+ *
+ * 1) irq_work_exit happens
+ * 2) generates perf sample
+ * 3) generates irq_work
+ * 4) goto 1
+ */
+TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
+
/*
* call_function - called when entering/exiting a call function interrupt
* vector handler
#ifdef CONFIG_PERF_EVENTS
int perf_refcount;
struct hlist_head __percpu *perf_events;
+
+ int (*perf_perm)(struct ftrace_event_call *,
+ struct perf_event *);
#endif
};
} \
early_initcall(trace_init_flags_##name);
+#define __TRACE_EVENT_PERF_PERM(name, expr...) \
+ static int perf_perm_##name(struct ftrace_event_call *tp_event, \
+ struct perf_event *p_event) \
+ { \
+ return ({ expr; }); \
+ } \
+ static int __init trace_init_perf_perm_##name(void) \
+ { \
+ event_##name.perf_perm = &perf_perm_##name; \
+ return 0; \
+ } \
+ early_initcall(trace_init_perf_perm_##name);
+
#define PERF_MAX_TRACE_SIZE 2048
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
#define TRACE_EVENT_FLAGS(event, flag)
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
#endif /* DECLARE_TRACE */
#ifndef TRACE_EVENT
#define TRACE_EVENT_FLAGS(event, flag)
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
#endif /* ifdef TRACE_EVENT (see note above) */
#define TRACE_EVENT_FLAGS(name, value) \
__TRACE_EVENT_FLAGS(name, value)
+#undef TRACE_EVENT_PERF_PERM
+#define TRACE_EVENT_PERF_PERM(name, expr...) \
+ __TRACE_EVENT_PERF_PERM(name, expr)
+
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#undef TRACE_EVENT_FLAGS
#define TRACE_EVENT_FLAGS(event, flag)
+#undef TRACE_EVENT_PERF_PERM
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/*
static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
+ if (tp_event->perf_perm) {
+ int ret = tp_event->perf_perm(tp_event, p_event);
+ if (ret)
+ return ret;
+ }
+
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event) &&
perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))