ftrace, perf: Avoid infinite event generation loop
authorPeter Zijlstra <peterz@infradead.org>
Thu, 14 Nov 2013 15:23:04 +0000 (16:23 +0100)
committerIngo Molnar <mingo@kernel.org>
Tue, 19 Nov 2013 15:57:40 +0000 (16:57 +0100)
Vince's perf-trinity fuzzer found yet another 'interesting' problem.

When we sample the irq_work_exit tracepoint with period==1 (or
PERF_SAMPLE_PERIOD) and we add an fasync SIGNAL handler we create an
infinite event generation loop:

  ,-> <IPI>
  |     irq_work_exit() ->
  |       trace_irq_work_exit() ->
  |         ...
  |           __perf_event_overflow() -> (due to fasync)
  |             irq_work_queue() -> (irq_work_list must be empty)
  '---------      arch_irq_work_raise()

Similar things can happen due to regular poll() wakeups if we exceed
the ring-buffer wakeup watermark, or have an event_limit.

To avoid this, dis-allow sampling this particular tracepoint.

In order to achieve this, create a special perf_perm function pointer
for each event and call this (when set) on trying to create a
tracepoint perf event.

[ roasted: use expr... to allow for ',' in your expression ]

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20131114152304.GC5364@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/trace/irq_vectors.h
include/linux/ftrace_event.h
include/linux/tracepoint.h
include/trace/ftrace.h
kernel/trace/trace_event_perf.c

index 2874df24e7a448cd56631d87f5ac9e0fdd00cbc7..4cab890007a7267ecc7ce148eaa1fdfbee4a49df 100644 (file)
@@ -71,6 +71,17 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
  */
 DEFINE_IRQ_VECTOR_EVENT(irq_work);
 
+/*
+ * We must dis-allow sampling irq_work_exit() because perf event sampling
+ * itself can cause irq_work, which would lead to an infinite loop;
+ *
+ *  1) irq_work_exit happens
+ *  2) generates perf sample
+ *  3) generates irq_work
+ *  4) goto 1
+ */
+TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
+
 /*
  * call_function - called when entering/exiting a call function interrupt
  * vector handler
index 9abbe630c456775b19cc999f36887efa94bbdd41..8c9b7a1c4138b87a79833bc400370a13e94cf270 100644 (file)
@@ -248,6 +248,9 @@ struct ftrace_event_call {
 #ifdef CONFIG_PERF_EVENTS
        int                             perf_refcount;
        struct hlist_head __percpu      *perf_events;
+
+       int     (*perf_perm)(struct ftrace_event_call *,
+                            struct perf_event *);
 #endif
 };
 
@@ -317,6 +320,19 @@ struct ftrace_event_file {
        }                                                               \
        early_initcall(trace_init_flags_##name);
 
+#define __TRACE_EVENT_PERF_PERM(name, expr...)                         \
+       static int perf_perm_##name(struct ftrace_event_call *tp_event, \
+                                   struct perf_event *p_event)         \
+       {                                                               \
+               return ({ expr; });                                     \
+       }                                                               \
+       static int __init trace_init_perf_perm_##name(void)             \
+       {                                                               \
+               event_##name.perf_perm = &perf_perm_##name;             \
+               return 0;                                               \
+       }                                                               \
+       early_initcall(trace_init_perf_perm_##name);
+
 #define PERF_MAX_TRACE_SIZE    2048
 
 #define MAX_FILTER_STR_VAL     256     /* Should handle KSYM_SYMBOL_LEN */
index ebeab360d851c1ece4952b3f5f4f294aa9673db0..f16dc0a4004976376c9036a244e1121ed52ed020 100644 (file)
@@ -267,6 +267,8 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -399,4 +401,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
 #endif /* ifdef TRACE_EVENT (see note above) */
index 52594b20179e0d8459a0810c7fbcc75fceda84ff..6b852f60f8ae24136c075eff90aa9488297e9490 100644 (file)
 #define TRACE_EVENT_FLAGS(name, value)                                 \
        __TRACE_EVENT_FLAGS(name, value)
 
+#undef TRACE_EVENT_PERF_PERM
+#define TRACE_EVENT_PERF_PERM(name, expr...)                           \
+       __TRACE_EVENT_PERF_PERM(name, expr)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
 #undef TRACE_EVENT_FLAGS
 #define TRACE_EVENT_FLAGS(event, flag)
 
+#undef TRACE_EVENT_PERF_PERM
+#define TRACE_EVENT_PERF_PERM(event, expr...)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
index 78e27e3b52ac2ee0b9e86f544b77a45e12865d95..630889f68b1d34808049694405850476f5c7a4d9 100644 (file)
@@ -24,6 +24,12 @@ static int   total_ref_count;
 static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
                                 struct perf_event *p_event)
 {
+       if (tp_event->perf_perm) {
+               int ret = tp_event->perf_perm(tp_event, p_event);
+               if (ret)
+                       return ret;
+       }
+
        /* The ftrace function trace is allowed only for root. */
        if (ftrace_event_is_function(tp_event) &&
            perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))