tracing/filters: add run-time field descriptions to TRACE_EVENT_FORMAT events
authorTom Zanussi <tzanussi@gmail.com>
Tue, 31 Mar 2009 05:48:49 +0000 (00:48 -0500)
committerIngo Molnar <mingo@elte.hu>
Mon, 13 Apr 2009 22:00:50 +0000 (00:00 +0200)
This patch adds run-time field descriptions to all the event formats
exported using TRACE_EVENT_FORMAT.  It also hooks up all the tracers
that use them (i.e. the tracers in the 'ftrace subsystem') so they can
also have their output filtered by the event-filtering mechanism.

When I was testing this, there were a couple of things that fooled me
into thinking the filters weren't working, when actually they were -
I'll mention them here so others don't make the same mistakes (and file
bug reports. ;-)

One is that some of the tracers trace multiple events e.g. the
sched_switch tracer uses the context_switch and wakeup events, and if
you don't set filters on all of the traced events, the unfiltered output
from the events without filters on them can make it look like the
filtering as a whole isn't working properly, when actually it is doing
what it was asked to do - it just wasn't asked to do the right thing.

The other is that for the really high-volume tracers e.g. the function
tracer, the volume of filtered events can be so high that it pushes the
unfiltered events out of the ring buffer before they can be read so e.g.
cat'ing the trace file repeatedly shows either no output, or once in
awhile some output but that isn't there the next time you read the
trace, which isn't what you normally expect when reading the trace file.
If you read from the trace_pipe file though, you can catch them before
they disappear.

Changes from v1:

As suggested by Frederic Weisbecker:

- get rid of externs in functions
- added unlikely() to filter_check_discard()

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/trace/kmemtrace.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_branch.c
kernel/trace/trace_event_types.h
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_events_stage_2.h
kernel/trace/trace_export.c
kernel/trace/trace_hw_branches.c
kernel/trace/trace_power.c

index 7a0aa0e260db86225d86ac8b14215861cea716bc..9419ad10541b673f2b6452d82eab7b7709f78b17 100644 (file)
@@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
                                   gfp_t gfp_flags,
                                   int node)
 {
+       struct ftrace_event_call *call = &event_kmem_alloc;
        struct trace_array *tr = kmemtrace_array;
        struct kmemtrace_alloc_entry *entry;
        struct ring_buffer_event *event;
@@ -62,6 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
        entry->gfp_flags        = gfp_flags;
        entry->node             = node;
 
+       filter_check_discard(call, entry, event);
+
        ring_buffer_unlock_commit(tr->buffer, event);
 
        trace_wake_up();
@@ -71,6 +74,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
                                  unsigned long call_site,
                                  const void *ptr)
 {
+       struct ftrace_event_call *call = &event_kmem_free;
        struct trace_array *tr = kmemtrace_array;
        struct kmemtrace_free_entry *entry;
        struct ring_buffer_event *event;
@@ -86,6 +90,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
        entry->call_site        = call_site;
        entry->ptr              = ptr;
 
+       filter_check_discard(call, entry, event);
+
        ring_buffer_unlock_commit(tr->buffer, event);
 
        trace_wake_up();
index 4865459f609f94c8cc42601d6f1e8c245582594c..962e6179994aede80cffa33939b6598fafb368a3 100644 (file)
@@ -898,6 +898,7 @@ trace_function(struct trace_array *tr,
               unsigned long ip, unsigned long parent_ip, unsigned long flags,
               int pc)
 {
+       struct ftrace_event_call *call = &event_function;
        struct ring_buffer_event *event;
        struct ftrace_entry *entry;
 
@@ -912,6 +913,9 @@ trace_function(struct trace_array *tr,
        entry   = ring_buffer_event_data(event);
        entry->ip                       = ip;
        entry->parent_ip                = parent_ip;
+
+       filter_check_discard(call, entry, event);
+
        ring_buffer_unlock_commit(tr->buffer, event);
 }
 
@@ -921,6 +925,7 @@ static int __trace_graph_entry(struct trace_array *tr,
                                unsigned long flags,
                                int pc)
 {
+       struct ftrace_event_call *call = &event_funcgraph_entry;
        struct ring_buffer_event *event;
        struct ftrace_graph_ent_entry *entry;
 
@@ -933,6 +938,7 @@ static int __trace_graph_entry(struct trace_array *tr,
                return 0;
        entry   = ring_buffer_event_data(event);
        entry->graph_ent                        = *trace;
+       filter_check_discard(call, entry, event);
        ring_buffer_unlock_commit(global_trace.buffer, event);
 
        return 1;
@@ -943,6 +949,7 @@ static void __trace_graph_return(struct trace_array *tr,
                                unsigned long flags,
                                int pc)
 {
+       struct ftrace_event_call *call = &event_funcgraph_exit;
        struct ring_buffer_event *event;
        struct ftrace_graph_ret_entry *entry;
 
@@ -955,6 +962,7 @@ static void __trace_graph_return(struct trace_array *tr,
                return;
        entry   = ring_buffer_event_data(event);
        entry->ret                              = *trace;
+       filter_check_discard(call, entry, event);
        ring_buffer_unlock_commit(global_trace.buffer, event);
 }
 #endif
@@ -973,6 +981,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
                                 int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+       struct ftrace_event_call *call = &event_kernel_stack;
        struct ring_buffer_event *event;
        struct stack_entry *entry;
        struct stack_trace trace;
@@ -990,6 +999,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace(&trace);
+       filter_check_discard(call, entry, event);
        ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
@@ -1015,6 +1025,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
                                   unsigned long flags, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+       struct ftrace_event_call *call = &event_user_stack;
        struct ring_buffer_event *event;
        struct userstack_entry *entry;
        struct stack_trace trace;
@@ -1036,6 +1047,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace_user(&trace);
+       filter_check_discard(call, entry, event);
        ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
@@ -1052,6 +1064,7 @@ ftrace_trace_special(void *__tr,
                     unsigned long arg1, unsigned long arg2, unsigned long arg3,
                     int pc)
 {
+       struct ftrace_event_call *call = &event_special;
        struct ring_buffer_event *event;
        struct trace_array *tr = __tr;
        struct special_entry *entry;
@@ -1064,6 +1077,7 @@ ftrace_trace_special(void *__tr,
        entry->arg1                     = arg1;
        entry->arg2                     = arg2;
        entry->arg3                     = arg3;
+       filter_check_discard(call, entry, event);
        trace_buffer_unlock_commit(tr, event, 0, pc);
 }
 
@@ -1080,6 +1094,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
                           struct task_struct *next,
                           unsigned long flags, int pc)
 {
+       struct ftrace_event_call *call = &event_context_switch;
        struct ring_buffer_event *event;
        struct ctx_switch_entry *entry;
 
@@ -1095,6 +1110,9 @@ tracing_sched_switch_trace(struct trace_array *tr,
        entry->next_prio                = next->prio;
        entry->next_state               = next->state;
        entry->next_cpu = task_cpu(next);
+
+       filter_check_discard(call, entry, event);
+
        trace_buffer_unlock_commit(tr, event, flags, pc);
 }
 
@@ -1104,6 +1122,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
                           struct task_struct *curr,
                           unsigned long flags, int pc)
 {
+       struct ftrace_event_call *call = &event_wakeup;
        struct ring_buffer_event *event;
        struct ctx_switch_entry *entry;
 
@@ -1120,6 +1139,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
        entry->next_state               = wakee->state;
        entry->next_cpu                 = task_cpu(wakee);
 
+       filter_check_discard(call, entry, event);
+
        ring_buffer_unlock_commit(tr->buffer, event);
        ftrace_trace_stack(tr, flags, 6, pc);
        ftrace_trace_userstack(tr, flags, pc);
@@ -1221,6 +1242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
                (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
 
+       struct ftrace_event_call *call = &event_bprint;
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
@@ -1260,6 +1282,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        entry->fmt                      = fmt;
 
        memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+       filter_check_discard(call, entry, event);
        ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
@@ -1279,6 +1302,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
        static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
        static char trace_buf[TRACE_BUF_SIZE];
 
+       struct ftrace_event_call *call = &event_print;
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
@@ -1314,6 +1338,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
        memcpy(&entry->buf, trace_buf, len);
        entry->buf[len] = 0;
+       filter_check_discard(call, entry, event);
        ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
index 34b94c3f40adba1f6c236cb5db569141afa5de5e..e7737281953fa4e5bcbabd46651fd7f7723fa479 100644 (file)
@@ -866,6 +866,21 @@ extern void filter_free_subsystem_preds(struct event_subsystem *system);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
                                     struct filter_pred *pred);
 
+static inline void
+filter_check_discard(struct ftrace_event_call *call, void *rec,
+                    struct ring_buffer_event *event)
+{
+       if (unlikely(call->preds) && !filter_match_preds(call, rec))
+               ring_buffer_event_discard(event);
+}
+
+#define __common_field(type, item)                                     \
+       ret = trace_define_field(event_call, #type, "common_" #item,    \
+                                offsetof(typeof(field.ent), item),     \
+                                sizeof(field.ent.item));               \
+       if (ret)                                                        \
+               return ret;
+
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
@@ -897,4 +912,9 @@ do {                                                                        \
                __trace_printk(ip, fmt, ##args);                        \
 } while (0)
 
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+       extern struct ftrace_event_call event_##call;
+#include "trace_event_types.h"
+
 #endif /* _LINUX_KERNEL_TRACE_H */
index e6e32912ffb8102d94e6773a126a92da7988eccd..c95c25d838ef0aa09e5333c34816857d9405b051 100644 (file)
@@ -30,6 +30,7 @@ static struct trace_array *branch_tracer;
 static void
 probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 {
+       struct ftrace_event_call *call = &event_branch;
        struct trace_array *tr = branch_tracer;
        struct ring_buffer_event *event;
        struct trace_branch *entry;
@@ -73,6 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
        entry->line = f->line;
        entry->correct = val == expect;
 
+       filter_check_discard(call, entry, event);
+
        ring_buffer_unlock_commit(tr->buffer, event);
 
  out:
index fd78bee71dd7519e03d45d38e3de9e58579af234..95b147aac2299b6dadc7c66192dc079c26c8692a 100644 (file)
@@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
 TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
        TRACE_STRUCT(
                TRACE_FIELD(unsigned int, line, line)
-               TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
-               TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
+               TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
+                                   TRACE_FUNC_SIZE+1, func)
+               TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
+                                   TRACE_FUNC_SIZE+1, file)
                TRACE_FIELD(char, correct, correct)
        ),
        TP_RAW_FMT("%u:%s:%s (%u)")
index 64ec4d278ffbcd56b94d0d161b855914756f3e0b..be9299a53e2a8a783d56478f6e16b18431575b88 100644 (file)
@@ -680,6 +680,7 @@ static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
        struct event_subsystem *system;
+       struct dentry *entry;
 
        /* First see if we did not already create this dir */
        list_for_each_entry(system, &event_subsystems, list) {
@@ -708,6 +709,12 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
        system->preds = NULL;
 
+       entry = debugfs_create_file("filter", 0644, system->entry, system,
+                                   &ftrace_subsystem_filter_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs "
+                          "'%s/filter' entry\n", name);
+
        return system->entry;
 }
 
index 026be412f356d9d6642480a3db01770611f7337f..470ad9487eccc71fab90e1f640dfa2cf3b7aa355 100644 (file)
@@ -185,7 +185,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
        }
 
        events_for_each(call) {
-               if (!call->name || !call->regfunc)
+               if (!call->define_fields)
                        continue;
 
                if (!strcmp(call->system, system->name))
@@ -324,7 +324,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
        events_for_each(call) {
                int err;
 
-               if (!call->name || !call->regfunc)
+               if (!call->define_fields)
                        continue;
 
                if (strcmp(call->system, system->name))
index 30743f7d41108b942cc596f06856ab633b792c47..1c94b87c7180d0865600dbdb2ef1682a355f6a69 100644 (file)
@@ -146,13 +146,6 @@ ftrace_format_##call(struct trace_seq *s)                          \
        if (ret)                                                        \
                return ret;
 
-#define __common_field(type, item)                                     \
-       ret = trace_define_field(event_call, #type, "common_" #item,    \
-                                offsetof(typeof(field.ent), item),     \
-                                sizeof(field.ent.item));               \
-       if (ret)                                                        \
-               return ret;
-
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
 int                                                                    \
index 07a22c33ebf3c31b4d07b7125a9062118ca90490..f4e46616c48eae3af477092bf51daf908ae4486c 100644 (file)
@@ -30,7 +30,7 @@
 
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd)                      \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)                 \
        ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"   \
                               "offset:%u;\tsize:%u;\n",                \
                               (unsigned int)offsetof(typeof(field), item), \
@@ -85,18 +85,69 @@ ftrace_format_##call(struct trace_seq *s)                           \
 #define TRACE_ENTRY    entry
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
        cmd;
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+int ftrace_define_fields_##call(void);                                 \
+static int ftrace_raw_init_event_##call(void);                         \
                                                                        \
-static struct ftrace_event_call __used                                 \
+struct ftrace_event_call __used                                                \
 __attribute__((__aligned__(4)))                                                \
 __attribute__((section("_ftrace_events"))) event_##call = {            \
        .name                   = #call,                                \
        .id                     = proto,                                \
        .system                 = __stringify(TRACE_SYSTEM),            \
+       .raw_init               = ftrace_raw_init_event_##call,         \
        .show_format            = ftrace_format_##call,                 \
+       .define_fields          = ftrace_define_fields_##call,          \
+};                                                                     \
+static int ftrace_raw_init_event_##call(void)                          \
+{                                                                      \
+       INIT_LIST_HEAD(&event_##call.fields);                           \
+       return 0;                                                       \
+}                                                                      \
+
+#include "trace_event_types.h"
+
+#undef TRACE_FIELD
+#define TRACE_FIELD(type, item, assign)                                        \
+       ret = trace_define_field(event_call, #type, #item,              \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item));                   \
+       if (ret)                                                        \
+               return ret;
+
+#undef TRACE_FIELD_SPECIAL
+#define TRACE_FIELD_SPECIAL(type, item, len, cmd)                      \
+       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item));                   \
+       if (ret)                                                        \
+               return ret;
+
+#undef TRACE_FIELD_ZERO_CHAR
+#define TRACE_FIELD_ZERO_CHAR(item)
+
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+int                                                                    \
+ftrace_define_fields_##call(void)                                      \
+{                                                                      \
+       struct ftrace_event_call *event_call = &event_##call;           \
+       struct args field;                                              \
+       int ret;                                                        \
+                                                                       \
+       __common_field(unsigned char, type);                            \
+       __common_field(unsigned char, flags);                           \
+       __common_field(unsigned char, preempt_count);                   \
+       __common_field(int, pid);                                       \
+       __common_field(int, tgid);                                      \
+                                                                       \
+       tstruct;                                                        \
+                                                                       \
+       return ret;                                                     \
 }
+
 #include "trace_event_types.h"
index 7bfdf4c2347f38251acb30e6d3ef3ad588c89e30..e6b275b22ac08526d959d5f3b288219636bc5cb2 100644 (file)
@@ -168,6 +168,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 
 void trace_hw_branch(u64 from, u64 to)
 {
+       struct ftrace_event_call *call = &event_hw_branch;
        struct trace_array *tr = hw_branch_trace;
        struct ring_buffer_event *event;
        struct hw_branch_entry *entry;
@@ -194,6 +195,7 @@ void trace_hw_branch(u64 from, u64 to)
        entry->ent.type = TRACE_HW_BRANCHES;
        entry->from = from;
        entry->to   = to;
+       filter_check_discard(call, entry, event);
        trace_buffer_unlock_commit(tr, event, 0, 0);
 
  out:
index bae791ebcc516ada0ef814e36afa6949178ebb2c..8ce7d7d62c0760701af61d19831c87e77abc3778 100644 (file)
@@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type,
 
 static void probe_power_end(struct power_trace *it)
 {
+       struct ftrace_event_call *call = &event_power;
        struct ring_buffer_event *event;
        struct trace_power *entry;
        struct trace_array_cpu *data;
@@ -54,6 +55,7 @@ static void probe_power_end(struct power_trace *it)
                goto out;
        entry   = ring_buffer_event_data(event);
        entry->state_data = *it;
+       filter_check_discard(call, entry, event);
        trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
        preempt_enable();
@@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it)
 static void probe_power_mark(struct power_trace *it, unsigned int type,
                                unsigned int level)
 {
+       struct ftrace_event_call *call = &event_power;
        struct ring_buffer_event *event;
        struct trace_power *entry;
        struct trace_array_cpu *data;
@@ -84,6 +87,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
                goto out;
        entry   = ring_buffer_event_data(event);
        entry->state_data = *it;
+       filter_check_discard(call, entry, event);
        trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
        preempt_enable();