tracing: Implement event pid filtering
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>
Fri, 25 Sep 2015 16:58:44 +0000 (12:58 -0400)
committerSteven Rostedt <rostedt@goodmis.org>
Mon, 26 Oct 2015 01:33:56 +0000 (21:33 -0400)
Add the necessary hooks to use the pids loaded in set_event_pid to filter
all the events enabled in the tracing instance that match the pids listed.

Two probes are added to both sched_switch and sched_wakeup tracepoints to be
called before other probes are called and after the other probes are called.
The first is used to set the necessary flags to let the probes know to test
if they should be traced or not.

The sched_switch pre probe will set the "ignore_pid" flag if neither the
previous or next task has a matching pid.

The sched_switch probe will set the "ignore_pid" flag if the next task
does not match the matching pid.

The pre probe allows for probes tracing sched_switch to be traced if
necessary.

The sched_wakeup pre probe will set the "ignore_pid" flag if neither the
current task nor the wakee task has a matching pid.

The sched_wakeup post probe will set the "ignore_pid" flag if the current
task does not have a matching pid.

Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
include/linux/trace_events.h
kernel/trace/trace.h
kernel/trace/trace_events.c

index f85693bbcdc317e1bdcc693e63253be60787f23c..429fdfc3baf59e018d0e198062f894642af8a5bc 100644 (file)
@@ -328,6 +328,7 @@ enum {
        EVENT_FILE_FL_SOFT_DISABLED_BIT,
        EVENT_FILE_FL_TRIGGER_MODE_BIT,
        EVENT_FILE_FL_TRIGGER_COND_BIT,
+       EVENT_FILE_FL_PID_FILTER_BIT,
 };
 
 /*
@@ -341,6 +342,7 @@ enum {
  *                   tracepoint may be enabled)
  *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
+ *  PID_FILTER    - When set, the event is filtered based on pid
  */
 enum {
        EVENT_FILE_FL_ENABLED           = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -351,6 +353,7 @@ enum {
        EVENT_FILE_FL_SOFT_DISABLED     = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
        EVENT_FILE_FL_TRIGGER_MODE      = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
        EVENT_FILE_FL_TRIGGER_COND      = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
+       EVENT_FILE_FL_PID_FILTER        = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
 };
 
 struct trace_event_file {
@@ -429,6 +432,8 @@ extern enum event_trigger_type event_triggers_call(struct trace_event_file *file
 extern void event_triggers_post_call(struct trace_event_file *file,
                                     enum event_trigger_type tt);
 
+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
+
 /**
  * trace_trigger_soft_disabled - do triggers and test if soft disabled
  * @file: The file pointer of the event to test
@@ -448,6 +453,8 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
                        event_triggers_call(file, NULL);
                if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
                        return true;
+               if (eflags & EVENT_FILE_FL_PID_FILTER)
+                       return trace_event_ignore_this_pid(file);
        }
        return false;
 }
index 250481043bb531356b77aa723fb2d618e5a36301..89ffdaf3e3718b55db75e938394936d92d61b563 100644 (file)
@@ -156,6 +156,8 @@ struct trace_array_cpu {
        pid_t                   pid;
        kuid_t                  uid;
        char                    comm[TASK_COMM_LEN];
+
+       bool                    ignore_pid;
 };
 
 struct tracer;
index 2ad7014707eec009cc3bbc0af4e5565f45035bb6..ab07058e27c1d3f9d188387abd51bb2bec9ce86e 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 
+#include <trace/events/sched.h>
+
 #include <asm/setup.h>
 
 #include "trace_output.h"
@@ -212,12 +214,32 @@ int trace_event_raw_init(struct trace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
+{
+       struct trace_array *tr = trace_file->tr;
+       struct trace_array_cpu *data;
+       struct trace_pid_list *pid_list;
+
+       pid_list = rcu_dereference_sched(tr->filtered_pids);
+       if (!pid_list)
+               return false;
+
+       data = this_cpu_ptr(tr->trace_buffer.data);
+
+       return data->ignore_pid;
+}
+EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
+
 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
                                 struct trace_event_file *trace_file,
                                 unsigned long len)
 {
        struct trace_event_call *event_call = trace_file->event_call;
 
+       if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
+           trace_event_ignore_this_pid(trace_file))
+               return NULL;
+
        local_save_flags(fbuffer->flags);
        fbuffer->pc = preempt_count();
        fbuffer->trace_file = trace_file;
@@ -459,15 +481,114 @@ static int cmp_pid(const void *key, const void *elt)
        return 1;
 }
 
+static bool
+check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
+{
+       pid_t search_pid;
+       pid_t *pid;
+
+       /*
+        * Return false, because if filtered_pids does not exist,
+        * all pids are good to trace.
+        */
+       if (!filtered_pids)
+               return false;
+
+       search_pid = task->pid;
+
+       pid = bsearch(&search_pid, filtered_pids->pids,
+                     filtered_pids->nr_pids, sizeof(pid_t),
+                     cmp_pid);
+       if (!pid)
+               return true;
+
+       return false;
+}
+
+static void
+event_filter_pid_sched_switch_probe_pre(void *data,
+                   struct task_struct *prev, struct task_struct *next)
+{
+       struct trace_array *tr = data;
+       struct trace_pid_list *pid_list;
+
+       pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+       this_cpu_write(tr->trace_buffer.data->ignore_pid,
+                      check_ignore_pid(pid_list, prev) &&
+                      check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_switch_probe_post(void *data,
+                   struct task_struct *prev, struct task_struct *next)
+{
+       struct trace_array *tr = data;
+       struct trace_pid_list *pid_list;
+
+       pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+       this_cpu_write(tr->trace_buffer.data->ignore_pid,
+                      check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
+{
+       struct trace_array *tr = data;
+       struct trace_pid_list *pid_list;
+
+       /* Nothing to do if we are already tracing */
+       if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
+               return;
+
+       pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+       this_cpu_write(tr->trace_buffer.data->ignore_pid,
+                      check_ignore_pid(pid_list, task));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
+{
+       struct trace_array *tr = data;
+       struct trace_pid_list *pid_list;
+
+       /* Nothing to do if we are not tracing */
+       if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
+               return;
+
+       pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+       /* Set tracing if current is enabled */
+       this_cpu_write(tr->trace_buffer.data->ignore_pid,
+                      check_ignore_pid(pid_list, current));
+}
+
 static void __ftrace_clear_event_pids(struct trace_array *tr)
 {
        struct trace_pid_list *pid_list;
+       struct trace_event_file *file;
+       int cpu;
 
        pid_list = rcu_dereference_protected(tr->filtered_pids,
                                             lockdep_is_held(&event_mutex));
        if (!pid_list)
                return;
 
+       unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
+       unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
+
+       unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
+       unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
+
+       list_for_each_entry(file, &tr->events, list) {
+               clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+       }
+
+       for_each_possible_cpu(cpu)
+               per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
+
        rcu_assign_pointer(tr->filtered_pids, NULL);
 
        /* Wait till all users are no longer using pid filtering */
@@ -1429,13 +1550,14 @@ static int max_pids(struct trace_pid_list *pid_list)
 }
 
 static ssize_t
-ftrace_event_pid_write(struct file *file, const char __user *ubuf,
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
                       size_t cnt, loff_t *ppos)
 {
-       struct seq_file *m = file->private_data;
+       struct seq_file *m = filp->private_data;
        struct trace_array *tr = m->private;
        struct trace_pid_list *filtered_pids = NULL;
        struct trace_pid_list *pid_list = NULL;
+       struct trace_event_file *file;
        struct trace_parser parser;
        unsigned long val;
        loff_t this_pos;
@@ -1564,15 +1686,35 @@ ftrace_event_pid_write(struct file *file, const char __user *ubuf,
 
        rcu_assign_pointer(tr->filtered_pids, pid_list);
 
-       mutex_unlock(&event_mutex);
+       list_for_each_entry(file, &tr->events, list) {
+               set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+       }
 
        if (filtered_pids) {
                synchronize_sched();
 
                free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
                kfree(filtered_pids);
+       } else {
+               /*
+                * Register a probe that is called before all other probes
+                * to set ignore_pid if next or prev do not match.
+                * Register a probe this is called after all other probes
+                * to only keep ignore_pid set if next pid matches.
+                */
+               register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
+                                                tr, INT_MAX);
+               register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
+                                                tr, 0);
+
+               register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
+                                                tr, INT_MAX);
+               register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
+                                                tr, 0);
        }
 
+       mutex_unlock(&event_mutex);
+
        ret = read;
        *ppos += read;