tracing: Have dynamic size event stack traces
authorSteven Rostedt <srostedt@redhat.com>
Thu, 14 Jul 2011 20:36:53 +0000 (16:36 -0400)
committerSteven Rostedt <rostedt@goodmis.org>
Thu, 14 Jul 2011 20:36:53 +0000 (16:36 -0400)
Currently the stack trace per event in ftace is only 8 frames.
This can be quite limiting and sometimes useless. Especially when
the "ignore frames" is wrong and we also use up stack frames for
the event processing itself.

Change this to be dynamic by adding a percpu buffer that we can
write a large stack frame into and then copy into the ring buffer.

For interrupts and NMIs that come in while another event is being
process, will only get to use the 8 frame stack. That should be enough
as the task that it interrupted will have the full stack frame anyway.

Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
include/linux/ftrace_event.h
kernel/trace/trace.c
kernel/trace/trace_entries.h
kernel/trace/trace_output.c

index b1e69eefc2035b95a5a5baac92cc55bfbe5f41e3..96efa6794ea5293a59f40638d344005b2497f0bc 100644 (file)
@@ -76,6 +76,7 @@ struct trace_iterator {
        struct trace_entry      *ent;
        unsigned long           lost_events;
        int                     leftover;
+       int                     ent_size;
        int                     cpu;
        u64                     ts;
 
index d9c16123f6e22d69973b2bc9e4ef5a587fdb6f26..e5df02c69b1d6d0d29c64cd9e2165ddfc6d9a3f6 100644 (file)
@@ -1248,6 +1248,15 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 #ifdef CONFIG_STACKTRACE
+
+#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+struct ftrace_stack {
+       unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(int, ftrace_stack_reserve);
+
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
                                 unsigned long flags,
                                 int skip, int pc, struct pt_regs *regs)
@@ -1256,25 +1265,77 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
        struct ring_buffer_event *event;
        struct stack_entry *entry;
        struct stack_trace trace;
+       int use_stack;
+       int size = FTRACE_STACK_ENTRIES;
+
+       trace.nr_entries        = 0;
+       trace.skip              = skip;
+
+       /*
+        * Since events can happen in NMIs there's no safe way to
+        * use the per cpu ftrace_stacks. We reserve it and if an interrupt
+        * or NMI comes in, it will just have to use the default
+        * FTRACE_STACK_SIZE.
+        */
+       preempt_disable_notrace();
+
+       use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+       /*
+        * We don't need any atomic variables, just a barrier.
+        * If an interrupt comes in, we don't care, because it would
+        * have exited and put the counter back to what we want.
+        * We just need a barrier to keep gcc from moving things
+        * around.
+        */
+       barrier();
+       if (use_stack == 1) {
+               trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
+               trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
+
+               if (regs)
+                       save_stack_trace_regs(regs, &trace);
+               else
+                       save_stack_trace(&trace);
+
+               if (trace.nr_entries > size)
+                       size = trace.nr_entries;
+       } else
+               /* From now on, use_stack is a boolean */
+               use_stack = 0;
+
+       size *= sizeof(unsigned long);
 
        event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
-                                         sizeof(*entry), flags, pc);
+                                         sizeof(*entry) + size, flags, pc);
        if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       memset(&entry->caller, 0, sizeof(entry->caller));
+               goto out;
+       entry = ring_buffer_event_data(event);
 
-       trace.nr_entries        = 0;
-       trace.max_entries       = FTRACE_STACK_ENTRIES;
-       trace.skip              = skip;
-       trace.entries           = entry->caller;
+       memset(&entry->caller, 0, size);
+
+       if (use_stack)
+               memcpy(&entry->caller, trace.entries,
+                      trace.nr_entries * sizeof(unsigned long));
+       else {
+               trace.max_entries       = FTRACE_STACK_ENTRIES;
+               trace.entries           = entry->caller;
+               if (regs)
+                       save_stack_trace_regs(regs, &trace);
+               else
+                       save_stack_trace(&trace);
+       }
+
+       entry->size = trace.nr_entries;
 
-       if (regs)
-               save_stack_trace_regs(regs, &trace);
-       else
-               save_stack_trace(&trace);
        if (!filter_check_discard(call, entry, buffer, event))
                ring_buffer_unlock_commit(buffer, event);
+
+ out:
+       /* Again, don't let gcc optimize things here */
+       barrier();
+       __get_cpu_var(ftrace_stack_reserve)--;
+       preempt_enable_notrace();
+
 }
 
 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
@@ -1562,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
 
        ftrace_enable_cpu();
 
-       return event ? ring_buffer_event_data(event) : NULL;
+       if (event) {
+               iter->ent_size = ring_buffer_event_length(event);
+               return ring_buffer_event_data(event);
+       }
+       iter->ent_size = 0;
+       return NULL;
 }
 
 static struct trace_entry *
index e32744c84d9497bd041a2e612d82f1f859406af9..93365907f219e71c446c0cf7d3a742d6eb83880a 100644 (file)
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
        TRACE_STACK,
 
        F_STRUCT(
-               __array(        unsigned long,  caller, FTRACE_STACK_ENTRIES    )
+               __field(        int,            size    )
+               __dynamic_array(unsigned long,  caller  )
        ),
 
        F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
index e37de492a9e18c7d41b629c8267ce0f78f03fff9..51999309a6cf5d71347da29c3406e45f1b1e98d0 100644 (file)
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 {
        struct stack_entry *field;
        struct trace_seq *s = &iter->seq;
-       int i;
+       unsigned long *p;
+       unsigned long *end;
 
        trace_assign_type(field, iter->ent);
+       end = (unsigned long *)((long)iter->ent + iter->ent_size);
 
        if (!trace_seq_puts(s, "<stack trace>\n"))
                goto partial;
-       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-               if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
-                       break;
+
+       for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
                if (!trace_seq_puts(s, " => "))
                        goto partial;
 
-               if (!seq_print_ip_sym(s, field->caller[i], flags))
+               if (!seq_print_ip_sym(s, *p, flags))
                        goto partial;
                if (!trace_seq_puts(s, "\n"))
                        goto partial;