tracing: add support for userspace stacktraces in tracing/iter_ctrl
authorTörök Edwin <edwintorok@gmail.com>
Sat, 22 Nov 2008 11:28:47 +0000 (13:28 +0200)
committerIngo Molnar <mingo@elte.hu>
Sun, 23 Nov 2008 08:25:15 +0000 (09:25 +0100)
Impact: add new (default-off) tracing visualization feature

Usage example:

 mount -t debugfs nodev /sys/kernel/debug
 cd /sys/kernel/debug/tracing
 echo userstacktrace >iter_ctrl
 echo sched_switch >current_tracer
 echo 1 >tracing_enabled
 .... run application ...
 echo 0 >tracing_enabled

Then read one of 'trace','latency_trace','trace_pipe'.

To get the best output you can compile your userspace programs with
frame pointers (at least glibc + the app you are tracing).

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Documentation/ftrace.txt
arch/x86/kernel/stacktrace.c
include/linux/stacktrace.h
kernel/trace/trace.c
kernel/trace/trace.h

index 753f4de4b1752cd9c0fe7c84d4cbd6cf665e50cd..79a80f79c062c8723d4279e56212ec07d20e434b 100644 (file)
@@ -324,7 +324,7 @@ output. To see what is available, simply cat the file:
 
   cat /debug/tracing/trace_options
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
- noblock nostacktrace nosched-tree
+ noblock nostacktrace nosched-tree nouserstacktrace
 
 To disable one of the options, echo in the option prepended with "no".
 
@@ -378,6 +378,9 @@ Here are the available options:
                When a trace is recorded, so is the stack of functions.
                This allows for back traces of trace sites.
 
+  userstacktrace - This option changes the trace.
+                  It records a stacktrace of the current userspace thread.
+
   sched-tree - TBD (any users??)
 
 
index a03e7f6d90c35af5f4638f6394e4a39f6e2e4020..b15153060417371fed5954dd3e2e8257cc63d79c 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
 static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+       const void __user       *next_fp;
+       unsigned long           return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+       int ret;
+
+       if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+               return 0;
+
+       ret = 1;
+       pagefault_disable();
+       if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+               ret = 0;
+       pagefault_enable();
+
+       return ret;
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+       /*
+        * Trace user stack if we are not a kernel thread
+        */
+       if (current->mm) {
+               const struct pt_regs *regs = task_pt_regs(current);
+               const void __user *fp = (const void __user *)regs->bp;
+
+               if (trace->nr_entries < trace->max_entries)
+                       trace->entries[trace->nr_entries++] = regs->ip;
+
+               while (trace->nr_entries < trace->max_entries) {
+                       struct stack_frame frame;
+                       frame.next_fp = NULL;
+                       frame.return_address = 0;
+                       if (!copy_stack_frame(fp, &frame))
+                               break;
+                       if ((unsigned long)fp < regs->sp)
+                               break;
+                       if (frame.return_address)
+                               trace->entries[trace->nr_entries++] =
+                                       frame.return_address;
+                       if (fp == frame.next_fp)
+                               break;
+                       fp = frame.next_fp;
+               }
+       }
+       if (trace->nr_entries < trace->max_entries)
+               trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
index b106fd8e0d5c4298d6495e1d48cefdd81d21910d..68de51468f5d980e9fd4e9865eafeba25eebf01d 100644 (file)
@@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
                                struct stack_trace *trace);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
+
+#ifdef CONFIG_X86
+extern void save_stack_trace_user(struct stack_trace *trace);
+#else
+# define save_stack_trace_user(trace)              do { } while (0)
+#endif
+
 #else
 # define save_stack_trace(trace)                       do { } while (0)
 # define save_stack_trace_tsk(tsk, trace)              do { } while (0)
+# define save_stack_trace_user(trace)              do { } while (0)
 # define print_stack_trace(trace, spaces)              do { } while (0)
 #endif
 
index 4ee6f0375222e5d274a045f3ea6617836dab093c..ced8b4fa9f5138bba68118b30b83d0170b208d3c 100644 (file)
@@ -275,6 +275,7 @@ static const char *trace_options[] = {
        "ftrace_preempt",
        "branch",
        "annotate",
+       "userstacktrace",
        NULL
 };
 
@@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr,
        ftrace_trace_stack(tr, data, flags, skip, preempt_count());
 }
 
+static void ftrace_trace_userstack(struct trace_array *tr,
+                  struct trace_array_cpu *data,
+                  unsigned long flags, int pc)
+{
+       struct userstack_entry *entry;
+       struct stack_trace trace;
+       struct ring_buffer_event *event;
+       unsigned long irq_flags;
+
+       if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+               return;
+
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type         = TRACE_USER_STACK;
+
+       memset(&entry->caller, 0, sizeof(entry->caller));
+
+       trace.nr_entries        = 0;
+       trace.max_entries       = FTRACE_STACK_ENTRIES;
+       trace.skip              = 0;
+       trace.entries           = entry->caller;
+
+       save_stack_trace_user(&trace);
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+}
+
+void __trace_userstack(struct trace_array *tr,
+                  struct trace_array_cpu *data,
+                  unsigned long flags)
+{
+       ftrace_trace_userstack(tr, data, flags, preempt_count());
+}
+
 static void
 ftrace_trace_special(void *__tr, void *__data,
                     unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data,
        entry->arg3                     = arg3;
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
        ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+       ftrace_trace_userstack(tr, data, irq_flags, pc);
 
        trace_wake_up();
 }
@@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
        entry->next_cpu = task_cpu(next);
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
        ftrace_trace_stack(tr, data, flags, 5, pc);
+       ftrace_trace_userstack(tr, data, flags, pc);
 }
 
 void
@@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
        entry->next_cpu                 = task_cpu(wakee);
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
        ftrace_trace_stack(tr, data, flags, 6, pc);
+       ftrace_trace_userstack(tr, data, flags, pc);
 
        trace_wake_up();
 }
@@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
        return ret;
 }
 
+static int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+               unsigned long sym_flags)
+{
+       int ret = 1;
+       unsigned i;
+
+       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+               unsigned long ip = entry->caller[i];
+
+               if (ip == ULONG_MAX || !ret)
+                       break;
+               if (i)
+                       ret = trace_seq_puts(s, " <- ");
+               if (!ip) {
+                       ret = trace_seq_puts(s, "??");
+                       continue;
+               }
+               if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
+                       ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+       }
+
+       return ret;
+}
+
 static void print_lat_help_header(struct seq_file *m)
 {
        seq_puts(m, "#                  _------=> CPU#            \n");
@@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
                                 field->line);
                break;
        }
+       case TRACE_USER_STACK: {
+               struct userstack_entry *field;
+
+               trace_assign_type(field, entry);
+
+               seq_print_userip_objs(field, s, sym_flags);
+               if (entry->flags & TRACE_FLAG_CONT)
+                       trace_seq_print_cont(s, iter);
+               break;
+       }
        default:
                trace_seq_printf(s, "Unknown type %d\n", entry->type);
        }
@@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
                                 field->line);
                break;
        }
+       case TRACE_USER_STACK: {
+               struct userstack_entry *field;
+
+               trace_assign_type(field, entry);
+
+               ret = seq_print_userip_objs(field, s, sym_flags);
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+               ret = trace_seq_putc(s, '\n');
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+               break;
+       }
        }
        return TRACE_TYPE_HANDLED;
 }
@@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
                break;
        }
        case TRACE_SPECIAL:
+       case TRACE_USER_STACK:
        case TRACE_STACK: {
                struct special_entry *field;
 
@@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
                break;
        }
        case TRACE_SPECIAL:
+       case TRACE_USER_STACK:
        case TRACE_STACK: {
                struct special_entry *field;
 
@@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
                break;
        }
        case TRACE_SPECIAL:
+       case TRACE_USER_STACK:
        case TRACE_STACK: {
                struct special_entry *field;
 
index 2cb12fd98f6b395dbdc1083d01a96b527633f858..17bb4c830b0197af923250b316057cbaf98f88d4 100644 (file)
@@ -26,6 +26,7 @@ enum trace_type {
        TRACE_BOOT_CALL,
        TRACE_BOOT_RET,
        TRACE_FN_RET,
+       TRACE_USER_STACK,
 
        __TRACE_LAST_TYPE
 };
@@ -42,6 +43,7 @@ struct trace_entry {
        unsigned char           flags;
        unsigned char           preempt_count;
        int                     pid;
+       int                     tgid;
 };
 
 /*
@@ -99,6 +101,11 @@ struct stack_entry {
        unsigned long           caller[FTRACE_STACK_ENTRIES];
 };
 
+struct userstack_entry {
+       struct trace_entry      ent;
+       unsigned long           caller[FTRACE_STACK_ENTRIES];
+};
+
 /*
  * ftrace_printk entry:
  */
@@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void);
                IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);        \
                IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
                IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
+               IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
                IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
                IF_ASSIGN(var, ent, struct special_entry, 0);           \
                IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
@@ -500,6 +508,7 @@ enum trace_iterator_flags {
        TRACE_ITER_PREEMPTONLY          = 0x800,
        TRACE_ITER_BRANCH               = 0x1000,
        TRACE_ITER_ANNOTATE             = 0x2000,
+       TRACE_ITER_USERSTACKTRACE       = 0x4000
 };
 
 /*