tracing/syscalls: core infrastructure for syscalls tracing, enhancements
authorFrederic Weisbecker <fweisbec@gmail.com>
Fri, 13 Mar 2009 14:42:11 +0000 (15:42 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 13 Mar 2009 15:57:42 +0000 (16:57 +0100)
Impact: new feature

This adds the generic support for syscalls tracing. This is
currently exploited through a devoted tracer but other tracing
engines can use it. (They just have to play with
{start,stop}_ftrace_syscalls() and use the display callbacks
unless they want to override them.)

The syscalls prototypes definitions are abused here to steal
some metadata informations:

- syscall name, param types, param names, number of params

The syscall addr is not directly saved during this definition
because we don't know if its prototype is available in the
namespace. But we don't really need it. The arch has just to
build a function able to resolve the syscall number to its
metadata struct.

The current tracer prints the syscall names, parameters names
and values (and their types optionally). Currently the value is
a raw hex but higher level values diplaying is on my TODO list.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1236955332-10133-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/asm-generic/vmlinux.lds.h
include/linux/ftrace.h
include/linux/syscalls.h
kernel/trace/trace.h
kernel/trace/trace_syscalls.c

index 0e0f39be6c8b668811c168ab28335108cd76c0dc..d3bc3c86df6a76efaaf2c025470661dc44d37e4c 100644 (file)
 #define TRACE_PRINTKS()
 #endif
 
+#ifdef CONFIG_FTRACE_SYSCALLS
+#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .;        \
+                        *(__syscalls_metadata)                         \
+                        VMLINUX_SYMBOL(__stop_syscalls_metadata) = .;
+#else
+#define TRACE_SYSCALLS()
+#endif
+
 /* .data section */
 #define DATA_DATA                                                      \
        *(.data)                                                        \
        LIKELY_PROFILE()                                                \
        BRANCH_PROFILE()                                                \
        TRACE_PRINTKS()                                                 \
-       FTRACE_EVENTS()
+       FTRACE_EVENTS()                                                 \
+       TRACE_SYSCALLS()
 
 #define RO_DATA(align)                                                 \
        . = ALIGN((align));                                             \
index c146c1021a29c2ee34560e5a1a0ff54f41387ab4..6dc1c652447ede2f33a589efed9e7c359c32942e 100644 (file)
@@ -506,13 +506,21 @@ static inline void trace_hw_branch_oops(void) {}
 /*
  * A syscall entry in the ftrace syscalls array.
  *
- * @syscall_nr: syscall number
+ * @name: name of the syscall
+ * @nb_args: number of parameters it takes
+ * @types: list of types as strings
+ * @args: list of args as strings (args[i] matches types[i])
  */
-struct syscall_trace_entry {
-       int             syscall_nr;
+struct syscall_metadata {
+       const char      *name;
+       int             nb_args;
+       const char      **types;
+       const char      **args;
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
+extern void arch_init_ftrace_syscalls(void);
+extern struct syscall_metadata *syscall_nr_to_meta(int nr);
 extern void start_ftrace_syscalls(void);
 extern void stop_ftrace_syscalls(void);
 extern void ftrace_syscall_enter(struct pt_regs *regs);
index f9f900cfd066f1ad19ac979dba0c48f2384b95e2..0cff9bb80b028034d39c9014785cc386a3edd862 100644 (file)
@@ -65,6 +65,7 @@ struct old_linux_dirent;
 #include <asm/signal.h>
 #include <linux/quota.h>
 #include <linux/key.h>
+#include <linux/ftrace.h>
 
 #define __SC_DECL1(t1, a1)     t1 a1
 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
@@ -95,7 +96,46 @@ struct old_linux_dirent;
 #define __SC_TEST5(t5, a5, ...)        __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
 #define __SC_TEST6(t6, a6, ...)        __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
+#ifdef CONFIG_FTRACE_SYSCALLS
+#define __SC_STR_ADECL1(t, a)          #a
+#define __SC_STR_ADECL2(t, a, ...)     #a, __SC_STR_ADECL1(__VA_ARGS__)
+#define __SC_STR_ADECL3(t, a, ...)     #a, __SC_STR_ADECL2(__VA_ARGS__)
+#define __SC_STR_ADECL4(t, a, ...)     #a, __SC_STR_ADECL3(__VA_ARGS__)
+#define __SC_STR_ADECL5(t, a, ...)     #a, __SC_STR_ADECL4(__VA_ARGS__)
+#define __SC_STR_ADECL6(t, a, ...)     #a, __SC_STR_ADECL5(__VA_ARGS__)
+
+#define __SC_STR_TDECL1(t, a)          #t
+#define __SC_STR_TDECL2(t, a, ...)     #t, __SC_STR_TDECL1(__VA_ARGS__)
+#define __SC_STR_TDECL3(t, a, ...)     #t, __SC_STR_TDECL2(__VA_ARGS__)
+#define __SC_STR_TDECL4(t, a, ...)     #t, __SC_STR_TDECL3(__VA_ARGS__)
+#define __SC_STR_TDECL5(t, a, ...)     #t, __SC_STR_TDECL4(__VA_ARGS__)
+#define __SC_STR_TDECL6(t, a, ...)     #t, __SC_STR_TDECL5(__VA_ARGS__)
+
+#define SYSCALL_METADATA(sname, nb)                            \
+       static const struct syscall_metadata __used             \
+         __attribute__((__aligned__(4)))                       \
+         __attribute__((section("__syscalls_metadata")))       \
+         __syscall_meta_##sname = {                            \
+               .name           = "sys"#sname,                  \
+               .nb_args        = nb,                           \
+               .types          = types_##sname,                \
+               .args           = args_##sname,                 \
+       }
+
+#define SYSCALL_DEFINE0(sname)                                 \
+       static const struct syscall_metadata __used             \
+         __attribute__((__aligned__(4)))                       \
+         __attribute__((section("__syscalls_metadata")))       \
+         __syscall_meta_##sname = {                            \
+               .name           = "sys_"#sname,                 \
+               .nb_args        = 0,                            \
+       };                                                      \
+       asmlinkage long sys_##sname(void)
+
+#else
 #define SYSCALL_DEFINE0(name)     asmlinkage long sys_##name(void)
+#endif
+
 #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
 #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
 #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
@@ -117,10 +157,26 @@ struct old_linux_dirent;
 #endif
 #endif
 
+#ifdef CONFIG_FTRACE_SYSCALLS
+#define SYSCALL_DEFINEx(x, sname, ...)                         \
+       static const char *types_##sname[] = {                  \
+               __SC_STR_TDECL##x(__VA_ARGS__)                  \
+       };                                                      \
+       static const char *args_##sname[] = {                   \
+               __SC_STR_ADECL##x(__VA_ARGS__)                  \
+       };                                                      \
+       SYSCALL_METADATA(sname, x);                             \
+       __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
+#else
+#define SYSCALL_DEFINEx(x, sname, ...)                         \
+       __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
+#endif
+
 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 
 #define SYSCALL_DEFINE(name) static inline long SYSC_##name
-#define SYSCALL_DEFINEx(x, name, ...)                                  \
+
+#define __SYSCALL_DEFINEx(x, name, ...)                                        \
        asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__));           \
        static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__));       \
        asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__))            \
@@ -134,7 +190,7 @@ struct old_linux_dirent;
 #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
 
 #define SYSCALL_DEFINE(name) asmlinkage long sys_##name
-#define SYSCALL_DEFINEx(x, name, ...)                                  \
+#define __SYSCALL_DEFINEx(x, name, ...)                                        \
        asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
 
 #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
index 3d49daae47dcca70bd2d83f227dceb6e2c0b04ce..d80ca0d464d970675030b622a8025c3af601765c 100644 (file)
@@ -194,6 +194,19 @@ struct kmemtrace_free_entry {
        const void *ptr;
 };
 
+struct syscall_trace_enter {
+       struct trace_entry      ent;
+       int                     nr;
+       unsigned long           args[];
+};
+
+struct syscall_trace_exit {
+       struct trace_entry      ent;
+       int                     nr;
+       unsigned long           ret;
+};
+
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -306,6 +319,10 @@ extern void __ftrace_bad_type(void);
                          TRACE_KMEM_ALLOC);    \
                IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
                          TRACE_KMEM_FREE);     \
+               IF_ASSIGN(var, ent, struct syscall_trace_enter,         \
+                         TRACE_SYSCALL_ENTER);                         \
+               IF_ASSIGN(var, ent, struct syscall_trace_exit,          \
+                         TRACE_SYSCALL_EXIT);                          \
                __ftrace_bad_type();                                    \
        } while (0)
 
index 66cf97449af30f7b73b80b536b6d7a2d5742173b..c72e599230ff58d8a238501a5e734ea29fe65b38 100644 (file)
@@ -1,6 +1,5 @@
-#include <linux/ftrace.h>
 #include <linux/kernel.h>
-
+#include <linux/ftrace.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
@@ -8,6 +7,90 @@
 
 static atomic_t refcount;
 
+/* Our two options */
+enum {
+       TRACE_SYSCALLS_OPT_TYPES = 0x1,
+};
+
+static struct tracer_opt syscalls_opts[] = {
+       { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
+       { }
+};
+
+static struct tracer_flags syscalls_flags = {
+       .val = 0, /* By default: no args types */
+       .opts = syscalls_opts
+};
+
+enum print_line_t
+print_syscall_enter(struct trace_iterator *iter, int flags)
+{
+       struct trace_seq *s = &iter->seq;
+       struct trace_entry *ent = iter->ent;
+       struct syscall_trace_enter *trace;
+       struct syscall_metadata *entry;
+       int i, ret, syscall;
+
+       trace_assign_type(trace, ent);
+
+       syscall = trace->nr;
+
+       entry = syscall_nr_to_meta(syscall);
+       if (!entry)
+               goto end;
+
+       ret = trace_seq_printf(s, "%s(", entry->name);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       for (i = 0; i < entry->nb_args; i++) {
+               /* parameter types */
+               if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
+                       ret = trace_seq_printf(s, "%s ", entry->types[i]);
+                       if (!ret)
+                               return TRACE_TYPE_PARTIAL_LINE;
+               }
+               /* parameter values */
+               ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
+                                      trace->args[i],
+                                      i == entry->nb_args - 1 ? ")" : ",");
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+       }
+
+end:
+       trace_seq_printf(s, "\n");
+       return TRACE_TYPE_HANDLED;
+}
+
+enum print_line_t
+print_syscall_exit(struct trace_iterator *iter, int flags)
+{
+       struct trace_seq *s = &iter->seq;
+       struct trace_entry *ent = iter->ent;
+       struct syscall_trace_exit *trace;
+       int syscall;
+       struct syscall_metadata *entry;
+       int ret;
+
+       trace_assign_type(trace, ent);
+
+       syscall = trace->nr;
+
+       entry = syscall_nr_to_meta(syscall);
+       if (!entry) {
+               trace_seq_printf(s, "\n");
+               return TRACE_TYPE_HANDLED;
+       }
+
+       ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
+                               trace->ret);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return TRACE_TYPE_HANDLED;
+}
+
 void start_ftrace_syscalls(void)
 {
        unsigned long flags;
@@ -16,6 +99,7 @@ void start_ftrace_syscalls(void)
        if (atomic_inc_return(&refcount) != 1)
                goto out;
 
+       arch_init_ftrace_syscalls();
        read_lock_irqsave(&tasklist_lock, flags);
 
        do_each_thread(g, t) {
@@ -48,20 +132,63 @@ out:
 
 void ftrace_syscall_enter(struct pt_regs *regs)
 {
+       struct syscall_trace_enter *entry;
+       struct syscall_metadata *sys_data;
+       struct ring_buffer_event *event;
+       int size;
        int syscall_nr;
+       int cpu;
 
        syscall_nr = syscall_get_nr(current, regs);
 
-       trace_printk("syscall %d enter\n", syscall_nr);
+       cpu = raw_smp_processor_id();
+
+       sys_data = syscall_nr_to_meta(syscall_nr);
+       if (!sys_data)
+               return;
+
+       size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+
+       event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
+                                                       0, 0);
+       if (!event)
+               return;
+
+       entry = ring_buffer_event_data(event);
+       entry->nr = syscall_nr;
+       syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+
+       trace_current_buffer_unlock_commit(event, 0, 0);
+       trace_wake_up();
 }
 
 void ftrace_syscall_exit(struct pt_regs *regs)
 {
+       struct syscall_trace_exit *entry;
+       struct syscall_metadata *sys_data;
+       struct ring_buffer_event *event;
        int syscall_nr;
+       int cpu;
 
        syscall_nr = syscall_get_nr(current, regs);
 
-       trace_printk("syscall %d exit\n", syscall_nr);
+       cpu = raw_smp_processor_id();
+
+       sys_data = syscall_nr_to_meta(syscall_nr);
+       if (!sys_data)
+               return;
+
+       event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
+                               sizeof(*entry), 0, 0);
+       if (!event)
+               return;
+
+       entry = ring_buffer_event_data(event);
+       entry->nr = syscall_nr;
+       entry->ret = syscall_get_return_value(current, regs);
+
+       trace_current_buffer_unlock_commit(event, 0, 0);
+       trace_wake_up();
 }
 
 static int init_syscall_tracer(struct trace_array *tr)
@@ -77,17 +204,20 @@ static void reset_syscall_tracer(struct trace_array *tr)
 }
 
 static struct trace_event syscall_enter_event = {
-       .type           = TRACE_SYSCALL_ENTER,
+       .type           = TRACE_SYSCALL_ENTER,
+       .trace          = print_syscall_enter,
 };
 
 static struct trace_event syscall_exit_event = {
-       .type           = TRACE_SYSCALL_EXIT,
+       .type           = TRACE_SYSCALL_EXIT,
+       .trace          = print_syscall_exit,
 };
 
 static struct tracer syscall_tracer __read_mostly = {
-       .name           = "syscall",
+       .name           = "syscall",
        .init           = init_syscall_tracer,
-       .reset          = reset_syscall_tracer
+       .reset          = reset_syscall_tracer,
+       .flags          = &syscalls_flags,
 };
 
 __init int register_ftrace_syscalls(void)