perf trace: Add support for printing call chains on sys_exit events.
authorMilian Wolff <milian.wolff@kdab.com>
Fri, 8 Apr 2016 11:34:15 +0000 (13:34 +0200)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 12 Apr 2016 01:18:16 +0000 (22:18 -0300)
Now, one can print the call chain for every encountered sys_exit event,
e.g.:

    $ perf trace -e nanosleep --call-graph dwarf path/to/ex_sleep
    1005.757 (1000.090 ms): ex_sleep/13167 nanosleep(...) = 0
                                             syscall_slow_exit_work ([kernel.kallsyms])
                                             syscall_return_slowpath ([kernel.kallsyms])
                                             int_ret_from_sys_call ([kernel.kallsyms])
                                             __nanosleep (/usr/lib/libc-2.23.so)
                                             [unknown] (/usr/lib/libQt5Core.so.5.6.0)
                                             QThread::sleep (/usr/lib/libQt5Core.so.5.6.0)
                                             main (path/to/ex_sleep)
                                             __libc_start_main (/usr/lib/libc-2.23.so)
                                             _start (path/to/ex_sleep)

Note that it is advised to increase the number of mmap pages to prevent
event losses when using this new feature. Often, adding `-m 10M` to the
`perf trace` invocation is enough.

This feature is also available in strace when built with libunwind via
`strace -k`. Performance wise, this solution is much better:

    $ time find path/to/linux &> /dev/null

    real    0m0.051s
    user    0m0.013s
    sys     0m0.037s

    $ time perf trace -m 800M --call-graph dwarf find path/to/linux &> /dev/null

    real    0m2.624s
    user    0m1.203s
    sys     0m1.333s

    $ time strace -k find path/to/linux  &> /dev/null

    real    0m35.398s
    user    0m10.403s
    sys     0m23.173s

Note that it is currently not possible to configure the print output.
Adding such a feature, similar to what is available in `perf script` via
its `--fields` knob can be added later on.

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
LPU-Reference: 1460115255-17648-1-git-send-email-milian.wolff@kdab.com
[ Split from a larger patch, do not print the IP, left align,
  remove dup call symbol__init(), added man page entry ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-trace.txt
tools/perf/builtin-trace.c

index 13293de8869fe932c9610f3a3c5612c838a65bc4..ed485df16409e632e5389eeab2ea8d3a5ea47313 100644 (file)
@@ -117,6 +117,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --syscalls::
        Trace system calls. This options is enabled by default.
 
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+        Setup and enable call-graph (stack chain/backtrace) recording.
+        See `--call-graph` section in perf-record and perf-report
+        man pages for details. The ones that are most useful in 'perf trace'
+        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
 --event::
        Trace other events, see 'perf list' for a complete list.
 
index 27d98703062796268363639804606abc89b5befd..8c587a8d3742ae265ddb39c4e502fb5c4b0e44e9 100644 (file)
@@ -34,6 +34,7 @@
 #include "trace-event.h"
 #include "util/parse-events.h"
 #include "util/bpf-loader.h"
+#include "callchain.h"
 #include "syscalltbl.h"
 
 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
@@ -2190,6 +2191,21 @@ signed_print:
                goto signed_print;
 
        fputc('\n', trace->output);
+
+       if (sample->callchain) {
+               struct addr_location al;
+               /* TODO: user-configurable print_opts */
+               const unsigned int print_opts = PRINT_IP_OPT_SYM
+                                             | PRINT_IP_OPT_DSO;
+
+               if (machine__resolve(trace->host, &al, sample) < 0) {
+                       pr_err("problem processing %d event, skipping it.\n",
+                              event->header.type);
+                       goto out_put;
+               }
+               perf_evsel__print_ip(evsel, sample, &al, 38, print_opts,
+                                    scripting_max_stack, trace->output);
+       }
 out:
        ttrace->entry_pending = false;
        err = 0;
@@ -3250,6 +3266,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Trace pagefaults", parse_pagefaults, "maj"),
        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+       OPT_CALLBACK(0, "call-graph", &trace.opts,
+                    "record_mode[,record_size]", record_callchain_help,
+                    &record_parse_callchain_opt),
        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
        OPT_END()
@@ -3285,6 +3304,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                trace.opts.sample_time = true;
        }
 
+       if (trace.opts.callgraph_set)
+               symbol_conf.use_callchain = true;
+
        if (trace.evlist->nr_entries > 0)
                evlist__set_evsel_handler(trace.evlist, trace__event_handler);