perf top: Add --max-stack option to limit callchain stack scan
authorWaiman Long <Waiman.Long@hp.com>
Fri, 18 Oct 2013 14:38:49 +0000 (10:38 -0400)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 21 Oct 2013 20:36:25 +0000 (17:36 -0300)
When the callgraph function is enabled (-G), it may take a long time to
scan all the stack data and merge them accordingly.

This patch adds a new --max-stack option to perf-top to limit the depth
of callchain stack data to look at to reduce the time it takes for
perf-top to finish its processing. It reduces the amount of information
provided to the user in exchange for faster speed.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: David Ahern <dsahern@gmail.com>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1382107129-2010-5-git-send-email-Waiman.Long@hp.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-top.txt
tools/perf/builtin-top.c
tools/perf/util/top.h

index f65777c1f723ba3c2226eceb9ad26f36e4fdcb88..c16a09e2f182333cfa710d7f11df76240ad4634f 100644 (file)
@@ -158,6 +158,14 @@ Default is to monitor all CPUS.
 
        Default: fractal,0.5,callee.
 
+--max-stack::
+       Set the stack depth limit when parsing the callchain, anything
+       beyond the specified depth will be ignored. This is a trade-off
+       between information loss and faster processing especially for
+       workloads that can have a very long callchain stack.
+
+       Default: 127
+
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.
         This has the effect of collecting the callers of each such
index 112cb7d68e649beac8b760015119908b1ae5bbea..386d83324a8d0d519800c8b3cba7a6b5defa95f5 100644 (file)
@@ -771,7 +771,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
                        err = machine__resolve_callchain(machine, evsel,
                                                         al.thread, sample,
                                                         &parent, &al,
-                                                        PERF_MAX_STACK_DEPTH);
+                                                        top->max_stack);
                        if (err)
                                return;
                }
@@ -1048,10 +1048,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                        .user_freq      = UINT_MAX,
                        .user_interval  = ULLONG_MAX,
                        .freq           = 4000, /* 4 KHz */
-                       .target              = {
+                       .target         = {
                                .uses_mmap   = true,
                        },
                },
+               .max_stack           = PERF_MAX_STACK_DEPTH,
                .sym_pcnt_filter     = 5,
        };
        struct perf_record_opts *opts = &top.record_opts;
@@ -1110,6 +1111,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
                             "mode[,dump_size]", record_callchain_help,
                             &parse_callchain_opt, "fp"),
+       OPT_INTEGER(0, "max-stack", &top.max_stack,
+                   "Set the maximum stack depth when parsing the callchain. "
+                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
                   "ignore callees of these functions in call graphs",
                   report_parse_ignore_callees_opt),
index b554ffc462b653e73b7e8de84cfa53e2609be989..88cfeaff600b334390842e725b9a7f792b0fd9af 100644 (file)
@@ -24,6 +24,7 @@ struct perf_top {
        u64                exact_samples;
        u64                guest_us_samples, guest_kernel_samples;
        int                print_entries, count_filter, delay_secs;
+       int                max_stack;
        bool               hide_kernel_symbols, hide_user_symbols, zero;
        bool               use_tui, use_stdio;
        bool               kptr_restrict_warned;