perf tools: Set the maximum allowed stack from /proc/sys/kernel/perf_event_max_stack
authorArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 27 Apr 2016 13:16:24 +0000 (10:16 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 27 Apr 2016 13:29:07 +0000 (10:29 -0300)
There is an upper limit to what tooling considers a valid callchain,
and it was tied to the hardcoded value in the kernel,
PERF_MAX_STACK_DEPTH (127), now that this can be tuned via a sysctl,
make it read it and use that as the upper limit, falling back to
PERF_MAX_STACK_DEPTH for kernels where this sysctl isn't present.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-yjqsd30nnkogvj5oyx9ghir9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
16 files changed:
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf-trace.txt
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/perf.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_output.c
tools/perf/util/machine.c
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/util.c
tools/perf/util/util.h

index 496d42cdf02b153593d22abc28366fd16f6090ce..ebaf849e30efd15bce04edb37252fb4aed66441f 100644 (file)
@@ -248,7 +248,7 @@ OPTIONS
        Note that when using the --itrace option the synthesized callchain size
        will override this value if the synthesized callchain size is bigger.
 
-       Default: 127
+       Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 -G::
 --inverted::
index 4fc44c75263fdb803315394e8353d77063f69e6a..a856a1095893cab0a0d3f7fdff39820090c99422 100644 (file)
@@ -267,7 +267,7 @@ include::itrace.txt[]
         Note that when using the --itrace option the synthesized callchain size
         will override this value if the synthesized callchain size is bigger.
 
-        Default: 127
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --ns::
        Use 9 decimal places when displaying time (i.e. show the nanoseconds)
index 19f046f027cd81e42c5696ab3172539baaeb745d..91d638df3a6bb9e6e9e8c10bb7ad3d2fc1e9efd0 100644 (file)
@@ -177,7 +177,7 @@ Default is to monitor all CPUS.
        between information loss and faster processing especially for
        workloads that can have a very long callchain stack.
 
-       Default: 127
+       Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.
index c075c002eaa407f7cb9cb8b41dff36a3c69757e2..6afe20121bc06d671931a3d22d6eeca2ca35c0a6 100644 (file)
@@ -143,7 +143,7 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
         Implies '--call-graph dwarf' when --call-graph not present on the
         command line, on systems where DWARF unwinding was built in.
 
-        Default: 127
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --min-stack::
         Set the stack depth limit when parsing the callchain, anything
index 1d5be0bd426f789018af5217fae3373ed5981bd8..8d9b88af901dd6d187af0fbbcd241675eea77199 100644 (file)
@@ -691,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                        .ordered_events  = true,
                        .ordering_requires_timestamps = true,
                },
-               .max_stack               = PERF_MAX_STACK_DEPTH,
+               .max_stack               = sysctl_perf_event_max_stack,
                .pretty_printing_style   = "normal",
                .socket_filter           = -1,
        };
@@ -744,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_INTEGER(0, "max-stack", &report.max_stack,
                    "Set the maximum stack depth when parsing the callchain, "
                    "anything beyond the specified depth will be ignored. "
-                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                   "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
                    "alias for inverted call graph"),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
index f43b0c6f88f45548a7e84073f473f28d569fedcd..efca81679bb314624b88d024c52b63f2fc54729c 100644 (file)
@@ -2031,7 +2031,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
                     "Set the maximum stack depth when parsing the callchain, "
                     "anything beyond the specified depth will be ignored. "
-                    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN('I', "show-info", &show_full_info,
                    "display extended information from perf.data file"),
        OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -2067,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
 
+       scripting_max_stack = sysctl_perf_event_max_stack;
+
        setup_scripting();
 
        argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
index c130a11d3a0d7987d4720d52a0091d5ff46d5541..da18517b1d400cfd31d08ee16410e4b6fb6ba0fa 100644 (file)
@@ -1103,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                        },
                        .proc_map_timeout    = 500,
                },
-               .max_stack           = PERF_MAX_STACK_DEPTH,
+               .max_stack           = sysctl_perf_event_max_stack,
                .sym_pcnt_filter     = 5,
        };
        struct record_opts *opts = &top.record_opts;
@@ -1171,7 +1171,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                    "Accumulate callchains of children and show total overhead as well"),
        OPT_INTEGER(0, "max-stack", &top.max_stack,
                    "Set the maximum stack depth when parsing the callchain. "
-                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                   "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
                   "ignore callees of these functions in call graphs",
                   report_parse_ignore_callees_opt),
index 48b00f0425991926b9f844678715b008f57714b2..f4f3389c92c7b986bba5e9464aa4989030c18bc8 100644 (file)
@@ -3106,7 +3106,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_UINTEGER(0, "max-stack", &trace.max_stack,
                     "Set the maximum stack depth when parsing the callchain, "
                     "anything beyond the specified depth will be ignored. "
-                    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
        OPT_END()
@@ -3150,7 +3150,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                mmap_pages_user_set = false;
 
        if (trace.max_stack == UINT_MAX) {
-               trace.max_stack = PERF_MAX_STACK_DEPTH;
+               trace.max_stack = sysctl_perf_event_max_stack;
                max_stack_user_set = false;
        }
 
index 7b2df2b46525f1ec19352ee21ada917908063d52..83ffe7cd73301f8990b23c200e135f62aa57cbac 100644 (file)
@@ -17,6 +17,7 @@
 #include <subcmd/parse-options.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
+#include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <pthread.h>
 #include <stdlib.h>
@@ -533,6 +534,7 @@ int main(int argc, const char **argv)
 {
        const char *cmd;
        char sbuf[STRERR_BUFSIZE];
+       int value;
 
        /* libsubcmd init */
        exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -542,6 +544,9 @@ int main(int argc, const char **argv)
        page_size = sysconf(_SC_PAGE_SIZE);
        cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
 
+       if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+               sysctl_perf_event_max_stack = value;
+
        cmd = extract_argv0_path(argv[0]);
        if (!cmd)
                cmd = "perf-help";
index ed5aa9eaeb6cf51d113e75620737641737a85fb6..4a2bbff9b1ee6f0190d73175668b5ea2bfb8f439 100644 (file)
@@ -101,7 +101,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                if (machine__resolve(machine, &al, &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+               if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
                                         NULL) < 0) {
                        addr_location__put(&al);
                        goto out;
index b825d24f81866b8756903ce8975beb74e932e326..e846f8c420136426fae224ce658946923b126f83 100644 (file)
@@ -81,7 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
 
                        al.socket = fake_samples[i].socket;
                        if (hist_entry_iter__add(&iter, &al,
-                                                PERF_MAX_STACK_DEPTH, NULL) < 0) {
+                                                sysctl_perf_event_max_stack, NULL) < 0) {
                                addr_location__put(&al);
                                goto out;
                        }
index d3556fbe8c5caeaa0dee938615417c64346e8551..7cd8738e842f0fa7b53a05e4a69d6e50e097909d 100644 (file)
@@ -67,7 +67,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                if (machine__resolve(machine, &al, &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+               if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
                                         NULL) < 0) {
                        addr_location__put(&al);
                        goto out;
index 656c1d7ee7d46f771d473873823ae4c716bfaf06..2cb95bbf9ea67cb85ee3ce53240dab0bcb984f14 100644 (file)
@@ -1764,7 +1764,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                 */
                int mix_chain_nr = i + 1 + lbr_nr + 1;
 
-               if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
+               if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) {
                        pr_warning("corrupted callchain. skipping...\n");
                        return 0;
                }
@@ -1825,7 +1825,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
         * Based on DWARF debug information, some architectures skip
         * a callchain entry saved by the kernel.
         */
-       if (chain->nr < PERF_MAX_STACK_DEPTH)
+       if (chain->nr < sysctl_perf_event_max_stack)
                skip_idx = arch_skip_callchain_idx(thread, chain);
 
        /*
@@ -1886,7 +1886,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
        }
 
 check_calls:
-       if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
+       if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) {
                pr_warning("corrupted callchain. skipping...\n");
                return 0;
        }
index ae1cebc307c5bc09a2d4d30d8d733aa2d50b8ac8..62c7f6988e0e584cb16f6deab30a84b3b8672971 100644 (file)
@@ -265,7 +265,7 @@ static SV *perl_process_callchain(struct perf_sample *sample,
 
        if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
                                      sample, NULL, NULL,
-                                     PERF_MAX_STACK_DEPTH) != 0) {
+                                     sysctl_perf_event_max_stack) != 0) {
                pr_err("Failed to resolve callchain. Skipping\n");
                goto exit;
        }
index 9473d46c00bba0985c28eafa0cf687f589c63dfa..619ba2061b62fcd9574da5c24ad2dadd54581120 100644 (file)
@@ -33,6 +33,8 @@ struct callchain_param        callchain_param = {
 unsigned int page_size;
 int cacheline_size;
 
+unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
index 26a924651e7be18e1683b5e36d741d3231f3e567..88f607af1f47036842f6ff77ad2c203b68dcb7c3 100644 (file)
@@ -267,6 +267,7 @@ void sighandler_dump_stack(int sig);
 
 extern unsigned int page_size;
 extern int cacheline_size;
+extern unsigned int sysctl_perf_event_max_stack;
 
 struct parse_tag {
        char tag;