perf tools: Enable LBR call stack support
authorKan Liang <kan.liang@intel.com>
Mon, 5 Jan 2015 18:23:04 +0000 (13:23 -0500)
committerIngo Molnar <mingo@kernel.org>
Wed, 18 Feb 2015 16:16:17 +0000 (17:16 +0100)
Currently, there are two call chain recording options, fp and dwarf.

Haswell has a new feature that utilizes the existing LBR facility to
record call chains. Kernel side LBR support code provides this as a
third option to record call chains. This patch enables the lbr call
stack support on the tooling side.

LBR call stack has some limitations:

 - It reuses current LBR facility, so LBR call stack and branch record
   can not be enabled at the same time.

 - It is only available for user-space callchains.

However, it also offers some advantages:

 - LBR call stack can work on user apps which don't have frame-pointers
   or dwarf debug info compiled. It is a good alternative when nothing
   else works.

Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Jacob Shin <jacob.w.shin@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rodrigo Campos <rodrigo@sdfg.com.ar>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1420482185-29830-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
tools/perf/Documentation/perf-record.txt
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/evsel.c

index 31e977459c519d933473d7a46d4bc1b14263ef7e..1c7e50f62b1fe221edd7d97966c52633a375eb71 100644 (file)
@@ -115,13 +115,19 @@ OPTIONS
        implies -g.
 
        Allows specifying "fp" (frame pointer) or "dwarf"
-       (DWARF's CFI - Call Frame Information) as the method to collect
+       (DWARF's CFI - Call Frame Information) or "lbr"
+       (Hardware Last Branch Record facility) as the method to collect
        the information used to show the call graphs.
 
        In some systems, where binaries are build with gcc
        --fomit-frame-pointer, using the "fp" method will produce bogus
        call graphs, using "dwarf", if available (perf tools linked to
        the libunwind library) should be used instead.
+       Using the "lbr" method doesn't require any compiler options. It
+       will produce call graphs from the hardware LBR registers. The
+       main limition is that it is only available on new Intel
+       platforms, such as Haswell. It can only get user call chain. It
+       doesn't work with branch stack sampling at the same time.
 
 -q::
 --quiet::
index 404ab34340523f934abc76fcfc6053907708e4fc..d0d02a811ecd69aa3c6cc58fdb4fb51315bafe9b 100644 (file)
@@ -658,7 +658,7 @@ error:
 
 static void callchain_debug(void)
 {
-       static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
+       static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 
        pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
 
@@ -751,9 +751,9 @@ static struct record record = {
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
 #else
-const char record_callchain_help[] = CALLCHAIN_HELP "fp";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
 #endif
 
 /*
index 2f91094e228b6010527c84b66e1172dd616819be..0ba5f07906fb8c85452bad6e9934761a2294b002 100644 (file)
@@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep)
                if ((sample_type & PERF_SAMPLE_REGS_USER) &&
                    (sample_type & PERF_SAMPLE_STACK_USER))
                        callchain_param.record_mode = CALLCHAIN_DWARF;
+               else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+                       callchain_param.record_mode = CALLCHAIN_LBR;
                else
                        callchain_param.record_mode = CALLCHAIN_FP;
        }
index 14e7a123d43b3f4ab4e04a5aba7448bd5d1106cd..9f643ee770010811d83dc88897c57a17861f4317 100644 (file)
@@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg)
                                callchain_param.dump_size = size;
                        }
 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
+               } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+                       if (!strtok_r(NULL, ",", &saveptr)) {
+                               callchain_param.record_mode = CALLCHAIN_LBR;
+                               ret = 0;
+                       } else
+                               pr_err("callchain: No more arguments "
+                                       "needed for --call-graph lbr\n");
+                       break;
                } else {
                        pr_err("callchain: Unknown --call-graph option "
                               "value: %s\n", arg);
index c0ec1acc38e404aa599b5b6635d004ac2f0e204f..6033a0a212ca5c255434ae2cf34c7370d785d858 100644 (file)
@@ -11,6 +11,7 @@ enum perf_call_graph_mode {
        CALLCHAIN_NONE,
        CALLCHAIN_FP,
        CALLCHAIN_DWARF,
+       CALLCHAIN_LBR,
        CALLCHAIN_MAX
 };
 
index ea51a90e20a0e9daa1a3f57c7dcf289b83299061..f93e5208c76260c8dc40aafd7b9c241dc62a79d2 100644 (file)
@@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
 }
 
 static void
-perf_evsel__config_callgraph(struct perf_evsel *evsel)
+perf_evsel__config_callgraph(struct perf_evsel *evsel,
+                            struct record_opts *opts)
 {
        bool function = perf_evsel__is_function_event(evsel);
        struct perf_event_attr *attr = &evsel->attr;
 
        perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
+       if (callchain_param.record_mode == CALLCHAIN_LBR) {
+               if (!opts->branch_stack) {
+                       if (attr->exclude_user) {
+                               pr_warning("LBR callstack option is only available "
+                                          "to get user callchain information. "
+                                          "Falling back to framepointers.\n");
+                       } else {
+                               perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+                               attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+                                                       PERF_SAMPLE_BRANCH_CALL_STACK;
+                       }
+               } else
+                        pr_warning("Cannot use LBR callstack with branch stack. "
+                                   "Falling back to framepointers.\n");
+       }
+
        if (callchain_param.record_mode == CALLCHAIN_DWARF) {
                if (!function) {
                        perf_evsel__set_sample_bit(evsel, REGS_USER);
@@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
                evsel->attr.exclude_callchain_user = 1;
 
        if (callchain_param.enabled && !evsel->no_aux_samples)
-               perf_evsel__config_callgraph(evsel);
+               perf_evsel__config_callgraph(evsel, opts);
 
        if (opts->sample_intr_regs) {
                attr->sample_regs_intr = PERF_REGS_MASK;