perf evsel: Disable branch flags/cycles for --callgraph lbr
authorAndi Kleen <ak@linux.intel.com>
Sat, 12 Dec 2015 00:12:24 +0000 (16:12 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 14 Dec 2015 15:11:22 +0000 (12:11 -0300)
[The kernel patch needed for this is in tip now (b16a5b52eb9 perf/x86:
Add option to disable ...) So this user tools patch to make use of it
should be merged now]

Automatically disable collecting branch flags and cycles with
--call-graph lbr. This allows avoiding a bunch of extra MSR
reads in the PMI on Skylake.

When the kernel doesn't support the new flags they are automatically
cleared in the fallback code.

v2: Switch to use branch_sample_type instead of sample_type.
Adjust description.
Fix the fallback logic.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1449879144-29074-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/evsel.c

index 47f03308934916ef6316eed7345832a358f9c957..544e4400de133b64d43be7ca185482f0e26599ce 100644 (file)
@@ -36,6 +36,7 @@ static struct {
        bool cloexec;
        bool clockid;
        bool clockid_wrong;
+       bool lbr_flags;
 } perf_missing_features;
 
 static clockid_t clockid;
@@ -574,7 +575,9 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
                        } else {
                                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
                                attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
-                                                       PERF_SAMPLE_BRANCH_CALL_STACK;
+                                                       PERF_SAMPLE_BRANCH_CALL_STACK |
+                                                       PERF_SAMPLE_BRANCH_NO_CYCLES |
+                                                       PERF_SAMPLE_BRANCH_NO_FLAGS;
                        }
                } else
                         pr_warning("Cannot use LBR callstack with branch stack. "
@@ -1337,6 +1340,9 @@ fallback_missing_features:
                evsel->attr.mmap2 = 0;
        if (perf_missing_features.exclude_guest)
                evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+       if (perf_missing_features.lbr_flags)
+               evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+                                    PERF_SAMPLE_BRANCH_NO_CYCLES);
 retry_sample_id:
        if (perf_missing_features.sample_id_all)
                evsel->attr.sample_id_all = 0;
@@ -1455,6 +1461,12 @@ try_fallback:
        } else if (!perf_missing_features.sample_id_all) {
                perf_missing_features.sample_id_all = true;
                goto retry_sample_id;
+       } else if (!perf_missing_features.lbr_flags &&
+                       (evsel->attr.branch_sample_type &
+                        (PERF_SAMPLE_BRANCH_NO_CYCLES |
+                         PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+               perf_missing_features.lbr_flags = true;
+               goto fallback_missing_features;
        }
 
 out_close: