perf auxtrace: Add option to feed branches to the thread stack
authorAdrian Hunter <adrian.hunter@intel.com>
Thu, 23 Jun 2016 13:40:57 +0000 (16:40 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 23 Jun 2016 20:02:59 +0000 (17:02 -0300)
In preparation for using the thread stack to print an indent
representing the stack depth in perf script, add an option to tell
decoders to feed branches to the thread stack. Add support for that
option to Intel PT and Intel BTS.

The advantage of using the decoder to feed the thread stack is that it
happens before branch filtering and so can be used with different itrace
options (e.g. it still works when only showing calls, even though the
thread stack needs to see calls and returns). Also it does not conflict
with using the thread stack to get callchains.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1466689258-28493-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/auxtrace.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt.c

index 767989e0e3126714fd956df13df7709098aadc5c..ac5f0d7167e657e505615736cd1875d3226d1317 100644 (file)
@@ -63,6 +63,7 @@ enum itrace_period_type {
  * @calls: limit branch samples to calls (can be combined with @returns)
  * @returns: limit branch samples to returns (can be combined with @calls)
  * @callchain: add callchain to 'instructions' events
+ * @thread_stack: feed branches to the thread_stack
  * @last_branch: add branch context to 'instruction' events
  * @callchain_sz: maximum callchain size
  * @last_branch_sz: branch context size
@@ -82,6 +83,7 @@ struct itrace_synth_opts {
        bool                    calls;
        bool                    returns;
        bool                    callchain;
+       bool                    thread_stack;
        bool                    last_branch;
        unsigned int            callchain_sz;
        unsigned int            last_branch_sz;
index ecec73f6fe5a9623662c735219f0755a47e0cb7a..749e6f2e37ca800b6fb4ab5009ed13cfbe2ab956 100644 (file)
@@ -422,7 +422,8 @@ static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
 }
 
 static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
-                                   struct auxtrace_buffer *buffer)
+                                   struct auxtrace_buffer *buffer,
+                                   struct thread *thread)
 {
        struct branch *branch;
        size_t sz, bsz = sizeof(struct branch);
@@ -444,6 +445,12 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
                if (!branch->from && !branch->to)
                        continue;
                intel_bts_get_branch_type(btsq, branch);
+               if (btsq->bts->synth_opts.thread_stack)
+                       thread_stack__event(thread, btsq->sample_flags,
+                                           le64_to_cpu(branch->from),
+                                           le64_to_cpu(branch->to),
+                                           btsq->intel_pt_insn.length,
+                                           buffer->buffer_nr + 1);
                if (filter && !(filter & btsq->sample_flags))
                        continue;
                err = intel_bts_synth_branch_sample(btsq, branch);
@@ -507,12 +514,13 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
                goto out_put;
        }
 
-       if (!btsq->bts->synth_opts.callchain && thread &&
+       if (!btsq->bts->synth_opts.callchain &&
+           !btsq->bts->synth_opts.thread_stack && thread &&
            (!old_buffer || btsq->bts->sampling_mode ||
             (btsq->bts->snapshot_mode && !buffer->consecutive)))
                thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
 
-       err = intel_bts_process_buffer(btsq, buffer);
+       err = intel_bts_process_buffer(btsq, buffer, thread);
 
        auxtrace_buffer__drop_data(buffer);
 
@@ -905,10 +913,14 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
        if (dump_trace)
                return 0;
 
-       if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+       if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
                bts->synth_opts = *session->itrace_synth_opts;
-       else
+       } else {
                itrace_synth_opts__set_default(&bts->synth_opts);
+               if (session->itrace_synth_opts)
+                       bts->synth_opts.thread_stack =
+                               session->itrace_synth_opts->thread_stack;
+       }
 
        if (bts->synth_opts.calls)
                bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
index dc243b19197b10f912008ad333788ee1edeb2699..551ff6f640be85fef9362034a487ff5d4ea0c807 100644 (file)
@@ -1234,7 +1234,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
        if (!(state->type & INTEL_PT_BRANCH))
                return 0;
 
-       if (pt->synth_opts.callchain)
+       if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
                thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
                                    state->to_ip, ptq->insn_len,
                                    state->trace_nr);
@@ -2137,6 +2137,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
                        pt->synth_opts.branches = false;
                        pt->synth_opts.callchain = true;
                }
+               if (session->itrace_synth_opts)
+                       pt->synth_opts.thread_stack =
+                               session->itrace_synth_opts->thread_stack;
        }
 
        if (pt->synth_opts.log)