perf intel-pt/bts: Report instruction bytes and length in sample
authorAndi Kleen <ak@linux.intel.com>
Fri, 7 Oct 2016 13:42:26 +0000 (16:42 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 24 Oct 2016 13:31:32 +0000 (10:31 -0300)
Change Intel PT and BTS to pass up the length and the instruction
bytes of the decoded or sampled instruction in the perf sample.

The decoder already knows this information, we just need to pass it
up. Since it is only a couple of movs it is not very expensive.

Handle instruction cache too. Make sure ilen is always initialized.

Used in the next patch.

[Adrian: re-base on top (and adjust for) instruction buffer size tidy-up]
[Adrian: add BTS support and adjust commit message accordingly]

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/1475847747-30994-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/event.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
tools/perf/util/intel-pt.c

index 8d363d5e65a2e14c019fd18a6129ceef7b1538c3..c735c53a26f8f6dcb37727299e0560d07a5f6ddb 100644 (file)
@@ -177,6 +177,8 @@ enum {
        PERF_IP_FLAG_TRACE_BEGIN        |\
        PERF_IP_FLAG_TRACE_END)
 
+#define MAX_INSN 16
+
 struct perf_sample {
        u64 ip;
        u32 pid, tid;
@@ -193,6 +195,7 @@ struct perf_sample {
        u32 flags;
        u16 insn_len;
        u8  cpumode;
+       char insn[MAX_INSN];
        void *raw_data;
        struct ip_callchain *callchain;
        struct branch_stack *branch_stack;
index 8bc7fec817d7b71bdba215765de0b92188afeb88..6c2eb5da4afc0d290786881ebd9a7d9c5f83775f 100644 (file)
@@ -295,6 +295,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
        sample.cpu = btsq->cpu;
        sample.flags = btsq->sample_flags;
        sample.insn_len = btsq->intel_pt_insn.length;
+       memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
 
        if (bts->synth_opts.inject) {
                event.sample.header.size = bts->branches_event_size;
index 16c06d3ae577297ff4e312e911428e0b5c985b61..e4e7dc781d21d17b309513c54ff7c0cf0ab9d383 100644 (file)
@@ -980,6 +980,8 @@ out:
 out_no_progress:
        decoder->state.insn_op = intel_pt_insn->op;
        decoder->state.insn_len = intel_pt_insn->length;
+       memcpy(decoder->state.insn, intel_pt_insn->buf,
+              INTEL_PT_INSN_BUF_SZ);
 
        if (decoder->tx_flags & INTEL_PT_IN_TX)
                decoder->state.flags |= INTEL_PT_IN_TX;
index 89399985fa4d4bfec664a1630ebc13c37da3b6d0..e90619a43c0cefdd6edebb0369e77dbf46017c21 100644 (file)
@@ -66,6 +66,7 @@ struct intel_pt_state {
        uint32_t flags;
        enum intel_pt_insn_op insn_op;
        int insn_len;
+       char insn[INTEL_PT_INSN_BUF_SZ];
 };
 
 struct intel_pt_insn;
index 5f95cd44207596b6761b1c5a0c30f245edac937d..7913363bde5c0407fded864f62a839c8b28056ea 100644 (file)
@@ -27,7 +27,7 @@
 
 #include "intel-pt-insn-decoder.h"
 
-#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
 #error Instruction buffer size too small
 #endif
 
index 815a14d8904b7c2e6e9e99a486a1dcb32c2cac8c..85d5eeb66c75339fc7c230bfcd7bcabd975ee402 100644 (file)
@@ -143,6 +143,7 @@ struct intel_pt_queue {
        u32 flags;
        u16 insn_len;
        u64 last_insn_cnt;
+       char insn[INTEL_PT_INSN_BUF_SZ];
 };
 
 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@@ -315,6 +316,7 @@ struct intel_pt_cache_entry {
        enum intel_pt_insn_branch       branch;
        int                             length;
        int32_t                         rel;
+       char                            insn[INTEL_PT_INSN_BUF_SZ];
 };
 
 static int intel_pt_config_div(const char *var, const char *value, void *data)
@@ -400,6 +402,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
        e->branch = intel_pt_insn->branch;
        e->length = intel_pt_insn->length;
        e->rel = intel_pt_insn->rel;
+       memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
 
        err = auxtrace_cache__add(c, offset, &e->entry);
        if (err)
@@ -436,6 +439,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
        u64 insn_cnt = 0;
        bool one_map = true;
 
+       intel_pt_insn->length = 0;
+
        if (to_ip && *ip == to_ip)
                goto out_no_cache;
 
@@ -475,6 +480,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
                                intel_pt_insn->branch = e->branch;
                                intel_pt_insn->length = e->length;
                                intel_pt_insn->rel = e->rel;
+                               memcpy(intel_pt_insn->buf, e->insn,
+                                      INTEL_PT_INSN_BUF_SZ);
                                intel_pt_log_insn_no_data(intel_pt_insn, *ip);
                                return 0;
                        }
@@ -898,6 +905,7 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
                if (ptq->state->flags & INTEL_PT_IN_TX)
                        ptq->flags |= PERF_IP_FLAG_IN_TX;
                ptq->insn_len = ptq->state->insn_len;
+               memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
        }
 }
 
@@ -1078,6 +1086,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
        sample.cpu = ptq->cpu;
        sample.flags = ptq->flags;
        sample.insn_len = ptq->insn_len;
+       memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
        /*
         * perf report cannot handle events without a branch stack when using
@@ -1139,6 +1148,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
        sample.cpu = ptq->cpu;
        sample.flags = ptq->flags;
        sample.insn_len = ptq->insn_len;
+       memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
        ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
@@ -1201,6 +1211,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
        sample.cpu = ptq->cpu;
        sample.flags = ptq->flags;
        sample.insn_len = ptq->insn_len;
+       memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
        if (pt->synth_opts.callchain) {
                thread_stack__sample(ptq->thread, ptq->chain,