perf tools: Add support for skipping itrace instructions
authorAndi Kleen <ak@linux.intel.com>
Mon, 28 Mar 2016 17:45:38 +0000 (10:45 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 30 Mar 2016 14:14:09 +0000 (11:14 -0300)
When using 'perf script' to look at PT traces it is often useful to
ignore the initialization code at the beginning.

On larger traces which may have many millions of instructions in
initialization code doing that in a pipeline can be very slow, with perf
script spending a lot of CPU time calling printf and writing data.

This patch adds an extension to the --itrace argument that skips 'n'
events (instructions, branches or transactions) at the beginning. This
is much more efficient.

v2:
Add support for BTS (Adrian Hunter)
Document in itrace.txt
Fix branch check
Check transactions and instructions too

Committer note:

To test intel_pt one needs to make sure VT-x isn't active, i.e.
stopping KVM guests on the test machine, as described by Andi Kleen
at http://lkml.kernel.org/r/20160301234953.GD23621@tassilo.jf.intel.com

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1459187142-20035-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/intel-pt.txt
tools/perf/Documentation/itrace.txt
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt.c

index be764f9ec7691a3d2357214cbe1af9c6c333ad92..c6c8318e38a2efbf52d264b4bf945aefba40272a 100644 (file)
@@ -672,6 +672,7 @@ The letters are:
        d       create a debug log
        g       synthesize a call chain (use with i or x)
        l       synthesize last branch entries (use with i or x)
+       s       skip initial number of events
 
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
@@ -730,6 +731,12 @@ from one sample to the next.
 
 To disable trace decoding entirely, use the option --no-itrace.
 
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+       --itrace=i0nss1000000
+
+skips the first million instructions.
 
 dump option
 -----------
index 65453f4c700604f8a259df384c6e52988ce81b14..e2a4c5e0dbe5b078a4a54b38a65007d00b06c94a 100644 (file)
@@ -7,6 +7,7 @@
                d       create a debug log
                g       synthesize a call chain (use with i or x)
                l       synthesize last branch entries (use with i or x)
+               s       skip initial number of events
 
        The default is all events i.e. the same as --itrace=ibxe
 
 
        Also the number of last branch entries (default 64, max. 1024) for
        instructions or transactions events can be specified.
+
+       It is also possible to skip events generated (instructions, branches, transactions)
+       at the beginning. This is useful to ignore initialization code.
+
+       --itrace=i0nss1000000
+
+       skips the first million instructions.
index ec164fe70718df1480b02733d8701c7ab2b74297..c9169011e55ef84bf52728f3a0853d68c8120702 100644 (file)
@@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
        synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
        synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
        synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+       synth_opts->initial_skip = 0;
 }
 
 /*
@@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
                                synth_opts->last_branch_sz = val;
                        }
                        break;
+               case 's':
+                       synth_opts->initial_skip = strtoul(p, &endptr, 10);
+                       if (p == endptr)
+                               goto out_err;
+                       p = endptr;
+                       break;
                case ' ':
                case ',':
                        break;
index 57ff31ecb8e40f85bd60b876925394172d6843d8..767989e0e3126714fd956df13df7709098aadc5c 100644 (file)
@@ -68,6 +68,7 @@ enum itrace_period_type {
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
  * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
  */
 struct itrace_synth_opts {
        bool                    set;
@@ -86,6 +87,7 @@ struct itrace_synth_opts {
        unsigned int            last_branch_sz;
        unsigned long long      period;
        enum itrace_period_type period_type;
+       unsigned long           initial_skip;
 };
 
 /**
index abf1366e2a24d3bcf439434f96075320000e49a9..9df99608556332289b499622f11a5a402e6b875a 100644 (file)
@@ -66,6 +66,7 @@ struct intel_bts {
        u64                             branches_id;
        size_t                          branches_event_size;
        bool                            synth_needs_swap;
+       unsigned long                   num_events;
 };
 
 struct intel_bts_queue {
@@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
        union perf_event event;
        struct perf_sample sample = { .ip = 0, };
 
+       if (bts->synth_opts.initial_skip &&
+           bts->num_events++ <= bts->synth_opts.initial_skip)
+               return 0;
+
        event.sample.header.type = PERF_RECORD_SAMPLE;
        event.sample.header.misc = PERF_RECORD_MISC_USER;
        event.sample.header.size = sizeof(struct perf_event_header);
index 407f11b97c8dc9dbaaf74409bbd8dab363966e24..ddec87f6e61654eb56687cb8a547526c5c91a474 100644 (file)
@@ -100,6 +100,8 @@ struct intel_pt {
        u64 cyc_bit;
        u64 noretcomp_bit;
        unsigned max_non_turbo_ratio;
+
+       unsigned long num_events;
 };
 
 enum switch_state {
@@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
        if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
                return 0;
 
+       if (pt->synth_opts.initial_skip &&
+           pt->num_events++ < pt->synth_opts.initial_skip)
+               return 0;
+
        event->sample.header.type = PERF_RECORD_SAMPLE;
        event->sample.header.misc = PERF_RECORD_MISC_USER;
        event->sample.header.size = sizeof(struct perf_event_header);
@@ -1029,6 +1035,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
        union perf_event *event = ptq->event_buf;
        struct perf_sample sample = { .ip = 0, };
 
+       if (pt->synth_opts.initial_skip &&
+           pt->num_events++ < pt->synth_opts.initial_skip)
+               return 0;
+
        event->sample.header.type = PERF_RECORD_SAMPLE;
        event->sample.header.misc = PERF_RECORD_MISC_USER;
        event->sample.header.size = sizeof(struct perf_event_header);
@@ -1087,6 +1097,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
        union perf_event *event = ptq->event_buf;
        struct perf_sample sample = { .ip = 0, };
 
+       if (pt->synth_opts.initial_skip &&
+           pt->num_events++ < pt->synth_opts.initial_skip)
+               return 0;
+
        event->sample.header.type = PERF_RECORD_SAMPLE;
        event->sample.header.misc = PERF_RECORD_MISC_USER;
        event->sample.header.size = sizeof(struct perf_event_header);
@@ -1199,14 +1213,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
        ptq->have_sample = false;
 
        if (pt->sample_instructions &&
-           (state->type & INTEL_PT_INSTRUCTION)) {
+           (state->type & INTEL_PT_INSTRUCTION) &&
+           (!pt->synth_opts.initial_skip ||
+            pt->num_events++ >= pt->synth_opts.initial_skip)) {
                err = intel_pt_synth_instruction_sample(ptq);
                if (err)
                        return err;
        }
 
        if (pt->sample_transactions &&
-           (state->type & INTEL_PT_TRANSACTION)) {
+           (state->type & INTEL_PT_TRANSACTION) &&
+           (!pt->synth_opts.initial_skip ||
+            pt->num_events++ >= pt->synth_opts.initial_skip)) {
                err = intel_pt_synth_transaction_sample(ptq);
                if (err)
                        return err;