perf record,report,annotate,diff: Process events in order
authorIan Munsie <imunsie@au1.ibm.com>
Thu, 9 Dec 2010 05:33:53 +0000 (16:33 +1100)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 21 Dec 2010 22:17:51 +0000 (20:17 -0200)
This patch changes perf report to ask for the ID info on all events be
default if recording from multiple CPUs.

Perf report, annotate and diff will now process the events in order if
the kernel is able to provide timestamps on all events. This ensures
that events such as COMM and MMAP which are necessary to correctly
interpret samples are processed prior to those samples so that they are
attributed correctly.

Before:
 # perf record ./cachetest
 # perf report

 # Events: 6K cycles
 #
 # Overhead  Command      Shared Object                           Symbol
 # ........  .......  .................  ...............................
 #
     74.11%    :3259  [unknown]          [k] 0x4a6c
      1.50%  cachetest  ld-2.11.2.so       [.] 0x1777c
      1.46%    :3259  [kernel.kallsyms]  [k] .perf_event_mmap_ctx
      1.25%    :3259  [kernel.kallsyms]  [k] restore
      0.74%    :3259  [kernel.kallsyms]  [k] ._raw_spin_lock
      0.71%    :3259  [kernel.kallsyms]  [k] .filemap_fault
      0.66%    :3259  [kernel.kallsyms]  [k] .memset
      0.54%  cachetest  [kernel.kallsyms]  [k] .sha_transform
      0.54%    :3259  [kernel.kallsyms]  [k] .copy_4K_page
      0.54%    :3259  [kernel.kallsyms]  [k] .find_get_page
      0.52%    :3259  [kernel.kallsyms]  [k] .trace_hardirqs_off
      0.50%    :3259  [kernel.kallsyms]  [k] .__do_fault
<SNIP>

After:
 # perf report

 # Events: 6K cycles
 #
 # Overhead  Command      Shared Object                           Symbol
 # ........  .......  .................  ...............................
 #
     44.28%  cachetest  cachetest          [.] sumArrayNaive
     22.53%  cachetest  cachetest          [.] sumArrayOptimal
      6.59%  cachetest  ld-2.11.2.so       [.] 0x1777c
      2.13%  cachetest  [unknown]          [k] 0x340
      1.46%  cachetest  [kernel.kallsyms]  [k] .perf_event_mmap_ctx
      1.25%  cachetest  [kernel.kallsyms]  [k] restore
      0.74%  cachetest  [kernel.kallsyms]  [k] ._raw_spin_lock
      0.71%  cachetest  [kernel.kallsyms]  [k] .filemap_fault
      0.66%  cachetest  [kernel.kallsyms]  [k] .memset
      0.54%  cachetest  [kernel.kallsyms]  [k] .copy_4K_page
      0.54%  cachetest  [kernel.kallsyms]  [k] .find_get_page
      0.54%  cachetest  [kernel.kallsyms]  [k] .sha_transform
      0.52%  cachetest  [kernel.kallsyms]  [k] .trace_hardirqs_off
      0.50%  cachetest  [kernel.kallsyms]  [k] .__do_fault
<SNIP>

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1291872833-839-1-git-send-email-imunsie@au1.ibm.com>
Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-annotate.c
tools/perf/builtin-diff.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c

index 48dbab4b482f700f7894fd5581301547d3da7e02..c056cdc0691258b159665ca3e8c74d2963543ccf 100644 (file)
@@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = {
        .mmap   = event__process_mmap,
        .comm   = event__process_comm,
        .fork   = event__process_task,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static int __cmd_annotate(void)
index af84e1c0519d72c5d6efd2e3c4c94641bb39c216..97846dcafc63dd9c32d7928128426594149d417a 100644 (file)
@@ -61,6 +61,8 @@ static struct perf_event_ops event_ops = {
        .exit   = event__process_task,
        .fork   = event__process_task,
        .lost   = event__process_lost,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
index efd1b3c3d4a03cc33f61b4b615af720f0c46fdd4..5149e3deb7bc7381d2713f66210f0ac288762cc9 100644 (file)
@@ -285,7 +285,7 @@ static void create_counter(int counter, int cpu)
        if (system_wide)
                attr->sample_type       |= PERF_SAMPLE_CPU;
 
-       if (sample_time)
+       if (sample_time || system_wide || !no_inherit || cpu_list)
                attr->sample_type       |= PERF_SAMPLE_TIME;
 
        if (raw_samples) {
@@ -327,6 +327,9 @@ try_again:
                                 * Old kernel, no attr->sample_id_type_all field
                                 */
                                sample_id_all_avail = false;
+                               if (!sample_time && !raw_samples)
+                                       attr->sample_type &= ~PERF_SAMPLE_TIME;
+
                                goto retry_sample_id;
                        }
 
index fd4c4500cd15ee6712385bf1b1fdff32f7969fa8..4af7ce6e155546ac3ec4dabcb2d01cbf35440fcc 100644 (file)
@@ -244,6 +244,8 @@ static struct perf_event_ops event_ops = {
        .event_type = event__process_event_type,
        .tracing_data = event__process_tracing_data,
        .build_id = event__process_build_id,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 extern volatile int session_done;