perf diff: Introduce tool to show performance difference
authorArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 14 Dec 2009 22:09:31 +0000 (20:09 -0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 15 Dec 2009 07:50:29 +0000 (08:50 +0100)
I guess it is enough to show some examples:

[root@doppio linux-2.6-tip]# rm -f perf.data*
[root@doppio linux-2.6-tip]# ls -la perf.data*
ls: cannot access perf.data*: No such file or directory
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2692 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74280 2009-12-14 20:03 perf.data
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data.old
[root@doppio linux-2.6-tip]# perf diff | head -5
   1        -34994580     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        -15307806         [kernel.kallsyms]   __kmalloc
   3    +1   +3665941     /lib64/libc-2.10.1.so   __GI_memmove
   4    +4  +23508995     /lib64/libc-2.10.1.so   _int_malloc
   5    +7  +38538813         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -p | head -5
   1        +1.00%     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2                       [kernel.kallsyms]   __kmalloc
   3    +1             /lib64/libc-2.10.1.so   __GI_memmove
   4    +4             /lib64/libc-2.10.1.so   _int_malloc
   5    +7  -1.00%         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -v | head -5
   1        361449551 326454971 -34994580     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        151009241 135701435 -15307806         [kernel.kallsyms]   __kmalloc
   3    +1  101805328 105471269  +3665941     /lib64/libc-2.10.1.so   __GI_memmove
   4    +4   78041440 101550435 +23508995     /lib64/libc-2.10.1.so   _int_malloc
   5    +7   59536172  98074985 +38538813         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -vp | head -5
   1        9.00% 8.00% +1.00%     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        3.00% 3.00%                [kernel.kallsyms]   __kmalloc
   3    +1  2.00% 2.00%            /lib64/libc-2.10.1.so   __GI_memmove
   4    +4  2.00% 2.00%            /lib64/libc-2.10.1.so   _int_malloc
   5    +7  1.00% 2.00% -1.00%         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]#

This should be enough for diffs where the system is non
volatile, i.e. when one doesn't updates binaries.

For volatile environments, stay tuned for the next perf tool
feature: a buildid cache populated by 'perf record', managed by
'perf buildid-cache' a-la ccache, and used by all the report
tools.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
LKML-Reference: <1260828571-3613-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
tools/perf/Documentation/perf-diff.txt [new file with mode: 0644]
tools/perf/Makefile
tools/perf/builtin-diff.c [new file with mode: 0644]
tools/perf/builtin.h
tools/perf/command-list.txt
tools/perf/perf.c
tools/perf/util/sort.h

diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
new file mode 100644 (file)
index 0000000..bd1ee55
--- /dev/null
@@ -0,0 +1,31 @@
+perf-diff(1)
+==============
+
+NAME
+----
+perf-diff - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf diff' [oldfile] [newfile]
+
+DESCRIPTION
+-----------
+This command displays the performance difference among two perf.data files
+captured via perf record.
+
+If no parameters are passed it will assume perf.data.old and perf.data.
+
+OPTIONS
+-------
+-p::
+--percentage::
+       Show percentages instead of raw counters
+-v::
+--verbose::
+       Be verbose, for instance, show the raw counters in addition to the
+       diff.
+SEE ALSO
+--------
+linkperf:perf-record[1]
index a4cb79255383039dab29993fef2f0d020e4d825a..87a424e4cdae503fa9d2ef2687c8c9e7a24a4846 100644 (file)
@@ -427,6 +427,7 @@ BUILTIN_OBJS += bench/sched-messaging.o
 BUILTIN_OBJS += bench/sched-pipe.o
 BUILTIN_OBJS += bench/mem-memcpy.o
 
+BUILTIN_OBJS += builtin-diff.o
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
 BUILTIN_OBJS += builtin-buildid-list.o
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
new file mode 100644 (file)
index 0000000..0d52801
--- /dev/null
@@ -0,0 +1,288 @@
+/*
+ * builtin-diff.c
+ *
+ * Builtin diff command: Analyze two perf.data input files, look up and read
+ * DSOs and symbol information, sort them and produce a diff.
+ */
+#include "builtin.h"
+
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/hist.h"
+#include "util/session.h"
+#include "util/sort.h"
+#include "util/symbol.h"
+#include "util/util.h"
+
+#include <stdlib.h>
+
+static char       const *input_old = "perf.data.old",
+                        *input_new = "perf.data";
+static int        force;
+static bool       show_percent;
+
+struct symbol_conf symbol_conf;
+
+static int perf_session__add_hist_entry(struct perf_session *self,
+                                       struct addr_location *al, u64 count)
+{
+       bool hit;
+       struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL,
+                                                              count, &hit);
+       if (he == NULL)
+               return -ENOMEM;
+
+       if (hit)
+               he->count += count;
+
+       return 0;
+}
+
+static int diff__process_sample_event(event_t *event, struct perf_session *session)
+{
+       struct addr_location al;
+       struct sample_data data = { .period = 1, };
+
+       dump_printf("(IP, %d): %d: %p\n", event->header.misc,
+                   event->ip.pid, (void *)(long)event->ip.ip);
+
+       if (event__preprocess_sample(event, session, &al, NULL) < 0) {
+               pr_warning("problem processing %d event, skipping it.\n",
+                          event->header.type);
+               return -1;
+       }
+
+       event__parse_sample(event, session->sample_type, &data);
+
+       if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
+               pr_warning("problem incrementing symbol count, skipping event\n");
+               return -1;
+       }
+
+       session->events_stats.total += data.period;
+       return 0;
+}
+
+static struct perf_event_ops event_ops = {
+       .process_sample_event = diff__process_sample_event,
+       .process_mmap_event   = event__process_mmap,
+       .process_comm_event   = event__process_comm,
+       .process_exit_event   = event__process_task,
+       .process_fork_event   = event__process_task,
+       .process_lost_event   = event__process_lost,
+};
+
+static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
+                                                   struct hist_entry *he)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter;
+
+       while (*p != NULL) {
+               int cmp;
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node);
+
+               cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+               if (cmp > 0)
+                       p = &(*p)->rb_left;
+               else if (cmp < 0)
+                       p = &(*p)->rb_right;
+               else {
+                       cmp = strcmp(he->sym->name, iter->sym->name);
+                       if (cmp > 0)
+                               p = &(*p)->rb_left;
+                       else
+                               p = &(*p)->rb_right;
+               }
+       }
+
+       rb_link_node(&he->rb_node, parent, p);
+       rb_insert_color(&he->rb_node, root);
+}
+
+static void perf_session__resort_by_name(struct perf_session *self)
+{
+       unsigned long position = 1;
+       struct rb_root tmp = RB_ROOT;
+       struct rb_node *next = rb_first(&self->hists);
+
+       while (next != NULL) {
+               struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
+
+               next = rb_next(&n->rb_node);
+               rb_erase(&n->rb_node, &self->hists);
+               n->position = position++;
+               perf_session__insert_hist_entry_by_name(&tmp, n);
+       }
+
+       self->hists = tmp;
+}
+
+static struct hist_entry *
+perf_session__find_hist_entry_by_name(struct perf_session *self,
+                                     struct hist_entry *he)
+{
+       struct rb_node *n = self->hists.rb_node;
+
+       while (n) {
+               struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
+               int cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+
+               if (cmp > 0)
+                       n = n->rb_left;
+               else if (cmp < 0)
+                       n = n->rb_right;
+               else {
+                       cmp = strcmp(he->sym->name, iter->sym->name);
+                       if (cmp > 0)
+                               n = n->rb_left;
+                       else if (cmp < 0)
+                               n = n->rb_right;
+                       else
+                               return iter;
+               }
+       }
+
+       return NULL;
+}
+
+static void perf_session__match_hists(struct perf_session *old_session,
+                                     struct perf_session *new_session)
+{
+       struct rb_node *nd;
+
+       perf_session__resort_by_name(old_session);
+
+       for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
+               struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
+               pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
+       }
+}
+
+static size_t hist_entry__fprintf_matched(struct hist_entry *self,
+                                         unsigned long pos,
+                                         struct perf_session *session,
+                                         struct perf_session *pair_session,
+                                         FILE *fp)
+{
+       u64 old_count = 0;
+       char displacement[16];
+       size_t printed;
+
+       if (self->pair != NULL) {
+               long pdiff = (long)self->pair->position - (long)pos;
+               old_count = self->pair->count;
+               if (pdiff == 0)
+                       goto blank;
+               snprintf(displacement, sizeof(displacement), "%+4ld", pdiff);
+       } else {
+blank:         memset(displacement, ' ', sizeof(displacement));
+       }
+
+       printed = fprintf(fp, "%4lu %5.5s ", pos, displacement);
+
+       if (show_percent) {
+               double old_percent = (old_count * 100) / pair_session->events_stats.total,
+                      new_percent = (self->count * 100) / session->events_stats.total;
+               double diff = old_percent - new_percent;
+
+               if (verbose)
+                       printed += fprintf(fp, " %3.2f%% %3.2f%%", old_percent, new_percent);
+
+               if ((u64)diff != 0)
+                       printed += fprintf(fp, " %+4.2F%%", diff);
+               else
+                       printed += fprintf(fp, "       ");
+       } else {
+               if (verbose)
+                       printed += fprintf(fp, " %9Lu %9Lu", old_count, self->count);
+               printed += fprintf(fp, " %+9Ld", (s64)self->count - (s64)old_count);
+       }
+
+       return printed + fprintf(fp, " %25.25s   %s\n",
+                                self->map->dso->name, self->sym->name);
+}
+
+static size_t perf_session__fprintf_matched_hists(struct perf_session *self,
+                                                 struct perf_session *pair,
+                                                 FILE *fp)
+{
+       struct rb_node *nd;
+       size_t printed = 0;
+       unsigned long pos = 1;
+
+       for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) {
+               struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+               printed += hist_entry__fprintf_matched(he, pos++, self, pair, fp);
+       }
+
+       return printed;
+}
+
+static int __cmd_diff(void)
+{
+       int ret, i;
+       struct perf_session *session[2];
+
+       session[0] = perf_session__new(input_old, O_RDONLY, force, &symbol_conf);
+       session[1] = perf_session__new(input_new, O_RDONLY, force, &symbol_conf);
+       if (session[0] == NULL || session[1] == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < 2; ++i) {
+               ret = perf_session__process_events(session[i], &event_ops);
+               if (ret)
+                       goto out_delete;
+               perf_session__output_resort(session[i], session[i]->events_stats.total);
+       }
+
+       perf_session__match_hists(session[0], session[1]);
+       perf_session__fprintf_matched_hists(session[1], session[0], stdout);
+out_delete:
+       for (i = 0; i < 2; ++i)
+               perf_session__delete(session[i]);
+       return ret;
+}
+
+static const char *const diff_usage[] = {
+       "perf diff [<options>] [old_file] [new_file]",
+};
+
+static const struct option options[] = {
+       OPT_BOOLEAN('v', "verbose", &verbose,
+                   "be more verbose (show symbol address, etc)"),
+       OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+                   "dump raw trace in ASCII"),
+       OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+       OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+                   "load module symbols - WARNING: use only with -k and LIVE kernel"),
+       OPT_BOOLEAN('p', "percentages", &show_percent,
+                   "Don't shorten the pathnames taking into account the cwd"),
+       OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+                   "Don't shorten the pathnames taking into account the cwd"),
+       OPT_END()
+};
+
+int cmd_diff(int argc, const char **argv, const char *prefix __used)
+{
+       if (symbol__init(&symbol_conf) < 0)
+               return -1;
+
+       setup_sorting(diff_usage, options);
+
+       argc = parse_options(argc, argv, options, diff_usage, 0);
+       if (argc) {
+               if (argc > 2)
+                       usage_with_options(diff_usage, options);
+               if (argc == 2) {
+                       input_old = argv[0];
+                       input_new = argv[1];
+               } else
+                       input_new = argv[0];
+       }
+
+       setup_pager();
+       return __cmd_diff();
+}
index a3d8bf65f26c2a0466bd76ab02d5e48db02ce0dc..18035b1f16c79eca09add0eb9a34c5540823faf1 100644 (file)
@@ -17,6 +17,7 @@ extern int check_pager_config(const char *cmd);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_bench(int argc, const char **argv, const char *prefix);
 extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+extern int cmd_diff(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_sched(int argc, const char **argv, const char *prefix);
 extern int cmd_list(int argc, const char **argv, const char *prefix);
index 02b09ea17a3ecad604cd970edf28b386fb22a15b..71dc7c3fe7b266c1ba8bca6cb05dc34de3044b1f 100644 (file)
@@ -5,6 +5,7 @@
 perf-annotate                  mainporcelain common
 perf-bench                     mainporcelain common
 perf-buildid-list              mainporcelain common
+perf-diff                      mainporcelain common
 perf-list                      mainporcelain common
 perf-sched                     mainporcelain common
 perf-record                    mainporcelain common
index cf64049bc9bdd71e4c97c68a59e5b754bf9311ad..873e55fab375d253d4b80d4fb1862053f21080a4 100644 (file)
@@ -286,6 +286,7 @@ static void handle_internal_command(int argc, const char **argv)
        const char *cmd = argv[0];
        static struct cmd_struct commands[] = {
                { "buildid-list", cmd_buildid_list, 0 },
+               { "diff",       cmd_diff,       0 },
                { "help",       cmd_help,       0 },
                { "list",       cmd_list,       0 },
                { "record",     cmd_record,     0 },
index cb6151c026f2e9da35ef4ca7080891f1830b33e1..925f083e1eeee43c7a6dbb75abfc2974449f9762 100644 (file)
@@ -49,9 +49,13 @@ struct hist_entry {
        struct symbol           *sym;
        u64                     ip;
        char                    level;
-       struct symbol           *parent;
+       struct symbol     *parent;
        struct callchain_node   callchain;
-       struct rb_root          sorted_chain;
+       union {
+               unsigned long     position;
+               struct hist_entry *pair;
+               struct rb_root    sorted_chain;
+       };
 };
 
 enum sort_type {