perf report: Add branch flag to callchain cursor node
authorJin Yao <yao.jin@linux.intel.com>
Mon, 31 Oct 2016 01:19:49 +0000 (09:19 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 14 Nov 2016 16:15:56 +0000 (13:15 -0300)
Since the branch ip has been added to call stack for easier browsing,
this patch adds more branch information. For example, add a flag to
indicate if this ip is a branch, and also add with the branch flag.

Then we can know if the cursor node represents a branch and know what
the branch flag it has.

The branch history code has a loop detection pass that removes loops. It
would be nice for knowing how many loops were removed then in next
steps, we can compute out the average number of iterations.

For example:

Before remove_loops(),
entry0: from = 0x100, to = 0x200
entry1: from = 0x300, to = 0x250
entry2: from = 0x300, to = 0x250
entry3: from = 0x300, to = 0x250
entry4: from = 0x700, to = 0x800

After remove_loops()
entry0: from = 0x100, to = 0x200
entry1: from = 0x300, to = 0x250
entry2: from = 0x700, to = 0x800

The original entry2 and entry3 are removed. So the number of iterations
(from = 0x300, to = 0x250) is equal to removed number + 1 (2 + 1).

iterations = removed number + 1;
average iteractions = Sum(iteractions) / number of samples

This formula ignores other cases, for example, iterations cross multiple
buffers and one buffer contains 2+ loops. Because in practice, it's good
enough.

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linux-kernel@vger.kernel.org
Cc: Yao Jin <yao.jin@linux.intel.com>
Link: http://lkml.kernel.org/n/1477876794-30749-2-git-send-email-yao.jin@linux.intel.com
[ Renamed 'iter' to 'nr_loop_iter' for clarity ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/machine.c

index ae58b493af4549d07558b9b730b9cc7f05759a7e..138a415fad0d9b15e1cdc2ef6b73eefb351753e0 100644 (file)
@@ -728,7 +728,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
 
        list_for_each_entry_safe(list, next_list, &src->val, list) {
                callchain_cursor_append(cursor, list->ip,
-                                       list->ms.map, list->ms.sym);
+                                       list->ms.map, list->ms.sym,
+                                       false, NULL, 0, 0);
                list_del(&list->list);
                free(list);
        }
@@ -765,7 +766,9 @@ int callchain_merge(struct callchain_cursor *cursor,
 }
 
 int callchain_cursor_append(struct callchain_cursor *cursor,
-                           u64 ip, struct map *map, struct symbol *sym)
+                           u64 ip, struct map *map, struct symbol *sym,
+                           bool branch, struct branch_flags *flags,
+                           int nr_loop_iter, int samples)
 {
        struct callchain_cursor_node *node = *cursor->last;
 
@@ -780,6 +783,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
        node->ip = ip;
        node->map = map;
        node->sym = sym;
+       node->branch = branch;
+       node->nr_loop_iter = nr_loop_iter;
+       node->samples = samples;
+
+       if (flags)
+               memcpy(&node->branch_flags, flags,
+                       sizeof(struct branch_flags));
 
        cursor->nr++;
 
index 47cfd10809755f9f2ed3dfd9fdd5b13b46ad9330..df6329d1c3503010d0e3e9cbbeeb7eec3ba60d18 100644 (file)
@@ -125,6 +125,10 @@ struct callchain_cursor_node {
        u64                             ip;
        struct map                      *map;
        struct symbol                   *sym;
+       bool                            branch;
+       struct branch_flags             branch_flags;
+       int                             nr_loop_iter;
+       int                             samples;
        struct callchain_cursor_node    *next;
 };
 
@@ -179,7 +183,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 }
 
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
-                           struct map *map, struct symbol *sym);
+                           struct map *map, struct symbol *sym,
+                           bool branch, struct branch_flags *flags,
+                           int nr_loop_iter, int samples);
 
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
index df85b9efd80f4c6027a79d925e4b7f63a6569e37..9b33bef545818cd530eea80b8790ac0a6fcc026e 100644 (file)
@@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread,
                            struct symbol **parent,
                            struct addr_location *root_al,
                            u8 *cpumode,
-                           u64 ip)
+                           u64 ip,
+                           bool branch,
+                           struct branch_flags *flags,
+                           int nr_loop_iter,
+                           int samples)
 {
        struct addr_location al;
 
@@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread,
 
        if (symbol_conf.hide_unresolved && al.sym == NULL)
                return 0;
-       return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
+       return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+                                      branch, flags, nr_loop_iter, samples);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
        /* LBR only affects the user callchain */
        if (i != chain_nr) {
                struct branch_stack *lbr_stack = sample->branch_stack;
-               int lbr_nr = lbr_stack->nr, j;
+               int lbr_nr = lbr_stack->nr, j, k;
+               bool branch;
+               struct branch_flags *flags;
                /*
                 * LBR callstack can only get user call chain.
                 * The mix_chain_nr is kernel call chain
@@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
                for (j = 0; j < mix_chain_nr; j++) {
                        int err;
+                       branch = false;
+                       flags = NULL;
+
                        if (callchain_param.order == ORDER_CALLEE) {
                                if (j < i + 1)
                                        ip = chain->ips[j];
-                               else if (j > i + 1)
-                                       ip = lbr_stack->entries[j - i - 2].from;
-                               else
+                               else if (j > i + 1) {
+                                       k = j - i - 2;
+                                       ip = lbr_stack->entries[k].from;
+                                       branch = true;
+                                       flags = &lbr_stack->entries[k].flags;
+                               } else {
                                        ip = lbr_stack->entries[0].to;
+                                       branch = true;
+                                       flags = &lbr_stack->entries[0].flags;
+                               }
                        } else {
-                               if (j < lbr_nr)
-                                       ip = lbr_stack->entries[lbr_nr - j - 1].from;
+                               if (j < lbr_nr) {
+                                       k = lbr_nr - j - 1;
+                                       ip = lbr_stack->entries[k].from;
+                                       branch = true;
+                                       flags = &lbr_stack->entries[k].flags;
+                               }
                                else if (j > lbr_nr)
                                        ip = chain->ips[i + 1 - (j - lbr_nr)];
-                               else
+                               else {
                                        ip = lbr_stack->entries[0].to;
+                                       branch = true;
+                                       flags = &lbr_stack->entries[0].flags;
+                               }
                        }
 
-                       err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
+                       err = add_callchain_ip(thread, cursor, parent,
+                                              root_al, &cpumode, ip,
+                                              branch, flags, 0, 0);
                        if (err)
                                return (err < 0) ? err : 0;
                }
@@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
        int i, j, err, nr_entries;
        int skip_idx = -1;
        int first_call = 0;
+       int nr_loop_iter;
 
        if (perf_evsel__has_branch_callstack(evsel)) {
                err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
@@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread,
                                be[i] = branch->entries[branch->nr - i - 1];
                }
 
+               nr_loop_iter = nr;
                nr = remove_loops(be, nr);
 
+               /*
+                * Get the number of iterations.
+                * It's only approximation, but good enough in practice.
+                */
+               if (nr_loop_iter > nr)
+                       nr_loop_iter = nr_loop_iter - nr + 1;
+               else
+                       nr_loop_iter = 0;
+
                for (i = 0; i < nr; i++) {
-                       err = add_callchain_ip(thread, cursor, parent, root_al,
-                                              NULL, be[i].to);
+                       if (i == nr - 1)
+                               err = add_callchain_ip(thread, cursor, parent,
+                                                      root_al,
+                                                      NULL, be[i].to,
+                                                      true, &be[i].flags,
+                                                      nr_loop_iter, 1);
+                       else
+                               err = add_callchain_ip(thread, cursor, parent,
+                                                      root_al,
+                                                      NULL, be[i].to,
+                                                      true, &be[i].flags,
+                                                      0, 0);
+
                        if (!err)
                                err = add_callchain_ip(thread, cursor, parent, root_al,
-                                                      NULL, be[i].from);
+                                                      NULL, be[i].from,
+                                                      true, &be[i].flags,
+                                                      0, 0);
                        if (err == -EINVAL)
                                break;
                        if (err)
@@ -1903,7 +1952,9 @@ check_calls:
                if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
 
-               err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
+               err = add_callchain_ip(thread, cursor, parent,
+                                      root_al, &cpumode, ip,
+                                      false, NULL, 0, 0);
 
                if (err)
                        return (err < 0) ? err : 0;
@@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
        if (symbol_conf.hide_unresolved && entry->sym == NULL)
                return 0;
        return callchain_cursor_append(cursor, entry->ip,
-                                      entry->map, entry->sym);
+                                      entry->map, entry->sym,
+                                      false, NULL, 0, 0);
 }
 
 static int thread__resolve_callchain_unwind(struct thread *thread,