perf c2c report: Add 'node' sort key
authorJiri Olsa <jolsa@kernel.org>
Fri, 3 Jun 2016 13:40:28 +0000 (15:40 +0200)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 19 Oct 2016 16:18:31 +0000 (13:18 -0300)
It is to be displayed in the single cacheline output:

  node

It displays nodes hits related to cacheline accesses.

The node filed comes in 3 flavors:
  - node IDs separated by ','
  - node IDs with stats for each ID, in following format:
      Node{cpus %hitms %stores}
  - node IDs with list of affected CPUs in following format:
      Node{cpu list}

User can switch the flavor with -N option (-NN,-NNN).
It will be available in TUI to switch this with 'n' key.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrgxx5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-c2c.c

index ffd41744886e4d55b730b0ebc06c2b91d5c12d62..ca2f37479e6dc0991331f6d4b78272c095c5310b 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/stringify.h>
+#include <asm/bug.h>
 #include "util.h"
 #include "debug.h"
 #include "builtin.h"
@@ -22,6 +23,8 @@ struct c2c_hists {
 struct c2c_hist_entry {
        struct c2c_hists        *hists;
        struct c2c_stats         stats;
+       unsigned long           *cpuset;
+       struct c2c_stats        *node_stats;
        /*
         * must be at the end,
         * because of its callchain dynamic entry
@@ -32,6 +35,12 @@ struct c2c_hist_entry {
 struct perf_c2c {
        struct perf_tool        tool;
        struct c2c_hists        hists;
+
+       unsigned long           **nodes;
+       int                      nodes_cnt;
+       int                      cpus_cnt;
+       int                     *cpu2node;
+       int                      node_info;
 };
 
 static struct perf_c2c c2c;
@@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size)
        if (!c2c_he)
                return NULL;
 
+       c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+       if (!c2c_he->cpuset)
+               return NULL;
+
+       c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
+       if (!c2c_he->node_stats)
+               return NULL;
+
        return &c2c_he->he;
 }
 
@@ -57,6 +74,8 @@ static void c2c_he_free(void *he)
                free(c2c_he->hists);
        }
 
+       free(c2c_he->cpuset);
+       free(c2c_he->node_stats);
        free(c2c_he);
 }
 
@@ -93,6 +112,16 @@ he__get_c2c_hists(struct hist_entry *he,
        return hists;
 }
 
+static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
+                           struct perf_sample *sample)
+{
+       if (WARN_ONCE(sample->cpu == (unsigned int) -1,
+                     "WARNING: no sample cpu value"))
+               return;
+
+       set_bit(sample->cpu, c2c_he->cpuset);
+}
+
 static int process_sample_event(struct perf_tool *tool __maybe_unused,
                                union perf_event *event,
                                struct perf_sample *sample,
@@ -133,10 +162,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
        c2c_add_stats(&c2c_he->stats, &stats);
        c2c_add_stats(&c2c_hists->stats, &stats);
 
+       c2c_he__set_cpu(c2c_he, sample);
+
        hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
        ret = hist_entry__append_callchain(he, sample);
 
        if (!ret) {
+               /*
+                * There's already been warning about missing
+                * sample's cpu value. Let's account all to
+                * node 0 in this case, without any further
+                * warning.
+                *
+                * Doing node stats only for single callchain data.
+                */
+               int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
+               int node = c2c.cpu2node[cpu];
+
                mi = mi_dup;
 
                mi_dup = memdup(mi, sizeof(*mi));
@@ -156,6 +198,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
                c2c_he = container_of(he, struct c2c_hist_entry, he);
                c2c_add_stats(&c2c_he->stats, &stats);
                c2c_add_stats(&c2c_hists->stats, &stats);
+               c2c_add_stats(&c2c_he->node_stats[node], &stats);
+
+               c2c_he__set_cpu(c2c_he, sample);
 
                hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
                ret = hist_entry__append_callchain(he, sample);
@@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
        return left->thread->pid_ - right->thread->pid_;
 }
 
+static int64_t
+empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+         struct hist_entry *left __maybe_unused,
+         struct hist_entry *right __maybe_unused)
+{
+       return 0;
+}
+
+static int
+node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+          struct hist_entry *he)
+{
+       struct c2c_hist_entry *c2c_he;
+       bool first = true;
+       int node;
+       int ret = 0;
+
+       c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+       for (node = 0; node < c2c.nodes_cnt; node++) {
+               DECLARE_BITMAP(set, c2c.cpus_cnt);
+
+               bitmap_zero(set, c2c.cpus_cnt);
+               bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
+
+               if (!bitmap_weight(set, c2c.cpus_cnt)) {
+                       if (c2c.node_info == 1) {
+                               ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
+                               advance_hpp(hpp, ret);
+                       }
+                       continue;
+               }
+
+               if (!first) {
+                       ret = scnprintf(hpp->buf, hpp->size, " ");
+                       advance_hpp(hpp, ret);
+               }
+
+               switch (c2c.node_info) {
+               case 0:
+                       ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
+                       advance_hpp(hpp, ret);
+                       break;
+               case 1:
+               {
+                       int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
+                       struct c2c_stats *stats = &c2c_he->node_stats[node];
+
+                       ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
+                       advance_hpp(hpp, ret);
+
+
+                       if (c2c_he->stats.rmt_hitm > 0) {
+                               ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
+                                               percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm));
+                       } else {
+                               ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+                       }
+
+                       advance_hpp(hpp, ret);
+
+                       if (c2c_he->stats.store > 0) {
+                               ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
+                                               percent(stats->store, c2c_he->stats.store));
+                       } else {
+                               ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
+                       }
+
+                       advance_hpp(hpp, ret);
+                       break;
+               }
+               case 2:
+                       ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
+                       advance_hpp(hpp, ret);
+
+                       ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
+                       advance_hpp(hpp, ret);
+
+                       ret = scnprintf(hpp->buf, hpp->size, "}");
+                       advance_hpp(hpp, ret);
+                       break;
+               default:
+                       break;
+               }
+
+               first = false;
+       }
+
+       return 0;
+}
+
 #define HEADER_LOW(__h)                        \
        {                               \
                .line[1] = {            \
@@ -1115,6 +1251,19 @@ static struct c2c_dimension dim_dso = {
        .se             = &sort_dso,
 };
 
+static struct c2c_header header_node[3] = {
+       HEADER_LOW("Node"),
+       HEADER_LOW("Node{cpus %hitms %stores}"),
+       HEADER_LOW("Node{cpu list}"),
+};
+
+static struct c2c_dimension dim_node = {
+       .name           = "node",
+       .cmp            = empty_cmp,
+       .entry          = node_entry,
+       .width          = 4,
+};
+
 static struct c2c_dimension *dimensions[] = {
        &dim_dcacheline,
        &dim_offset,
@@ -1148,6 +1297,7 @@ static struct c2c_dimension *dimensions[] = {
        &dim_tid,
        &dim_symbol,
        &dim_dso,
+       &dim_node,
        NULL,
 };
 
@@ -1374,6 +1524,68 @@ static int resort_cl_cb(struct hist_entry *he)
        return 0;
 }
 
+static void setup_nodes_header(void)
+{
+       dim_node.header = header_node[c2c.node_info];
+}
+
+static int setup_nodes(struct perf_session *session)
+{
+       struct numa_node *n;
+       unsigned long **nodes;
+       int node, cpu;
+       int *cpu2node;
+
+       if (c2c.node_info > 2)
+               c2c.node_info = 2;
+
+       c2c.nodes_cnt = session->header.env.nr_numa_nodes;
+       c2c.cpus_cnt  = session->header.env.nr_cpus_online;
+
+       n = session->header.env.numa_nodes;
+       if (!n)
+               return -EINVAL;
+
+       nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
+       if (!nodes)
+               return -ENOMEM;
+
+       c2c.nodes = nodes;
+
+       cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
+       if (!cpu2node)
+               return -ENOMEM;
+
+       for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
+               cpu2node[cpu] = -1;
+
+       c2c.cpu2node = cpu2node;
+
+       for (node = 0; node < c2c.nodes_cnt; node++) {
+               struct cpu_map *map = n[node].map;
+               unsigned long *set;
+
+               set = bitmap_alloc(c2c.cpus_cnt);
+               if (!set)
+                       return -ENOMEM;
+
+               for (cpu = 0; cpu < map->nr; cpu++) {
+                       set_bit(map->map[cpu], set);
+
+                       if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+                               return -EINVAL;
+
+                       cpu2node[map->map[cpu]] = node;
+               }
+
+               nodes[node] = set;
+       }
+
+       setup_nodes_header();
+       return 0;
+}
+
+
 static int perf_c2c__report(int argc, const char **argv)
 {
        struct perf_session *session;
@@ -1388,6 +1600,8 @@ static int perf_c2c__report(int argc, const char **argv)
                 "be more verbose (show counter open errors, etc)"),
        OPT_STRING('i', "input", &input_name, "file",
                   "the input file to process"),
+       OPT_INCR('N', "node-info", &c2c.node_info,
+                "show extra node info in report (repeat for more info)"),
        OPT_END()
        };
        int err = 0;
@@ -1413,6 +1627,11 @@ static int perf_c2c__report(int argc, const char **argv)
                pr_debug("No memory for session\n");
                goto out;
        }
+       err = setup_nodes(session);
+       if (err) {
+               pr_err("Failed setup nodes\n");
+               goto out;
+       }
 
        if (symbol__init(&session->header.env) < 0)
                goto out_session;