perf report: Fix and improve the displaying of per-thread event counters
authorBrice Goglin <Brice.Goglin@inria.fr>
Fri, 7 Aug 2009 11:55:24 +0000 (13:55 +0200)
committerIngo Molnar <mingo@elte.hu>
Sun, 9 Aug 2009 11:04:20 +0000 (13:04 +0200)
Improve and fix the handling of per-thread counter stats
recorded via perf record -s. Previously we only displayed
it in debug printouts (-D) and even that output was hard
to disambiguate.

I moved everything to utils/values.[ch] so that we may reuse
it in perf stat.

We get something like this now:

 #  PID   TID  cache-misses  cache-references
   4658  4659        495581           3238779
   4658  4662        498246           3236823
   4658  4663        499531           3243162

Then it'll be easy to add --pretty=raw to display a single line per thread/event.

By the way, -S was also used for --symbol... So I used -T/--thread here.

perf report: Add -T/--threads to display per-thread counter values

 We get something like this now:
 #  PID   TID  cache-misses  cache-references
   4658  4659        495581           3238779
   4658  4662        498246           3236823
   4658  4663        499531           3243162

Per-thread arrays of counter values are managed in utils/values.[ch]

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus@samba.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
tools/perf/Documentation/perf-report.txt
tools/perf/Makefile
tools/perf/builtin-report.c
tools/perf/util/values.c [new file with mode: 0644]
tools/perf/util/values.h [new file with mode: 0644]

index e72e931107826e069d5bfabc3677dca7a2745af6..370344afb5b29884d7c615dbf2adb43eb7a59f70 100644 (file)
@@ -27,6 +27,9 @@ OPTIONS
 -n
 --show-nr-samples
        Show the number of samples for each symbol
+-T
+--threads
+       Show per-thread event counters
 -C::
 --comms=::
        Only consider symbols in these comms. CSV that understands
index 60411e94113be47ee778f2d5f3e54b396945850e..de7beac1095ebb9562c42c3696f28deb76c39c7a 100644 (file)
@@ -310,6 +310,7 @@ LIB_H += util/sigchain.h
 LIB_H += util/symbol.h
 LIB_H += util/module.h
 LIB_H += util/color.h
+LIB_H += util/values.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
@@ -337,6 +338,7 @@ LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
 LIB_OBJS += util/header.o
 LIB_OBJS += util/callchain.o
+LIB_OBJS += util/values.o
 
 BUILTIN_OBJS += builtin-annotate.o
 BUILTIN_OBJS += builtin-help.o
index 99274cec0adb7515e00802000445bb548778ee04..41639182fb3fd67acbb44fc8a7a4135f418f147f 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/string.h"
 #include "util/callchain.h"
 #include "util/strlist.h"
+#include "util/values.h"
 
 #include "perf.h"
 #include "util/header.h"
@@ -53,6 +54,9 @@ static int            modules;
 static int             full_paths;
 static int             show_nr_samples;
 
+static int             show_threads;
+static struct perf_read_values show_threads_values;
+
 static unsigned long   page_size;
 static unsigned long   mmap_window = 32;
 
@@ -1473,6 +1477,9 @@ print_entries:
 
        free(rem_sq_bracket);
 
+       if (show_threads)
+               perf_read_values_display(fp, &show_threads_values);
+
        return ret;
 }
 
@@ -1758,6 +1765,16 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
 {
        struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
 
+       if (show_threads) {
+               char *name = attr ? __event_name(attr->type, attr->config)
+                                  : "unknown";
+               perf_read_values_add_value(&show_threads_values,
+                                          event->read.pid, event->read.tid,
+                                          event->read.id,
+                                          name,
+                                          event->read.value);
+       }
+
        dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
                        (void *)(offset + head),
                        (void *)(long)(event->header.size),
@@ -1839,6 +1856,9 @@ static int __cmd_report(void)
 
        register_idle_thread();
 
+       if (show_threads)
+               perf_read_values_init(&show_threads_values);
+
        input = open(input_name, O_RDONLY);
        if (input < 0) {
                fprintf(stderr, " failed to open file: %s", input_name);
@@ -1993,6 +2013,9 @@ done:
        output__resort(total);
        output__fprintf(stdout, total);
 
+       if (show_threads)
+               perf_read_values_destroy(&show_threads_values);
+
        return rc;
 }
 
@@ -2066,6 +2089,8 @@ static const struct option options[] = {
                    "load module symbols - WARNING: use only with -k and LIVE kernel"),
        OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
                    "Show a column with the number of samples"),
+       OPT_BOOLEAN('T', "threads", &show_threads,
+                   "Show per-thread event counters"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
                   "sort by key(s): pid, comm, dso, symbol, parent"),
        OPT_BOOLEAN('P', "full-paths", &full_paths,
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
new file mode 100644 (file)
index 0000000..8551c0b
--- /dev/null
@@ -0,0 +1,171 @@
+#include <stdlib.h>
+
+#include "util.h"
+#include "values.h"
+
+void perf_read_values_init(struct perf_read_values *values)
+{
+       values->threads_max = 16;
+       values->pid = malloc(values->threads_max * sizeof(*values->pid));
+       values->tid = malloc(values->threads_max * sizeof(*values->tid));
+       values->value = malloc(values->threads_max * sizeof(*values->value));
+       if (!values->pid || !values->tid || !values->value)
+               die("failed to allocate read_values threads arrays");
+       values->threads = 0;
+
+       values->counters_max = 16;
+       values->counterrawid = malloc(values->counters_max
+                                     * sizeof(*values->counterrawid));
+       values->countername = malloc(values->counters_max
+                                    * sizeof(*values->countername));
+       if (!values->counterrawid || !values->countername)
+               die("failed to allocate read_values counters arrays");
+       values->counters = 0;
+}
+
+void perf_read_values_destroy(struct perf_read_values *values)
+{
+       int i;
+
+       if (!values->threads_max || !values->counters_max)
+               return;
+
+       for (i = 0; i < values->threads; i++)
+               free(values->value[i]);
+       free(values->pid);
+       free(values->tid);
+       free(values->counterrawid);
+       for (i = 0; i < values->counters; i++)
+               free(values->countername[i]);
+       free(values->countername);
+}
+
+static void perf_read_values__enlarge_threads(struct perf_read_values *values)
+{
+       values->threads_max *= 2;
+       values->pid = realloc(values->pid,
+                             values->threads_max * sizeof(*values->pid));
+       values->tid = realloc(values->tid,
+                             values->threads_max * sizeof(*values->tid));
+       values->value = realloc(values->value,
+                               values->threads_max * sizeof(*values->value));
+       if (!values->pid || !values->tid || !values->value)
+               die("failed to enlarge read_values threads arrays");
+}
+
+static int perf_read_values__findnew_thread(struct perf_read_values *values,
+                                           u32 pid, u32 tid)
+{
+       int i;
+
+       for (i = 0; i < values->threads; i++)
+               if (values->pid[i] == pid && values->tid[i] == tid)
+                       return i;
+
+       if (values->threads == values->threads_max)
+               perf_read_values__enlarge_threads(values);
+
+       i = values->threads++;
+       values->pid[i] = pid;
+       values->tid[i] = tid;
+       values->value[i] = malloc(values->counters_max * sizeof(**values->value));
+       if (!values->value[i])
+               die("failed to allocate read_values counters array");
+
+       return i;
+}
+
+static void perf_read_values__enlarge_counters(struct perf_read_values *values)
+{
+       int i;
+
+       values->counters_max *= 2;
+       values->counterrawid = realloc(values->counterrawid,
+                                      values->counters_max * sizeof(*values->counterrawid));
+       values->countername = realloc(values->countername,
+                                     values->counters_max * sizeof(*values->countername));
+       if (!values->counterrawid || !values->countername)
+               die("failed to enlarge read_values counters arrays");
+
+       for (i = 0; i < values->threads; i++) {
+               values->value[i] = realloc(values->value[i],
+                                          values->counters_max * sizeof(**values->value));
+               if (!values->value[i])
+                       die("failed to enlarge read_values counters arrays");
+       }
+}
+
+static int perf_read_values__findnew_counter(struct perf_read_values *values,
+                                            u64 rawid, char *name)
+{
+       int i;
+
+       for (i = 0; i < values->counters; i++)
+               if (values->counterrawid[i] == rawid)
+                       return i;
+
+       if (values->counters == values->counters_max)
+               perf_read_values__enlarge_counters(values);
+
+       i = values->counters++;
+       values->counterrawid[i] = rawid;
+       values->countername[i] = strdup(name);
+
+       return i;
+}
+
+void perf_read_values_add_value(struct perf_read_values *values,
+                               u32 pid, u32 tid,
+                               u64 rawid, char *name, u64 value)
+{
+       int tindex, cindex;
+
+       tindex = perf_read_values__findnew_thread(values, pid, tid);
+       cindex = perf_read_values__findnew_counter(values, rawid, name);
+
+       values->value[tindex][cindex] = value;
+}
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values)
+{
+       int i, j;
+       int pidwidth, tidwidth;
+       int *counterwidth;
+
+       counterwidth = malloc(values->counters * sizeof(*counterwidth));
+       if (!counterwidth)
+               die("failed to allocate counterwidth array");
+       tidwidth = 3;
+       pidwidth = 3;
+       for (j = 0; j < values->counters; j++)
+               counterwidth[j] = strlen(values->countername[j]);
+       for (i = 0; i < values->threads; i++) {
+               int width;
+
+               width = snprintf(NULL, 0, "%d", values->pid[i]);
+               if (width > pidwidth)
+                       pidwidth = width;
+               width = snprintf(NULL, 0, "%d", values->tid[i]);
+               if (width > tidwidth)
+                       tidwidth = width;
+               for (j = 0; j < values->counters; j++) {
+                       width = snprintf(NULL, 0, "%Lu", values->value[i][j]);
+                       if (width > counterwidth[j])
+                               counterwidth[j] = width;
+               }
+       }
+
+       fprintf(fp, "# %*s  %*s", pidwidth, "PID", tidwidth, "TID");
+       for (j = 0; j < values->counters; j++)
+               fprintf(fp, "  %*s", counterwidth[j], values->countername[j]);
+       fprintf(fp, "\n");
+
+       for (i = 0; i < values->threads; i++) {
+               fprintf(fp, "  %*d  %*d", pidwidth, values->pid[i],
+                       tidwidth, values->tid[i]);
+               for (j = 0; j < values->counters; j++)
+                       fprintf(fp, "  %*Lu",
+                               counterwidth[j], values->value[i][j]);
+               fprintf(fp, "\n");
+       }
+}
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
new file mode 100644 (file)
index 0000000..e41be5e
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef _PERF_VALUES_H
+#define _PERF_VALUES_H
+
+#include "types.h"
+
+struct perf_read_values {
+       int threads;
+       int threads_max;
+       u32 *pid, *tid;
+       int counters;
+       int counters_max;
+       u64 *counterrawid;
+       char **countername;
+       u64 **value;
+};
+
+void perf_read_values_init(struct perf_read_values *values);
+void perf_read_values_destroy(struct perf_read_values *values);
+
+void perf_read_values_add_value(struct perf_read_values *values,
+                               u32 pid, u32 tid,
+                               u64 rawid, char *name, u64 value);
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values);
+
+#endif /* _PERF_VALUES_H */