perf stat: Basic support for TopDown in perf stat

author Andi Kleen <ak@linux.intel.com>

Mon, 30 May 2016 15:49:42 +0000 (12:49 -0300)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Mon, 6 Jun 2016 20:04:15 +0000 (17:04 -0300)
author Andi Kleen <ak@linux.intel.com>
Mon, 30 May 2016 15:49:42 +0000 (12:49 -0300)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Mon, 6 Jun 2016 20:04:15 +0000 (17:04 -0300)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt

index 04f23b404bbc5a7bd4b5bcdfb7e833d79ba1234e..d96ccd4844df9a49f33b05c6c0384b5f8e6eef05 100644 (file)
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
  --no-aggr::
  Do not aggregate counts across all monitored CPUs.
  
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
  
  EXAMPLES
  --------
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build

index 465970370f3ed4db72ca525ae5f56f55150d7b12..4cd8a16b1b7b8dabccf245b9469647db30cf3f48 100644 (file)
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,6 +3,7 @@ libperf-y += tsc.o
  libperf-y += pmu.o
  libperf-y += kvm-stat.o
  libperf-y += perf_regs.o
+libperf-y += group.o
  
  libperf-$(CONFIG_DWARF) += dwarf-regs.o
  libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c

new file mode 100644 (file)

index 0000000..37f92aa
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+       int n;
+
+       if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+               return false;
+       if (n > 0) {
+               *warn = true;
+               return false;
+       }
+       return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+       fprintf(stderr,
+               "nmi_watchdog enabled with topdown. May give wrong results.\n"
+               "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index ee7ada78d86f81c5f788feed43f42ec49e33bb96..fd76bb0b18d1bded9ab45c25177ff23956b1d003 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
  #include "util/thread.h"
  #include "util/thread_map.h"
  #include "util/counts.h"
+#include "util/group.h"
  #include "util/session.h"
  #include "util/tool.h"
+#include "util/group.h"
  #include "asm/bug.h"
  
+#include <api/fs/fs.h>
  #include <stdlib.h>
  #include <sys/prctl.h>
  #include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
         "}"
  };
  
+static const char * topdown_attrs[] = {
+       "topdown-total-slots",
+       "topdown-slots-retired",
+       "topdown-recovery-bubbles",
+       "topdown-fetch-bubbles",
+       "topdown-slots-issued",
+       NULL,
+};
+
  static struct perf_evlist      *evsel_list;
  
  static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t               child_pid                       = -1;
  static bool                    null_run                        =  false;
  static int                     detailed_run                    =  0;
  static bool                    transaction_run;
+static bool                    topdown_run                     = false;
  static bool                    big_num                         =  true;
  static int                     big_num_opt                     =  -1;
  static const char              *csv_sep                        = NULL;
@@ -124,6 +137,7 @@ static unsigned int         initial_delay                   = 0;
  static unsigned int            unit_width                      = 4; /* strlen("unit") */
  static bool                    forever                         = false;
  static bool                    metric_only                     = false;
+static bool                    force_metric_only               = false;
  static struct timespec         ref_time;
  static struct cpu_map          *aggr_map;
  static aggr_get_id_t           aggr_get_id;
@@ -1520,6 +1534,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
         return 0;
  }
  
+static int enable_metric_only(const struct option *opt __maybe_unused,
+                             const char *s __maybe_unused, int unset)
+{
+       force_metric_only = true;
+       metric_only = !unset;
+       return 0;
+}
+
  static const struct option stat_options[] = {
         OPT_BOOLEAN('T', "transaction", &transaction_run,
                     "hardware transaction statistics"),
@@ -1578,8 +1600,10 @@ static const struct option stat_options[] = {
                      "aggregate counts per thread", AGGR_THREAD),
         OPT_UINTEGER('D', "delay", &initial_delay,
                      "ms to wait before starting measurement after program start"),
-       OPT_BOOLEAN(0, "metric-only", &metric_only,
-                       "Only print computed metrics. No raw values"),
+       OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+                       "Only print computed metrics. No raw values", enable_metric_only),
+       OPT_BOOLEAN(0, "topdown", &topdown_run,
+                       "measure topdown level 1 statistics"),
         OPT_END()
  };
  
@@ -1772,12 +1796,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
         return 0;
  }
  
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+       int off = 0;
+       int i;
+       int len = 0;
+       char *s;
+
+       for (i = 0; attr[i]; i++) {
+               if (pmu_have_event("cpu", attr[i])) {
+                       len += strlen(attr[i]) + 1;
+                       attr[i - off] = attr[i];
+               } else
+                       off++;
+       }
+       attr[i - off] = NULL;
+
+       *str = malloc(len + 1 + 2);
+       if (!*str)
+               return -1;
+       s = *str;
+       if (i - off == 0) {
+               *s = 0;
+               return 0;
+       }
+       if (use_group)
+               *s++ = '{';
+       for (i = 0; attr[i]; i++) {
+               strcpy(s, attr[i]);
+               s += strlen(s);
+               *s++ = ',';
+       }
+       if (use_group) {
+               s[-1] = '}';
+               *s = 0;
+       } else
+               s[-1] = 0;
+       return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+       *warn = false;
+       return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
  /*
   * Add default attributes, if there were no attributes specified or
   * if -d/--detailed, -d -d or -d -d -d is used:
   */
  static int add_default_attributes(void)
  {
+       int err;
         struct perf_event_attr default_attrs0[] = {
  
    { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
@@ -1896,7 +1970,6 @@ static int add_default_attributes(void)
                 return 0;
  
         if (transaction_run) {
-               int err;
                 if (pmu_have_event("cpu", "cycles-ct") &&
                     pmu_have_event("cpu", "el-start"))
                         err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +1982,46 @@ static int add_default_attributes(void)
                 return 0;
         }
  
+       if (topdown_run) {
+               char *str = NULL;
+               bool warn = false;
+
+               if (stat_config.aggr_mode != AGGR_GLOBAL &&
+                   stat_config.aggr_mode != AGGR_CORE) {
+                       pr_err("top down event configuration requires --per-core mode\n");
+                       return -1;
+               }
+               stat_config.aggr_mode = AGGR_CORE;
+               if (nr_cgroups || !target__has_cpu(&target)) {
+                       pr_err("top down event configuration requires system-wide mode (-a)\n");
+                       return -1;
+               }
+
+               if (!force_metric_only)
+                       metric_only = true;
+               if (topdown_filter_events(topdown_attrs, &str,
+                               arch_topdown_check_group(&warn)) < 0) {
+                       pr_err("Out of memory\n");
+                       return -1;
+               }
+               if (topdown_attrs[0] && str) {
+                       if (warn)
+                               arch_topdown_group_warn();
+                       err = parse_events(evsel_list, str, NULL);
+                       if (err) {
+                               fprintf(stderr,
+                                       "Cannot set up top down events %s: %d\n",
+                                       str, err);
+                               free(str);
+                               return -1;
+                       }
+               } else {
+                       fprintf(stderr, "System does not support topdown\n");
+                       return -1;
+               }
+               free(str);
+       }
+
         if (!evsel_list->nr_entries) {
                 if (target__has_cpu(&target))
                         default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h

new file mode 100644 (file)

index 0000000..116debe
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l

index 01af1ee90a2767ab6c22942255b7840eacd36d69..3c15b33b2e84aceb85eb651364f2c15bbd76d6fc 100644 (file)
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -260,6 +260,7 @@ cycles-ct                                   { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  cycles-t                                       { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  mem-loads                                      { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  mem-stores                                     { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+                                        { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  
  L1-dcache|l1-d|l1d|L1-data             |
  L1-icache|l1-i|l1i|L1-instruction      |
author	Andi Kleen <ak@linux.intel.com>
	Mon, 30 May 2016 15:49:42 +0000 (12:49 -0300)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Mon, 6 Jun 2016 20:04:15 +0000 (17:04 -0300)
tools/perf/Documentation/perf-stat.txt		patch \| blob \| blame \| history
tools/perf/arch/x86/util/Build		patch \| blob \| blame \| history
tools/perf/arch/x86/util/group.c	[new file with mode: 0644]	patch \| blob
tools/perf/builtin-stat.c		patch \| blob \| blame \| history
tools/perf/util/group.h	[new file with mode: 0644]	patch \| blob
tools/perf/util/parse-events.l		patch \| blob \| blame \| history