perf stat record: Add record command
authorJiri Olsa <jolsa@kernel.org>
Thu, 5 Nov 2015 14:40:46 +0000 (15:40 +0100)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 17 Dec 2015 18:15:15 +0000 (15:15 -0300)
Add 'perf stat record' command support. It creates simple (header only)
perf.data file ATM.

The record command could be specified anywhere among stat options. All
stat command options are valid for stat record command with '-o' option
exception. If specified for record command it denotes the perf data file
name.

Committer note:

Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
while avoiding that older tools show confusing messages, for instance,
with sample_type = 0, we get:

  $ perf stat record usleep 1

   Performance counter stats for 'usleep 1':

          0.630237      task-clock (msec)         #    0.528 CPUs utilized
                 1      context-switches          #    0.002 M/sec
                 0      cpu-migrations            #    0.000 K/sec
                52      page-faults               #    0.083 M/sec
           978,312      cycles                    #    1.552 GHz
           671,931      stalled-cycles-frontend   #   68.68% frontend cycles idle
   <not supported>      stalled-cycles-backend
           646,379      instructions              #    0.66  insns per cycle
                                                  #    1.04  stalled cycles per insn
           131,046      branches                  #  207.931 M/sec
             7,073      branch-misses             #    5.40% of all branches

       0.001193240 seconds time elapsed

  $ oldperf evlist
  WARNING: The perf.data file's data size field is 0 which is unexpected.
  Was the 'perf record' command properly terminated?
  non matching sample_type
  $

While with sample_type set to PERF_SAMPLE_IDENTIFIER, after we re-run 'perf
stat record usleep' we get:

  $ oldperf evlist
  WARNING: The perf.data file's data size field is 0 which is unexpected.
  Was the 'perf record' command properly terminated?
  task-clock
  context-switches
  cpu-migrations
  page-faults
  cycles
  stalled-cycles-frontend
  stalled-cycles-backend
  instructions
  branches
  branch-misses
  $

Which at least shows the names of the events in the perf.data file.

Additionally, such files, when passed to 'perf report' will produce:

  $ oldperf report --stdio
  WARNING: The perf.data file's data size field is 0 which is unexpected.
  Was the 'perf record' command properly terminated?
  Warning:
  Kernel address maps (/proc/{kallsyms,modules}) were restricted.

  Check /proc/sys/kernel/kptr_restrict before running 'perf record'.

  As no suitable kallsyms nor vmlinux was found, kernel samples
  can't be resolved.

  Samples in kernel modules can't be resolved as well.

  Error:
  The perf.data file has no samples!
  # To display the perf.data header info, please use --header/--header-only options.
  #
  $

Which is confusing and can be solved by just adding the kernel mmap record,
which will also remove that warning about the data size field being equal to
zero, after generating the mmap record:

  $ perf stat record usleep 1

   Performance counter stats for 'usleep 1':

          0.600796      task-clock (msec)         #    0.478 CPUs utilized
                 1      context-switches          #    0.002 M/sec
                 0      cpu-migrations            #    0.000 K/sec
                54      page-faults               #    0.090 M/sec
           886,844      cycles                    #    1.476 GHz
           582,169      stalled-cycles-frontend   #   65.65% frontend cycles idle
   <not supported>      stalled-cycles-backend
           638,344      instructions              #    0.72  insns per cycle
                                                  #    0.91  stalled cycles per insn
           130,204      branches                  #  216.719 M/sec
             7,500      branch-misses             #    5.76% of all branches

       0.001255897 seconds time elapsed

  $ oldperf evlist
  task-clock
  context-switches
  cpu-migrations
  page-faults
  cycles
  stalled-cycles-frontend
  stalled-cycles-backend
  instructions
  branches
  branch-misses
  $ oldperf report --stdio
  Error:
  The perf.data file has no samples!
  # To display the perf.data header info, please use --header/--header-only options.
  #
  [acme@zoo linux]$

No warnings, sensible output about what are the events in the perf.data file and also
a "file has no samples" message, which indeed it doesn't.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: htp://lkml.kernel.org/r/1446734469-11352-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-stat.txt
tools/perf/builtin-stat.c

index 4e074a6608269793d52a2ea4f28061532bb9798b..70eee1c2c444e762c75405edfe9ee93e768416f0 100644 (file)
@@ -10,6 +10,7 @@ SYNOPSIS
 [verse]
 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
 'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -22,6 +23,8 @@ OPTIONS
 <command>...::
        Any command you can specify in a shell.
 
+record::
+       See STAT RECORD.
 
 -e::
 --event=::
@@ -159,6 +162,15 @@ filter out the startup phase of the program, which is often very different.
 
 Print statistics of transactional execution if supported.
 
+STAT RECORD
+-----------
+Stores stat data into perf data file.
+
+-o file::
+--output file::
+Output file name.
+
+
 EXAMPLES
 --------
 
index bbf42eefd5e5b708f419e8c3e24a7d1f8aaf9dde..af2a3bf659f76e096ca53a09f922cc3e4cf77f5e 100644 (file)
@@ -59,6 +59,7 @@
 #include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/counts.h"
+#include "util/session.h"
 
 #include <stdlib.h>
 #include <sys/prctl.h>
@@ -126,6 +127,16 @@ static bool                        append_file;
 static const char              *output_name;
 static int                     output_fd;
 
+struct perf_stat {
+       bool                     record;
+       struct perf_data_file    file;
+       struct perf_session     *session;
+       u64                      bytes_written;
+};
+
+static struct perf_stat                perf_stat;
+#define STAT_RECORD            perf_stat.record
+
 static volatile int done = 0;
 
 static struct perf_stat_config stat_config = {
@@ -166,7 +177,11 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
         * like tracepoints. Clear it up for counting.
         */
        attr->sample_period = 0;
-       attr->sample_type   = 0;
+       /*
+        * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+        * while avoiding that older tools show confusing messages.
+        */
+       attr->sample_type   = PERF_SAMPLE_IDENTIFIER;
 
        /*
         * Disabling all counters initially, they will be enabled
@@ -202,6 +217,26 @@ static inline int nsec_counter(struct perf_evsel *evsel)
        return 0;
 }
 
+static int perf_stat__write(struct perf_stat *stat, void *bf, size_t size)
+{
+       if (perf_data_file__write(stat->session->file, bf, size) < 0) {
+               pr_err("failed to write perf data, error: %m\n");
+               return -1;
+       }
+
+       stat->bytes_written += size;
+       return 0;
+}
+
+static int process_synthesized_event(struct perf_tool *tool,
+                                    union perf_event *event,
+                                    struct perf_sample *sample __maybe_unused,
+                                    struct machine *machine __maybe_unused)
+{
+       struct perf_stat *stat = (void *)tool;
+       return perf_stat__write(stat, event, event->header.size);
+}
+
 /*
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
@@ -361,6 +396,15 @@ static int __run_perf_stat(int argc, const char **argv)
                return -1;
        }
 
+       if (STAT_RECORD) {
+               int err, fd = perf_data_file__fd(&perf_stat.file);
+
+               err = perf_session__write_header(perf_stat.session, evsel_list,
+                                                fd, false);
+               if (err < 0)
+                       return err;
+       }
+
        /*
         * Enable counters and exec the command:
         */
@@ -1261,6 +1305,38 @@ static int add_default_attributes(void)
        return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
 }
 
+static const char * const recort_usage[] = {
+       "perf stat record [<options>]",
+       NULL,
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+       struct perf_session *session;
+       struct perf_data_file *file = &perf_stat.file;
+
+       argc = parse_options(argc, argv, stat_options, record_usage,
+                            PARSE_OPT_STOP_AT_NON_OPTION);
+
+       if (output_name)
+               file->path = output_name;
+
+       session = perf_session__new(file, false, NULL);
+       if (session == NULL) {
+               pr_err("Perf session creation failed.\n");
+               return -1;
+       }
+
+       /* No pipe support ATM */
+       if (perf_stat.file.is_pipe)
+               return -EINVAL;
+
+       session->evlist   = evsel_list;
+       perf_stat.session = session;
+       perf_stat.record  = true;
+       return argc;
+}
+
 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 {
        const char * const stat_usage[] = {
@@ -1271,6 +1347,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        const char *mode;
        FILE *output = stderr;
        unsigned int interval;
+       const char * const stat_subcommands[] = { "record" };
 
        setlocale(LC_ALL, "");
 
@@ -1278,12 +1355,22 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        if (evsel_list == NULL)
                return -ENOMEM;
 
-       argc = parse_options(argc, argv, stat_options, stat_usage,
-               PARSE_OPT_STOP_AT_NON_OPTION);
+       argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
+                                       (const char **) stat_usage,
+                                       PARSE_OPT_STOP_AT_NON_OPTION);
+
+       if (argc && !strncmp(argv[0], "rec", 3)) {
+               argc = __cmd_record(argc, argv);
+               if (argc < 0)
+                       return -1;
+       }
 
        interval = stat_config.interval;
 
-       if (output_name && strcmp(output_name, "-"))
+       /*
+        * For record command the -o is already taken care of.
+        */
+       if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
                output = NULL;
 
        if (output_name && output_fd) {
@@ -1450,6 +1537,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        if (!forever && status != -1 && !interval)
                print_counters(NULL, argc, argv);
 
+       if (STAT_RECORD) {
+               /*
+                * We synthesize the kernel mmap record just so that older tools
+                * don't emit warnings about not being able to resolve symbols
+                * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+                * a saner message about no samples being in the perf.data file.
+                *
+                * This also serves to suppress a warning about f_header.data.size == 0
+                * in header.c.  -acme
+                */
+               int fd = perf_data_file__fd(&perf_stat.file);
+               int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+                                                            process_synthesized_event,
+                                                            &perf_stat.session->machines.host);
+               if (err) {
+                       pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
+                                  "older tools may produce warnings about this file\n.");
+               }
+
+               perf_stat.session->header.data_size += perf_stat.bytes_written;
+               perf_session__write_header(perf_stat.session, evsel_list, fd, true);
+
+               perf_session__delete(perf_stat.session);
+       }
+
        perf_stat__exit_aggr_mode();
        perf_evlist__free_stats(evsel_list);
 out: