perf tests: Add Intel CQM test
authorMatt Fleming <matt.fleming@intel.com>
Mon, 5 Oct 2015 14:40:21 +0000 (15:40 +0100)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 5 Oct 2015 19:56:07 +0000 (16:56 -0300)
Peter reports that it's possible to trigger a WARN_ON_ONCE() in the
Intel CQM code by combining a hardware event and an Intel CQM
(software) event into a group. Unfortunately, the perf tools are not
able to create this bundle and we need to manually construct a test
case.

For posterity, record Peter's proof of concept test case in tools/perf
so that it presents a model for how we can perform architecture
specific tests, or "arch tests", in perf in the future.

The particular issue triggered in the test case is that when the
counter for the hardware event overflows and triggers a PMI we'll read
both the hardware event and the software event counters.
Unfortunately, for CQM that involves performing an IPI to read the CQM
event counters on all sockets, which in NMI context triggers the
WARN_ON_ONCE().

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Vikas Shivappa <vikas.shivappa@intel.com>
Cc: Vince Weaver <vince@deater.net>
Link: http://lkml.kernel.org/r/1437490509-15373-1-git-send-email-matt@codeblueprint.co.uk
Link: http://lkml.kernel.org/n/tip-3p4ra0u8vzm7m289a1m799kf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/arch/x86/include/arch-tests.h
tools/perf/arch/x86/tests/Build
tools/perf/arch/x86/tests/arch-tests.c
tools/perf/arch/x86/tests/intel-cqm.c [new file with mode: 0644]

index 5927cf224325c47027d14d052725eb033d145624..7ed00f4b09080fdb5379129d8ed952d43b258f1e 100644 (file)
@@ -5,6 +5,7 @@
 int test__rdpmc(void);
 int test__perf_time_to_tsc(void);
 int test__insn_x86(void);
+int test__intel_cqm_count_nmi_context(void);
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
index 8e2c5a38c3b90c18c2159b8f91e54500734cd9f3..cbb7e978166bcc67209f31a9600149c6a5fe30bc 100644 (file)
@@ -5,3 +5,4 @@ libperf-y += arch-tests.o
 libperf-y += rdpmc.o
 libperf-y += perf-time-to-tsc.o
 libperf-$(CONFIG_AUXTRACE) += insn-x86.o
+libperf-y += intel-cqm.o
index d116c217af993277df85009af2586642aa9e8e21..2218cb64f8409c4b7f487f846f766b11dadcf722 100644 (file)
@@ -23,6 +23,10 @@ struct test arch_tests[] = {
                .func = test__insn_x86,
        },
 #endif
+       {
+               .desc = "Test intel cqm nmi context read",
+               .func = test__intel_cqm_count_nmi_context,
+       },
        {
                .func = NULL,
        },
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
new file mode 100644 (file)
index 0000000..d28c1b6
--- /dev/null
@@ -0,0 +1,124 @@
+#include "tests/tests.h"
+#include "perf.h"
+#include "cloexec.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "arch-tests.h"
+
+#include <sys/mman.h>
+#include <string.h>
+
+static pid_t spawn(void)
+{
+       pid_t pid;
+
+       pid = fork();
+       if (pid)
+               return pid;
+
+       while(1);
+               sleep(5);
+       return 0;
+}
+
+/*
+ * Create an event group that contains both a sampled hardware
+ * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then
+ * wait for the hardware perf counter to overflow and generate a PMI,
+ * which triggers an event read for both of the events in the group.
+ *
+ * Since reading Intel CQM event counters requires sending SMP IPIs, the
+ * CQM pmu needs to handle the above situation gracefully, and return
+ * the last read counter value to avoid triggering a WARN_ON_ONCE() in
+ * smp_call_function_many() caused by sending IPIs from NMI context.
+ */
+int test__intel_cqm_count_nmi_context(void)
+{
+       struct perf_evlist *evlist = NULL;
+       struct perf_evsel *evsel = NULL;
+       struct perf_event_attr pe;
+       int i, fd[2], flag, ret;
+       size_t mmap_len;
+       void *event;
+       pid_t pid;
+       int err = TEST_FAIL;
+
+       flag = perf_event_open_cloexec_flag();
+
+       evlist = perf_evlist__new();
+       if (!evlist) {
+               pr_debug("perf_evlist__new failed\n");
+               return TEST_FAIL;
+       }
+
+       ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
+       if (ret) {
+               pr_debug("parse_events failed\n");
+               err = TEST_SKIP;
+               goto out;
+       }
+
+       evsel = perf_evlist__first(evlist);
+       if (!evsel) {
+               pr_debug("perf_evlist__first failed\n");
+               goto out;
+       }
+
+       memset(&pe, 0, sizeof(pe));
+       pe.size = sizeof(pe);
+
+       pe.type = PERF_TYPE_HARDWARE;
+       pe.config = PERF_COUNT_HW_CPU_CYCLES;
+       pe.read_format = PERF_FORMAT_GROUP;
+
+       pe.sample_period = 128;
+       pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ;
+
+       pid = spawn();
+
+       fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag);
+       if (fd[0] < 0) {
+               pr_debug("failed to open event\n");
+               goto out;
+       }
+
+       memset(&pe, 0, sizeof(pe));
+       pe.size = sizeof(pe);
+
+       pe.type = evsel->attr.type;
+       pe.config = evsel->attr.config;
+
+       fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag);
+       if (fd[1] < 0) {
+               pr_debug("failed to open event\n");
+               goto out;
+       }
+
+       /*
+        * Pick a power-of-two number of pages + 1 for the meta-data
+        * page (struct perf_event_mmap_page). See tools/perf/design.txt.
+        */
+       mmap_len = page_size * 65;
+
+       event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0);
+       if (event == (void *)(-1)) {
+               pr_debug("failed to mmap %d\n", errno);
+               goto out;
+       }
+
+       sleep(1);
+
+       err = TEST_OK;
+
+       munmap(event, mmap_len);
+
+       for (i = 0; i < 2; i++)
+               close(fd[i]);
+
+       kill(pid, SIGKILL);
+       wait(NULL);
+out:
+       perf_evlist__delete(evlist);
+       return err;
+}