perf/x86/ibs: Fix race with IBS_STARTING state
authorPeter Zijlstra <peterz@infradead.org>
Wed, 16 Mar 2016 22:55:21 +0000 (23:55 +0100)
committerIngo Molnar <mingo@kernel.org>
Mon, 21 Mar 2016 08:08:15 +0000 (09:08 +0100)
While tracing the IBS bits I saw the NMI hitting between clearing
IBS_STARTING and the actual MSR writes to disable the counter.

Since IBS_STARTING was cleared, the handler assumed these were spurious
NMIs and because STOPPING wasn't set yet either, insta-triggered an
"Unknown NMI".

Cure this by clearing IBS_STARTING after disabling the hardware.

Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/events/amd/ibs.c

index 7956d29762ef5123ba54d2c51bc19edf412ab067..c43a36d5fd6f0807c53a353756cf60260d7b14a1 100644 (file)
@@ -376,7 +376,13 @@ static void perf_ibs_start(struct perf_event *event, int flags)
        hwc->state = 0;
 
        perf_ibs_set_period(perf_ibs, hwc, &period);
+       /*
+        * Set STARTED before enabling the hardware, such that
+        * a subsequent NMI must observe it. Then clear STOPPING
+        * such that we don't consume NMIs by accident.
+        */
        set_bit(IBS_STARTED, pcpu->state);
+       clear_bit(IBS_STOPPING, pcpu->state);
        perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 
        perf_event_update_userpage(event);
@@ -390,7 +396,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
        u64 config;
        int stopping;
 
-       stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+       stopping = test_bit(IBS_STARTED, pcpu->state);
 
        if (!stopping && (hwc->state & PERF_HES_UPTODATE))
                return;
@@ -398,8 +404,24 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
        rdmsrl(hwc->config_base, config);
 
        if (stopping) {
+               /*
+                * Set STOPPING before disabling the hardware, such that it
+                * must be visible to NMIs the moment we clear the EN bit,
+                * at which point we can generate an !VALID sample which
+                * we need to consume.
+                */
                set_bit(IBS_STOPPING, pcpu->state);
                perf_ibs_disable_event(perf_ibs, hwc, config);
+               /*
+                * Clear STARTED after disabling the hardware; if it were
+                * cleared before an NMI hitting after the clear but before
+                * clearing the EN bit might think it a spurious NMI and not
+                * handle it.
+                *
+                * Clearing it after, however, creates the problem of the NMI
+                * handler seeing STARTED but not having a valid sample.
+                */
+               clear_bit(IBS_STARTED, pcpu->state);
                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
                hwc->state |= PERF_HES_STOPPED;
        }
@@ -527,20 +549,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
        u64 *buf, *config, period;
 
        if (!test_bit(IBS_STARTED, pcpu->state)) {
+fail:
                /*
                 * Catch spurious interrupts after stopping IBS: After
                 * disabling IBS there could be still incoming NMIs
                 * with samples that even have the valid bit cleared.
                 * Mark all this NMIs as handled.
                 */
-               return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+               if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
+                       return 1;
+
+               return 0;
        }
 
        msr = hwc->config_base;
        buf = ibs_data.regs;
        rdmsrl(msr, *buf);
        if (!(*buf++ & perf_ibs->valid_mask))
-               return 0;
+               goto fail;
 
        config = &ibs_data.regs[0];
        perf_ibs_event_update(perf_ibs, event, config);