From 5a50f52917011365a4a2c0f1d2cea0990c4465cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Mar 2016 23:55:21 +0100 Subject: [PATCH] perf/x86/ibs: Fix race with IBS_STARTING state While tracing the IBS bits I saw the NMI hitting between clearing IBS_STARTING and the actual MSR writes to disable the counter. Since IBS_STARTING was cleared, the handler assumed these were spurious NMIs and because STOPPING wasn't set yet either, insta-triggered an "Unknown NMI". Cure this by clearing IBS_STARTING after disabling the hardware. Tested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: David Ahern Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/amd/ibs.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 7956d29762ef..c43a36d5fd6f 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -376,7 +376,13 @@ static void perf_ibs_start(struct perf_event *event, int flags) hwc->state = 0; perf_ibs_set_period(perf_ibs, hwc, &period); + /* + * Set STARTED before enabling the hardware, such that + * a subsequent NMI must observe it. Then clear STOPPING + * such that we don't consume NMIs by accident. + */ set_bit(IBS_STARTED, pcpu->state); + clear_bit(IBS_STOPPING, pcpu->state); perf_ibs_enable_event(perf_ibs, hwc, period >> 4); perf_event_update_userpage(event); @@ -390,7 +396,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags) u64 config; int stopping; - stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); + stopping = test_bit(IBS_STARTED, pcpu->state); if (!stopping && (hwc->state & PERF_HES_UPTODATE)) return; @@ -398,8 +404,24 @@ static void perf_ibs_stop(struct perf_event *event, int flags) rdmsrl(hwc->config_base, config); if (stopping) { + /* + * Set STOPPING before disabling the hardware, such that it + * must be visible to NMIs the moment we clear the EN bit, + * at which point we can generate an !VALID sample which + * we need to consume. + */ set_bit(IBS_STOPPING, pcpu->state); perf_ibs_disable_event(perf_ibs, hwc, config); + /* + * Clear STARTED after disabling the hardware; if it were + * cleared before an NMI hitting after the clear but before + * clearing the EN bit might think it a spurious NMI and not + * handle it. + * + * Clearing it after, however, creates the problem of the NMI + * handler seeing STARTED but not having a valid sample. + */ + clear_bit(IBS_STARTED, pcpu->state); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; } @@ -527,20 +549,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) u64 *buf, *config, period; if (!test_bit(IBS_STARTED, pcpu->state)) { +fail: /* * Catch spurious interrupts after stopping IBS: After * disabling IBS there could be still incoming NMIs * with samples that even have the valid bit cleared. * Mark all this NMIs as handled. */ - return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0; + if (test_and_clear_bit(IBS_STOPPING, pcpu->state)) + return 1; + + return 0; } msr = hwc->config_base; buf = ibs_data.regs; rdmsrl(msr, *buf); if (!(*buf++ & perf_ibs->valid_mask)) - return 0; + goto fail; config = &ibs_data.regs[0]; perf_ibs_event_update(perf_ibs, event, config); -- 2.20.1