From 34102009580a047c02b21f089f7fc7f65e605887 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 11 May 2016 14:58:23 +0200 Subject: [PATCH] x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers Scalable MCA provides new registers for all banks for logging deferred errors: MCA_DESTAT and MCA_DEADDR. Deferred errors are always logged to these registers. Update the AMD deferred error handler to use these registers, if available. Signed-off-by: Yazen Ghannam [ Sanity-check __log_error() args, massage a bit. ] Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1462971509-3856-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 32 +++++++++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 53ab69704771..8bf766ef0e18 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -110,6 +110,8 @@ #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 +#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) @@ -117,6 +119,8 @@ #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) /* diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index c594680c36f2..d1b1e62f7cb9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -430,12 +430,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } -static void __log_error(unsigned int bank, bool threshold_err, u64 misc) +static void +__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) { + u32 msr_status = msr_ops.status(bank); + u32 msr_addr = msr_ops.addr(bank); struct mce m; u64 status; - rdmsrl(msr_ops.status(bank), status); + WARN_ON_ONCE(deferred_err && threshold_err); + + if (deferred_err && mce_flags.smca) { + msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank); + msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank); + } + + rdmsrl(msr_status, status); + if (!(status & MCI_STATUS_VAL)) return; @@ -448,10 +459,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) m.misc = misc; if (m.status & MCI_STATUS_ADDRV) - rdmsrl(msr_ops.addr(bank), m.addr); + rdmsrl(msr_addr, m.addr); mce_log(&m); - wrmsrl(msr_ops.status(bank), 0); + + wrmsrl(msr_status, 0); } static inline void __smp_deferred_error_interrupt(void) @@ -479,17 +491,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void) /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - u64 status; unsigned int bank; + u32 msr_status; + u64 status; for (bank = 0; bank < mca_cfg.banks; ++bank) { - rdmsrl(msr_ops.status(bank), status); + msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) + : msr_ops.status(bank); + + rdmsrl(msr_status, status); if (!(status & MCI_STATUS_VAL) || !(status & MCI_STATUS_DEFERRED)) continue; - __log_error(bank, false, 0); + __log_error(bank, true, false, 0); break; } } @@ -544,7 +560,7 @@ static void amd_threshold_interrupt(void) return; log: - __log_error(bank, true, ((u64)high << 32) | low); + __log_error(bank, false, true, ((u64)high << 32) | low); } /* -- 2.20.1