From f05c41a9c6057a0d5851ebc9589e3834fde1a4b6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 11 Sep 2012 18:57:43 +0200 Subject: [PATCH] MCE, AMD: Remove functional unit references Having the functional unit names in each bank decode is only misleading as this code supports multiple families and there's no guarantee the mapping between FUs and MCE banks will stay the same. And also, knowing the functional unit name doesn't help much since you end up looking at the respective BKDG anyway. So drop all FU references and use the MC bank numbers instead. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 186 ++++++++++++++++++++--------------------- drivers/edac/mce_amd.h | 5 +- 2 files changed, 94 insertions(+), 97 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index d0c372e30de4..6b38c1186922 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL_GPL(to_msgs); const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; EXPORT_SYMBOL_GPL(ii_msgs); -static const char * const f15h_ic_mce_desc[] = { +static const char * const f15h_mc1_mce_desc[] = { "UC during a demand linefill from L2", "Parity error during data load from IC", "Parity error for IC valid bit", @@ -84,7 +84,7 @@ static const char * const f15h_ic_mce_desc[] = { "fetch address FIFO" }; -static const char * const f15h_cu_mce_desc[] = { +static const char * const f15h_mc2_mce_desc[] = { "Fill ECC error on data fills", /* xec = 0x4 */ "Fill parity error on insn fills", "Prefetcher request FIFO parity error", @@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; -static const char * const nb_mce_desc[] = { +static const char * const mc4_mce_desc[] = { "DRAM ECC error detected on the NB", "CRC error detected on HT link", "Link-defined sync error packets detected on HT link", @@ -123,7 +123,7 @@ static const char * const nb_mce_desc[] = { "ECC Error in the Probe Filter directory" }; -static const char * const fr_ex_mce_desc[] = { +static const char * const mc5_mce_desc[] = { "CPU Watchdog timer expire", "Wakeup array dest tag", "AG payload array", @@ -139,7 +139,7 @@ static const char * const fr_ex_mce_desc[] = { "DE error occurred" }; -static bool f12h_dc_mce(u16 ec, u8 xec) +static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -157,26 +157,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f10h_dc_mce(u16 ec, u8 xec) +static bool f10h_mc0_mce(u16 ec, u8 xec) { if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { pr_cont("during data scrub.\n"); return true; } - return f12h_dc_mce(ec, xec); + return f12h_mc0_mce(ec, xec); } -static bool k8_dc_mce(u16 ec, u8 xec) +static bool k8_mc0_mce(u16 ec, u8 xec) { if (BUS_ERROR(ec)) { pr_cont("during system linefill.\n"); return true; } - return f10h_dc_mce(ec, xec); + return f10h_mc0_mce(ec, xec); } -static bool f14h_dc_mce(u16 ec, u8 xec) +static bool f14h_mc0_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; @@ -228,7 +228,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f15h_dc_mce(u16 ec, u8 xec) +static bool f15h_mc0_mce(u16 ec, u8 xec) { bool ret = true; @@ -275,12 +275,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec) return ret; } -static void amd_decode_dc_mce(struct mce *m) +static void decode_mc0_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Data Cache Error: "); + pr_emerg(HW_ERR "MC0 Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { @@ -290,13 +290,13 @@ static void amd_decode_dc_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->dc_mce(ec, xec)) + } else if (fam_ops->mc0_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); } -static bool k8_ic_mce(u16 ec, u8 xec) +static bool k8_mc1_mce(u16 ec, u8 xec) { u8 ll = LL(ec); bool ret = true; @@ -330,7 +330,7 @@ static bool k8_ic_mce(u16 ec, u8 xec) return ret; } -static bool f14h_ic_mce(u16 ec, u8 xec) +static bool f14h_mc1_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; @@ -349,7 +349,7 @@ static bool f14h_ic_mce(u16 ec, u8 xec) return ret; } -static bool f15h_ic_mce(u16 ec, u8 xec) +static bool f15h_mc1_mce(u16 ec, u8 xec) { bool ret = true; @@ -358,19 +358,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec) switch (xec) { case 0x0 ... 0xa: - pr_cont("%s.\n", f15h_ic_mce_desc[xec]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec]); break; case 0xd: - pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]); break; case 0x10: - pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]); break; case 0x11 ... 0x14: - pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); + pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]); break; default: @@ -379,12 +379,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec) return ret; } -static void amd_decode_ic_mce(struct mce *m) +static void decode_mc1_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Instruction Cache Error: "); + pr_emerg(HW_ERR "MC1 Error: "); if (TLB_ERROR(ec)) pr_cont("%s TLB %s.\n", LL_MSG(ec), @@ -393,18 +393,18 @@ static void amd_decode_ic_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->ic_mce(ec, xec)) + } else if (fam_ops->mc1_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); } -static void amd_decode_bu_mce(struct mce *m) +static void decode_mc2_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Bus Unit Error"); + pr_emerg(HW_ERR "MC2 Error"); if (xec == 0x1) pr_cont(" in the write data buffers.\n"); @@ -429,24 +429,24 @@ static void amd_decode_bu_mce(struct mce *m) pr_cont(": %s parity/ECC error during data " "access from L2.\n", R4_MSG(ec)); else - goto wrong_bu_mce; + goto wrong_mc2_mce; } else - goto wrong_bu_mce; + goto wrong_mc2_mce; } else - goto wrong_bu_mce; + goto wrong_mc2_mce; return; -wrong_bu_mce: - pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); + wrong_mc2_mce: + pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n"); } -static void amd_decode_cu_mce(struct mce *m) +static void decode_f15_mc2_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Combined Unit Error: "); + pr_emerg(HW_ERR "MC2 Error: "); if (TLB_ERROR(ec)) { if (xec == 0x0) @@ -454,63 +454,63 @@ static void amd_decode_cu_mce(struct mce *m) else if (xec == 0x1) pr_cont("Poison data provided for TLB fill.\n"); else - goto wrong_cu_mce; + goto wrong_f15_mc2_mce; } else if (BUS_ERROR(ec)) { if (xec > 2) - goto wrong_cu_mce; + goto wrong_f15_mc2_mce; pr_cont("Error during attempted NB data read.\n"); } else if (MEM_ERROR(ec)) { switch (xec) { case 0x4 ... 0xc: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]); break; case 0x10 ... 0x14: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]); break; default: - goto wrong_cu_mce; + goto wrong_f15_mc2_mce; } } return; -wrong_cu_mce: - pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); + wrong_f15_mc2_mce: + pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n"); } -static void amd_decode_ls_mce(struct mce *m) +static void decode_mc3_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); if (boot_cpu_data.x86 >= 0x14) { - pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," + pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family," " please report on LKML.\n"); return; } - pr_emerg(HW_ERR "Load Store Error"); + pr_emerg(HW_ERR "MC3 Error"); if (xec == 0x0) { u8 r4 = R4(ec); if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) - goto wrong_ls_mce; + goto wrong_mc3_mce; pr_cont(" during %s.\n", R4_MSG(ec)); } else - goto wrong_ls_mce; + goto wrong_mc3_mce; return; -wrong_ls_mce: - pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); + wrong_mc3_mce: + pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n"); } -void amd_decode_nb_mce(struct mce *m) +static void decode_mc4_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; int node_id = amd_get_nb_id(m->extcpu); @@ -518,7 +518,7 @@ void amd_decode_nb_mce(struct mce *m) u8 xec = XEC(m->status, 0x1f); u8 offset = 0; - pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); + pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id); switch (xec) { case 0x0 ... 0xe: @@ -527,9 +527,9 @@ void amd_decode_nb_mce(struct mce *m) if (xec == 0x0 || xec == 0x8) { /* no ECCs on F11h */ if (c->x86 == 0x11) - goto wrong_nb_mce; + goto wrong_mc4_mce; - pr_cont("%s.\n", nb_mce_desc[xec]); + pr_cont("%s.\n", mc4_mce_desc[xec]); if (nb_bus_decoder) nb_bus_decoder(node_id, m); @@ -543,14 +543,14 @@ void amd_decode_nb_mce(struct mce *m) else if (BUS_ERROR(ec)) pr_cont("DMA Exclusion Vector Table Walk error.\n"); else - goto wrong_nb_mce; + goto wrong_mc4_mce; return; case 0x19: if (boot_cpu_data.x86 == 0x15) pr_cont("Compute Unit Data Error.\n"); else - goto wrong_nb_mce; + goto wrong_mc4_mce; return; case 0x1c ... 0x1f: @@ -558,46 +558,44 @@ void amd_decode_nb_mce(struct mce *m) break; default: - goto wrong_nb_mce; + goto wrong_mc4_mce; } - pr_cont("%s.\n", nb_mce_desc[xec - offset]); + pr_cont("%s.\n", mc4_mce_desc[xec - offset]); return; -wrong_nb_mce: - pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); + wrong_mc4_mce: + pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n"); } -EXPORT_SYMBOL_GPL(amd_decode_nb_mce); -static void amd_decode_fr_mce(struct mce *m) +static void decode_mc5_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; u8 xec = XEC(m->status, xec_mask); if (c->x86 == 0xf || c->x86 == 0x11) - goto wrong_fr_mce; + goto wrong_mc5_mce; - pr_emerg(HW_ERR "%s Error: ", - (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); + pr_emerg(HW_ERR "MC5 Error: "); if (xec == 0x0 || xec == 0xc) - pr_cont("%s.\n", fr_ex_mce_desc[xec]); + pr_cont("%s.\n", mc5_mce_desc[xec]); else if (xec < 0xd) - pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); + pr_cont("%s parity error.\n", mc5_mce_desc[xec]); else - goto wrong_fr_mce; + goto wrong_mc5_mce; return; -wrong_fr_mce: - pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); + wrong_mc5_mce: + pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n"); } -static void amd_decode_fp_mce(struct mce *m) +static void decode_mc6_mce(struct mce *m) { u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Floating Point Unit Error: "); + pr_emerg(HW_ERR "MC6 Error: "); switch (xec) { case 0x1: @@ -621,7 +619,7 @@ static void amd_decode_fp_mce(struct mce *m) break; default: - goto wrong_fp_mce; + goto wrong_mc6_mce; break; } @@ -629,8 +627,8 @@ static void amd_decode_fp_mce(struct mce *m) return; -wrong_fp_mce: - pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); + wrong_mc6_mce: + pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } static inline void amd_decode_err_code(u16 ec) @@ -703,34 +701,34 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) switch (m->bank) { case 0: - amd_decode_dc_mce(m); + decode_mc0_mce(m); break; case 1: - amd_decode_ic_mce(m); + decode_mc1_mce(m); break; case 2: if (c->x86 == 0x15) - amd_decode_cu_mce(m); + decode_f15_mc2_mce(m); else - amd_decode_bu_mce(m); + decode_mc2_mce(m); break; case 3: - amd_decode_ls_mce(m); + decode_mc3_mce(m); break; case 4: - amd_decode_nb_mce(m); + decode_mc4_mce(m); break; case 5: - amd_decode_fr_mce(m); + decode_mc5_mce(m); break; case 6: - amd_decode_fp_mce(m); + decode_mc6_mce(m); break; default: @@ -763,35 +761,35 @@ static int __init mce_amd_init(void) switch (c->x86) { case 0xf: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; break; case 0x10: - fam_ops->dc_mce = f10h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = f10h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; break; case 0x11: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; break; case 0x12: - fam_ops->dc_mce = f12h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = f12h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; break; case 0x14: nb_err_cpumask = 0x3; - fam_ops->dc_mce = f14h_dc_mce; - fam_ops->ic_mce = f14h_ic_mce; + fam_ops->mc0_mce = f14h_mc0_mce; + fam_ops->mc1_mce = f14h_mc1_mce; break; case 0x15: xec_mask = 0x1f; - fam_ops->dc_mce = f15h_dc_mce; - fam_ops->ic_mce = f15h_ic_mce; + fam_ops->mc0_mce = f15h_mc0_mce; + fam_ops->mc1_mce = f15h_mc1_mce; break; default: diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 8c87a5e87057..942f382ecb64 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -78,14 +78,13 @@ extern const char * const ii_msgs[]; * per-family decoder ops */ struct amd_decoder_ops { - bool (*dc_mce)(u16, u8); - bool (*ic_mce)(u16, u8); + bool (*mc0_mce)(u16, u8); + bool (*mc1_mce)(u16, u8); }; void amd_report_gart_errors(bool); void amd_register_ecc_decoder(void (*f)(int, struct mce *)); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); -void amd_decode_nb_mce(struct mce *); int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); #endif /* _EDAC_MCE_AMD_H */ -- 2.20.1