x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types
authorYazen Ghannam <Yazen.Ghannam@amd.com>
Mon, 12 Sep 2016 07:59:34 +0000 (09:59 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Tue, 13 Sep 2016 13:23:10 +0000 (15:23 +0200)
Scalable MCA defines a number of IP types. An MCA bank on an SMCA
system is defined as one of these IP types. A bank's type is uniquely
identified by the combination of the HWID and MCATYPE values read from
its MCA_IPID register.

Add the required tables in order to be able to lookup error descriptions
based on a bank's type and the error's extended error code.

[ bp: Align comments, simplify a bit. ]

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1472741832-1690-1-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/include/asm/mce.h
arch/x86/kernel/cpu/mcheck/mce_amd.c
drivers/edac/mce_amd.c

index 21bc5a3a4c890756105cba931bb4798aa8b01071..9bd7ff5ffbcccd61ed36e2615a877d68003c222c 100644 (file)
@@ -337,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
  * Scalable MCA.
  */
 #ifdef CONFIG_X86_MCE_AMD
-enum amd_ip_types {
-       SMCA_F17H_CORE = 0,     /* Core errors */
-       SMCA_DF,                /* Data Fabric */
-       SMCA_UMC,               /* Unified Memory Controller */
-       SMCA_PB,                /* Parameter Block */
-       SMCA_PSP,               /* Platform Security Processor */
-       SMCA_SMU,               /* System Management Unit */
-       N_AMD_IP_TYPES
-};
-
-struct amd_hwid {
-       const char *name;
-       unsigned int hwid;
-};
 
-extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
-
-enum amd_core_mca_blocks {
+/* These may be used by multiple smca_hwid_mcatypes */
+enum smca_bank_types {
        SMCA_LS = 0,    /* Load Store */
        SMCA_IF,        /* Instruction Fetch */
-       SMCA_L2_CACHE,  /* L2 cache */
-       SMCA_DE,        /* Decoder unit */
-       RES,            /* Reserved */
-       SMCA_EX,        /* Execution unit */
+       SMCA_L2_CACHE,  /* L2 Cache */
+       SMCA_DE,        /* Decoder Unit */
+       SMCA_EX,        /* Execution Unit */
        SMCA_FP,        /* Floating Point */
-       SMCA_L3_CACHE,  /* L3 cache */
-       N_CORE_MCA_BLOCKS
+       SMCA_L3_CACHE,  /* L3 Cache */
+       SMCA_CS,        /* Coherent Slave */
+       SMCA_PIE,       /* Power, Interrupts, etc. */
+       SMCA_UMC,       /* Unified Memory Controller */
+       SMCA_PB,        /* Parameter Block */
+       SMCA_PSP,       /* Platform Security Processor */
+       SMCA_SMU,       /* System Management Unit */
+       N_SMCA_BANK_TYPES
+};
+
+struct smca_bank_name {
+       const char *name;       /* Short name for sysfs */
+       const char *long_name;  /* Long name for pretty-printing */
 };
 
-extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
+
+#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
 
-enum amd_df_mca_blocks {
-       SMCA_CS = 0,    /* Coherent Slave */
-       SMCA_PIE,       /* Power management, Interrupts, etc */
-       N_DF_BLOCKS
+struct smca_hwid_mcatype {
+       unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
+       u32 hwid_mcatype;       /* (hwid,mcatype) tuple */
+       u32 xec_bitmap;         /* Bitmap of valid ExtErrorCodes; current max is 21. */
 };
 
-extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+struct smca_bank_info {
+       struct smca_hwid_mcatype *type;
+       u32 type_instance;
+};
+
+extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+
 #endif
 
 #endif /* _ASM_X86_MCE_H */
index 9da92fb2e07378291c266c441ada28ce2e89143c..3b74b62d0808d6443091201f83a66403fb37ad3c 100644 (file)
@@ -63,34 +63,55 @@ static const char * const th_names[] = {
        "execution_unit",
 };
 
-/* Define HWID to IP type mappings for Scalable MCA */
-struct amd_hwid amd_hwids[] = {
-       [SMCA_F17H_CORE]        = { "f17h_core",        0xB0 },
-       [SMCA_DF]               = { "data_fabric",      0x2E },
-       [SMCA_UMC]              = { "umc",              0x96 },
-       [SMCA_PB]               = { "param_block",      0x5 },
-       [SMCA_PSP]              = { "psp",              0xFF },
-       [SMCA_SMU]              = { "smu",              0x1 },
+struct smca_bank_name smca_bank_names[] = {
+       [SMCA_LS]       = { "load_store",       "Load Store Unit" },
+       [SMCA_IF]       = { "insn_fetch",       "Instruction Fetch Unit" },
+       [SMCA_L2_CACHE] = { "l2_cache",         "L2 Cache" },
+       [SMCA_DE]       = { "decode_unit",      "Decode Unit" },
+       [SMCA_EX]       = { "execution_unit",   "Execution Unit" },
+       [SMCA_FP]       = { "floating_point",   "Floating Point Unit" },
+       [SMCA_L3_CACHE] = { "l3_cache",         "L3 Cache" },
+       [SMCA_CS]       = { "coherent_slave",   "Coherent Slave" },
+       [SMCA_PIE]      = { "pie",              "Power, Interrupts, etc." },
+       [SMCA_UMC]      = { "umc",              "Unified Memory Controller" },
+       [SMCA_PB]       = { "param_block",      "Parameter Block" },
+       [SMCA_PSP]      = { "psp",              "Platform Security Processor" },
+       [SMCA_SMU]      = { "smu",              "System Management Unit" },
 };
-EXPORT_SYMBOL_GPL(amd_hwids);
-
-const char * const amd_core_mcablock_names[] = {
-       [SMCA_LS]               = "load_store",
-       [SMCA_IF]               = "insn_fetch",
-       [SMCA_L2_CACHE]         = "l2_cache",
-       [SMCA_DE]               = "decode_unit",
-       [RES]                   = "",
-       [SMCA_EX]               = "execution_unit",
-       [SMCA_FP]               = "floating_point",
-       [SMCA_L3_CACHE]         = "l3_cache",
-};
-EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+EXPORT_SYMBOL_GPL(smca_bank_names);
+
+static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
+       /* { bank_type, hwid_mcatype, xec_bitmap } */
+
+       /* ZN Core (HWID=0xB0) MCA types */
+       { SMCA_LS,       HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+       { SMCA_IF,       HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
+       { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
+       { SMCA_DE,       HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+       /* HWID 0xB0 MCATYPE 0x4 is Reserved */
+       { SMCA_EX,       HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+       { SMCA_FP,       HWID_MCATYPE(0xB0, 0x6), 0x7F },
+       { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+
+       /* Data Fabric MCA types */
+       { SMCA_CS,       HWID_MCATYPE(0x2E, 0x0), 0x1FF },
+       { SMCA_PIE,      HWID_MCATYPE(0x2E, 0x1), 0xF },
+
+       /* Unified Memory Controller MCA type */
+       { SMCA_UMC,      HWID_MCATYPE(0x96, 0x0), 0x3F },
+
+       /* Parameter Block MCA type */
+       { SMCA_PB,       HWID_MCATYPE(0x05, 0x0), 0x1 },
 
-const char * const amd_df_mcablock_names[] = {
-       [SMCA_CS]               = "coherent_slave",
-       [SMCA_PIE]              = "pie",
+       /* Platform Security Processor MCA type */
+       { SMCA_PSP,      HWID_MCATYPE(0xFF, 0x0), 0x1 },
+
+       /* System Management Unit MCA type */
+       { SMCA_SMU,      HWID_MCATYPE(0x01, 0x0), 0x1 },
 };
-EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
+struct smca_bank_info smca_banks[MAX_NR_BANKS];
+EXPORT_SYMBOL_GPL(smca_banks);
 
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
@@ -108,6 +129,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
  * CPU Initialization
  */
 
+static void get_smca_bank_info(unsigned int bank)
+{
+       unsigned int i, hwid_mcatype, cpu = smp_processor_id();
+       struct smca_hwid_mcatype *type;
+       u32 high, instanceId;
+       u16 hwid, mcatype;
+
+       /* Collect bank_info using CPU 0 for now. */
+       if (cpu)
+               return;
+
+       if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
+               pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
+               return;
+       }
+
+       hwid = high & MCI_IPID_HWID;
+       mcatype = (high & MCI_IPID_MCATYPE) >> 16;
+       hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
+
+       for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
+               type = &smca_hwid_mcatypes[i];
+               if (hwid_mcatype == type->hwid_mcatype) {
+                       smca_banks[bank].type = type;
+                       smca_banks[bank].type_instance = instanceId;
+                       break;
+               }
+       }
+}
+
 struct thresh_restart {
        struct threshold_block  *b;
        int                     reset;
@@ -425,6 +476,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
        int offset = -1;
 
        for (bank = 0; bank < mca_cfg.banks; ++bank) {
+               if (mce_flags.smca)
+                       get_smca_bank_info(bank);
+
                for (block = 0; block < NR_BLOCKS; ++block) {
                        address = get_block_address(cpu, address, low, high, bank, block);
                        if (!address)
index ea549a94361bd6119935170c35d72db2292cd2ed..99b3bf3f4182ecf70e02be949ad8d7a05e7e6314 100644 (file)
@@ -283,6 +283,27 @@ static const char * const smca_smu_mce_desc[] = {
        "SMU RAM ECC or parity error",
 };
 
+struct smca_mce_desc {
+       const char * const *descs;
+       unsigned int num_descs;
+};
+
+static struct smca_mce_desc smca_mce_descs[] = {
+       [SMCA_LS]       = { smca_ls_mce_desc,   ARRAY_SIZE(smca_ls_mce_desc)    },
+       [SMCA_IF]       = { smca_if_mce_desc,   ARRAY_SIZE(smca_if_mce_desc)    },
+       [SMCA_L2_CACHE] = { smca_l2_mce_desc,   ARRAY_SIZE(smca_l2_mce_desc)    },
+       [SMCA_DE]       = { smca_de_mce_desc,   ARRAY_SIZE(smca_de_mce_desc)    },
+       [SMCA_EX]       = { smca_ex_mce_desc,   ARRAY_SIZE(smca_ex_mce_desc)    },
+       [SMCA_FP]       = { smca_fp_mce_desc,   ARRAY_SIZE(smca_fp_mce_desc)    },
+       [SMCA_L3_CACHE] = { smca_l3_mce_desc,   ARRAY_SIZE(smca_l3_mce_desc)    },
+       [SMCA_CS]       = { smca_cs_mce_desc,   ARRAY_SIZE(smca_cs_mce_desc)    },
+       [SMCA_PIE]      = { smca_pie_mce_desc,  ARRAY_SIZE(smca_pie_mce_desc)   },
+       [SMCA_UMC]      = { smca_umc_mce_desc,  ARRAY_SIZE(smca_umc_mce_desc)   },
+       [SMCA_PB]       = { smca_pb_mce_desc,   ARRAY_SIZE(smca_pb_mce_desc)    },
+       [SMCA_PSP]      = { smca_psp_mce_desc,  ARRAY_SIZE(smca_psp_mce_desc)   },
+       [SMCA_SMU]      = { smca_smu_mce_desc,  ARRAY_SIZE(smca_smu_mce_desc)   },
+};
+
 static bool f12h_mc0_mce(u16 ec, u8 xec)
 {
        bool ret = false;
@@ -827,175 +848,32 @@ static void decode_mc6_mce(struct mce *m)
        pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
 }
 
-static void decode_f17h_core_errors(const char *ip_name, u8 xec,
-                                  unsigned int mca_type)
-{
-       const char * const *error_desc_array;
-       size_t len;
-
-       pr_emerg(HW_ERR "%s Error: ", ip_name);
-
-       switch (mca_type) {
-       case SMCA_LS:
-               error_desc_array = smca_ls_mce_desc;
-               len = ARRAY_SIZE(smca_ls_mce_desc) - 1;
-
-               if (xec == 0x4) {
-                       pr_cont("Unrecognized LS MCA error code.\n");
-                       return;
-               }
-               break;
-
-       case SMCA_IF:
-               error_desc_array = smca_if_mce_desc;
-               len = ARRAY_SIZE(smca_if_mce_desc) - 1;
-               break;
-
-       case SMCA_L2_CACHE:
-               error_desc_array = smca_l2_mce_desc;
-               len = ARRAY_SIZE(smca_l2_mce_desc) - 1;
-               break;
-
-       case SMCA_DE:
-               error_desc_array = smca_de_mce_desc;
-               len = ARRAY_SIZE(smca_de_mce_desc) - 1;
-               break;
-
-       case SMCA_EX:
-               error_desc_array = smca_ex_mce_desc;
-               len = ARRAY_SIZE(smca_ex_mce_desc) - 1;
-               break;
-
-       case SMCA_FP:
-               error_desc_array = smca_fp_mce_desc;
-               len = ARRAY_SIZE(smca_fp_mce_desc) - 1;
-               break;
-
-       case SMCA_L3_CACHE:
-               error_desc_array = smca_l3_mce_desc;
-               len = ARRAY_SIZE(smca_l3_mce_desc) - 1;
-               break;
-
-       default:
-               pr_cont("Corrupted MCA core error info.\n");
-               return;
-       }
-
-       if (xec > len) {
-               pr_cont("Unrecognized %s MCA bank error code.\n",
-                        amd_core_mcablock_names[mca_type]);
-               return;
-       }
-
-       pr_cont("%s.\n", error_desc_array[xec]);
-}
-
-static void decode_df_errors(u8 xec, unsigned int mca_type)
-{
-       const char * const *error_desc_array;
-       size_t len;
-
-       pr_emerg(HW_ERR "Data Fabric Error: ");
-
-       switch (mca_type) {
-       case  SMCA_CS:
-               error_desc_array = smca_cs_mce_desc;
-               len = ARRAY_SIZE(smca_cs_mce_desc) - 1;
-               break;
-
-       case SMCA_PIE:
-               error_desc_array = smca_pie_mce_desc;
-               len = ARRAY_SIZE(smca_pie_mce_desc) - 1;
-               break;
-
-       default:
-               pr_cont("Corrupted MCA Data Fabric info.\n");
-               return;
-       }
-
-       if (xec > len) {
-               pr_cont("Unrecognized %s MCA bank error code.\n",
-                        amd_df_mcablock_names[mca_type]);
-               return;
-       }
-
-       pr_cont("%s.\n", error_desc_array[xec]);
-}
-
 /* Decode errors according to Scalable MCA specification */
 static void decode_smca_errors(struct mce *m)
 {
-       u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
-       unsigned int hwid, mca_type, i;
-       u8 xec = XEC(m->status, xec_mask);
-       const char * const *error_desc_array;
+       struct smca_hwid_mcatype *type;
+       unsigned int bank_type;
        const char *ip_name;
-       u32 low, high;
-       size_t len;
+       u8 xec = XEC(m->status, xec_mask);
 
-       if (rdmsr_safe(addr, &low, &high)) {
-               pr_emerg(HW_ERR "Invalid IP block specified.\n");
+       if (m->bank >= ARRAY_SIZE(smca_banks))
                return;
-       }
-
-       hwid = high & MCI_IPID_HWID;
-       mca_type = (high & MCI_IPID_MCATYPE) >> 16;
 
-       pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
-
-       /*
-        * Based on hwid and mca_type values, decode errors from respective IPs.
-        * Note: mca_type values make sense only in the context of an hwid.
-        */
-       for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
-               if (amd_hwids[i].hwid == hwid)
-                       break;
-
-       switch (i) {
-       case SMCA_F17H_CORE:
-               ip_name = (mca_type == SMCA_L3_CACHE) ?
-                         "L3 Cache" : "F17h Core";
-               return decode_f17h_core_errors(ip_name, xec, mca_type);
-               break;
-
-       case SMCA_DF:
-               return decode_df_errors(xec, mca_type);
-               break;
-
-       case SMCA_UMC:
-               error_desc_array = smca_umc_mce_desc;
-               len = ARRAY_SIZE(smca_umc_mce_desc) - 1;
-               break;
-
-       case SMCA_PB:
-               error_desc_array = smca_pb_mce_desc;
-               len = ARRAY_SIZE(smca_pb_mce_desc) - 1;
-               break;
-
-       case SMCA_PSP:
-               error_desc_array = smca_psp_mce_desc;
-               len = ARRAY_SIZE(smca_psp_mce_desc) - 1;
-               break;
-
-       case SMCA_SMU:
-               error_desc_array = smca_smu_mce_desc;
-               len = ARRAY_SIZE(smca_smu_mce_desc) - 1;
-               break;
-
-       default:
-               pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+       type = smca_banks[m->bank].type;
+       if (!type)
                return;
-       }
 
-       ip_name = amd_hwids[i].name;
-       pr_emerg(HW_ERR "%s Error: ", ip_name);
+       bank_type = type->bank_type;
+       ip_name = smca_bank_names[bank_type].long_name;
 
-       if (xec > len) {
-               pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
-               return;
-       }
+       pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
 
-       pr_cont("%s.\n", error_desc_array[xec]);
+       /* Only print the decode of valid error codes */
+       if (xec < smca_mce_descs[bank_type].num_descs &&
+                       (type->xec_bitmap & BIT_ULL(xec))) {
+               pr_emerg(HW_ERR "%s Error: ", ip_name);
+               pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+       }
 }
 
 static inline void amd_decode_err_code(u16 ec)