string = "Failed to map error addr to a csrow";
break;
case ERR_CHANNEL:
- string = "unknown syndrome - possible error reporting race";
+ string = "Unknown syndrome - possible error reporting race";
+ break;
+ case ERR_SYND:
+ string = "MCA_SYND not valid - unknown syndrome and csrow";
+ break;
+ case ERR_NORM_ADDR:
+ string = "Cannot decode normalized address";
break;
default:
string = "WTF error";
__log_ecc_error(mci, &err, ecc_type);
}
+/*
+ * To find the UMC channel represented by this bank we need to match on its
+ * instance_id. The instance_id of a bank is held in the lower 32 bits of its
+ * IPID.
+ */
+static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+{
+ u32 umc_instance_id[] = {0x50f00, 0x150f00};
+ u32 instance_id = m->ipid & GENMASK(31, 0);
+ int i, channel = -1;
+
+ for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
+ if (umc_instance_id[i] == instance_id)
+ channel = i;
+
+ return channel;
+}
+
+static void decode_umc_error(int node_id, struct mce *m)
+{
+ u8 ecc_type = (m->status >> 45) & 0x3;
+ struct mem_ctl_info *mci;
+ struct amd64_pvt *pvt;
+ struct err_info err;
+ u64 sys_addr;
+
+ mci = edac_mc_find(node_id);
+ if (!mci)
+ return;
+
+ pvt = mci->pvt_info;
+
+ memset(&err, 0, sizeof(err));
+
+ if (m->status & MCI_STATUS_DEFERRED)
+ ecc_type = 3;
+
+ err.channel = find_umc_channel(pvt, m);
+ if (err.channel < 0) {
+ err.err_code = ERR_CHANNEL;
+ goto log_error;
+ }
+
+ if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
+ err.err_code = ERR_NORM_ADDR;
+ goto log_error;
+ }
+
+ error_address_to_page_and_offset(sys_addr, &err);
+
+ if (!(m->status & MCI_STATUS_SYNDV)) {
+ err.err_code = ERR_SYND;
+ goto log_error;
+ }
+
+ if (ecc_type == 2) {
+ u8 length = (m->synd >> 18) & 0x3f;
+
+ if (length)
+ err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
+ else
+ err.err_code = ERR_CHANNEL;
+ }
+
+ err.csrow = m->synd & 0x7;
+
+log_error:
+ __log_ecc_error(mci, &err, ecc_type);
+}
+
/*
* Use pvt->F3 which contains the F3 CPU PCI device to get the related
* F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
if (report_gart_errors)
amd_report_gart_errors(true);
- amd_register_ecc_decoder(decode_bus_error);
+ if (pvt->umc)
+ amd_register_ecc_decoder(decode_umc_error);
+ else
+ amd_register_ecc_decoder(decode_bus_error);
return 0;
/* unregister from EDAC MCE */
amd_report_gart_errors(false);
- amd_unregister_ecc_decoder(decode_bus_error);
+
+ if (pvt->umc)
+ amd_unregister_ecc_decoder(decode_umc_error);
+ else
+ amd_unregister_ecc_decoder(decode_bus_error);
kfree(ecc_stngs[nid]);
ecc_stngs[nid] = NULL;