ACPI, APEI, GHES: Add hardware memory error recovery support
authorHuang Ying <ying.huang@intel.com>
Wed, 13 Jul 2011 05:14:28 +0000 (13:14 +0800)
committerLen Brown <len.brown@intel.com>
Wed, 3 Aug 2011 15:15:58 +0000 (11:15 -0400)
memory_failure_queue() is called when recoverable memory errors are
notified by firmware to do the recovery work.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
drivers/acpi/apei/Kconfig
drivers/acpi/apei/ghes.c

index 35596eaaca179d8c584a751bc55c9cbd69097492..c34aa51af4eed85a99bc9aad7720e4dd582f47a8 100644 (file)
@@ -32,6 +32,13 @@ config ACPI_APEI_PCIEAER
          PCIe AER errors may be reported via APEI firmware first mode.
          Turn on this option to enable the corresponding support.
 
+config ACPI_APEI_MEMORY_FAILURE
+       bool "APEI memory error recovering support"
+       depends on ACPI_APEI && MEMORY_FAILURE
+       help
+         Memory errors may be reported via APEI firmware first mode.
+         Turn on this option to enable the memory recovering support.
+
 config ACPI_APEI_EINJ
        tristate "APEI Error INJection (EINJ)"
        depends on ACPI_APEI && DEBUG_FS
index 931410d31a966a92e69c7cd5ff0162c1bd9d843d..e92c47c46f91b7cc840a4320c89db426205f9273 100644 (file)
@@ -451,20 +451,30 @@ static void ghes_clear_estatus(struct ghes *ghes)
 
 static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
 {
-       int sev, processed = 0;
+       int sev, sec_sev;
        struct acpi_hest_generic_data *gdata;
 
        sev = ghes_severity(estatus->error_severity);
        apei_estatus_for_each_section(estatus, gdata) {
-#ifdef CONFIG_X86_MCE
+               sec_sev = ghes_severity(gdata->error_severity);
                if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
                                 CPER_SEC_PLATFORM_MEM)) {
-                       apei_mce_report_mem_error(
-                               sev == GHES_SEV_CORRECTED,
-                               (struct cper_sec_mem_err *)(gdata+1));
-                       processed = 1;
-               }
+                       struct cper_sec_mem_err *mem_err;
+                       mem_err = (struct cper_sec_mem_err *)(gdata+1);
+#ifdef CONFIG_X86_MCE
+                       apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
+                                                 mem_err);
 #endif
+#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
+                       if (sev == GHES_SEV_RECOVERABLE &&
+                           sec_sev == GHES_SEV_RECOVERABLE &&
+                           mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+                               unsigned long pfn;
+                               pfn = mem_err->physical_addr >> PAGE_SHIFT;
+                               memory_failure_queue(pfn, 0, 0);
+                       }
+#endif
+               }
        }
 }