ACPI, APEI, Use ERST for persistent storage of MCE

author Huang Ying <ying.huang@intel.com>

Tue, 18 May 2010 06:35:22 +0000 (14:35 +0800)

committer Len Brown <len.brown@intel.com>

Thu, 20 May 2010 02:41:40 +0000 (22:41 -0400)
author Huang Ying <ying.huang@intel.com>
Tue, 18 May 2010 06:35:22 +0000 (14:35 +0800)
committer Len Brown <len.brown@intel.com>
Thu, 20 May 2010 02:41:40 +0000 (22:41 -0400)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c

index 4eccd1fadb1481e2a402b9fb826a407b15bf00b7..745b54f9be89fe02d3043bdb24032016deb2d7db 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -8,6 +8,9 @@
   * the error memory page can be offlined by /sbin/mcelog if the error
   * count for one page is beyond the threshold.
   *
+ * For fatal MCE, save MCE record into persistent storage via ERST, so
+ * that the MCE record can be logged after reboot via ERST.
+ *
   * Copyright 2010 Intel Corp.
   *   Author: Huang Ying <ying.huang@intel.com>
   *
@@ -50,3 +53,86 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
         mce_notify_irq();
  }
  EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
+
+#define CPER_CREATOR_MCE                                               \
+       UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,     \
+               0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_MCE                                          \
+       UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,     \
+               0x04, 0x4a, 0x38, 0xfc)
+
+/*
+ * CPER specification (in UEFI specification 2.3 appendix N) requires
+ * byte-packed.
+ */
+struct cper_mce_record {
+       struct cper_record_header hdr;
+       struct cper_section_descriptor sec_hdr;
+       struct mce mce;
+} __packed;
+
+int apei_write_mce(struct mce *m)
+{
+       struct cper_mce_record rcd;
+
+       memset(&rcd, 0, sizeof(rcd));
+       memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+       rcd.hdr.revision = CPER_RECORD_REV;
+       rcd.hdr.signature_end = CPER_SIG_END;
+       rcd.hdr.section_count = 1;
+       rcd.hdr.error_severity = CPER_SER_FATAL;
+       /* timestamp, platform_id, partition_id are all invalid */
+       rcd.hdr.validation_bits = 0;
+       rcd.hdr.record_length = sizeof(rcd);
+       rcd.hdr.creator_id = CPER_CREATOR_MCE;
+       rcd.hdr.notification_type = CPER_NOTIFY_MCE;
+       rcd.hdr.record_id = cper_next_record_id();
+       rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+       rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
+       rcd.sec_hdr.section_length = sizeof(rcd.mce);
+       rcd.sec_hdr.revision = CPER_SEC_REV;
+       /* fru_id and fru_text is invalid */
+       rcd.sec_hdr.validation_bits = 0;
+       rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
+       rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+       rcd.sec_hdr.section_severity = CPER_SER_FATAL;
+
+       memcpy(&rcd.mce, m, sizeof(*m));
+
+       return erst_write(&rcd.hdr);
+}
+
+ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+       struct cper_mce_record rcd;
+       ssize_t len;
+
+       len = erst_read_next(&rcd.hdr, sizeof(rcd));
+       if (len <= 0)
+               return len;
+       /* Can not skip other records in storage via ERST unless clear them */
+       else if (len != sizeof(rcd) ||
+                uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+               if (printk_ratelimit())
+                       pr_warning(
+                       "MCE-APEI: Can not skip the unknown record in ERST");
+               return -EIO;
+       }
+
+       memcpy(m, &rcd.mce, sizeof(*m));
+       *record_id = rcd.hdr.record_id;
+
+       return sizeof(*m);
+}
+
+/* Check whether there is record in ERST */
+int apei_check_mce(void)
+{
+       return erst_get_record_count();
+}
+
+int apei_clear_mce(u64 record_id)
+{
+       return erst_clear(record_id);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h

index 32996f9fab670be274b925e7ec3a4bd52e8650c3..fefcc69ee8b5a937bdcce8f551b4d3ce585db585 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
  
  extern struct mce_bank *mce_banks;
  
+#ifdef CONFIG_ACPI_APEI
+int apei_write_mce(struct mce *m);
+ssize_t apei_read_mce(struct mce *m, u64 *record_id);
+int apei_check_mce(void);
+int apei_clear_mce(u64 record_id);
+#else
+static inline int apei_write_mce(struct mce *m)
+{
+       return -EINVAL;
+}
+static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+       return 0;
+}
+static inline int apei_check_mce(void)
+{
+       return 0;
+}
+static inline int apei_clear_mce(u64 record_id)
+{
+       return -EINVAL;
+}
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c

index 8a6f0afa767ec804c0a1ffa2f6bfee6e87168cb1..09535ca9b9d7d0ac14025abaa0d94e70f094499b 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -264,7 +264,7 @@ static void wait_for_panic(void)
  
  static void mce_panic(char *msg, struct mce *final, char *exp)
  {
-       int i;
+       int i, apei_err = 0;
  
         if (!fake_panic) {
                 /*
@@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
                 struct mce *m = &mcelog.entry[i];
                 if (!(m->status & MCI_STATUS_VAL))
                         continue;
-               if (!(m->status & MCI_STATUS_UC))
+               if (!(m->status & MCI_STATUS_UC)) {
                         print_mce(m);
+                       if (!apei_err)
+                               apei_err = apei_write_mce(m);
+               }
         }
         /* Now print uncorrected but with the final one last */
         for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
                         continue;
                 if (!(m->status & MCI_STATUS_UC))
                         continue;
-               if (!final || memcmp(m, final, sizeof(struct mce)))
+               if (!final || memcmp(m, final, sizeof(struct mce))) {
                         print_mce(m);
+                       if (!apei_err)
+                               apei_err = apei_write_mce(m);
+               }
         }
-       if (final)
+       if (final) {
                 print_mce(final);
+               if (!apei_err)
+                       apei_err = apei_write_mce(final);
+       }
         if (cpu_missing)
                 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
         print_mce_tail();
@@ -1493,6 +1502,43 @@ static void collect_tscs(void *data)
         rdtscll(cpu_tsc[smp_processor_id()]);
  }
  
+static int mce_apei_read_done;
+
+/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
+static int __mce_read_apei(char __user **ubuf, size_t usize)
+{
+       int rc;
+       u64 record_id;
+       struct mce m;
+
+       if (usize < sizeof(struct mce))
+               return -EINVAL;
+
+       rc = apei_read_mce(&m, &record_id);
+       /* Error or no more MCE record */
+       if (rc <= 0) {
+               mce_apei_read_done = 1;
+               return rc;
+       }
+       rc = -EFAULT;
+       if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
+               return rc;
+       /*
+        * In fact, we should have cleared the record after that has
+        * been flushed to the disk or sent to network in
+        * /sbin/mcelog, but we have no interface to support that now,
+        * so just clear it to avoid duplication.
+        */
+       rc = apei_clear_mce(record_id);
+       if (rc) {
+               mce_apei_read_done = 1;
+               return rc;
+       }
+       *ubuf += sizeof(struct mce);
+
+       return 0;
+}
+
  static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
                         loff_t *off)
  {
@@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
                 return -ENOMEM;
  
         mutex_lock(&mce_read_mutex);
+
+       if (!mce_apei_read_done) {
+               err = __mce_read_apei(&buf, usize);
+               if (err || buf != ubuf)
+                       goto out;
+       }
+
         next = rcu_dereference_check_mce(mcelog.next);
  
         /* Only supports full reads right now */
-       if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
-               mutex_unlock(&mce_read_mutex);
-               kfree(cpu_tsc);
-
-               return -EINVAL;
-       }
+       err = -EINVAL;
+       if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+               goto out;
  
         err = 0;
         prev = 0;
@@ -1562,10 +1612,15 @@ timeout:
                         memset(&mcelog.entry[i], 0, sizeof(struct mce));
                 }
         }
+
+       if (err)
+               err = -EFAULT;
+
+out:
         mutex_unlock(&mce_read_mutex);
         kfree(cpu_tsc);
  
-       return err ? -EFAULT : buf - ubuf;
+       return err ? err : buf - ubuf;
  }
  
  static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
         poll_wait(file, &mce_wait, wait);
         if (rcu_dereference_check_mce(mcelog.next))
                 return POLLIN | POLLRDNORM;
+       if (!mce_apei_read_done && apei_check_mce())
+               return POLLIN | POLLRDNORM;
         return 0;
  }
author	Huang Ying <ying.huang@intel.com>
	Tue, 18 May 2010 06:35:22 +0000 (14:35 +0800)
committer	Len Brown <len.brown@intel.com>
	Thu, 20 May 2010 02:41:40 +0000 (22:41 -0400)
arch/x86/kernel/cpu/mcheck/mce-apei.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mcheck/mce-internal.h		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mcheck/mce.c		patch \| blob \| blame \| history