edac: Cleanup the logs for i7core and sb edac drivers
authorMauro Carvalho Chehab <mchehab@redhat.com>
Fri, 11 May 2012 14:41:45 +0000 (11:41 -0300)
committerMauro Carvalho Chehab <mchehab@redhat.com>
Mon, 28 May 2012 22:13:51 +0000 (19:13 -0300)
Remove some information that it is duplicated at the MCE log,
and don't have much usage for the error. Those data will be
added again, when creating a trace function that outputs both
memory errors and MCE fields.

Cc: Aristeu Rozanski <arozansk@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
drivers/edac/i7core_edac.c
drivers/edac/sb_edac.c

index 6d89c78a9b7a31a25bdb7034205a55091873fb27..2aacd951d41c772044eeccbb0d90476059377ef9 100644 (file)
@@ -1623,7 +1623,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
                                    const struct mce *m)
 {
        struct i7core_pvt *pvt = mci->pvt_info;
-       char *type, *optype, *err, *msg;
+       char *type, *optype, *err, msg[80];
        enum hw_event_mc_err_type tp_event;
        unsigned long error = m->status & 0x1ff0000l;
        bool uncorrected_error = m->mcgstatus & 1ll << 61;
@@ -1701,10 +1701,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
                err = "unknown";
        }
 
-       msg = kasprintf(GFP_ATOMIC,
-               "addr=0x%08llx cpu=%d count=%d Err=%08llx:%08llx (%s: %s))\n",
-               (long long) m->addr, m->cpu, core_err_cnt,
-               (long long)m->status, (long long)m->misc, optype, err);
+       snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
 
        /*
         * Call the helper to output message
@@ -1718,8 +1715,6 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
                                     syndrome,
                                     channel, dimm, -1,
                                     err, msg, m);
-
-       kfree(msg);
 }
 
 /*
index 2f95a1b583dc905819287ab19c159ab8defd81b5..e834dfd034d66764c20b71a2aff021bcbfb23249 100644 (file)
@@ -788,7 +788,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
                                 u8 *socket,
                                 long *channel_mask,
                                 u8 *rank,
-                                char *area_type, char *msg)
+                                char **area_type, char *msg)
 {
        struct mem_ctl_info     *new_mci;
        struct sbridge_pvt *pvt = mci->pvt_info;
@@ -843,7 +843,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
                sprintf(msg, "Can't discover the memory socket");
                return -EINVAL;
        }
-       area_type = get_dram_attr(reg);
+       *area_type = get_dram_attr(reg);
        interleave_mode = INTERLEAVE_MODE(reg);
 
        pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@ -1342,7 +1342,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
        struct mem_ctl_info *new_mci;
        struct sbridge_pvt *pvt = mci->pvt_info;
        enum hw_event_mc_err_type tp_event;
-       char *type, *optype, msg[256], *recoverable_msg;
+       char *type, *optype, msg[256];
        bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
        bool overflow = GET_BITFIELD(m->status, 62, 62);
        bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1355,7 +1355,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
        long channel_mask, first_channel;
        u8  rank, socket;
        int rc, dimm;
-       char *area_type = "Unknown";
+       char *area_type = NULL;
 
        if (uncorrected_error) {
                if (ripv) {
@@ -1407,7 +1407,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
        }
 
        rc = get_memory_error_data(mci, m->addr, &socket,
-                                  &channel_mask, &rank, area_type, msg);
+                                  &channel_mask, &rank, &area_type, msg);
        if (rc < 0)
                goto err_parsing;
        new_mci = get_mci_for_node_id(socket);
@@ -1427,29 +1427,23 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
        else
                dimm = 2;
 
-       if (uncorrected_error && recoverable)
-               recoverable_msg = " recoverable";
-       else
-               recoverable_msg = "";
 
        /*
-        * FIXME: What should we do with "channel" information on mcelog?
-        * Probably, we can just discard it, as the channel information
-        * comes from the get_memory_error_data() address decoding
+        * FIXME: On some memory configurations (mirror, lockstep), the
+        * Memory Controller can't point the error to a single DIMM. The
+        * EDAC core should be handling the channel mask, in order to point
+        * to the group of dimm's where the error may be happening.
         */
        snprintf(msg, sizeof(msg),
-                       "%d error(s)%s: %s%s: cpu=%d Err=%04x:%04x addr = 0x%08llx socket=%d Channel=%ld(mask=%ld), rank=%d\n",
-                       core_err_cnt,
-                       overflow ? " OVERFLOW" : "",
-                       area_type,
-                       recoverable_msg,
-                       m->cpu,
-                       mscod, errcode,
-                       (long long) m->addr,
-                       socket,
-                       first_channel,          /* This is the real channel on SB */
-                       channel_mask,
-                       rank);
+                "count:%d%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
+                core_err_cnt,
+                overflow ? " OVERFLOW" : "",
+                (uncorrected_error && recoverable) ? " recoverable" : "",
+                area_type,
+                mscod, errcode,
+                socket,
+                channel_mask,
+                rank);
 
        debugf0("%s", msg);