i7core_edac: don't free on success
[GitHub/LineageOS/android_kernel_samsung_universal7580.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
3b918c12 28#include <linux/delay.h>
a0c36a1f
MCC
29#include <linux/edac.h>
30#include <linux/mmzone.h>
d5381642 31#include <linux/edac_mce.h>
f4742949 32#include <linux/smp.h>
14d2c083 33#include <asm/processor.h>
a0c36a1f
MCC
34
35#include "edac_core.h"
36
f4742949
MCC
37/*
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
42 */
43#define MAX_SOCKET_BUSES 2
44
45
a0c36a1f
MCC
46/*
47 * Alter this version for the module when modifications are made
48 */
49#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50#define EDAC_MOD_STR "i7core_edac"
51
a0c36a1f
MCC
52/*
53 * Debug macros
54 */
55#define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
57
58#define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
60
61/*
62 * i7core Memory Controller Registers
63 */
64
e9bd2e73
MCC
65 /* OFFSETS for Device 0 Function 0 */
66
67#define MC_CFG_CONTROL 0x90
68
a0c36a1f
MCC
69 /* OFFSETS for Device 3 Function 0 */
70
71#define MC_CONTROL 0x48
72#define MC_STATUS 0x4c
73#define MC_MAX_DOD 0x64
74
442305b1
MCC
75/*
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
78 */
79
80#define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82
83#define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86
b4e8f0b6
MCC
87/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88#define MC_COR_ECC_CNT_0 0x80
89#define MC_COR_ECC_CNT_1 0x84
90#define MC_COR_ECC_CNT_2 0x88
91#define MC_COR_ECC_CNT_3 0x8c
92#define MC_COR_ECC_CNT_4 0x90
93#define MC_COR_ECC_CNT_5 0x94
94
95#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
97
98
a0c36a1f
MCC
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100
0b2b7b7e
MCC
101#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
106
f122a892
MCC
107#define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110
0b2b7b7e
MCC
111#define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
113
a0c36a1f 114#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
115#define MC_CHANNEL_ERROR_MASK 0xf8
116#define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
a0c36a1f 125
0b2b7b7e 126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 127
0b2b7b7e
MCC
128#define MC_DOD_CH_DIMM0 0x48
129#define MC_DOD_CH_DIMM1 0x4c
130#define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 143
f122a892
MCC
144#define MC_RANK_PRESENT 0x7c
145
0b2b7b7e
MCC
146#define MC_SAG_CH_0 0x80
147#define MC_SAG_CH_1 0x84
148#define MC_SAG_CH_2 0x88
149#define MC_SAG_CH_3 0x8c
150#define MC_SAG_CH_4 0x90
151#define MC_SAG_CH_5 0x94
152#define MC_SAG_CH_6 0x98
153#define MC_SAG_CH_7 0x9c
154
155#define MC_RIR_LIMIT_CH_0 0x40
156#define MC_RIR_LIMIT_CH_1 0x44
157#define MC_RIR_LIMIT_CH_2 0x48
158#define MC_RIR_LIMIT_CH_3 0x4C
159#define MC_RIR_LIMIT_CH_4 0x50
160#define MC_RIR_LIMIT_CH_5 0x54
161#define MC_RIR_LIMIT_CH_6 0x58
162#define MC_RIR_LIMIT_CH_7 0x5C
163#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164
165#define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
168
a0c36a1f
MCC
169/*
170 * i7core structs
171 */
172
173#define NUM_CHANS 3
442305b1
MCC
174#define MAX_DIMMS 3 /* Max DIMMS per channel */
175#define MAX_MCR_FUNC 4
176#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
177
178struct i7core_info {
179 u32 mc_control;
180 u32 mc_status;
181 u32 max_dod;
f122a892 182 u32 ch_map;
a0c36a1f
MCC
183};
184
194a40fe
MCC
185
186struct i7core_inject {
187 int enable;
188
189 u32 section;
190 u32 type;
191 u32 eccmask;
192
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
195};
196
0b2b7b7e 197struct i7core_channel {
442305b1
MCC
198 u32 ranks;
199 u32 dimms;
0b2b7b7e
MCC
200};
201
8f331907 202struct pci_id_descr {
66607706
MCC
203 int dev;
204 int func;
205 int dev_id;
de06eeef 206 int optional;
8f331907
MCC
207};
208
f4742949
MCC
209struct i7core_dev {
210 struct list_head list;
211 u8 socket;
212 struct pci_dev **pdev;
de06eeef 213 int n_devs;
f4742949
MCC
214 struct mem_ctl_info *mci;
215};
216
a0c36a1f 217struct i7core_pvt {
f4742949
MCC
218 struct pci_dev *pci_noncore;
219 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
220 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
221
222 struct i7core_dev *i7core_dev;
67166af4 223
a0c36a1f 224 struct i7core_info info;
194a40fe 225 struct i7core_inject inject;
f4742949 226 struct i7core_channel channel[NUM_CHANS];
67166af4 227
f4742949 228 int channels; /* Number of active channels */
442305b1 229
f4742949
MCC
230 int ce_count_available;
231 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
232
233 /* ECC corrected errors counts per udimm */
f4742949
MCC
234 unsigned long udimm_ce_count[MAX_DIMMS];
235 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 236 /* ECC corrected errors counts per rdimm */
f4742949
MCC
237 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
238 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 239
f4742949 240 unsigned int is_registered;
14d2c083 241
d5381642
MCC
242 /* mcelog glue */
243 struct edac_mce edac_mce;
ca9c90ba
MCC
244
245 /* Fifo double buffers */
d5381642 246 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
247 struct mce mce_outentry[MCE_LOG_LEN];
248
249 /* Fifo in/out counters */
250 unsigned mce_in, mce_out;
251
252 /* Count indicator to show errors not got */
253 unsigned mce_overrun;
a0c36a1f
MCC
254};
255
66607706
MCC
256/* Static vars */
257static LIST_HEAD(i7core_edac_list);
258static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 259
8f331907
MCC
260#define PCI_DESCR(device, function, device_id) \
261 .dev = (device), \
262 .func = (function), \
263 .dev_id = (device_id)
264
de06eeef 265struct pci_id_descr pci_dev_descr_i7core[] = {
8f331907
MCC
266 /* Memory controller */
267 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
268 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
de06eeef
MCC
269 /* Exists only for RDIMM */
270 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
8f331907
MCC
271 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
272
273 /* Channel 0 */
274 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
275 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
276 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
277 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
278
279 /* Channel 1 */
280 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
281 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
282 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
283 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
284
285 /* Channel 2 */
286 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
287 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
288 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
289 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
290
291 /* Generic Non-core registers */
292 /*
293 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
294 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
295 * the probing code needs to test for the other address in case of
296 * failure of this one
297 */
fd382654 298 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
310cbb72 299
a0c36a1f 300};
8f331907 301
52a2e4fc
MCC
302struct pci_id_descr pci_dev_descr_lynnfield[] = {
303 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
304 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
305 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
306
307 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
308 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
309 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
310 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
311
508fa179
MCC
312 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
313 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
314 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
315 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
52a2e4fc 316
f05da2f7
MCC
317 /*
318 * This is the PCI device has an alternate address on some
319 * processors like Core i7 860
320 */
52a2e4fc
MCC
321 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
322};
323
8f331907
MCC
324/*
325 * pci_device_id table for which devices we are looking for
8f331907
MCC
326 */
327static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 328 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
f05da2f7 329 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
8f331907
MCC
330 {0,} /* 0 terminated list. */
331};
332
a0c36a1f
MCC
333static struct edac_pci_ctl_info *i7core_pci;
334
335/****************************************************************************
336 Anciliary status routines
337 ****************************************************************************/
338
339 /* MC_CONTROL bits */
ef708b53
MCC
340#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
341#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
342
343 /* MC_STATUS bits */
61053fde 344#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 345#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
346
347 /* MC_MAX_DOD read functions */
854d3349 348static inline int numdimms(u32 dimms)
a0c36a1f 349{
854d3349 350 return (dimms & 0x3) + 1;
a0c36a1f
MCC
351}
352
854d3349 353static inline int numrank(u32 rank)
a0c36a1f
MCC
354{
355 static int ranks[4] = { 1, 2, 4, -EINVAL };
356
854d3349 357 return ranks[rank & 0x3];
a0c36a1f
MCC
358}
359
854d3349 360static inline int numbank(u32 bank)
a0c36a1f
MCC
361{
362 static int banks[4] = { 4, 8, 16, -EINVAL };
363
854d3349 364 return banks[bank & 0x3];
a0c36a1f
MCC
365}
366
854d3349 367static inline int numrow(u32 row)
a0c36a1f
MCC
368{
369 static int rows[8] = {
370 1 << 12, 1 << 13, 1 << 14, 1 << 15,
371 1 << 16, -EINVAL, -EINVAL, -EINVAL,
372 };
373
854d3349 374 return rows[row & 0x7];
a0c36a1f
MCC
375}
376
854d3349 377static inline int numcol(u32 col)
a0c36a1f
MCC
378{
379 static int cols[8] = {
380 1 << 10, 1 << 11, 1 << 12, -EINVAL,
381 };
854d3349 382 return cols[col & 0x3];
a0c36a1f
MCC
383}
384
f4742949 385static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
386{
387 struct i7core_dev *i7core_dev;
388
389 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
390 if (i7core_dev->socket == socket)
391 return i7core_dev;
392 }
393
394 return NULL;
395}
396
a0c36a1f
MCC
397/****************************************************************************
398 Memory check routines
399 ****************************************************************************/
67166af4
MCC
400static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
401 unsigned func)
ef708b53 402{
66607706 403 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 404 int i;
ef708b53 405
66607706
MCC
406 if (!i7core_dev)
407 return NULL;
408
de06eeef 409 for (i = 0; i < i7core_dev->n_devs; i++) {
66607706 410 if (!i7core_dev->pdev[i])
ef708b53
MCC
411 continue;
412
66607706
MCC
413 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
414 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
415 return i7core_dev->pdev[i];
ef708b53
MCC
416 }
417 }
418
eb94fc40
MCC
419 return NULL;
420}
421
ec6df24c
MCC
422/**
423 * i7core_get_active_channels() - gets the number of channels and csrows
424 * @socket: Quick Path Interconnect socket
425 * @channels: Number of channels that will be returned
426 * @csrows: Number of csrows found
427 *
428 * Since EDAC core needs to know in advance the number of available channels
429 * and csrows, in order to allocate memory for csrows/channels, it is needed
430 * to run two similar steps. At the first step, implemented on this function,
431 * it checks the number of csrows/channels present at one socket.
432 * this is used in order to properly allocate the size of mci components.
433 *
434 * It should be noticed that none of the current available datasheets explain
435 * or even mention how csrows are seen by the memory controller. So, we need
436 * to add a fake description for csrows.
437 * So, this driver is attributing one DIMM memory for one csrow.
438 */
67166af4
MCC
439static int i7core_get_active_channels(u8 socket, unsigned *channels,
440 unsigned *csrows)
eb94fc40
MCC
441{
442 struct pci_dev *pdev = NULL;
443 int i, j;
444 u32 status, control;
445
446 *channels = 0;
447 *csrows = 0;
448
67166af4 449 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 450 if (!pdev) {
67166af4
MCC
451 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
452 socket);
ef708b53 453 return -ENODEV;
b7c76151 454 }
ef708b53
MCC
455
456 /* Device 3 function 0 reads */
457 pci_read_config_dword(pdev, MC_STATUS, &status);
458 pci_read_config_dword(pdev, MC_CONTROL, &control);
459
460 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 461 u32 dimm_dod[3];
ef708b53
MCC
462 /* Check if the channel is active */
463 if (!(control & (1 << (8 + i))))
464 continue;
465
466 /* Check if the channel is disabled */
41fcb7fe 467 if (status & (1 << i))
ef708b53 468 continue;
ef708b53 469
67166af4 470 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 471 if (!pdev) {
67166af4
MCC
472 i7core_printk(KERN_ERR, "Couldn't find socket %d "
473 "fn %d.%d!!!\n",
474 socket, i + 4, 1);
eb94fc40
MCC
475 return -ENODEV;
476 }
477 /* Devices 4-6 function 1 */
478 pci_read_config_dword(pdev,
479 MC_DOD_CH_DIMM0, &dimm_dod[0]);
480 pci_read_config_dword(pdev,
481 MC_DOD_CH_DIMM1, &dimm_dod[1]);
482 pci_read_config_dword(pdev,
483 MC_DOD_CH_DIMM2, &dimm_dod[2]);
484
ef708b53 485 (*channels)++;
eb94fc40
MCC
486
487 for (j = 0; j < 3; j++) {
488 if (!DIMM_PRESENT(dimm_dod[j]))
489 continue;
490 (*csrows)++;
491 }
ef708b53
MCC
492 }
493
c77720b9 494 debugf0("Number of active channels on socket %d: %d\n",
67166af4 495 socket, *channels);
1c6fed80 496
ef708b53
MCC
497 return 0;
498}
499
f4742949 500static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
501{
502 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 503 struct csrow_info *csr;
854d3349 504 struct pci_dev *pdev;
ba6c5c62 505 int i, j;
5566cb7c 506 unsigned long last_page = 0;
1c6fed80 507 enum edac_type mode;
854d3349 508 enum mem_type mtype;
a0c36a1f 509
854d3349 510 /* Get data from the MC register, function 0 */
f4742949 511 pdev = pvt->pci_mcr[0];
7dd6953c 512 if (!pdev)
8f331907
MCC
513 return -ENODEV;
514
f122a892 515 /* Device 3 function 0 reads */
7dd6953c
MCC
516 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
517 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
518 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
519 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 520
17cb7b0c 521 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 522 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 523 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 524
1c6fed80 525 if (ECC_ENABLED(pvt)) {
41fcb7fe 526 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
527 if (ECCx8(pvt))
528 mode = EDAC_S8ECD8ED;
529 else
530 mode = EDAC_S4ECD4ED;
531 } else {
a0c36a1f 532 debugf0("ECC disabled\n");
1c6fed80
MCC
533 mode = EDAC_NONE;
534 }
a0c36a1f
MCC
535
536 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
537 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
538 "x%x x 0x%x\n",
854d3349
MCC
539 numdimms(pvt->info.max_dod),
540 numrank(pvt->info.max_dod >> 2),
276b824c 541 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
542 numrow(pvt->info.max_dod >> 6),
543 numcol(pvt->info.max_dod >> 9));
a0c36a1f 544
0b2b7b7e 545 for (i = 0; i < NUM_CHANS; i++) {
854d3349 546 u32 data, dimm_dod[3], value[8];
0b2b7b7e 547
52a2e4fc
MCC
548 if (!pvt->pci_ch[i][0])
549 continue;
550
0b2b7b7e
MCC
551 if (!CH_ACTIVE(pvt, i)) {
552 debugf0("Channel %i is not active\n", i);
553 continue;
554 }
555 if (CH_DISABLED(pvt, i)) {
556 debugf0("Channel %i is disabled\n", i);
557 continue;
558 }
559
f122a892 560 /* Devices 4-6 function 0 */
f4742949 561 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
562 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
563
f4742949 564 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 565 4 : 2;
0b2b7b7e 566
854d3349
MCC
567 if (data & REGISTERED_DIMM)
568 mtype = MEM_RDDR3;
14d2c083 569 else
854d3349
MCC
570 mtype = MEM_DDR3;
571#if 0
0b2b7b7e
MCC
572 if (data & THREE_DIMMS_PRESENT)
573 pvt->channel[i].dimms = 3;
574 else if (data & SINGLE_QUAD_RANK_PRESENT)
575 pvt->channel[i].dimms = 1;
576 else
577 pvt->channel[i].dimms = 2;
854d3349
MCC
578#endif
579
580 /* Devices 4-6 function 1 */
f4742949 581 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 582 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 583 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 584 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 585 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 586 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 587
1c6fed80 588 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 589 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
590 i,
591 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
592 data,
f4742949 593 pvt->channel[i].ranks,
41fcb7fe 594 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
595
596 for (j = 0; j < 3; j++) {
597 u32 banks, ranks, rows, cols;
5566cb7c 598 u32 size, npages;
854d3349
MCC
599
600 if (!DIMM_PRESENT(dimm_dod[j]))
601 continue;
602
603 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
604 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
605 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
606 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
607
5566cb7c
MCC
608 /* DDR3 has 8 I/O banks */
609 size = (rows * cols * banks * ranks) >> (20 - 3);
610
f4742949 611 pvt->channel[i].dimms++;
854d3349 612
17cb7b0c
MCC
613 debugf0("\tdimm %d %d Mb offset: %x, "
614 "bank: %d, rank: %d, row: %#x, col: %#x\n",
615 j, size,
854d3349
MCC
616 RANKOFFSET(dimm_dod[j]),
617 banks, ranks, rows, cols);
618
eb94fc40
MCC
619#if PAGE_SHIFT > 20
620 npages = size >> (PAGE_SHIFT - 20);
621#else
622 npages = size << (20 - PAGE_SHIFT);
623#endif
5566cb7c 624
ba6c5c62 625 csr = &mci->csrows[*csrow];
5566cb7c
MCC
626 csr->first_page = last_page + 1;
627 last_page += npages;
628 csr->last_page = last_page;
629 csr->nr_pages = npages;
630
854d3349 631 csr->page_mask = 0;
eb94fc40 632 csr->grain = 8;
ba6c5c62 633 csr->csrow_idx = *csrow;
eb94fc40
MCC
634 csr->nr_channels = 1;
635
636 csr->channels[0].chan_idx = i;
637 csr->channels[0].ce_count = 0;
854d3349 638
f4742949 639 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 640
854d3349
MCC
641 switch (banks) {
642 case 4:
643 csr->dtype = DEV_X4;
644 break;
645 case 8:
646 csr->dtype = DEV_X8;
647 break;
648 case 16:
649 csr->dtype = DEV_X16;
650 break;
651 default:
652 csr->dtype = DEV_UNKNOWN;
653 }
654
655 csr->edac_mode = mode;
656 csr->mtype = mtype;
657
ba6c5c62 658 (*csrow)++;
854d3349 659 }
1c6fed80 660
854d3349
MCC
661 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
662 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
663 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
664 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
665 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
666 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
667 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
668 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 669 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 670 for (j = 0; j < 8; j++)
17cb7b0c 671 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
672 (value[j] >> 27) & 0x1,
673 (value[j] >> 24) & 0x7,
674 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
675 }
676
a0c36a1f
MCC
677 return 0;
678}
679
194a40fe
MCC
680/****************************************************************************
681 Error insertion routines
682 ****************************************************************************/
683
684/* The i7core has independent error injection features per channel.
685 However, to have a simpler code, we don't allow enabling error injection
686 on more than one channel.
687 Also, since a change at an inject parameter will be applied only at enable,
688 we're disabling error injection on all write calls to the sysfs nodes that
689 controls the error code injection.
690 */
8f331907 691static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
692{
693 struct i7core_pvt *pvt = mci->pvt_info;
694
695 pvt->inject.enable = 0;
696
f4742949 697 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
698 return -ENODEV;
699
f4742949 700 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 701 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
702
703 return 0;
194a40fe
MCC
704}
705
706/*
707 * i7core inject inject.section
708 *
709 * accept and store error injection inject.section value
710 * bit 0 - refers to the lower 32-byte half cacheline
711 * bit 1 - refers to the upper 32-byte half cacheline
712 */
713static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
714 const char *data, size_t count)
715{
716 struct i7core_pvt *pvt = mci->pvt_info;
717 unsigned long value;
718 int rc;
719
720 if (pvt->inject.enable)
41fcb7fe 721 disable_inject(mci);
194a40fe
MCC
722
723 rc = strict_strtoul(data, 10, &value);
724 if ((rc < 0) || (value > 3))
2068def5 725 return -EIO;
194a40fe
MCC
726
727 pvt->inject.section = (u32) value;
728 return count;
729}
730
731static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
732 char *data)
733{
734 struct i7core_pvt *pvt = mci->pvt_info;
735 return sprintf(data, "0x%08x\n", pvt->inject.section);
736}
737
738/*
739 * i7core inject.type
740 *
741 * accept and store error injection inject.section value
742 * bit 0 - repeat enable - Enable error repetition
743 * bit 1 - inject ECC error
744 * bit 2 - inject parity error
745 */
746static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
747 const char *data, size_t count)
748{
749 struct i7core_pvt *pvt = mci->pvt_info;
750 unsigned long value;
751 int rc;
752
753 if (pvt->inject.enable)
41fcb7fe 754 disable_inject(mci);
194a40fe
MCC
755
756 rc = strict_strtoul(data, 10, &value);
757 if ((rc < 0) || (value > 7))
2068def5 758 return -EIO;
194a40fe
MCC
759
760 pvt->inject.type = (u32) value;
761 return count;
762}
763
764static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
765 char *data)
766{
767 struct i7core_pvt *pvt = mci->pvt_info;
768 return sprintf(data, "0x%08x\n", pvt->inject.type);
769}
770
771/*
772 * i7core_inject_inject.eccmask_store
773 *
774 * The type of error (UE/CE) will depend on the inject.eccmask value:
775 * Any bits set to a 1 will flip the corresponding ECC bit
776 * Correctable errors can be injected by flipping 1 bit or the bits within
777 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
778 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
779 * uncorrectable error to be injected.
780 */
781static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
782 const char *data, size_t count)
783{
784 struct i7core_pvt *pvt = mci->pvt_info;
785 unsigned long value;
786 int rc;
787
788 if (pvt->inject.enable)
41fcb7fe 789 disable_inject(mci);
194a40fe
MCC
790
791 rc = strict_strtoul(data, 10, &value);
792 if (rc < 0)
2068def5 793 return -EIO;
194a40fe
MCC
794
795 pvt->inject.eccmask = (u32) value;
796 return count;
797}
798
799static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
800 char *data)
801{
802 struct i7core_pvt *pvt = mci->pvt_info;
803 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
804}
805
806/*
807 * i7core_addrmatch
808 *
809 * The type of error (UE/CE) will depend on the inject.eccmask value:
810 * Any bits set to a 1 will flip the corresponding ECC bit
811 * Correctable errors can be injected by flipping 1 bit or the bits within
812 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
813 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
814 * uncorrectable error to be injected.
815 */
194a40fe 816
a5538e53
MCC
817#define DECLARE_ADDR_MATCH(param, limit) \
818static ssize_t i7core_inject_store_##param( \
819 struct mem_ctl_info *mci, \
820 const char *data, size_t count) \
821{ \
cc301b3a 822 struct i7core_pvt *pvt; \
a5538e53
MCC
823 long value; \
824 int rc; \
825 \
cc301b3a
MCC
826 debugf1("%s()\n", __func__); \
827 pvt = mci->pvt_info; \
828 \
a5538e53
MCC
829 if (pvt->inject.enable) \
830 disable_inject(mci); \
831 \
4f87fad1 832 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
a5538e53
MCC
833 value = -1; \
834 else { \
835 rc = strict_strtoul(data, 10, &value); \
836 if ((rc < 0) || (value >= limit)) \
837 return -EIO; \
838 } \
839 \
840 pvt->inject.param = value; \
841 \
842 return count; \
843} \
844 \
845static ssize_t i7core_inject_show_##param( \
846 struct mem_ctl_info *mci, \
847 char *data) \
848{ \
cc301b3a
MCC
849 struct i7core_pvt *pvt; \
850 \
851 pvt = mci->pvt_info; \
852 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
853 if (pvt->inject.param < 0) \
854 return sprintf(data, "any\n"); \
855 else \
856 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
857}
858
a5538e53
MCC
859#define ATTR_ADDR_MATCH(param) \
860 { \
861 .attr = { \
862 .name = #param, \
863 .mode = (S_IRUGO | S_IWUSR) \
864 }, \
865 .show = i7core_inject_show_##param, \
866 .store = i7core_inject_store_##param, \
867 }
194a40fe 868
a5538e53
MCC
869DECLARE_ADDR_MATCH(channel, 3);
870DECLARE_ADDR_MATCH(dimm, 3);
871DECLARE_ADDR_MATCH(rank, 4);
872DECLARE_ADDR_MATCH(bank, 32);
873DECLARE_ADDR_MATCH(page, 0x10000);
874DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 875
276b824c
MCC
876static int write_and_test(struct pci_dev *dev, int where, u32 val)
877{
878 u32 read;
879 int count;
880
4157d9f5
MCC
881 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
882 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
883 where, val);
884
276b824c
MCC
885 for (count = 0; count < 10; count++) {
886 if (count)
b990538a 887 msleep(100);
276b824c
MCC
888 pci_write_config_dword(dev, where, val);
889 pci_read_config_dword(dev, where, &read);
890
891 if (read == val)
892 return 0;
893 }
894
4157d9f5
MCC
895 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
896 "write=%08x. Read=%08x\n",
897 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
898 where, val, read);
276b824c
MCC
899
900 return -EINVAL;
901}
902
194a40fe
MCC
903/*
904 * This routine prepares the Memory Controller for error injection.
905 * The error will be injected when some process tries to write to the
906 * memory that matches the given criteria.
907 * The criteria can be set in terms of a mask where dimm, rank, bank, page
908 * and col can be specified.
909 * A -1 value for any of the mask items will make the MCU to ignore
910 * that matching criteria for error injection.
911 *
912 * It should be noticed that the error will only happen after a write operation
913 * on a memory that matches the condition. if REPEAT_EN is not enabled at
914 * inject mask, then it will produce just one error. Otherwise, it will repeat
915 * until the injectmask would be cleaned.
916 *
917 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
918 * is reliable enough to check if the MC is using the
919 * three channels. However, this is not clear at the datasheet.
920 */
921static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
922 const char *data, size_t count)
923{
924 struct i7core_pvt *pvt = mci->pvt_info;
925 u32 injectmask;
926 u64 mask = 0;
927 int rc;
928 long enable;
929
f4742949 930 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
931 return 0;
932
194a40fe
MCC
933 rc = strict_strtoul(data, 10, &enable);
934 if ((rc < 0))
935 return 0;
936
937 if (enable) {
938 pvt->inject.enable = 1;
939 } else {
940 disable_inject(mci);
941 return count;
942 }
943
944 /* Sets pvt->inject.dimm mask */
945 if (pvt->inject.dimm < 0)
486dd09f 946 mask |= 1LL << 41;
194a40fe 947 else {
f4742949 948 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 949 mask |= (pvt->inject.dimm & 0x3LL) << 35;
194a40fe 950 else
486dd09f 951 mask |= (pvt->inject.dimm & 0x1LL) << 36;
194a40fe
MCC
952 }
953
954 /* Sets pvt->inject.rank mask */
955 if (pvt->inject.rank < 0)
486dd09f 956 mask |= 1LL << 40;
194a40fe 957 else {
f4742949 958 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 959 mask |= (pvt->inject.rank & 0x1LL) << 34;
194a40fe 960 else
486dd09f 961 mask |= (pvt->inject.rank & 0x3LL) << 34;
194a40fe
MCC
962 }
963
964 /* Sets pvt->inject.bank mask */
965 if (pvt->inject.bank < 0)
486dd09f 966 mask |= 1LL << 39;
194a40fe 967 else
486dd09f 968 mask |= (pvt->inject.bank & 0x15LL) << 30;
194a40fe
MCC
969
970 /* Sets pvt->inject.page mask */
971 if (pvt->inject.page < 0)
486dd09f 972 mask |= 1LL << 38;
194a40fe 973 else
486dd09f 974 mask |= (pvt->inject.page & 0xffff) << 14;
194a40fe
MCC
975
976 /* Sets pvt->inject.column mask */
977 if (pvt->inject.col < 0)
486dd09f 978 mask |= 1LL << 37;
194a40fe 979 else
486dd09f 980 mask |= (pvt->inject.col & 0x3fff);
194a40fe 981
276b824c
MCC
982 /*
983 * bit 0: REPEAT_EN
984 * bits 1-2: MASK_HALF_CACHELINE
985 * bit 3: INJECT_ECC
986 * bit 4: INJECT_ADDR_PARITY
987 */
988
989 injectmask = (pvt->inject.type & 1) |
990 (pvt->inject.section & 0x3) << 1 |
991 (pvt->inject.type & 0x6) << (3 - 1);
992
993 /* Unlock writes to registers - this register is write only */
f4742949 994 pci_write_config_dword(pvt->pci_noncore,
67166af4 995 MC_CFG_CONTROL, 0x2);
e9bd2e73 996
f4742949 997 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 998 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 999 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1000 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1001
f4742949 1002 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1003 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1004
f4742949 1005 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1006 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1007
194a40fe 1008 /*
276b824c
MCC
1009 * This is something undocumented, based on my tests
1010 * Without writing 8 to this register, errors aren't injected. Not sure
1011 * why.
194a40fe 1012 */
f4742949 1013 pci_write_config_dword(pvt->pci_noncore,
276b824c 1014 MC_CFG_CONTROL, 8);
194a40fe 1015
41fcb7fe
MCC
1016 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1017 " inject 0x%08x\n",
194a40fe
MCC
1018 mask, pvt->inject.eccmask, injectmask);
1019
7b029d03 1020
194a40fe
MCC
1021 return count;
1022}
1023
1024static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1025 char *data)
1026{
1027 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1028 u32 injectmask;
1029
52a2e4fc
MCC
1030 if (!pvt->pci_ch[pvt->inject.channel][0])
1031 return 0;
1032
f4742949 1033 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1034 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1035
1036 debugf0("Inject error read: 0x%018x\n", injectmask);
1037
1038 if (injectmask & 0x0c)
1039 pvt->inject.enable = 1;
1040
194a40fe
MCC
1041 return sprintf(data, "%d\n", pvt->inject.enable);
1042}
1043
f338d736
MCC
1044#define DECLARE_COUNTER(param) \
1045static ssize_t i7core_show_counter_##param( \
1046 struct mem_ctl_info *mci, \
1047 char *data) \
1048{ \
1049 struct i7core_pvt *pvt = mci->pvt_info; \
1050 \
1051 debugf1("%s() \n", __func__); \
1052 if (!pvt->ce_count_available || (pvt->is_registered)) \
1053 return sprintf(data, "data unavailable\n"); \
1054 return sprintf(data, "%lu\n", \
1055 pvt->udimm_ce_count[param]); \
1056}
442305b1 1057
f338d736
MCC
1058#define ATTR_COUNTER(param) \
1059 { \
1060 .attr = { \
1061 .name = __stringify(udimm##param), \
1062 .mode = (S_IRUGO | S_IWUSR) \
1063 }, \
1064 .show = i7core_show_counter_##param \
d88b8507 1065 }
442305b1 1066
f338d736
MCC
1067DECLARE_COUNTER(0);
1068DECLARE_COUNTER(1);
1069DECLARE_COUNTER(2);
442305b1 1070
194a40fe
MCC
1071/*
1072 * Sysfs struct
1073 */
a5538e53
MCC
1074
1075
1076static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1077 ATTR_ADDR_MATCH(channel),
1078 ATTR_ADDR_MATCH(dimm),
1079 ATTR_ADDR_MATCH(rank),
1080 ATTR_ADDR_MATCH(bank),
1081 ATTR_ADDR_MATCH(page),
1082 ATTR_ADDR_MATCH(col),
1083 { .attr = { .name = NULL } }
1084};
1085
a5538e53
MCC
1086static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1087 .name = "inject_addrmatch",
1088 .mcidev_attr = i7core_addrmatch_attrs,
1089};
1090
f338d736
MCC
1091static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1092 ATTR_COUNTER(0),
1093 ATTR_COUNTER(1),
1094 ATTR_COUNTER(2),
1095};
1096
1097static struct mcidev_sysfs_group i7core_udimm_counters = {
1098 .name = "all_channel_counts",
1099 .mcidev_attr = i7core_udimm_counters_attrs,
1100};
1101
a5538e53 1102static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
194a40fe
MCC
1103 {
1104 .attr = {
1105 .name = "inject_section",
1106 .mode = (S_IRUGO | S_IWUSR)
1107 },
1108 .show = i7core_inject_section_show,
1109 .store = i7core_inject_section_store,
1110 }, {
1111 .attr = {
1112 .name = "inject_type",
1113 .mode = (S_IRUGO | S_IWUSR)
1114 },
1115 .show = i7core_inject_type_show,
1116 .store = i7core_inject_type_store,
1117 }, {
1118 .attr = {
1119 .name = "inject_eccmask",
1120 .mode = (S_IRUGO | S_IWUSR)
1121 },
1122 .show = i7core_inject_eccmask_show,
1123 .store = i7core_inject_eccmask_store,
1124 }, {
a5538e53 1125 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1126 }, {
1127 .attr = {
1128 .name = "inject_enable",
1129 .mode = (S_IRUGO | S_IWUSR)
1130 },
1131 .show = i7core_inject_enable_show,
1132 .store = i7core_inject_enable_store,
1133 },
f338d736 1134 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
42538680 1135 { .attr = { .name = NULL } }
194a40fe
MCC
1136};
1137
a0c36a1f
MCC
1138/****************************************************************************
1139 Device initialization routines: put/get, init/exit
1140 ****************************************************************************/
1141
1142/*
1143 * i7core_put_devices 'put' all the devices that we have
1144 * reserved via 'get'
1145 */
13d6e9b6 1146static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1147{
13d6e9b6 1148 int i;
a0c36a1f 1149
22e6bcbd 1150 debugf0(__FILE__ ": %s()\n", __func__);
de06eeef 1151 for (i = 0; i < i7core_dev->n_devs; i++) {
22e6bcbd
MCC
1152 struct pci_dev *pdev = i7core_dev->pdev[i];
1153 if (!pdev)
1154 continue;
1155 debugf0("Removing dev %02x:%02x.%d\n",
1156 pdev->bus->number,
1157 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1158 pci_dev_put(pdev);
1159 }
13d6e9b6 1160 kfree(i7core_dev->pdev);
22e6bcbd 1161 list_del(&i7core_dev->list);
13d6e9b6
MCC
1162 kfree(i7core_dev);
1163}
66607706 1164
13d6e9b6
MCC
1165static void i7core_put_all_devices(void)
1166{
42538680 1167 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1168
42538680 1169 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1170 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1171}
1172
71753e01 1173static void __init i7core_xeon_pci_fixup(int dev_id)
bc2d7245
KM
1174{
1175 struct pci_dev *pdev = NULL;
1176 int i;
1177 /*
1178 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1179 * aren't announced by acpi. So, we need to use a legacy scan probing
1180 * to detect them
1181 */
de06eeef 1182 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
bc2d7245 1183 if (unlikely(!pdev)) {
f4742949 1184 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1185 pcibios_scan_specific_bus(255-i);
1186 }
1187}
1188
a0c36a1f
MCC
1189/*
1190 * i7core_get_devices Find and perform 'get' operation on the MCH's
1191 * device/functions we want to reference for this driver
1192 *
1193 * Need to 'get' device 16 func 1 and func 2
1194 */
de06eeef
MCC
1195int i7core_get_onedevice(struct pci_dev **prev, int devno,
1196 struct pci_id_descr *dev_descr, unsigned n_devs)
a0c36a1f 1197{
66607706
MCC
1198 struct i7core_dev *i7core_dev;
1199
8f331907 1200 struct pci_dev *pdev = NULL;
67166af4
MCC
1201 u8 bus = 0;
1202 u8 socket = 0;
a0c36a1f 1203
c77720b9 1204 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
de06eeef 1205 dev_descr->dev_id, *prev);
c77720b9 1206
c77720b9
MCC
1207 /*
1208 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1209 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1210 * to probe for the alternate address in case of failure
1211 */
de06eeef 1212 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
c77720b9 1213 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
fd382654 1214 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
d1fd4fb6 1215
ac1ecece 1216 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) {
f05da2f7
MCC
1217 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1218 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1219 *prev);
ac1ecece
MCC
1220 if (!pdev)
1221 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1222 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2,
1223 *prev);
1224 }
f05da2f7 1225
c77720b9
MCC
1226 if (!pdev) {
1227 if (*prev) {
1228 *prev = pdev;
1229 return 0;
d1fd4fb6
MCC
1230 }
1231
de06eeef 1232 if (dev_descr->optional)
c77720b9 1233 return 0;
310cbb72 1234
c77720b9
MCC
1235 i7core_printk(KERN_ERR,
1236 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1237 dev_descr->dev, dev_descr->func,
1238 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
67166af4 1239
c77720b9
MCC
1240 /* End of list, leave */
1241 return -ENODEV;
1242 }
1243 bus = pdev->bus->number;
67166af4 1244
c77720b9
MCC
1245 if (bus == 0x3f)
1246 socket = 0;
1247 else
1248 socket = 255 - bus;
1249
66607706
MCC
1250 i7core_dev = get_i7core_dev(socket);
1251 if (!i7core_dev) {
1252 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1253 if (!i7core_dev)
1254 return -ENOMEM;
de06eeef 1255 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
66607706 1256 GFP_KERNEL);
2a6fae32
AB
1257 if (!i7core_dev->pdev) {
1258 kfree(i7core_dev);
66607706 1259 return -ENOMEM;
2a6fae32 1260 }
66607706 1261 i7core_dev->socket = socket;
de06eeef 1262 i7core_dev->n_devs = n_devs;
66607706 1263 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1264 }
67166af4 1265
66607706 1266 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1267 i7core_printk(KERN_ERR,
1268 "Duplicated device for "
1269 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1270 bus, dev_descr->dev, dev_descr->func,
1271 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1272 pci_dev_put(pdev);
1273 return -ENODEV;
1274 }
67166af4 1275
66607706 1276 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1277
1278 /* Sanity check */
de06eeef
MCC
1279 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1280 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
c77720b9
MCC
1281 i7core_printk(KERN_ERR,
1282 "Device PCI ID %04x:%04x "
1283 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
de06eeef 1284 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
c77720b9 1285 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
de06eeef 1286 bus, dev_descr->dev, dev_descr->func);
c77720b9
MCC
1287 return -ENODEV;
1288 }
ef708b53 1289
c77720b9
MCC
1290 /* Be sure that the device is enabled */
1291 if (unlikely(pci_enable_device(pdev) < 0)) {
1292 i7core_printk(KERN_ERR,
1293 "Couldn't enable "
1294 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1295 bus, dev_descr->dev, dev_descr->func,
1296 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1297 return -ENODEV;
1298 }
ef708b53 1299
d4c27795 1300 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1301 socket, bus, dev_descr->dev,
1302 dev_descr->func,
1303 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
8f331907 1304
c77720b9 1305 *prev = pdev;
ef708b53 1306
c77720b9
MCC
1307 return 0;
1308}
a0c36a1f 1309
de06eeef 1310static int i7core_get_devices(struct pci_id_descr dev_descr[], unsigned n_devs)
c77720b9 1311{
de06eeef 1312 int i, rc;
c77720b9 1313 struct pci_dev *pdev = NULL;
ef708b53 1314
de06eeef 1315 for (i = 0; i < n_devs; i++) {
c77720b9
MCC
1316 pdev = NULL;
1317 do {
de06eeef
MCC
1318 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1319 n_devs);
1320 if (rc < 0) {
13d6e9b6 1321 i7core_put_all_devices();
c77720b9
MCC
1322 return -ENODEV;
1323 }
1324 } while (pdev);
1325 }
66607706 1326
ef708b53 1327 return 0;
ef708b53
MCC
1328}
1329
f4742949
MCC
1330static int mci_bind_devs(struct mem_ctl_info *mci,
1331 struct i7core_dev *i7core_dev)
ef708b53
MCC
1332{
1333 struct i7core_pvt *pvt = mci->pvt_info;
1334 struct pci_dev *pdev;
f4742949 1335 int i, func, slot;
ef708b53 1336
f4742949
MCC
1337 /* Associates i7core_dev and mci for future usage */
1338 pvt->i7core_dev = i7core_dev;
1339 i7core_dev->mci = mci;
66607706 1340
f4742949 1341 pvt->is_registered = 0;
de06eeef 1342 for (i = 0; i < i7core_dev->n_devs; i++) {
f4742949
MCC
1343 pdev = i7core_dev->pdev[i];
1344 if (!pdev)
66607706
MCC
1345 continue;
1346
f4742949
MCC
1347 func = PCI_FUNC(pdev->devfn);
1348 slot = PCI_SLOT(pdev->devfn);
1349 if (slot == 3) {
1350 if (unlikely(func > MAX_MCR_FUNC))
1351 goto error;
1352 pvt->pci_mcr[func] = pdev;
1353 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1354 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1355 goto error;
f4742949
MCC
1356 pvt->pci_ch[slot - 4][func] = pdev;
1357 } else if (!slot && !func)
1358 pvt->pci_noncore = pdev;
1359 else
1360 goto error;
ef708b53 1361
f4742949
MCC
1362 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1363 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1364 pdev, i7core_dev->socket);
14d2c083 1365
f4742949
MCC
1366 if (PCI_SLOT(pdev->devfn) == 3 &&
1367 PCI_FUNC(pdev->devfn) == 2)
1368 pvt->is_registered = 1;
a0c36a1f 1369 }
e9bd2e73 1370
f338d736
MCC
1371 /*
1372 * Add extra nodes to count errors on udimm
1373 * For registered memory, this is not needed, since the counters
1374 * are already displayed at the standard locations
1375 */
1376 if (!pvt->is_registered)
1377 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1378 &i7core_udimm_counters;
1379
a0c36a1f 1380 return 0;
ef708b53
MCC
1381
1382error:
1383 i7core_printk(KERN_ERR, "Device %d, function %d "
1384 "is out of the expected range\n",
1385 slot, func);
1386 return -EINVAL;
a0c36a1f
MCC
1387}
1388
442305b1
MCC
1389/****************************************************************************
1390 Error check routines
1391 ****************************************************************************/
f4742949 1392static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1393 int chan, int dimm, int add)
1394{
1395 char *msg;
1396 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1397 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1398
1399 for (i = 0; i < add; i++) {
1400 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1401 "(Socket=%d channel=%d dimm=%d)",
1402 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1403
1404 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1405 kfree (msg);
1406 }
1407}
1408
1409static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1410 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1411{
1412 struct i7core_pvt *pvt = mci->pvt_info;
1413 int add0 = 0, add1 = 0, add2 = 0;
1414 /* Updates CE counters if it is not the first time here */
f4742949 1415 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1416 /* Updates CE counters */
1417
f4742949
MCC
1418 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1419 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1420 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1421
1422 if (add2 < 0)
1423 add2 += 0x7fff;
f4742949 1424 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1425
1426 if (add1 < 0)
1427 add1 += 0x7fff;
f4742949 1428 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1429
1430 if (add0 < 0)
1431 add0 += 0x7fff;
f4742949 1432 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1433 } else
f4742949 1434 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1435
1436 /* Store the new values */
f4742949
MCC
1437 pvt->rdimm_last_ce_count[chan][2] = new2;
1438 pvt->rdimm_last_ce_count[chan][1] = new1;
1439 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1440
1441 /*updated the edac core */
1442 if (add0 != 0)
f4742949 1443 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1444 if (add1 != 0)
f4742949 1445 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1446 if (add2 != 0)
f4742949 1447 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1448
1449}
1450
f4742949 1451static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1452{
1453 struct i7core_pvt *pvt = mci->pvt_info;
1454 u32 rcv[3][2];
1455 int i, new0, new1, new2;
1456
1457 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1458 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1459 &rcv[0][0]);
f4742949 1460 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1461 &rcv[0][1]);
f4742949 1462 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1463 &rcv[1][0]);
f4742949 1464 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1465 &rcv[1][1]);
f4742949 1466 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1467 &rcv[2][0]);
f4742949 1468 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1469 &rcv[2][1]);
1470 for (i = 0 ; i < 3; i++) {
1471 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1472 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1473 /*if the channel has 3 dimms*/
f4742949 1474 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1475 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1476 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1477 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1478 } else {
1479 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1480 DIMM_BOT_COR_ERR(rcv[i][0]);
1481 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1482 DIMM_BOT_COR_ERR(rcv[i][1]);
1483 new2 = 0;
1484 }
1485
f4742949 1486 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1487 }
1488}
442305b1
MCC
1489
1490/* This function is based on the device 3 function 4 registers as described on:
1491 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1492 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1493 * also available at:
1494 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1495 */
f4742949 1496static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1497{
1498 struct i7core_pvt *pvt = mci->pvt_info;
1499 u32 rcv1, rcv0;
1500 int new0, new1, new2;
1501
f4742949 1502 if (!pvt->pci_mcr[4]) {
b990538a 1503 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1504 return;
1505 }
1506
b4e8f0b6 1507 /* Corrected test errors */
f4742949
MCC
1508 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1509 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1510
1511 /* Store the new values */
1512 new2 = DIMM2_COR_ERR(rcv1);
1513 new1 = DIMM1_COR_ERR(rcv0);
1514 new0 = DIMM0_COR_ERR(rcv0);
1515
442305b1 1516 /* Updates CE counters if it is not the first time here */
f4742949 1517 if (pvt->ce_count_available) {
442305b1
MCC
1518 /* Updates CE counters */
1519 int add0, add1, add2;
1520
f4742949
MCC
1521 add2 = new2 - pvt->udimm_last_ce_count[2];
1522 add1 = new1 - pvt->udimm_last_ce_count[1];
1523 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1524
1525 if (add2 < 0)
1526 add2 += 0x7fff;
f4742949 1527 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1528
1529 if (add1 < 0)
1530 add1 += 0x7fff;
f4742949 1531 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1532
1533 if (add0 < 0)
1534 add0 += 0x7fff;
f4742949 1535 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1536
1537 if (add0 | add1 | add2)
1538 i7core_printk(KERN_ERR, "New Corrected error(s): "
1539 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1540 add0, add1, add2);
442305b1 1541 } else
f4742949 1542 pvt->ce_count_available = 1;
442305b1
MCC
1543
1544 /* Store the new values */
f4742949
MCC
1545 pvt->udimm_last_ce_count[2] = new2;
1546 pvt->udimm_last_ce_count[1] = new1;
1547 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1548}
1549
8a2f118e
MCC
1550/*
1551 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1552 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1553 * Nehalem are defined as family 0x06, model 0x1a
1554 *
1555 * The MCA registers used here are the following ones:
8a2f118e 1556 * struct mce field MCA Register
f237fcf2
MCC
1557 * m->status MSR_IA32_MC8_STATUS
1558 * m->addr MSR_IA32_MC8_ADDR
1559 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1560 * In the case of Nehalem, the error information is masked at .status and .misc
1561 * fields
1562 */
d5381642
MCC
1563static void i7core_mce_output_error(struct mem_ctl_info *mci,
1564 struct mce *m)
1565{
b4e8f0b6 1566 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1567 char *type, *optype, *err, *msg;
8a2f118e 1568 unsigned long error = m->status & 0x1ff0000l;
a639539f 1569 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1570 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1571 u32 dimm = (m->misc >> 16) & 0x3;
1572 u32 channel = (m->misc >> 18) & 0x3;
1573 u32 syndrome = m->misc >> 32;
1574 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1575 int csrow;
8a2f118e 1576
c5d34528
MCC
1577 if (m->mcgstatus & 1)
1578 type = "FATAL";
1579 else
1580 type = "NON_FATAL";
1581
a639539f 1582 switch (optypenum) {
b990538a
MCC
1583 case 0:
1584 optype = "generic undef request";
1585 break;
1586 case 1:
1587 optype = "read error";
1588 break;
1589 case 2:
1590 optype = "write error";
1591 break;
1592 case 3:
1593 optype = "addr/cmd error";
1594 break;
1595 case 4:
1596 optype = "scrubbing error";
1597 break;
1598 default:
1599 optype = "reserved";
1600 break;
a639539f
MCC
1601 }
1602
8a2f118e
MCC
1603 switch (errnum) {
1604 case 16:
1605 err = "read ECC error";
1606 break;
1607 case 17:
1608 err = "RAS ECC error";
1609 break;
1610 case 18:
1611 err = "write parity error";
1612 break;
1613 case 19:
1614 err = "redundacy loss";
1615 break;
1616 case 20:
1617 err = "reserved";
1618 break;
1619 case 21:
1620 err = "memory range error";
1621 break;
1622 case 22:
1623 err = "RTID out of range";
1624 break;
1625 case 23:
1626 err = "address parity error";
1627 break;
1628 case 24:
1629 err = "byte enable parity error";
1630 break;
1631 default:
1632 err = "unknown";
d5381642 1633 }
d5381642 1634
f237fcf2 1635 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1636 msg = kasprintf(GFP_ATOMIC,
f4742949 1637 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1638 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1639 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1640 syndrome, core_err_cnt, (long long)m->status,
1641 (long long)m->misc, optype, err);
8a2f118e
MCC
1642
1643 debugf0("%s", msg);
d5381642 1644
f4742949 1645 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1646
d5381642 1647 /* Call the helper to output message */
b4e8f0b6
MCC
1648 if (m->mcgstatus & 1)
1649 edac_mc_handle_fbd_ue(mci, csrow, 0,
1650 0 /* FIXME: should be channel here */, msg);
f4742949 1651 else if (!pvt->is_registered)
b4e8f0b6
MCC
1652 edac_mc_handle_fbd_ce(mci, csrow,
1653 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1654
1655 kfree(msg);
d5381642
MCC
1656}
1657
87d1d272
MCC
1658/*
1659 * i7core_check_error Retrieve and process errors reported by the
1660 * hardware. Called by the Core module.
1661 */
1662static void i7core_check_error(struct mem_ctl_info *mci)
1663{
d5381642
MCC
1664 struct i7core_pvt *pvt = mci->pvt_info;
1665 int i;
1666 unsigned count = 0;
ca9c90ba 1667 struct mce *m;
d5381642 1668
ca9c90ba
MCC
1669 /*
1670 * MCE first step: Copy all mce errors into a temporary buffer
1671 * We use a double buffering here, to reduce the risk of
1672 * loosing an error.
1673 */
1674 smp_rmb();
321ece4d
MCC
1675 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1676 % MCE_LOG_LEN;
ca9c90ba 1677 if (!count)
8a311e17 1678 goto check_ce_error;
f4742949 1679
ca9c90ba 1680 m = pvt->mce_outentry;
321ece4d
MCC
1681 if (pvt->mce_in + count > MCE_LOG_LEN) {
1682 unsigned l = MCE_LOG_LEN - pvt->mce_in;
f4742949 1683
ca9c90ba
MCC
1684 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1685 smp_wmb();
1686 pvt->mce_in = 0;
1687 count -= l;
1688 m += l;
1689 }
1690 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1691 smp_wmb();
1692 pvt->mce_in += count;
1693
1694 smp_rmb();
1695 if (pvt->mce_overrun) {
1696 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1697 pvt->mce_overrun);
1698 smp_wmb();
1699 pvt->mce_overrun = 0;
1700 }
d5381642 1701
ca9c90ba
MCC
1702 /*
1703 * MCE second step: parse errors and display
1704 */
d5381642 1705 for (i = 0; i < count; i++)
ca9c90ba 1706 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1707
ca9c90ba
MCC
1708 /*
1709 * Now, let's increment CE error counts
1710 */
8a311e17 1711check_ce_error:
f4742949
MCC
1712 if (!pvt->is_registered)
1713 i7core_udimm_check_mc_ecc_err(mci);
1714 else
1715 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1716}
1717
d5381642
MCC
1718/*
1719 * i7core_mce_check_error Replicates mcelog routine to get errors
1720 * This routine simply queues mcelog errors, and
1721 * return. The error itself should be handled later
1722 * by i7core_check_error.
6e103be1
MCC
1723 * WARNING: As this routine should be called at NMI time, extra care should
1724 * be taken to avoid deadlocks, and to be as fast as possible.
d5381642
MCC
1725 */
1726static int i7core_mce_check_error(void *priv, struct mce *mce)
1727{
c5d34528
MCC
1728 struct mem_ctl_info *mci = priv;
1729 struct i7core_pvt *pvt = mci->pvt_info;
d5381642 1730
8a2f118e
MCC
1731 /*
1732 * Just let mcelog handle it if the error is
1733 * outside the memory controller
1734 */
1735 if (((mce->status & 0xffff) >> 7) != 1)
1736 return 0;
1737
f237fcf2
MCC
1738 /* Bank 8 registers are the only ones that we know how to handle */
1739 if (mce->bank != 8)
1740 return 0;
1741
3b918c12 1742#ifdef CONFIG_SMP
f4742949 1743 /* Only handle if it is the right mc controller */
6e103be1 1744 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
f4742949 1745 return 0;
3b918c12 1746#endif
f4742949 1747
ca9c90ba 1748 smp_rmb();
321ece4d 1749 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
ca9c90ba
MCC
1750 smp_wmb();
1751 pvt->mce_overrun++;
1752 return 0;
d5381642 1753 }
6e103be1
MCC
1754
1755 /* Copy memory error at the ringbuffer */
1756 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
ca9c90ba 1757 smp_wmb();
321ece4d 1758 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
d5381642 1759
c5d34528
MCC
1760 /* Handle fatal errors immediately */
1761 if (mce->mcgstatus & 1)
1762 i7core_check_error(mci);
1763
d5381642 1764 /* Advice mcelog that the error were handled */
8a2f118e 1765 return 1;
d5381642
MCC
1766}
1767
f4742949
MCC
1768static int i7core_register_mci(struct i7core_dev *i7core_dev,
1769 int num_channels, int num_csrows)
a0c36a1f
MCC
1770{
1771 struct mem_ctl_info *mci;
1772 struct i7core_pvt *pvt;
ba6c5c62 1773 int csrow = 0;
f4742949 1774 int rc;
a0c36a1f 1775
a0c36a1f 1776 /* allocate a new MC control structure */
d4c27795
MCC
1777 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1778 i7core_dev->socket);
f4742949
MCC
1779 if (unlikely(!mci))
1780 return -ENOMEM;
a0c36a1f
MCC
1781
1782 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1783
f4742949
MCC
1784 /* record ptr to the generic device */
1785 mci->dev = &i7core_dev->pdev[0]->dev;
1786
a0c36a1f 1787 pvt = mci->pvt_info;
ef708b53 1788 memset(pvt, 0, sizeof(*pvt));
67166af4 1789
41fcb7fe
MCC
1790 /*
1791 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1792 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1793 * memory channels
1794 */
1795 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1796 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1797 mci->edac_cap = EDAC_FLAG_NONE;
1798 mci->mod_name = "i7core_edac.c";
1799 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1800 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1801 i7core_dev->socket);
1802 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1803 mci->ctl_page_to_phys = NULL;
a5538e53 1804 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
87d1d272
MCC
1805 /* Set the function pointer to an actual operation function */
1806 mci->edac_check = i7core_check_error;
8f331907 1807
ef708b53 1808 /* Store pci devices at mci for faster access */
f4742949 1809 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1810 if (unlikely(rc < 0))
f4742949 1811 goto fail;
ef708b53
MCC
1812
1813 /* Get dimm basic config */
f4742949 1814 get_dimm_config(mci, &csrow);
ef708b53 1815
a0c36a1f 1816 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1817 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1818 debugf0("MC: " __FILE__
1819 ": %s(): failed edac_mc_add_mc()\n", __func__);
1820 /* FIXME: perhaps some code should go here that disables error
1821 * reporting if we just enabled it
1822 */
b7c76151
MCC
1823
1824 rc = -EINVAL;
f4742949 1825 goto fail;
a0c36a1f
MCC
1826 }
1827
1828 /* allocating generic PCI control info */
f4742949
MCC
1829 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1830 EDAC_MOD_STR);
41fcb7fe 1831 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1832 printk(KERN_WARNING
1833 "%s(): Unable to create PCI control\n",
1834 __func__);
1835 printk(KERN_WARNING
1836 "%s(): PCI error report via EDAC not setup\n",
1837 __func__);
1838 }
1839
194a40fe 1840 /* Default error mask is any memory */
ef708b53 1841 pvt->inject.channel = 0;
194a40fe
MCC
1842 pvt->inject.dimm = -1;
1843 pvt->inject.rank = -1;
1844 pvt->inject.bank = -1;
1845 pvt->inject.page = -1;
1846 pvt->inject.col = -1;
1847
d5381642 1848 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1849 pvt->edac_mce.priv = mci;
d5381642 1850 pvt->edac_mce.check_error = i7core_mce_check_error;
d5381642
MCC
1851
1852 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1853 if (unlikely(rc < 0)) {
d5381642
MCC
1854 debugf0("MC: " __FILE__
1855 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1856 }
1857
1858fail:
d4d1ef45
TL
1859 if (rc < 0)
1860 edac_mc_free(mci);
f4742949
MCC
1861 return rc;
1862}
1863
1864/*
1865 * i7core_probe Probe for ONE instance of device to see if it is
1866 * present.
1867 * return:
1868 * 0 for FOUND a device
1869 * < 0 for error code
1870 */
1871static int __devinit i7core_probe(struct pci_dev *pdev,
1872 const struct pci_device_id *id)
1873{
1874 int dev_idx = id->driver_data;
1875 int rc;
1876 struct i7core_dev *i7core_dev;
1877
1878 /*
d4c27795 1879 * All memory controllers are allocated at the first pass.
f4742949
MCC
1880 */
1881 if (unlikely(dev_idx >= 1))
1882 return -EINVAL;
1883
1884 /* get the pci devices we want to reserve for our use */
1885 mutex_lock(&i7core_edac_lock);
de06eeef 1886
f05da2f7 1887 if (pdev->device == PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0) {
52a2e4fc
MCC
1888 printk(KERN_INFO "i7core_edac: detected a "
1889 "Lynnfield processor\n");
1890 rc = i7core_get_devices(pci_dev_descr_lynnfield,
1891 ARRAY_SIZE(pci_dev_descr_lynnfield));
1892 } else {
1893 printk(KERN_INFO "i7core_edac: detected a "
1894 "Nehalem/Nehalem-EP processor\n");
1895 rc = i7core_get_devices(pci_dev_descr_i7core,
1896 ARRAY_SIZE(pci_dev_descr_i7core));
1897 }
1898
f4742949
MCC
1899 if (unlikely(rc < 0))
1900 goto fail0;
1901
1902 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1903 int channels;
1904 int csrows;
1905
1906 /* Check the number of active and not disabled channels */
1907 rc = i7core_get_active_channels(i7core_dev->socket,
1908 &channels, &csrows);
1909 if (unlikely(rc < 0))
1910 goto fail1;
1911
d4c27795
MCC
1912 rc = i7core_register_mci(i7core_dev, channels, csrows);
1913 if (unlikely(rc < 0))
1914 goto fail1;
d5381642
MCC
1915 }
1916
ef708b53 1917 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1918
66607706 1919 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1920 return 0;
1921
66607706 1922fail1:
13d6e9b6 1923 i7core_put_all_devices();
66607706
MCC
1924fail0:
1925 mutex_unlock(&i7core_edac_lock);
b7c76151 1926 return rc;
a0c36a1f
MCC
1927}
1928
1929/*
1930 * i7core_remove destructor for one instance of device
1931 *
1932 */
1933static void __devexit i7core_remove(struct pci_dev *pdev)
1934{
1935 struct mem_ctl_info *mci;
22e6bcbd 1936 struct i7core_dev *i7core_dev, *tmp;
a0c36a1f
MCC
1937
1938 debugf0(__FILE__ ": %s()\n", __func__);
1939
1940 if (i7core_pci)
1941 edac_pci_release_generic_ctl(i7core_pci);
1942
22e6bcbd
MCC
1943 /*
1944 * we have a trouble here: pdev value for removal will be wrong, since
1945 * it will point to the X58 register used to detect that the machine
1946 * is a Nehalem or upper design. However, due to the way several PCI
1947 * devices are grouped together to provide MC functionality, we need
1948 * to use a different method for releasing the devices
1949 */
87d1d272 1950
66607706 1951 mutex_lock(&i7core_edac_lock);
22e6bcbd
MCC
1952 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1953 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1954 if (mci) {
1955 struct i7core_pvt *pvt = mci->pvt_info;
1956
1957 i7core_dev = pvt->i7core_dev;
1958 edac_mce_unregister(&pvt->edac_mce);
1959 kfree(mci->ctl_name);
1960 edac_mc_free(mci);
1961 i7core_put_devices(i7core_dev);
1962 } else {
1963 i7core_printk(KERN_ERR,
1964 "Couldn't find mci for socket %d\n",
1965 i7core_dev->socket);
1966 }
1967 }
66607706 1968 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1969}
1970
a0c36a1f
MCC
1971MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1972
1973/*
1974 * i7core_driver pci_driver structure for this module
1975 *
1976 */
1977static struct pci_driver i7core_driver = {
1978 .name = "i7core_edac",
1979 .probe = i7core_probe,
1980 .remove = __devexit_p(i7core_remove),
1981 .id_table = i7core_pci_tbl,
1982};
1983
1984/*
1985 * i7core_init Module entry function
1986 * Try to initialize this module for its devices
1987 */
1988static int __init i7core_init(void)
1989{
1990 int pci_rc;
1991
1992 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1993
1994 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1995 opstate_init();
1996
de06eeef 1997 i7core_xeon_pci_fixup(pci_dev_descr_i7core[0].dev_id);
bc2d7245 1998
a0c36a1f
MCC
1999 pci_rc = pci_register_driver(&i7core_driver);
2000
3ef288a9
MCC
2001 if (pci_rc >= 0)
2002 return 0;
2003
2004 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2005 pci_rc);
2006
2007 return pci_rc;
a0c36a1f
MCC
2008}
2009
2010/*
2011 * i7core_exit() Module exit function
2012 * Unregister the driver
2013 */
2014static void __exit i7core_exit(void)
2015{
2016 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2017 pci_unregister_driver(&i7core_driver);
2018}
2019
2020module_init(i7core_init);
2021module_exit(i7core_exit);
2022
2023MODULE_LICENSE("GPL");
2024MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2025MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2026MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2027 I7CORE_REVISION);
2028
2029module_param(edac_op_state, int, 0444);
2030MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");