edac_core: Allow the creation of sysfs groups
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
d5381642
MCC
30#include <linux/edac_mce.h>
31#include <linux/spinlock.h>
f4742949 32#include <linux/smp.h>
14d2c083 33#include <asm/processor.h>
a0c36a1f
MCC
34
35#include "edac_core.h"
36
f4742949
MCC
37/*
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
42 */
43#define MAX_SOCKET_BUSES 2
44
45
a0c36a1f
MCC
46/*
47 * Alter this version for the module when modifications are made
48 */
49#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50#define EDAC_MOD_STR "i7core_edac"
51
a0c36a1f
MCC
52/*
53 * Debug macros
54 */
55#define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
57
58#define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
60
61/*
62 * i7core Memory Controller Registers
63 */
64
e9bd2e73
MCC
65 /* OFFSETS for Device 0 Function 0 */
66
67#define MC_CFG_CONTROL 0x90
68
a0c36a1f
MCC
69 /* OFFSETS for Device 3 Function 0 */
70
71#define MC_CONTROL 0x48
72#define MC_STATUS 0x4c
73#define MC_MAX_DOD 0x64
74
442305b1
MCC
75/*
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
78 */
79
80#define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82
83#define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86
b4e8f0b6
MCC
87/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88#define MC_COR_ECC_CNT_0 0x80
89#define MC_COR_ECC_CNT_1 0x84
90#define MC_COR_ECC_CNT_2 0x88
91#define MC_COR_ECC_CNT_3 0x8c
92#define MC_COR_ECC_CNT_4 0x90
93#define MC_COR_ECC_CNT_5 0x94
94
95#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
97
98
a0c36a1f
MCC
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100
0b2b7b7e
MCC
101#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
106
f122a892
MCC
107#define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110
0b2b7b7e
MCC
111#define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
113
a0c36a1f 114#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
115#define MC_CHANNEL_ERROR_MASK 0xf8
116#define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
a0c36a1f 125
0b2b7b7e 126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 127
0b2b7b7e
MCC
128#define MC_DOD_CH_DIMM0 0x48
129#define MC_DOD_CH_DIMM1 0x4c
130#define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 143
f122a892
MCC
144#define MC_RANK_PRESENT 0x7c
145
0b2b7b7e
MCC
146#define MC_SAG_CH_0 0x80
147#define MC_SAG_CH_1 0x84
148#define MC_SAG_CH_2 0x88
149#define MC_SAG_CH_3 0x8c
150#define MC_SAG_CH_4 0x90
151#define MC_SAG_CH_5 0x94
152#define MC_SAG_CH_6 0x98
153#define MC_SAG_CH_7 0x9c
154
155#define MC_RIR_LIMIT_CH_0 0x40
156#define MC_RIR_LIMIT_CH_1 0x44
157#define MC_RIR_LIMIT_CH_2 0x48
158#define MC_RIR_LIMIT_CH_3 0x4C
159#define MC_RIR_LIMIT_CH_4 0x50
160#define MC_RIR_LIMIT_CH_5 0x54
161#define MC_RIR_LIMIT_CH_6 0x58
162#define MC_RIR_LIMIT_CH_7 0x5C
163#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164
165#define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
168
a0c36a1f
MCC
169/*
170 * i7core structs
171 */
172
173#define NUM_CHANS 3
442305b1
MCC
174#define MAX_DIMMS 3 /* Max DIMMS per channel */
175#define MAX_MCR_FUNC 4
176#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
177
178struct i7core_info {
179 u32 mc_control;
180 u32 mc_status;
181 u32 max_dod;
f122a892 182 u32 ch_map;
a0c36a1f
MCC
183};
184
194a40fe
MCC
185
186struct i7core_inject {
187 int enable;
188
189 u32 section;
190 u32 type;
191 u32 eccmask;
192
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
195};
196
0b2b7b7e 197struct i7core_channel {
442305b1
MCC
198 u32 ranks;
199 u32 dimms;
0b2b7b7e
MCC
200};
201
8f331907 202struct pci_id_descr {
66607706
MCC
203 int dev;
204 int func;
205 int dev_id;
8f331907
MCC
206};
207
f4742949
MCC
208struct i7core_dev {
209 struct list_head list;
210 u8 socket;
211 struct pci_dev **pdev;
212 struct mem_ctl_info *mci;
213};
214
a0c36a1f 215struct i7core_pvt {
f4742949
MCC
216 struct pci_dev *pci_noncore;
217 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
218 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
219
220 struct i7core_dev *i7core_dev;
67166af4 221
a0c36a1f 222 struct i7core_info info;
194a40fe 223 struct i7core_inject inject;
f4742949 224 struct i7core_channel channel[NUM_CHANS];
67166af4 225
f4742949 226 int channels; /* Number of active channels */
442305b1 227
f4742949
MCC
228 int ce_count_available;
229 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
230
231 /* ECC corrected errors counts per udimm */
f4742949
MCC
232 unsigned long udimm_ce_count[MAX_DIMMS];
233 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 234 /* ECC corrected errors counts per rdimm */
f4742949
MCC
235 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
236 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 237
f4742949 238 unsigned int is_registered;
14d2c083 239
d5381642
MCC
240 /* mcelog glue */
241 struct edac_mce edac_mce;
242 struct mce mce_entry[MCE_LOG_LEN];
243 unsigned mce_count;
244 spinlock_t mce_lock;
a0c36a1f
MCC
245};
246
66607706
MCC
247/* Static vars */
248static LIST_HEAD(i7core_edac_list);
249static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 250
8f331907
MCC
251#define PCI_DESCR(device, function, device_id) \
252 .dev = (device), \
253 .func = (function), \
254 .dev_id = (device_id)
255
66607706 256struct pci_id_descr pci_dev_descr[] = {
8f331907
MCC
257 /* Memory controller */
258 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
259 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
b990538a 260 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM */
8f331907
MCC
261 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
262
263 /* Channel 0 */
264 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
265 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
266 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
267 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
268
269 /* Channel 1 */
270 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
271 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
272 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
273 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
274
275 /* Channel 2 */
276 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
277 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
278 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
279 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
280
281 /* Generic Non-core registers */
282 /*
283 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
284 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
285 * the probing code needs to test for the other address in case of
286 * failure of this one
287 */
288 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
289
a0c36a1f 290};
66607706 291#define N_DEVS ARRAY_SIZE(pci_dev_descr)
8f331907
MCC
292
293/*
294 * pci_device_id table for which devices we are looking for
8f331907
MCC
295 */
296static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 297 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
8f331907
MCC
298 {0,} /* 0 terminated list. */
299};
300
a0c36a1f
MCC
301static struct edac_pci_ctl_info *i7core_pci;
302
303/****************************************************************************
304 Anciliary status routines
305 ****************************************************************************/
306
307 /* MC_CONTROL bits */
ef708b53
MCC
308#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
309#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
310
311 /* MC_STATUS bits */
61053fde 312#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 313#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
314
315 /* MC_MAX_DOD read functions */
854d3349 316static inline int numdimms(u32 dimms)
a0c36a1f 317{
854d3349 318 return (dimms & 0x3) + 1;
a0c36a1f
MCC
319}
320
854d3349 321static inline int numrank(u32 rank)
a0c36a1f
MCC
322{
323 static int ranks[4] = { 1, 2, 4, -EINVAL };
324
854d3349 325 return ranks[rank & 0x3];
a0c36a1f
MCC
326}
327
854d3349 328static inline int numbank(u32 bank)
a0c36a1f
MCC
329{
330 static int banks[4] = { 4, 8, 16, -EINVAL };
331
854d3349 332 return banks[bank & 0x3];
a0c36a1f
MCC
333}
334
854d3349 335static inline int numrow(u32 row)
a0c36a1f
MCC
336{
337 static int rows[8] = {
338 1 << 12, 1 << 13, 1 << 14, 1 << 15,
339 1 << 16, -EINVAL, -EINVAL, -EINVAL,
340 };
341
854d3349 342 return rows[row & 0x7];
a0c36a1f
MCC
343}
344
854d3349 345static inline int numcol(u32 col)
a0c36a1f
MCC
346{
347 static int cols[8] = {
348 1 << 10, 1 << 11, 1 << 12, -EINVAL,
349 };
854d3349 350 return cols[col & 0x3];
a0c36a1f
MCC
351}
352
f4742949 353static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
354{
355 struct i7core_dev *i7core_dev;
356
357 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
358 if (i7core_dev->socket == socket)
359 return i7core_dev;
360 }
361
362 return NULL;
363}
364
a0c36a1f
MCC
365/****************************************************************************
366 Memory check routines
367 ****************************************************************************/
67166af4
MCC
368static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
369 unsigned func)
ef708b53 370{
66607706 371 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 372 int i;
ef708b53 373
66607706
MCC
374 if (!i7core_dev)
375 return NULL;
376
ef708b53 377 for (i = 0; i < N_DEVS; i++) {
66607706 378 if (!i7core_dev->pdev[i])
ef708b53
MCC
379 continue;
380
66607706
MCC
381 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
382 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
383 return i7core_dev->pdev[i];
ef708b53
MCC
384 }
385 }
386
eb94fc40
MCC
387 return NULL;
388}
389
ec6df24c
MCC
390/**
391 * i7core_get_active_channels() - gets the number of channels and csrows
392 * @socket: Quick Path Interconnect socket
393 * @channels: Number of channels that will be returned
394 * @csrows: Number of csrows found
395 *
396 * Since EDAC core needs to know in advance the number of available channels
397 * and csrows, in order to allocate memory for csrows/channels, it is needed
398 * to run two similar steps. At the first step, implemented on this function,
399 * it checks the number of csrows/channels present at one socket.
400 * this is used in order to properly allocate the size of mci components.
401 *
402 * It should be noticed that none of the current available datasheets explain
403 * or even mention how csrows are seen by the memory controller. So, we need
404 * to add a fake description for csrows.
405 * So, this driver is attributing one DIMM memory for one csrow.
406 */
67166af4
MCC
407static int i7core_get_active_channels(u8 socket, unsigned *channels,
408 unsigned *csrows)
eb94fc40
MCC
409{
410 struct pci_dev *pdev = NULL;
411 int i, j;
412 u32 status, control;
413
414 *channels = 0;
415 *csrows = 0;
416
67166af4 417 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 418 if (!pdev) {
67166af4
MCC
419 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
420 socket);
ef708b53 421 return -ENODEV;
b7c76151 422 }
ef708b53
MCC
423
424 /* Device 3 function 0 reads */
425 pci_read_config_dword(pdev, MC_STATUS, &status);
426 pci_read_config_dword(pdev, MC_CONTROL, &control);
427
428 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 429 u32 dimm_dod[3];
ef708b53
MCC
430 /* Check if the channel is active */
431 if (!(control & (1 << (8 + i))))
432 continue;
433
434 /* Check if the channel is disabled */
41fcb7fe 435 if (status & (1 << i))
ef708b53 436 continue;
ef708b53 437
67166af4 438 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 439 if (!pdev) {
67166af4
MCC
440 i7core_printk(KERN_ERR, "Couldn't find socket %d "
441 "fn %d.%d!!!\n",
442 socket, i + 4, 1);
eb94fc40
MCC
443 return -ENODEV;
444 }
445 /* Devices 4-6 function 1 */
446 pci_read_config_dword(pdev,
447 MC_DOD_CH_DIMM0, &dimm_dod[0]);
448 pci_read_config_dword(pdev,
449 MC_DOD_CH_DIMM1, &dimm_dod[1]);
450 pci_read_config_dword(pdev,
451 MC_DOD_CH_DIMM2, &dimm_dod[2]);
452
ef708b53 453 (*channels)++;
eb94fc40
MCC
454
455 for (j = 0; j < 3; j++) {
456 if (!DIMM_PRESENT(dimm_dod[j]))
457 continue;
458 (*csrows)++;
459 }
ef708b53
MCC
460 }
461
c77720b9 462 debugf0("Number of active channels on socket %d: %d\n",
67166af4 463 socket, *channels);
1c6fed80 464
ef708b53
MCC
465 return 0;
466}
467
f4742949 468static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
469{
470 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 471 struct csrow_info *csr;
854d3349 472 struct pci_dev *pdev;
ba6c5c62 473 int i, j;
5566cb7c 474 unsigned long last_page = 0;
1c6fed80 475 enum edac_type mode;
854d3349 476 enum mem_type mtype;
a0c36a1f 477
854d3349 478 /* Get data from the MC register, function 0 */
f4742949 479 pdev = pvt->pci_mcr[0];
7dd6953c 480 if (!pdev)
8f331907
MCC
481 return -ENODEV;
482
f122a892 483 /* Device 3 function 0 reads */
7dd6953c
MCC
484 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
485 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
486 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
487 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 488
17cb7b0c 489 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 490 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 491 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 492
1c6fed80 493 if (ECC_ENABLED(pvt)) {
41fcb7fe 494 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
495 if (ECCx8(pvt))
496 mode = EDAC_S8ECD8ED;
497 else
498 mode = EDAC_S4ECD4ED;
499 } else {
a0c36a1f 500 debugf0("ECC disabled\n");
1c6fed80
MCC
501 mode = EDAC_NONE;
502 }
a0c36a1f
MCC
503
504 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
505 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
506 "x%x x 0x%x\n",
854d3349
MCC
507 numdimms(pvt->info.max_dod),
508 numrank(pvt->info.max_dod >> 2),
276b824c 509 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
510 numrow(pvt->info.max_dod >> 6),
511 numcol(pvt->info.max_dod >> 9));
a0c36a1f 512
0b2b7b7e 513 for (i = 0; i < NUM_CHANS; i++) {
854d3349 514 u32 data, dimm_dod[3], value[8];
0b2b7b7e
MCC
515
516 if (!CH_ACTIVE(pvt, i)) {
517 debugf0("Channel %i is not active\n", i);
518 continue;
519 }
520 if (CH_DISABLED(pvt, i)) {
521 debugf0("Channel %i is disabled\n", i);
522 continue;
523 }
524
f122a892 525 /* Devices 4-6 function 0 */
f4742949 526 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
527 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
528
f4742949 529 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 530 4 : 2;
0b2b7b7e 531
854d3349
MCC
532 if (data & REGISTERED_DIMM)
533 mtype = MEM_RDDR3;
14d2c083 534 else
854d3349
MCC
535 mtype = MEM_DDR3;
536#if 0
0b2b7b7e
MCC
537 if (data & THREE_DIMMS_PRESENT)
538 pvt->channel[i].dimms = 3;
539 else if (data & SINGLE_QUAD_RANK_PRESENT)
540 pvt->channel[i].dimms = 1;
541 else
542 pvt->channel[i].dimms = 2;
854d3349
MCC
543#endif
544
545 /* Devices 4-6 function 1 */
f4742949 546 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 547 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 548 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 549 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 550 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 551 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 552
1c6fed80 553 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 554 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
555 i,
556 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
557 data,
f4742949 558 pvt->channel[i].ranks,
41fcb7fe 559 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
560
561 for (j = 0; j < 3; j++) {
562 u32 banks, ranks, rows, cols;
5566cb7c 563 u32 size, npages;
854d3349
MCC
564
565 if (!DIMM_PRESENT(dimm_dod[j]))
566 continue;
567
568 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
569 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
570 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
571 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
572
5566cb7c
MCC
573 /* DDR3 has 8 I/O banks */
574 size = (rows * cols * banks * ranks) >> (20 - 3);
575
f4742949 576 pvt->channel[i].dimms++;
854d3349 577
17cb7b0c
MCC
578 debugf0("\tdimm %d %d Mb offset: %x, "
579 "bank: %d, rank: %d, row: %#x, col: %#x\n",
580 j, size,
854d3349
MCC
581 RANKOFFSET(dimm_dod[j]),
582 banks, ranks, rows, cols);
583
eb94fc40
MCC
584#if PAGE_SHIFT > 20
585 npages = size >> (PAGE_SHIFT - 20);
586#else
587 npages = size << (20 - PAGE_SHIFT);
588#endif
5566cb7c 589
ba6c5c62 590 csr = &mci->csrows[*csrow];
5566cb7c
MCC
591 csr->first_page = last_page + 1;
592 last_page += npages;
593 csr->last_page = last_page;
594 csr->nr_pages = npages;
595
854d3349 596 csr->page_mask = 0;
eb94fc40 597 csr->grain = 8;
ba6c5c62 598 csr->csrow_idx = *csrow;
eb94fc40
MCC
599 csr->nr_channels = 1;
600
601 csr->channels[0].chan_idx = i;
602 csr->channels[0].ce_count = 0;
854d3349 603
f4742949 604 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 605
854d3349
MCC
606 switch (banks) {
607 case 4:
608 csr->dtype = DEV_X4;
609 break;
610 case 8:
611 csr->dtype = DEV_X8;
612 break;
613 case 16:
614 csr->dtype = DEV_X16;
615 break;
616 default:
617 csr->dtype = DEV_UNKNOWN;
618 }
619
620 csr->edac_mode = mode;
621 csr->mtype = mtype;
622
ba6c5c62 623 (*csrow)++;
854d3349 624 }
1c6fed80 625
854d3349
MCC
626 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
627 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
628 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
629 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
630 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
631 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
632 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
633 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 634 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 635 for (j = 0; j < 8; j++)
17cb7b0c 636 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
637 (value[j] >> 27) & 0x1,
638 (value[j] >> 24) & 0x7,
639 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
640 }
641
a0c36a1f
MCC
642 return 0;
643}
644
194a40fe
MCC
645/****************************************************************************
646 Error insertion routines
647 ****************************************************************************/
648
649/* The i7core has independent error injection features per channel.
650 However, to have a simpler code, we don't allow enabling error injection
651 on more than one channel.
652 Also, since a change at an inject parameter will be applied only at enable,
653 we're disabling error injection on all write calls to the sysfs nodes that
654 controls the error code injection.
655 */
8f331907 656static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
657{
658 struct i7core_pvt *pvt = mci->pvt_info;
659
660 pvt->inject.enable = 0;
661
f4742949 662 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
663 return -ENODEV;
664
f4742949 665 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 666 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
667
668 return 0;
194a40fe
MCC
669}
670
671/*
672 * i7core inject inject.section
673 *
674 * accept and store error injection inject.section value
675 * bit 0 - refers to the lower 32-byte half cacheline
676 * bit 1 - refers to the upper 32-byte half cacheline
677 */
678static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
679 const char *data, size_t count)
680{
681 struct i7core_pvt *pvt = mci->pvt_info;
682 unsigned long value;
683 int rc;
684
685 if (pvt->inject.enable)
41fcb7fe 686 disable_inject(mci);
194a40fe
MCC
687
688 rc = strict_strtoul(data, 10, &value);
689 if ((rc < 0) || (value > 3))
2068def5 690 return -EIO;
194a40fe
MCC
691
692 pvt->inject.section = (u32) value;
693 return count;
694}
695
696static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
697 char *data)
698{
699 struct i7core_pvt *pvt = mci->pvt_info;
700 return sprintf(data, "0x%08x\n", pvt->inject.section);
701}
702
703/*
704 * i7core inject.type
705 *
706 * accept and store error injection inject.section value
707 * bit 0 - repeat enable - Enable error repetition
708 * bit 1 - inject ECC error
709 * bit 2 - inject parity error
710 */
711static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
712 const char *data, size_t count)
713{
714 struct i7core_pvt *pvt = mci->pvt_info;
715 unsigned long value;
716 int rc;
717
718 if (pvt->inject.enable)
41fcb7fe 719 disable_inject(mci);
194a40fe
MCC
720
721 rc = strict_strtoul(data, 10, &value);
722 if ((rc < 0) || (value > 7))
2068def5 723 return -EIO;
194a40fe
MCC
724
725 pvt->inject.type = (u32) value;
726 return count;
727}
728
729static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
730 char *data)
731{
732 struct i7core_pvt *pvt = mci->pvt_info;
733 return sprintf(data, "0x%08x\n", pvt->inject.type);
734}
735
736/*
737 * i7core_inject_inject.eccmask_store
738 *
739 * The type of error (UE/CE) will depend on the inject.eccmask value:
740 * Any bits set to a 1 will flip the corresponding ECC bit
741 * Correctable errors can be injected by flipping 1 bit or the bits within
742 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
743 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
744 * uncorrectable error to be injected.
745 */
746static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
747 const char *data, size_t count)
748{
749 struct i7core_pvt *pvt = mci->pvt_info;
750 unsigned long value;
751 int rc;
752
753 if (pvt->inject.enable)
41fcb7fe 754 disable_inject(mci);
194a40fe
MCC
755
756 rc = strict_strtoul(data, 10, &value);
757 if (rc < 0)
2068def5 758 return -EIO;
194a40fe
MCC
759
760 pvt->inject.eccmask = (u32) value;
761 return count;
762}
763
764static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
765 char *data)
766{
767 struct i7core_pvt *pvt = mci->pvt_info;
768 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
769}
770
771/*
772 * i7core_addrmatch
773 *
774 * The type of error (UE/CE) will depend on the inject.eccmask value:
775 * Any bits set to a 1 will flip the corresponding ECC bit
776 * Correctable errors can be injected by flipping 1 bit or the bits within
777 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
778 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
779 * uncorrectable error to be injected.
780 */
781static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
782 const char *data, size_t count)
783{
784 struct i7core_pvt *pvt = mci->pvt_info;
785 char *cmd, *val;
786 long value;
787 int rc;
788
789 if (pvt->inject.enable)
41fcb7fe 790 disable_inject(mci);
194a40fe
MCC
791
792 do {
793 cmd = strsep((char **) &data, ":");
794 if (!cmd)
795 break;
796 val = strsep((char **) &data, " \n\t");
797 if (!val)
798 return cmd - data;
799
41fcb7fe 800 if (!strcasecmp(val, "any"))
194a40fe
MCC
801 value = -1;
802 else {
803 rc = strict_strtol(val, 10, &value);
804 if ((rc < 0) || (value < 0))
805 return cmd - data;
806 }
807
41fcb7fe 808 if (!strcasecmp(cmd, "channel")) {
194a40fe
MCC
809 if (value < 3)
810 pvt->inject.channel = value;
811 else
812 return cmd - data;
41fcb7fe 813 } else if (!strcasecmp(cmd, "dimm")) {
276b824c 814 if (value < 3)
194a40fe
MCC
815 pvt->inject.dimm = value;
816 else
817 return cmd - data;
41fcb7fe 818 } else if (!strcasecmp(cmd, "rank")) {
194a40fe
MCC
819 if (value < 4)
820 pvt->inject.rank = value;
821 else
822 return cmd - data;
41fcb7fe 823 } else if (!strcasecmp(cmd, "bank")) {
276b824c 824 if (value < 32)
194a40fe
MCC
825 pvt->inject.bank = value;
826 else
827 return cmd - data;
41fcb7fe 828 } else if (!strcasecmp(cmd, "page")) {
194a40fe
MCC
829 if (value <= 0xffff)
830 pvt->inject.page = value;
831 else
832 return cmd - data;
41fcb7fe
MCC
833 } else if (!strcasecmp(cmd, "col") ||
834 !strcasecmp(cmd, "column")) {
194a40fe
MCC
835 if (value <= 0x3fff)
836 pvt->inject.col = value;
837 else
838 return cmd - data;
839 }
840 } while (1);
841
842 return count;
843}
844
845static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
846 char *data)
847{
848 struct i7core_pvt *pvt = mci->pvt_info;
849 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
850
851 if (pvt->inject.channel < 0)
852 sprintf(channel, "any");
853 else
854 sprintf(channel, "%d", pvt->inject.channel);
855 if (pvt->inject.dimm < 0)
856 sprintf(dimm, "any");
857 else
858 sprintf(dimm, "%d", pvt->inject.dimm);
859 if (pvt->inject.bank < 0)
860 sprintf(bank, "any");
861 else
862 sprintf(bank, "%d", pvt->inject.bank);
863 if (pvt->inject.rank < 0)
864 sprintf(rank, "any");
865 else
866 sprintf(rank, "%d", pvt->inject.rank);
867 if (pvt->inject.page < 0)
868 sprintf(page, "any");
869 else
870 sprintf(page, "0x%04x", pvt->inject.page);
871 if (pvt->inject.col < 0)
872 sprintf(col, "any");
873 else
874 sprintf(col, "0x%04x", pvt->inject.col);
875
876 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
877 "rank: %s\npage: %s\ncolumn: %s\n",
878 channel, dimm, bank, rank, page, col);
879}
880
276b824c
MCC
881static int write_and_test(struct pci_dev *dev, int where, u32 val)
882{
883 u32 read;
884 int count;
885
4157d9f5
MCC
886 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
887 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
888 where, val);
889
276b824c
MCC
890 for (count = 0; count < 10; count++) {
891 if (count)
b990538a 892 msleep(100);
276b824c
MCC
893 pci_write_config_dword(dev, where, val);
894 pci_read_config_dword(dev, where, &read);
895
896 if (read == val)
897 return 0;
898 }
899
4157d9f5
MCC
900 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
901 "write=%08x. Read=%08x\n",
902 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
903 where, val, read);
276b824c
MCC
904
905 return -EINVAL;
906}
907
194a40fe
MCC
908/*
909 * This routine prepares the Memory Controller for error injection.
910 * The error will be injected when some process tries to write to the
911 * memory that matches the given criteria.
912 * The criteria can be set in terms of a mask where dimm, rank, bank, page
913 * and col can be specified.
914 * A -1 value for any of the mask items will make the MCU to ignore
915 * that matching criteria for error injection.
916 *
917 * It should be noticed that the error will only happen after a write operation
918 * on a memory that matches the condition. if REPEAT_EN is not enabled at
919 * inject mask, then it will produce just one error. Otherwise, it will repeat
920 * until the injectmask would be cleaned.
921 *
922 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
923 * is reliable enough to check if the MC is using the
924 * three channels. However, this is not clear at the datasheet.
925 */
926static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
927 const char *data, size_t count)
928{
929 struct i7core_pvt *pvt = mci->pvt_info;
930 u32 injectmask;
931 u64 mask = 0;
932 int rc;
933 long enable;
934
f4742949 935 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
936 return 0;
937
194a40fe
MCC
938 rc = strict_strtoul(data, 10, &enable);
939 if ((rc < 0))
940 return 0;
941
942 if (enable) {
943 pvt->inject.enable = 1;
944 } else {
945 disable_inject(mci);
946 return count;
947 }
948
949 /* Sets pvt->inject.dimm mask */
950 if (pvt->inject.dimm < 0)
7b029d03 951 mask |= 1L << 41;
194a40fe 952 else {
f4742949 953 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 954 mask |= (pvt->inject.dimm & 0x3L) << 35;
194a40fe 955 else
7b029d03 956 mask |= (pvt->inject.dimm & 0x1L) << 36;
194a40fe
MCC
957 }
958
959 /* Sets pvt->inject.rank mask */
960 if (pvt->inject.rank < 0)
7b029d03 961 mask |= 1L << 40;
194a40fe 962 else {
f4742949 963 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 964 mask |= (pvt->inject.rank & 0x1L) << 34;
194a40fe 965 else
7b029d03 966 mask |= (pvt->inject.rank & 0x3L) << 34;
194a40fe
MCC
967 }
968
969 /* Sets pvt->inject.bank mask */
970 if (pvt->inject.bank < 0)
7b029d03 971 mask |= 1L << 39;
194a40fe 972 else
7b029d03 973 mask |= (pvt->inject.bank & 0x15L) << 30;
194a40fe
MCC
974
975 /* Sets pvt->inject.page mask */
976 if (pvt->inject.page < 0)
7b029d03 977 mask |= 1L << 38;
194a40fe 978 else
7b029d03 979 mask |= (pvt->inject.page & 0xffffL) << 14;
194a40fe
MCC
980
981 /* Sets pvt->inject.column mask */
982 if (pvt->inject.col < 0)
7b029d03 983 mask |= 1L << 37;
194a40fe 984 else
7b029d03 985 mask |= (pvt->inject.col & 0x3fffL);
194a40fe 986
276b824c
MCC
987 /*
988 * bit 0: REPEAT_EN
989 * bits 1-2: MASK_HALF_CACHELINE
990 * bit 3: INJECT_ECC
991 * bit 4: INJECT_ADDR_PARITY
992 */
993
994 injectmask = (pvt->inject.type & 1) |
995 (pvt->inject.section & 0x3) << 1 |
996 (pvt->inject.type & 0x6) << (3 - 1);
997
998 /* Unlock writes to registers - this register is write only */
f4742949 999 pci_write_config_dword(pvt->pci_noncore,
67166af4 1000 MC_CFG_CONTROL, 0x2);
e9bd2e73 1001
f4742949 1002 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1003 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1004 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1005 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1006
f4742949 1007 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1008 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1009
f4742949 1010 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1011 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1012
194a40fe 1013 /*
276b824c
MCC
1014 * This is something undocumented, based on my tests
1015 * Without writing 8 to this register, errors aren't injected. Not sure
1016 * why.
194a40fe 1017 */
f4742949 1018 pci_write_config_dword(pvt->pci_noncore,
276b824c 1019 MC_CFG_CONTROL, 8);
194a40fe 1020
41fcb7fe
MCC
1021 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1022 " inject 0x%08x\n",
194a40fe
MCC
1023 mask, pvt->inject.eccmask, injectmask);
1024
7b029d03 1025
194a40fe
MCC
1026 return count;
1027}
1028
1029static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1030 char *data)
1031{
1032 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1033 u32 injectmask;
1034
f4742949 1035 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1036 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1037
1038 debugf0("Inject error read: 0x%018x\n", injectmask);
1039
1040 if (injectmask & 0x0c)
1041 pvt->inject.enable = 1;
1042
194a40fe
MCC
1043 return sprintf(data, "%d\n", pvt->inject.enable);
1044}
1045
442305b1
MCC
1046static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1047{
f4742949 1048 unsigned i, count, total = 0;
442305b1
MCC
1049 struct i7core_pvt *pvt = mci->pvt_info;
1050
f4742949
MCC
1051 if (!pvt->ce_count_available) {
1052 count = sprintf(data, "data unavailable\n");
1053 return 0;
67166af4 1054 }
d88b8507 1055 if (!pvt->is_registered) {
f4742949
MCC
1056 count = sprintf(data, "all channels "
1057 "UDIMM0: %lu UDIMM1: %lu UDIMM2: %lu\n",
1058 pvt->udimm_ce_count[0],
1059 pvt->udimm_ce_count[1],
1060 pvt->udimm_ce_count[2]);
d88b8507
MCC
1061 data += count;
1062 total += count;
1063 } else {
f4742949
MCC
1064 for (i = 0; i < NUM_CHANS; i++) {
1065 count = sprintf(data, "channel %d RDIMM0: %lu "
1066 "RDIMM1: %lu RDIMM2: %lu\n",
1067 i,
1068 pvt->rdimm_ce_count[i][0],
1069 pvt->rdimm_ce_count[i][1],
1070 pvt->rdimm_ce_count[i][2]);
d88b8507
MCC
1071 data += count;
1072 total += count;
1073 }
1074 }
442305b1 1075
67166af4 1076 return total;
442305b1
MCC
1077}
1078
194a40fe
MCC
1079/*
1080 * Sysfs struct
1081 */
1082static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
194a40fe
MCC
1083 {
1084 .attr = {
1085 .name = "inject_section",
1086 .mode = (S_IRUGO | S_IWUSR)
1087 },
1088 .show = i7core_inject_section_show,
1089 .store = i7core_inject_section_store,
1090 }, {
1091 .attr = {
1092 .name = "inject_type",
1093 .mode = (S_IRUGO | S_IWUSR)
1094 },
1095 .show = i7core_inject_type_show,
1096 .store = i7core_inject_type_store,
1097 }, {
1098 .attr = {
1099 .name = "inject_eccmask",
1100 .mode = (S_IRUGO | S_IWUSR)
1101 },
1102 .show = i7core_inject_eccmask_show,
1103 .store = i7core_inject_eccmask_store,
1104 }, {
1105 .attr = {
1106 .name = "inject_addrmatch",
1107 .mode = (S_IRUGO | S_IWUSR)
1108 },
1109 .show = i7core_inject_addrmatch_show,
1110 .store = i7core_inject_addrmatch_store,
1111 }, {
1112 .attr = {
1113 .name = "inject_enable",
1114 .mode = (S_IRUGO | S_IWUSR)
1115 },
1116 .show = i7core_inject_enable_show,
1117 .store = i7core_inject_enable_store,
442305b1
MCC
1118 }, {
1119 .attr = {
1120 .name = "corrected_error_counts",
1121 .mode = (S_IRUGO | S_IWUSR)
1122 },
1123 .show = i7core_ce_regs_show,
1124 .store = NULL,
194a40fe 1125 },
42538680 1126 { .attr = { .name = NULL } }
194a40fe
MCC
1127};
1128
a0c36a1f
MCC
1129/****************************************************************************
1130 Device initialization routines: put/get, init/exit
1131 ****************************************************************************/
1132
1133/*
1134 * i7core_put_devices 'put' all the devices that we have
1135 * reserved via 'get'
1136 */
13d6e9b6 1137static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1138{
13d6e9b6 1139 int i;
a0c36a1f 1140
22e6bcbd
MCC
1141 debugf0(__FILE__ ": %s()\n", __func__);
1142 for (i = 0; i < N_DEVS; i++) {
1143 struct pci_dev *pdev = i7core_dev->pdev[i];
1144 if (!pdev)
1145 continue;
1146 debugf0("Removing dev %02x:%02x.%d\n",
1147 pdev->bus->number,
1148 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1149 pci_dev_put(pdev);
1150 }
13d6e9b6 1151 kfree(i7core_dev->pdev);
22e6bcbd 1152 list_del(&i7core_dev->list);
13d6e9b6
MCC
1153 kfree(i7core_dev);
1154}
66607706 1155
13d6e9b6
MCC
1156static void i7core_put_all_devices(void)
1157{
42538680 1158 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1159
42538680 1160 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1161 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1162}
1163
bc2d7245
KM
1164static void i7core_xeon_pci_fixup(void)
1165{
1166 struct pci_dev *pdev = NULL;
1167 int i;
1168 /*
1169 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1170 * aren't announced by acpi. So, we need to use a legacy scan probing
1171 * to detect them
1172 */
1173 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1174 pci_dev_descr[0].dev_id, NULL);
bc2d7245 1175 if (unlikely(!pdev)) {
f4742949 1176 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1177 pcibios_scan_specific_bus(255-i);
1178 }
1179}
1180
a0c36a1f
MCC
1181/*
1182 * i7core_get_devices Find and perform 'get' operation on the MCH's
1183 * device/functions we want to reference for this driver
1184 *
1185 * Need to 'get' device 16 func 1 and func 2
1186 */
c77720b9 1187int i7core_get_onedevice(struct pci_dev **prev, int devno)
a0c36a1f 1188{
66607706
MCC
1189 struct i7core_dev *i7core_dev;
1190
8f331907 1191 struct pci_dev *pdev = NULL;
67166af4
MCC
1192 u8 bus = 0;
1193 u8 socket = 0;
a0c36a1f 1194
c77720b9 1195 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1196 pci_dev_descr[devno].dev_id, *prev);
c77720b9 1197
c77720b9
MCC
1198 /*
1199 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1200 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1201 * to probe for the alternate address in case of failure
1202 */
66607706 1203 if (pci_dev_descr[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
c77720b9
MCC
1204 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1205 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
d1fd4fb6 1206
c77720b9
MCC
1207 if (!pdev) {
1208 if (*prev) {
1209 *prev = pdev;
1210 return 0;
d1fd4fb6
MCC
1211 }
1212
310cbb72 1213 /*
c77720b9
MCC
1214 * Dev 3 function 2 only exists on chips with RDIMMs
1215 * so, it is ok to not found it
310cbb72 1216 */
66607706 1217 if ((pci_dev_descr[devno].dev == 3) && (pci_dev_descr[devno].func == 2)) {
c77720b9
MCC
1218 *prev = pdev;
1219 return 0;
1220 }
310cbb72 1221
c77720b9
MCC
1222 i7core_printk(KERN_ERR,
1223 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1224 pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1225 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
67166af4 1226
c77720b9
MCC
1227 /* End of list, leave */
1228 return -ENODEV;
1229 }
1230 bus = pdev->bus->number;
67166af4 1231
c77720b9
MCC
1232 if (bus == 0x3f)
1233 socket = 0;
1234 else
1235 socket = 255 - bus;
1236
66607706
MCC
1237 i7core_dev = get_i7core_dev(socket);
1238 if (!i7core_dev) {
1239 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1240 if (!i7core_dev)
1241 return -ENOMEM;
1242 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * N_DEVS,
1243 GFP_KERNEL);
1244 if (!i7core_dev->pdev)
1245 return -ENOMEM;
1246 i7core_dev->socket = socket;
1247 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1248 }
67166af4 1249
66607706 1250 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1251 i7core_printk(KERN_ERR,
1252 "Duplicated device for "
1253 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1254 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1255 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1256 pci_dev_put(pdev);
1257 return -ENODEV;
1258 }
67166af4 1259
66607706 1260 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1261
1262 /* Sanity check */
66607706
MCC
1263 if (unlikely(PCI_SLOT(pdev->devfn) != pci_dev_descr[devno].dev ||
1264 PCI_FUNC(pdev->devfn) != pci_dev_descr[devno].func)) {
c77720b9
MCC
1265 i7core_printk(KERN_ERR,
1266 "Device PCI ID %04x:%04x "
1267 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
66607706 1268 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id,
c77720b9 1269 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
66607706 1270 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func);
c77720b9
MCC
1271 return -ENODEV;
1272 }
ef708b53 1273
c77720b9
MCC
1274 /* Be sure that the device is enabled */
1275 if (unlikely(pci_enable_device(pdev) < 0)) {
1276 i7core_printk(KERN_ERR,
1277 "Couldn't enable "
1278 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1279 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1280 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1281 return -ENODEV;
1282 }
ef708b53 1283
d4c27795
MCC
1284 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1285 socket, bus, pci_dev_descr[devno].dev,
1286 pci_dev_descr[devno].func,
1287 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
8f331907 1288
c77720b9 1289 *prev = pdev;
ef708b53 1290
c77720b9
MCC
1291 return 0;
1292}
a0c36a1f 1293
f4742949 1294static int i7core_get_devices(void)
c77720b9
MCC
1295{
1296 int i;
1297 struct pci_dev *pdev = NULL;
ef708b53 1298
c77720b9
MCC
1299 for (i = 0; i < N_DEVS; i++) {
1300 pdev = NULL;
1301 do {
1302 if (i7core_get_onedevice(&pdev, i) < 0) {
13d6e9b6 1303 i7core_put_all_devices();
c77720b9
MCC
1304 return -ENODEV;
1305 }
1306 } while (pdev);
1307 }
66607706 1308
ef708b53 1309 return 0;
ef708b53
MCC
1310}
1311
f4742949
MCC
1312static int mci_bind_devs(struct mem_ctl_info *mci,
1313 struct i7core_dev *i7core_dev)
ef708b53
MCC
1314{
1315 struct i7core_pvt *pvt = mci->pvt_info;
1316 struct pci_dev *pdev;
f4742949 1317 int i, func, slot;
ef708b53 1318
f4742949
MCC
1319 /* Associates i7core_dev and mci for future usage */
1320 pvt->i7core_dev = i7core_dev;
1321 i7core_dev->mci = mci;
66607706 1322
f4742949
MCC
1323 pvt->is_registered = 0;
1324 for (i = 0; i < N_DEVS; i++) {
1325 pdev = i7core_dev->pdev[i];
1326 if (!pdev)
66607706
MCC
1327 continue;
1328
f4742949
MCC
1329 func = PCI_FUNC(pdev->devfn);
1330 slot = PCI_SLOT(pdev->devfn);
1331 if (slot == 3) {
1332 if (unlikely(func > MAX_MCR_FUNC))
1333 goto error;
1334 pvt->pci_mcr[func] = pdev;
1335 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1336 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1337 goto error;
f4742949
MCC
1338 pvt->pci_ch[slot - 4][func] = pdev;
1339 } else if (!slot && !func)
1340 pvt->pci_noncore = pdev;
1341 else
1342 goto error;
ef708b53 1343
f4742949
MCC
1344 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1345 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1346 pdev, i7core_dev->socket);
14d2c083 1347
f4742949
MCC
1348 if (PCI_SLOT(pdev->devfn) == 3 &&
1349 PCI_FUNC(pdev->devfn) == 2)
1350 pvt->is_registered = 1;
a0c36a1f 1351 }
e9bd2e73 1352
a0c36a1f 1353 return 0;
ef708b53
MCC
1354
1355error:
1356 i7core_printk(KERN_ERR, "Device %d, function %d "
1357 "is out of the expected range\n",
1358 slot, func);
1359 return -EINVAL;
a0c36a1f
MCC
1360}
1361
442305b1
MCC
1362/****************************************************************************
1363 Error check routines
1364 ****************************************************************************/
f4742949 1365static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1366 int chan, int dimm, int add)
1367{
1368 char *msg;
1369 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1370 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1371
1372 for (i = 0; i < add; i++) {
1373 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1374 "(Socket=%d channel=%d dimm=%d)",
1375 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1376
1377 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1378 kfree (msg);
1379 }
1380}
1381
1382static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1383 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1384{
1385 struct i7core_pvt *pvt = mci->pvt_info;
1386 int add0 = 0, add1 = 0, add2 = 0;
1387 /* Updates CE counters if it is not the first time here */
f4742949 1388 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1389 /* Updates CE counters */
1390
f4742949
MCC
1391 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1392 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1393 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1394
1395 if (add2 < 0)
1396 add2 += 0x7fff;
f4742949 1397 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1398
1399 if (add1 < 0)
1400 add1 += 0x7fff;
f4742949 1401 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1402
1403 if (add0 < 0)
1404 add0 += 0x7fff;
f4742949 1405 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1406 } else
f4742949 1407 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1408
1409 /* Store the new values */
f4742949
MCC
1410 pvt->rdimm_last_ce_count[chan][2] = new2;
1411 pvt->rdimm_last_ce_count[chan][1] = new1;
1412 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1413
1414 /*updated the edac core */
1415 if (add0 != 0)
f4742949 1416 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1417 if (add1 != 0)
f4742949 1418 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1419 if (add2 != 0)
f4742949 1420 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1421
1422}
1423
f4742949 1424static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1425{
1426 struct i7core_pvt *pvt = mci->pvt_info;
1427 u32 rcv[3][2];
1428 int i, new0, new1, new2;
1429
1430 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1431 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1432 &rcv[0][0]);
f4742949 1433 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1434 &rcv[0][1]);
f4742949 1435 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1436 &rcv[1][0]);
f4742949 1437 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1438 &rcv[1][1]);
f4742949 1439 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1440 &rcv[2][0]);
f4742949 1441 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1442 &rcv[2][1]);
1443 for (i = 0 ; i < 3; i++) {
1444 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1445 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1446 /*if the channel has 3 dimms*/
f4742949 1447 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1448 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1449 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1450 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1451 } else {
1452 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1453 DIMM_BOT_COR_ERR(rcv[i][0]);
1454 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1455 DIMM_BOT_COR_ERR(rcv[i][1]);
1456 new2 = 0;
1457 }
1458
f4742949 1459 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1460 }
1461}
442305b1
MCC
1462
1463/* This function is based on the device 3 function 4 registers as described on:
1464 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1465 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1466 * also available at:
1467 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1468 */
f4742949 1469static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1470{
1471 struct i7core_pvt *pvt = mci->pvt_info;
1472 u32 rcv1, rcv0;
1473 int new0, new1, new2;
1474
f4742949 1475 if (!pvt->pci_mcr[4]) {
b990538a 1476 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1477 return;
1478 }
1479
b4e8f0b6 1480 /* Corrected test errors */
f4742949
MCC
1481 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1482 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1483
1484 /* Store the new values */
1485 new2 = DIMM2_COR_ERR(rcv1);
1486 new1 = DIMM1_COR_ERR(rcv0);
1487 new0 = DIMM0_COR_ERR(rcv0);
1488
442305b1 1489 /* Updates CE counters if it is not the first time here */
f4742949 1490 if (pvt->ce_count_available) {
442305b1
MCC
1491 /* Updates CE counters */
1492 int add0, add1, add2;
1493
f4742949
MCC
1494 add2 = new2 - pvt->udimm_last_ce_count[2];
1495 add1 = new1 - pvt->udimm_last_ce_count[1];
1496 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1497
1498 if (add2 < 0)
1499 add2 += 0x7fff;
f4742949 1500 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1501
1502 if (add1 < 0)
1503 add1 += 0x7fff;
f4742949 1504 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1505
1506 if (add0 < 0)
1507 add0 += 0x7fff;
f4742949 1508 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1509
1510 if (add0 | add1 | add2)
1511 i7core_printk(KERN_ERR, "New Corrected error(s): "
1512 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1513 add0, add1, add2);
442305b1 1514 } else
f4742949 1515 pvt->ce_count_available = 1;
442305b1
MCC
1516
1517 /* Store the new values */
f4742949
MCC
1518 pvt->udimm_last_ce_count[2] = new2;
1519 pvt->udimm_last_ce_count[1] = new1;
1520 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1521}
1522
8a2f118e
MCC
1523/*
1524 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1525 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1526 * Nehalem are defined as family 0x06, model 0x1a
1527 *
1528 * The MCA registers used here are the following ones:
8a2f118e 1529 * struct mce field MCA Register
f237fcf2
MCC
1530 * m->status MSR_IA32_MC8_STATUS
1531 * m->addr MSR_IA32_MC8_ADDR
1532 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1533 * In the case of Nehalem, the error information is masked at .status and .misc
1534 * fields
1535 */
d5381642
MCC
1536static void i7core_mce_output_error(struct mem_ctl_info *mci,
1537 struct mce *m)
1538{
b4e8f0b6 1539 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1540 char *type, *optype, *err, *msg;
8a2f118e 1541 unsigned long error = m->status & 0x1ff0000l;
a639539f 1542 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1543 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1544 u32 dimm = (m->misc >> 16) & 0x3;
1545 u32 channel = (m->misc >> 18) & 0x3;
1546 u32 syndrome = m->misc >> 32;
1547 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1548 int csrow;
8a2f118e 1549
c5d34528
MCC
1550 if (m->mcgstatus & 1)
1551 type = "FATAL";
1552 else
1553 type = "NON_FATAL";
1554
a639539f 1555 switch (optypenum) {
b990538a
MCC
1556 case 0:
1557 optype = "generic undef request";
1558 break;
1559 case 1:
1560 optype = "read error";
1561 break;
1562 case 2:
1563 optype = "write error";
1564 break;
1565 case 3:
1566 optype = "addr/cmd error";
1567 break;
1568 case 4:
1569 optype = "scrubbing error";
1570 break;
1571 default:
1572 optype = "reserved";
1573 break;
a639539f
MCC
1574 }
1575
8a2f118e
MCC
1576 switch (errnum) {
1577 case 16:
1578 err = "read ECC error";
1579 break;
1580 case 17:
1581 err = "RAS ECC error";
1582 break;
1583 case 18:
1584 err = "write parity error";
1585 break;
1586 case 19:
1587 err = "redundacy loss";
1588 break;
1589 case 20:
1590 err = "reserved";
1591 break;
1592 case 21:
1593 err = "memory range error";
1594 break;
1595 case 22:
1596 err = "RTID out of range";
1597 break;
1598 case 23:
1599 err = "address parity error";
1600 break;
1601 case 24:
1602 err = "byte enable parity error";
1603 break;
1604 default:
1605 err = "unknown";
d5381642 1606 }
d5381642 1607
f237fcf2 1608 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1609 msg = kasprintf(GFP_ATOMIC,
f4742949 1610 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1611 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1612 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1613 syndrome, core_err_cnt, (long long)m->status,
1614 (long long)m->misc, optype, err);
8a2f118e
MCC
1615
1616 debugf0("%s", msg);
d5381642 1617
f4742949 1618 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1619
d5381642 1620 /* Call the helper to output message */
b4e8f0b6
MCC
1621 if (m->mcgstatus & 1)
1622 edac_mc_handle_fbd_ue(mci, csrow, 0,
1623 0 /* FIXME: should be channel here */, msg);
f4742949 1624 else if (!pvt->is_registered)
b4e8f0b6
MCC
1625 edac_mc_handle_fbd_ce(mci, csrow,
1626 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1627
1628 kfree(msg);
d5381642
MCC
1629}
1630
87d1d272
MCC
1631/*
1632 * i7core_check_error Retrieve and process errors reported by the
1633 * hardware. Called by the Core module.
1634 */
1635static void i7core_check_error(struct mem_ctl_info *mci)
1636{
d5381642
MCC
1637 struct i7core_pvt *pvt = mci->pvt_info;
1638 int i;
1639 unsigned count = 0;
1640 struct mce *m = NULL;
1641 unsigned long flags;
1642
d5381642
MCC
1643 /* Copy all mce errors into a temporary buffer */
1644 spin_lock_irqsave(&pvt->mce_lock, flags);
1645 if (pvt->mce_count) {
1646 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
f4742949 1647
d5381642
MCC
1648 if (m) {
1649 count = pvt->mce_count;
1650 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1651 }
1652 pvt->mce_count = 0;
1653 }
f4742949 1654
d5381642
MCC
1655 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1656
1657 /* proccess mcelog errors */
1658 for (i = 0; i < count; i++)
1659 i7core_mce_output_error(mci, &m[i]);
1660
1661 kfree(m);
1662
1663 /* check memory count errors */
f4742949
MCC
1664 if (!pvt->is_registered)
1665 i7core_udimm_check_mc_ecc_err(mci);
1666 else
1667 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1668}
1669
d5381642
MCC
1670/*
1671 * i7core_mce_check_error Replicates mcelog routine to get errors
1672 * This routine simply queues mcelog errors, and
1673 * return. The error itself should be handled later
1674 * by i7core_check_error.
1675 */
1676static int i7core_mce_check_error(void *priv, struct mce *mce)
1677{
c5d34528
MCC
1678 struct mem_ctl_info *mci = priv;
1679 struct i7core_pvt *pvt = mci->pvt_info;
d5381642
MCC
1680 unsigned long flags;
1681
8a2f118e
MCC
1682 /*
1683 * Just let mcelog handle it if the error is
1684 * outside the memory controller
1685 */
1686 if (((mce->status & 0xffff) >> 7) != 1)
1687 return 0;
1688
f237fcf2
MCC
1689 /* Bank 8 registers are the only ones that we know how to handle */
1690 if (mce->bank != 8)
1691 return 0;
1692
f4742949 1693 /* Only handle if it is the right mc controller */
6c6aa3af
MCC
1694 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) {
1695 debugf0("mc%d: ignoring mce log for socket %d. "
1696 "Another mc should get it.\n",
1697 pvt->i7core_dev->socket,
1698 cpu_data(mce->cpu).phys_proc_id);
f4742949 1699 return 0;
6c6aa3af 1700 }
f4742949 1701
d5381642
MCC
1702 spin_lock_irqsave(&pvt->mce_lock, flags);
1703 if (pvt->mce_count < MCE_LOG_LEN) {
1704 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1705 pvt->mce_count++;
1706 }
1707 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1708
c5d34528
MCC
1709 /* Handle fatal errors immediately */
1710 if (mce->mcgstatus & 1)
1711 i7core_check_error(mci);
1712
d5381642 1713 /* Advice mcelog that the error were handled */
8a2f118e 1714 return 1;
d5381642
MCC
1715}
1716
f4742949
MCC
1717static int i7core_register_mci(struct i7core_dev *i7core_dev,
1718 int num_channels, int num_csrows)
a0c36a1f
MCC
1719{
1720 struct mem_ctl_info *mci;
1721 struct i7core_pvt *pvt;
ba6c5c62 1722 int csrow = 0;
f4742949 1723 int rc;
a0c36a1f 1724
a0c36a1f 1725 /* allocate a new MC control structure */
d4c27795
MCC
1726 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1727 i7core_dev->socket);
f4742949
MCC
1728 if (unlikely(!mci))
1729 return -ENOMEM;
a0c36a1f
MCC
1730
1731 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1732
f4742949
MCC
1733 /* record ptr to the generic device */
1734 mci->dev = &i7core_dev->pdev[0]->dev;
1735
a0c36a1f 1736 pvt = mci->pvt_info;
ef708b53 1737 memset(pvt, 0, sizeof(*pvt));
67166af4 1738
41fcb7fe
MCC
1739 /*
1740 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1741 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1742 * memory channels
1743 */
1744 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1745 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1746 mci->edac_cap = EDAC_FLAG_NONE;
1747 mci->mod_name = "i7core_edac.c";
1748 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1749 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1750 i7core_dev->socket);
1751 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1752 mci->ctl_page_to_phys = NULL;
194a40fe 1753 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
87d1d272
MCC
1754 /* Set the function pointer to an actual operation function */
1755 mci->edac_check = i7core_check_error;
8f331907 1756
ef708b53 1757 /* Store pci devices at mci for faster access */
f4742949 1758 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1759 if (unlikely(rc < 0))
f4742949 1760 goto fail;
ef708b53
MCC
1761
1762 /* Get dimm basic config */
f4742949 1763 get_dimm_config(mci, &csrow);
ef708b53 1764
a0c36a1f 1765 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1766 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1767 debugf0("MC: " __FILE__
1768 ": %s(): failed edac_mc_add_mc()\n", __func__);
1769 /* FIXME: perhaps some code should go here that disables error
1770 * reporting if we just enabled it
1771 */
b7c76151
MCC
1772
1773 rc = -EINVAL;
f4742949 1774 goto fail;
a0c36a1f
MCC
1775 }
1776
1777 /* allocating generic PCI control info */
f4742949
MCC
1778 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1779 EDAC_MOD_STR);
41fcb7fe 1780 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1781 printk(KERN_WARNING
1782 "%s(): Unable to create PCI control\n",
1783 __func__);
1784 printk(KERN_WARNING
1785 "%s(): PCI error report via EDAC not setup\n",
1786 __func__);
1787 }
1788
194a40fe 1789 /* Default error mask is any memory */
ef708b53 1790 pvt->inject.channel = 0;
194a40fe
MCC
1791 pvt->inject.dimm = -1;
1792 pvt->inject.rank = -1;
1793 pvt->inject.bank = -1;
1794 pvt->inject.page = -1;
1795 pvt->inject.col = -1;
1796
d5381642 1797 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1798 pvt->edac_mce.priv = mci;
d5381642
MCC
1799 pvt->edac_mce.check_error = i7core_mce_check_error;
1800 spin_lock_init(&pvt->mce_lock);
1801
1802 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1803 if (unlikely(rc < 0)) {
d5381642
MCC
1804 debugf0("MC: " __FILE__
1805 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1806 }
1807
1808fail:
1809 edac_mc_free(mci);
1810 return rc;
1811}
1812
1813/*
1814 * i7core_probe Probe for ONE instance of device to see if it is
1815 * present.
1816 * return:
1817 * 0 for FOUND a device
1818 * < 0 for error code
1819 */
1820static int __devinit i7core_probe(struct pci_dev *pdev,
1821 const struct pci_device_id *id)
1822{
1823 int dev_idx = id->driver_data;
1824 int rc;
1825 struct i7core_dev *i7core_dev;
1826
1827 /*
d4c27795 1828 * All memory controllers are allocated at the first pass.
f4742949
MCC
1829 */
1830 if (unlikely(dev_idx >= 1))
1831 return -EINVAL;
1832
1833 /* get the pci devices we want to reserve for our use */
1834 mutex_lock(&i7core_edac_lock);
1835 rc = i7core_get_devices();
1836 if (unlikely(rc < 0))
1837 goto fail0;
1838
1839 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1840 int channels;
1841 int csrows;
1842
1843 /* Check the number of active and not disabled channels */
1844 rc = i7core_get_active_channels(i7core_dev->socket,
1845 &channels, &csrows);
1846 if (unlikely(rc < 0))
1847 goto fail1;
1848
d4c27795
MCC
1849 rc = i7core_register_mci(i7core_dev, channels, csrows);
1850 if (unlikely(rc < 0))
1851 goto fail1;
d5381642
MCC
1852 }
1853
ef708b53 1854 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1855
66607706 1856 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1857 return 0;
1858
66607706 1859fail1:
13d6e9b6 1860 i7core_put_all_devices();
66607706
MCC
1861fail0:
1862 mutex_unlock(&i7core_edac_lock);
b7c76151 1863 return rc;
a0c36a1f
MCC
1864}
1865
1866/*
1867 * i7core_remove destructor for one instance of device
1868 *
1869 */
1870static void __devexit i7core_remove(struct pci_dev *pdev)
1871{
1872 struct mem_ctl_info *mci;
22e6bcbd 1873 struct i7core_dev *i7core_dev, *tmp;
a0c36a1f
MCC
1874
1875 debugf0(__FILE__ ": %s()\n", __func__);
1876
1877 if (i7core_pci)
1878 edac_pci_release_generic_ctl(i7core_pci);
1879
22e6bcbd
MCC
1880 /*
1881 * we have a trouble here: pdev value for removal will be wrong, since
1882 * it will point to the X58 register used to detect that the machine
1883 * is a Nehalem or upper design. However, due to the way several PCI
1884 * devices are grouped together to provide MC functionality, we need
1885 * to use a different method for releasing the devices
1886 */
87d1d272 1887
66607706 1888 mutex_lock(&i7core_edac_lock);
22e6bcbd
MCC
1889 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1890 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1891 if (mci) {
1892 struct i7core_pvt *pvt = mci->pvt_info;
1893
1894 i7core_dev = pvt->i7core_dev;
1895 edac_mce_unregister(&pvt->edac_mce);
1896 kfree(mci->ctl_name);
1897 edac_mc_free(mci);
1898 i7core_put_devices(i7core_dev);
1899 } else {
1900 i7core_printk(KERN_ERR,
1901 "Couldn't find mci for socket %d\n",
1902 i7core_dev->socket);
1903 }
1904 }
66607706 1905 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1906}
1907
a0c36a1f
MCC
1908MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1909
1910/*
1911 * i7core_driver pci_driver structure for this module
1912 *
1913 */
1914static struct pci_driver i7core_driver = {
1915 .name = "i7core_edac",
1916 .probe = i7core_probe,
1917 .remove = __devexit_p(i7core_remove),
1918 .id_table = i7core_pci_tbl,
1919};
1920
1921/*
1922 * i7core_init Module entry function
1923 * Try to initialize this module for its devices
1924 */
1925static int __init i7core_init(void)
1926{
1927 int pci_rc;
1928
1929 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1930
1931 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1932 opstate_init();
1933
bc2d7245
KM
1934 i7core_xeon_pci_fixup();
1935
a0c36a1f
MCC
1936 pci_rc = pci_register_driver(&i7core_driver);
1937
3ef288a9
MCC
1938 if (pci_rc >= 0)
1939 return 0;
1940
1941 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1942 pci_rc);
1943
1944 return pci_rc;
a0c36a1f
MCC
1945}
1946
1947/*
1948 * i7core_exit() Module exit function
1949 * Unregister the driver
1950 */
1951static void __exit i7core_exit(void)
1952{
1953 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1954 pci_unregister_driver(&i7core_driver);
1955}
1956
1957module_init(i7core_init);
1958module_exit(i7core_exit);
1959
1960MODULE_LICENSE("GPL");
1961MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1962MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1963MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1964 I7CORE_REVISION);
1965
1966module_param(edac_op_state, int, 0444);
1967MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");