RAS: Add a tracepoint for reporting memory controller events
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / edac / i7core_edac.c
CommitLineData
52707f91
MCC
1/* Intel i7 core/Nehalem Memory Controller kernel module
2 *
e7bf068a 3 * This driver supports the memory controllers found on the Intel
52707f91
MCC
4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6 * and Westmere-EP.
a0c36a1f
MCC
7 *
8 * This file may be distributed under the terms of the
9 * GNU General Public License version 2 only.
10 *
52707f91 11 * Copyright (c) 2009-2010 by:
a0c36a1f
MCC
12 * Mauro Carvalho Chehab <mchehab@redhat.com>
13 *
14 * Red Hat Inc. http://www.redhat.com
15 *
16 * Forked and adapted from the i5400_edac driver
17 *
18 * Based on the following public Intel datasheets:
19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20 * Datasheet, Volume 2:
21 * http://download.intel.com/design/processor/datashts/320835.pdf
22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
23 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24 * also available at:
25 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26 */
27
a0c36a1f
MCC
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/pci.h>
31#include <linux/pci_ids.h>
32#include <linux/slab.h>
3b918c12 33#include <linux/delay.h>
535e9c78 34#include <linux/dmi.h>
a0c36a1f
MCC
35#include <linux/edac.h>
36#include <linux/mmzone.h>
f4742949 37#include <linux/smp.h>
4140c542 38#include <asm/mce.h>
14d2c083 39#include <asm/processor.h>
4fad8098 40#include <asm/div64.h>
a0c36a1f
MCC
41
42#include "edac_core.h"
43
18c29002
MCC
44/* Static vars */
45static LIST_HEAD(i7core_edac_list);
46static DEFINE_MUTEX(i7core_edac_lock);
47static int probed;
48
54a08ab1
MCC
49static int use_pci_fixup;
50module_param(use_pci_fixup, int, 0444);
51MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
f4742949
MCC
52/*
53 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54 * registers start at bus 255, and are not reported by BIOS.
55 * We currently find devices with only 2 sockets. In order to support more QPI
56 * Quick Path Interconnect, just increment this number.
57 */
58#define MAX_SOCKET_BUSES 2
59
60
a0c36a1f
MCC
61/*
62 * Alter this version for the module when modifications are made
63 */
152ba394 64#define I7CORE_REVISION " Ver: 1.0.0"
a0c36a1f
MCC
65#define EDAC_MOD_STR "i7core_edac"
66
a0c36a1f
MCC
67/*
68 * Debug macros
69 */
70#define i7core_printk(level, fmt, arg...) \
71 edac_printk(level, "i7core", fmt, ##arg)
72
73#define i7core_mc_printk(mci, level, fmt, arg...) \
74 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75
76/*
77 * i7core Memory Controller Registers
78 */
79
e9bd2e73
MCC
80 /* OFFSETS for Device 0 Function 0 */
81
82#define MC_CFG_CONTROL 0x90
e8b6a127
SG
83 #define MC_CFG_UNLOCK 0x02
84 #define MC_CFG_LOCK 0x00
e9bd2e73 85
a0c36a1f
MCC
86 /* OFFSETS for Device 3 Function 0 */
87
88#define MC_CONTROL 0x48
89#define MC_STATUS 0x4c
90#define MC_MAX_DOD 0x64
91
442305b1
MCC
92/*
93 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
94 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95 */
96
97#define MC_TEST_ERR_RCV1 0x60
98 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
99
100#define MC_TEST_ERR_RCV0 0x64
101 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
102 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
103
b4e8f0b6 104/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
e8b6a127
SG
105#define MC_SSRCONTROL 0x48
106 #define SSR_MODE_DISABLE 0x00
107 #define SSR_MODE_ENABLE 0x01
108 #define SSR_MODE_MASK 0x03
109
110#define MC_SCRUB_CONTROL 0x4c
111 #define STARTSCRUB (1 << 24)
535e9c78 112 #define SCRUBINTERVAL_MASK 0xffffff
e8b6a127 113
b4e8f0b6
MCC
114#define MC_COR_ECC_CNT_0 0x80
115#define MC_COR_ECC_CNT_1 0x84
116#define MC_COR_ECC_CNT_2 0x88
117#define MC_COR_ECC_CNT_3 0x8c
118#define MC_COR_ECC_CNT_4 0x90
119#define MC_COR_ECC_CNT_5 0x94
120
121#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
122#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
123
124
a0c36a1f
MCC
125 /* OFFSETS for Devices 4,5 and 6 Function 0 */
126
0b2b7b7e
MCC
127#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128 #define THREE_DIMMS_PRESENT (1 << 24)
129 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
130 #define QUAD_RANK_PRESENT (1 << 22)
131 #define REGISTERED_DIMM (1 << 15)
132
f122a892
MCC
133#define MC_CHANNEL_MAPPER 0x60
134 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
136
0b2b7b7e
MCC
137#define MC_CHANNEL_RANK_PRESENT 0x7c
138 #define RANK_PRESENT_MASK 0xffff
139
a0c36a1f 140#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
141#define MC_CHANNEL_ERROR_MASK 0xf8
142#define MC_CHANNEL_ERROR_INJECT 0xfc
143 #define INJECT_ADDR_PARITY 0x10
144 #define INJECT_ECC 0x08
145 #define MASK_CACHELINE 0x06
146 #define MASK_FULL_CACHELINE 0x06
147 #define MASK_MSB32_CACHELINE 0x04
148 #define MASK_LSB32_CACHELINE 0x02
149 #define NO_MASK_CACHELINE 0x00
150 #define REPEAT_EN 0x01
a0c36a1f 151
0b2b7b7e 152 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 153
0b2b7b7e
MCC
154#define MC_DOD_CH_DIMM0 0x48
155#define MC_DOD_CH_DIMM1 0x4c
156#define MC_DOD_CH_DIMM2 0x50
157 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
158 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
159 #define DIMM_PRESENT_MASK (1 << 9)
160 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
161 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
162 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
163 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
164 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 165 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 166 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
167 #define MC_DOD_NUMCOL_MASK 3
168 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 169
f122a892
MCC
170#define MC_RANK_PRESENT 0x7c
171
0b2b7b7e
MCC
172#define MC_SAG_CH_0 0x80
173#define MC_SAG_CH_1 0x84
174#define MC_SAG_CH_2 0x88
175#define MC_SAG_CH_3 0x8c
176#define MC_SAG_CH_4 0x90
177#define MC_SAG_CH_5 0x94
178#define MC_SAG_CH_6 0x98
179#define MC_SAG_CH_7 0x9c
180
181#define MC_RIR_LIMIT_CH_0 0x40
182#define MC_RIR_LIMIT_CH_1 0x44
183#define MC_RIR_LIMIT_CH_2 0x48
184#define MC_RIR_LIMIT_CH_3 0x4C
185#define MC_RIR_LIMIT_CH_4 0x50
186#define MC_RIR_LIMIT_CH_5 0x54
187#define MC_RIR_LIMIT_CH_6 0x58
188#define MC_RIR_LIMIT_CH_7 0x5C
189#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
190
191#define MC_RIR_WAY_CH 0x80
192 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
193 #define MC_RIR_WAY_RANK_MASK 0x7
194
a0c36a1f
MCC
195/*
196 * i7core structs
197 */
198
199#define NUM_CHANS 3
442305b1
MCC
200#define MAX_DIMMS 3 /* Max DIMMS per channel */
201#define MAX_MCR_FUNC 4
202#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
203
204struct i7core_info {
205 u32 mc_control;
206 u32 mc_status;
207 u32 max_dod;
f122a892 208 u32 ch_map;
a0c36a1f
MCC
209};
210
194a40fe
MCC
211
212struct i7core_inject {
213 int enable;
214
215 u32 section;
216 u32 type;
217 u32 eccmask;
218
219 /* Error address mask */
220 int channel, dimm, rank, bank, page, col;
221};
222
0b2b7b7e 223struct i7core_channel {
0bf09e82
MCC
224 bool is_3dimms_present;
225 bool is_single_4rank;
226 bool has_4rank;
442305b1 227 u32 dimms;
0b2b7b7e
MCC
228};
229
8f331907 230struct pci_id_descr {
66607706
MCC
231 int dev;
232 int func;
233 int dev_id;
de06eeef 234 int optional;
8f331907
MCC
235};
236
bd9e19ca 237struct pci_id_table {
1288c18f
MCC
238 const struct pci_id_descr *descr;
239 int n_devs;
bd9e19ca
VM
240};
241
f4742949
MCC
242struct i7core_dev {
243 struct list_head list;
244 u8 socket;
245 struct pci_dev **pdev;
de06eeef 246 int n_devs;
f4742949
MCC
247 struct mem_ctl_info *mci;
248};
249
a0c36a1f 250struct i7core_pvt {
f4742949
MCC
251 struct pci_dev *pci_noncore;
252 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
253 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
254
255 struct i7core_dev *i7core_dev;
67166af4 256
a0c36a1f 257 struct i7core_info info;
194a40fe 258 struct i7core_inject inject;
f4742949 259 struct i7core_channel channel[NUM_CHANS];
67166af4 260
f4742949 261 int ce_count_available;
b4e8f0b6
MCC
262
263 /* ECC corrected errors counts per udimm */
f4742949
MCC
264 unsigned long udimm_ce_count[MAX_DIMMS];
265 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 266 /* ECC corrected errors counts per rdimm */
f4742949
MCC
267 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
268 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 269
27100db0 270 bool is_registered, enable_scrub;
14d2c083 271
ca9c90ba 272 /* Fifo double buffers */
d5381642 273 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
274 struct mce mce_outentry[MCE_LOG_LEN];
275
276 /* Fifo in/out counters */
277 unsigned mce_in, mce_out;
278
279 /* Count indicator to show errors not got */
280 unsigned mce_overrun;
939747bd 281
535e9c78
NC
282 /* DCLK Frequency used for computing scrub rate */
283 int dclk_freq;
284
939747bd
MCC
285 /* Struct to control EDAC polling */
286 struct edac_pci_ctl_info *i7core_pci;
a0c36a1f
MCC
287};
288
8f331907
MCC
289#define PCI_DESCR(device, function, device_id) \
290 .dev = (device), \
291 .func = (function), \
292 .dev_id = (device_id)
293
1288c18f 294static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
8f331907
MCC
295 /* Memory controller */
296 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
297 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
224e871f 298 /* Exists only for RDIMM */
de06eeef 299 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
8f331907
MCC
300 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
301
302 /* Channel 0 */
303 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
304 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
305 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
306 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
307
308 /* Channel 1 */
309 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
310 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
311 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
312 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
313
314 /* Channel 2 */
315 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
316 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
317 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
318 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
224e871f
MCC
319
320 /* Generic Non-core registers */
321 /*
322 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
323 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
324 * the probing code needs to test for the other address in case of
325 * failure of this one
326 */
327 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
328
a0c36a1f 329};
8f331907 330
1288c18f 331static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
52a2e4fc
MCC
332 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
333 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
334 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
335
336 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
337 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
338 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
339 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
340
508fa179
MCC
341 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
342 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
343 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
344 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
224e871f
MCC
345
346 /*
347 * This is the PCI device has an alternate address on some
348 * processors like Core i7 860
349 */
350 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
52a2e4fc
MCC
351};
352
1288c18f 353static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
bd9e19ca
VM
354 /* Memory controller */
355 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
356 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
357 /* Exists only for RDIMM */
358 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
359 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
360
361 /* Channel 0 */
362 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
363 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
364 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
365 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
366
367 /* Channel 1 */
368 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
369 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
370 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
371 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
372
373 /* Channel 2 */
374 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
375 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
376 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
377 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
224e871f
MCC
378
379 /* Generic Non-core registers */
380 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
381
bd9e19ca
VM
382};
383
1288c18f
MCC
384#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
385static const struct pci_id_table pci_dev_table[] = {
bd9e19ca
VM
386 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
387 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
388 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
3c52cc57 389 {0,} /* 0 terminated list. */
bd9e19ca
VM
390};
391
8f331907
MCC
392/*
393 * pci_device_id table for which devices we are looking for
8f331907 394 */
36c46f31 395static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
d1fd4fb6 396 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
f05da2f7 397 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
8f331907
MCC
398 {0,} /* 0 terminated list. */
399};
400
a0c36a1f
MCC
401/****************************************************************************
402 Anciliary status routines
403 ****************************************************************************/
404
405 /* MC_CONTROL bits */
ef708b53
MCC
406#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
407#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
408
409 /* MC_STATUS bits */
61053fde 410#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 411#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
412
413 /* MC_MAX_DOD read functions */
854d3349 414static inline int numdimms(u32 dimms)
a0c36a1f 415{
854d3349 416 return (dimms & 0x3) + 1;
a0c36a1f
MCC
417}
418
854d3349 419static inline int numrank(u32 rank)
a0c36a1f
MCC
420{
421 static int ranks[4] = { 1, 2, 4, -EINVAL };
422
854d3349 423 return ranks[rank & 0x3];
a0c36a1f
MCC
424}
425
854d3349 426static inline int numbank(u32 bank)
a0c36a1f
MCC
427{
428 static int banks[4] = { 4, 8, 16, -EINVAL };
429
854d3349 430 return banks[bank & 0x3];
a0c36a1f
MCC
431}
432
854d3349 433static inline int numrow(u32 row)
a0c36a1f
MCC
434{
435 static int rows[8] = {
436 1 << 12, 1 << 13, 1 << 14, 1 << 15,
437 1 << 16, -EINVAL, -EINVAL, -EINVAL,
438 };
439
854d3349 440 return rows[row & 0x7];
a0c36a1f
MCC
441}
442
854d3349 443static inline int numcol(u32 col)
a0c36a1f
MCC
444{
445 static int cols[8] = {
446 1 << 10, 1 << 11, 1 << 12, -EINVAL,
447 };
854d3349 448 return cols[col & 0x3];
a0c36a1f
MCC
449}
450
f4742949 451static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
452{
453 struct i7core_dev *i7core_dev;
454
455 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
456 if (i7core_dev->socket == socket)
457 return i7core_dev;
458 }
459
460 return NULL;
461}
462
848b2f7e
HS
463static struct i7core_dev *alloc_i7core_dev(u8 socket,
464 const struct pci_id_table *table)
465{
466 struct i7core_dev *i7core_dev;
467
468 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
469 if (!i7core_dev)
470 return NULL;
471
472 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
473 GFP_KERNEL);
474 if (!i7core_dev->pdev) {
475 kfree(i7core_dev);
476 return NULL;
477 }
478
479 i7core_dev->socket = socket;
480 i7core_dev->n_devs = table->n_devs;
481 list_add_tail(&i7core_dev->list, &i7core_edac_list);
482
483 return i7core_dev;
484}
485
2aa9be44
HS
486static void free_i7core_dev(struct i7core_dev *i7core_dev)
487{
488 list_del(&i7core_dev->list);
489 kfree(i7core_dev->pdev);
490 kfree(i7core_dev);
491}
492
a0c36a1f
MCC
493/****************************************************************************
494 Memory check routines
495 ****************************************************************************/
ef708b53 496
084a4fcc 497static int get_dimm_config(struct mem_ctl_info *mci)
a0c36a1f
MCC
498{
499 struct i7core_pvt *pvt = mci->pvt_info;
854d3349 500 struct pci_dev *pdev;
ba6c5c62 501 int i, j;
1c6fed80 502 enum edac_type mode;
854d3349 503 enum mem_type mtype;
084a4fcc 504 struct dimm_info *dimm;
a0c36a1f 505
854d3349 506 /* Get data from the MC register, function 0 */
f4742949 507 pdev = pvt->pci_mcr[0];
7dd6953c 508 if (!pdev)
8f331907
MCC
509 return -ENODEV;
510
f122a892 511 /* Device 3 function 0 reads */
7dd6953c
MCC
512 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
513 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
514 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
515 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 516
17cb7b0c 517 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 518 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 519 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 520
1c6fed80 521 if (ECC_ENABLED(pvt)) {
41fcb7fe 522 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
523 if (ECCx8(pvt))
524 mode = EDAC_S8ECD8ED;
525 else
526 mode = EDAC_S4ECD4ED;
527 } else {
a0c36a1f 528 debugf0("ECC disabled\n");
1c6fed80
MCC
529 mode = EDAC_NONE;
530 }
a0c36a1f
MCC
531
532 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
533 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
534 "x%x x 0x%x\n",
854d3349
MCC
535 numdimms(pvt->info.max_dod),
536 numrank(pvt->info.max_dod >> 2),
276b824c 537 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
538 numrow(pvt->info.max_dod >> 6),
539 numcol(pvt->info.max_dod >> 9));
a0c36a1f 540
0b2b7b7e 541 for (i = 0; i < NUM_CHANS; i++) {
854d3349 542 u32 data, dimm_dod[3], value[8];
0b2b7b7e 543
52a2e4fc
MCC
544 if (!pvt->pci_ch[i][0])
545 continue;
546
0b2b7b7e
MCC
547 if (!CH_ACTIVE(pvt, i)) {
548 debugf0("Channel %i is not active\n", i);
549 continue;
550 }
551 if (CH_DISABLED(pvt, i)) {
552 debugf0("Channel %i is disabled\n", i);
553 continue;
554 }
555
f122a892 556 /* Devices 4-6 function 0 */
f4742949 557 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
558 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
559
0bf09e82
MCC
560
561 if (data & THREE_DIMMS_PRESENT)
562 pvt->channel[i].is_3dimms_present = true;
563
564 if (data & SINGLE_QUAD_RANK_PRESENT)
565 pvt->channel[i].is_single_4rank = true;
566
567 if (data & QUAD_RANK_PRESENT)
568 pvt->channel[i].has_4rank = true;
0b2b7b7e 569
854d3349
MCC
570 if (data & REGISTERED_DIMM)
571 mtype = MEM_RDDR3;
14d2c083 572 else
854d3349 573 mtype = MEM_DDR3;
854d3349
MCC
574
575 /* Devices 4-6 function 1 */
f4742949 576 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 577 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 578 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 579 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 580 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 581 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 582
1c6fed80 583 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
0bf09e82 584 "%s%s%s%cDIMMs\n",
1c6fed80
MCC
585 i,
586 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587 data,
0bf09e82
MCC
588 pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589 pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590 pvt->channel[i].has_4rank ? "HAS_4R " : "",
41fcb7fe 591 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
592
593 for (j = 0; j < 3; j++) {
594 u32 banks, ranks, rows, cols;
5566cb7c 595 u32 size, npages;
854d3349
MCC
596
597 if (!DIMM_PRESENT(dimm_dod[j]))
598 continue;
599
0975c16f
MCC
600 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601 i, j, 0);
854d3349
MCC
602 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606
5566cb7c
MCC
607 /* DDR3 has 8 I/O banks */
608 size = (rows * cols * banks * ranks) >> (20 - 3);
609
17cb7b0c
MCC
610 debugf0("\tdimm %d %d Mb offset: %x, "
611 "bank: %d, rank: %d, row: %#x, col: %#x\n",
612 j, size,
854d3349
MCC
613 RANKOFFSET(dimm_dod[j]),
614 banks, ranks, rows, cols);
615
e9144601 616 npages = MiB_TO_PAGES(size);
5566cb7c 617
a895bf8b
MCC
618 dimm->nr_pages = npages;
619
854d3349
MCC
620 switch (banks) {
621 case 4:
084a4fcc 622 dimm->dtype = DEV_X4;
854d3349
MCC
623 break;
624 case 8:
084a4fcc 625 dimm->dtype = DEV_X8;
854d3349
MCC
626 break;
627 case 16:
084a4fcc 628 dimm->dtype = DEV_X16;
854d3349
MCC
629 break;
630 default:
084a4fcc 631 dimm->dtype = DEV_UNKNOWN;
854d3349
MCC
632 }
633
084a4fcc
MCC
634 snprintf(dimm->label, sizeof(dimm->label),
635 "CPU#%uChannel#%u_DIMM#%u",
636 pvt->i7core_dev->socket, i, j);
637 dimm->grain = 8;
638 dimm->edac_mode = mode;
639 dimm->mtype = mtype;
854d3349 640 }
1c6fed80 641
854d3349
MCC
642 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
643 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
644 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
645 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
646 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
647 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
648 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
649 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 650 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 651 for (j = 0; j < 8; j++)
17cb7b0c 652 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
653 (value[j] >> 27) & 0x1,
654 (value[j] >> 24) & 0x7,
80b8ce89 655 (value[j] & ((1 << 24) - 1)));
0b2b7b7e
MCC
656 }
657
a0c36a1f
MCC
658 return 0;
659}
660
194a40fe
MCC
661/****************************************************************************
662 Error insertion routines
663 ****************************************************************************/
664
665/* The i7core has independent error injection features per channel.
666 However, to have a simpler code, we don't allow enabling error injection
667 on more than one channel.
668 Also, since a change at an inject parameter will be applied only at enable,
669 we're disabling error injection on all write calls to the sysfs nodes that
670 controls the error code injection.
671 */
1288c18f 672static int disable_inject(const struct mem_ctl_info *mci)
194a40fe
MCC
673{
674 struct i7core_pvt *pvt = mci->pvt_info;
675
676 pvt->inject.enable = 0;
677
f4742949 678 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
679 return -ENODEV;
680
f4742949 681 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 682 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
683
684 return 0;
194a40fe
MCC
685}
686
687/*
688 * i7core inject inject.section
689 *
690 * accept and store error injection inject.section value
691 * bit 0 - refers to the lower 32-byte half cacheline
692 * bit 1 - refers to the upper 32-byte half cacheline
693 */
694static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
695 const char *data, size_t count)
696{
697 struct i7core_pvt *pvt = mci->pvt_info;
698 unsigned long value;
699 int rc;
700
701 if (pvt->inject.enable)
41fcb7fe 702 disable_inject(mci);
194a40fe
MCC
703
704 rc = strict_strtoul(data, 10, &value);
705 if ((rc < 0) || (value > 3))
2068def5 706 return -EIO;
194a40fe
MCC
707
708 pvt->inject.section = (u32) value;
709 return count;
710}
711
712static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
713 char *data)
714{
715 struct i7core_pvt *pvt = mci->pvt_info;
716 return sprintf(data, "0x%08x\n", pvt->inject.section);
717}
718
719/*
720 * i7core inject.type
721 *
722 * accept and store error injection inject.section value
723 * bit 0 - repeat enable - Enable error repetition
724 * bit 1 - inject ECC error
725 * bit 2 - inject parity error
726 */
727static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
728 const char *data, size_t count)
729{
730 struct i7core_pvt *pvt = mci->pvt_info;
731 unsigned long value;
732 int rc;
733
734 if (pvt->inject.enable)
41fcb7fe 735 disable_inject(mci);
194a40fe
MCC
736
737 rc = strict_strtoul(data, 10, &value);
738 if ((rc < 0) || (value > 7))
2068def5 739 return -EIO;
194a40fe
MCC
740
741 pvt->inject.type = (u32) value;
742 return count;
743}
744
745static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
746 char *data)
747{
748 struct i7core_pvt *pvt = mci->pvt_info;
749 return sprintf(data, "0x%08x\n", pvt->inject.type);
750}
751
752/*
753 * i7core_inject_inject.eccmask_store
754 *
755 * The type of error (UE/CE) will depend on the inject.eccmask value:
756 * Any bits set to a 1 will flip the corresponding ECC bit
757 * Correctable errors can be injected by flipping 1 bit or the bits within
758 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
759 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
760 * uncorrectable error to be injected.
761 */
762static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
763 const char *data, size_t count)
764{
765 struct i7core_pvt *pvt = mci->pvt_info;
766 unsigned long value;
767 int rc;
768
769 if (pvt->inject.enable)
41fcb7fe 770 disable_inject(mci);
194a40fe
MCC
771
772 rc = strict_strtoul(data, 10, &value);
773 if (rc < 0)
2068def5 774 return -EIO;
194a40fe
MCC
775
776 pvt->inject.eccmask = (u32) value;
777 return count;
778}
779
780static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
781 char *data)
782{
783 struct i7core_pvt *pvt = mci->pvt_info;
784 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
785}
786
787/*
788 * i7core_addrmatch
789 *
790 * The type of error (UE/CE) will depend on the inject.eccmask value:
791 * Any bits set to a 1 will flip the corresponding ECC bit
792 * Correctable errors can be injected by flipping 1 bit or the bits within
793 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
794 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
795 * uncorrectable error to be injected.
796 */
194a40fe 797
a5538e53
MCC
798#define DECLARE_ADDR_MATCH(param, limit) \
799static ssize_t i7core_inject_store_##param( \
800 struct mem_ctl_info *mci, \
801 const char *data, size_t count) \
802{ \
cc301b3a 803 struct i7core_pvt *pvt; \
a5538e53
MCC
804 long value; \
805 int rc; \
806 \
cc301b3a
MCC
807 debugf1("%s()\n", __func__); \
808 pvt = mci->pvt_info; \
809 \
a5538e53
MCC
810 if (pvt->inject.enable) \
811 disable_inject(mci); \
812 \
4f87fad1 813 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
a5538e53
MCC
814 value = -1; \
815 else { \
816 rc = strict_strtoul(data, 10, &value); \
817 if ((rc < 0) || (value >= limit)) \
818 return -EIO; \
819 } \
820 \
821 pvt->inject.param = value; \
822 \
823 return count; \
824} \
825 \
826static ssize_t i7core_inject_show_##param( \
827 struct mem_ctl_info *mci, \
828 char *data) \
829{ \
cc301b3a
MCC
830 struct i7core_pvt *pvt; \
831 \
832 pvt = mci->pvt_info; \
833 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
834 if (pvt->inject.param < 0) \
835 return sprintf(data, "any\n"); \
836 else \
837 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
838}
839
a5538e53
MCC
840#define ATTR_ADDR_MATCH(param) \
841 { \
842 .attr = { \
843 .name = #param, \
844 .mode = (S_IRUGO | S_IWUSR) \
845 }, \
846 .show = i7core_inject_show_##param, \
847 .store = i7core_inject_store_##param, \
848 }
194a40fe 849
a5538e53
MCC
850DECLARE_ADDR_MATCH(channel, 3);
851DECLARE_ADDR_MATCH(dimm, 3);
852DECLARE_ADDR_MATCH(rank, 4);
853DECLARE_ADDR_MATCH(bank, 32);
854DECLARE_ADDR_MATCH(page, 0x10000);
855DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 856
1288c18f 857static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
276b824c
MCC
858{
859 u32 read;
860 int count;
861
4157d9f5
MCC
862 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
863 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
864 where, val);
865
276b824c
MCC
866 for (count = 0; count < 10; count++) {
867 if (count)
b990538a 868 msleep(100);
276b824c
MCC
869 pci_write_config_dword(dev, where, val);
870 pci_read_config_dword(dev, where, &read);
871
872 if (read == val)
873 return 0;
874 }
875
4157d9f5
MCC
876 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
877 "write=%08x. Read=%08x\n",
878 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
879 where, val, read);
276b824c
MCC
880
881 return -EINVAL;
882}
883
194a40fe
MCC
884/*
885 * This routine prepares the Memory Controller for error injection.
886 * The error will be injected when some process tries to write to the
887 * memory that matches the given criteria.
888 * The criteria can be set in terms of a mask where dimm, rank, bank, page
889 * and col can be specified.
890 * A -1 value for any of the mask items will make the MCU to ignore
891 * that matching criteria for error injection.
892 *
893 * It should be noticed that the error will only happen after a write operation
894 * on a memory that matches the condition. if REPEAT_EN is not enabled at
895 * inject mask, then it will produce just one error. Otherwise, it will repeat
896 * until the injectmask would be cleaned.
897 *
898 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
899 * is reliable enough to check if the MC is using the
900 * three channels. However, this is not clear at the datasheet.
901 */
902static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
903 const char *data, size_t count)
904{
905 struct i7core_pvt *pvt = mci->pvt_info;
906 u32 injectmask;
907 u64 mask = 0;
908 int rc;
909 long enable;
910
f4742949 911 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
912 return 0;
913
194a40fe
MCC
914 rc = strict_strtoul(data, 10, &enable);
915 if ((rc < 0))
916 return 0;
917
918 if (enable) {
919 pvt->inject.enable = 1;
920 } else {
921 disable_inject(mci);
922 return count;
923 }
924
925 /* Sets pvt->inject.dimm mask */
926 if (pvt->inject.dimm < 0)
486dd09f 927 mask |= 1LL << 41;
194a40fe 928 else {
f4742949 929 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 930 mask |= (pvt->inject.dimm & 0x3LL) << 35;
194a40fe 931 else
486dd09f 932 mask |= (pvt->inject.dimm & 0x1LL) << 36;
194a40fe
MCC
933 }
934
935 /* Sets pvt->inject.rank mask */
936 if (pvt->inject.rank < 0)
486dd09f 937 mask |= 1LL << 40;
194a40fe 938 else {
f4742949 939 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 940 mask |= (pvt->inject.rank & 0x1LL) << 34;
194a40fe 941 else
486dd09f 942 mask |= (pvt->inject.rank & 0x3LL) << 34;
194a40fe
MCC
943 }
944
945 /* Sets pvt->inject.bank mask */
946 if (pvt->inject.bank < 0)
486dd09f 947 mask |= 1LL << 39;
194a40fe 948 else
486dd09f 949 mask |= (pvt->inject.bank & 0x15LL) << 30;
194a40fe
MCC
950
951 /* Sets pvt->inject.page mask */
952 if (pvt->inject.page < 0)
486dd09f 953 mask |= 1LL << 38;
194a40fe 954 else
486dd09f 955 mask |= (pvt->inject.page & 0xffff) << 14;
194a40fe
MCC
956
957 /* Sets pvt->inject.column mask */
958 if (pvt->inject.col < 0)
486dd09f 959 mask |= 1LL << 37;
194a40fe 960 else
486dd09f 961 mask |= (pvt->inject.col & 0x3fff);
194a40fe 962
276b824c
MCC
963 /*
964 * bit 0: REPEAT_EN
965 * bits 1-2: MASK_HALF_CACHELINE
966 * bit 3: INJECT_ECC
967 * bit 4: INJECT_ADDR_PARITY
968 */
969
970 injectmask = (pvt->inject.type & 1) |
971 (pvt->inject.section & 0x3) << 1 |
972 (pvt->inject.type & 0x6) << (3 - 1);
973
974 /* Unlock writes to registers - this register is write only */
f4742949 975 pci_write_config_dword(pvt->pci_noncore,
67166af4 976 MC_CFG_CONTROL, 0x2);
e9bd2e73 977
f4742949 978 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 979 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 980 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 981 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 982
f4742949 983 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
984 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
985
f4742949 986 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 987 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 988
194a40fe 989 /*
276b824c
MCC
990 * This is something undocumented, based on my tests
991 * Without writing 8 to this register, errors aren't injected. Not sure
992 * why.
194a40fe 993 */
f4742949 994 pci_write_config_dword(pvt->pci_noncore,
276b824c 995 MC_CFG_CONTROL, 8);
194a40fe 996
41fcb7fe
MCC
997 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
998 " inject 0x%08x\n",
194a40fe
MCC
999 mask, pvt->inject.eccmask, injectmask);
1000
7b029d03 1001
194a40fe
MCC
1002 return count;
1003}
1004
1005static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1006 char *data)
1007{
1008 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1009 u32 injectmask;
1010
52a2e4fc
MCC
1011 if (!pvt->pci_ch[pvt->inject.channel][0])
1012 return 0;
1013
f4742949 1014 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1015 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1016
1017 debugf0("Inject error read: 0x%018x\n", injectmask);
1018
1019 if (injectmask & 0x0c)
1020 pvt->inject.enable = 1;
1021
194a40fe
MCC
1022 return sprintf(data, "%d\n", pvt->inject.enable);
1023}
1024
f338d736
MCC
1025#define DECLARE_COUNTER(param) \
1026static ssize_t i7core_show_counter_##param( \
1027 struct mem_ctl_info *mci, \
1028 char *data) \
1029{ \
1030 struct i7core_pvt *pvt = mci->pvt_info; \
1031 \
1032 debugf1("%s() \n", __func__); \
1033 if (!pvt->ce_count_available || (pvt->is_registered)) \
1034 return sprintf(data, "data unavailable\n"); \
1035 return sprintf(data, "%lu\n", \
1036 pvt->udimm_ce_count[param]); \
1037}
442305b1 1038
f338d736
MCC
1039#define ATTR_COUNTER(param) \
1040 { \
1041 .attr = { \
1042 .name = __stringify(udimm##param), \
1043 .mode = (S_IRUGO | S_IWUSR) \
1044 }, \
1045 .show = i7core_show_counter_##param \
d88b8507 1046 }
442305b1 1047
f338d736
MCC
1048DECLARE_COUNTER(0);
1049DECLARE_COUNTER(1);
1050DECLARE_COUNTER(2);
442305b1 1051
194a40fe
MCC
1052/*
1053 * Sysfs struct
1054 */
a5538e53 1055
1288c18f 1056static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
a5538e53
MCC
1057 ATTR_ADDR_MATCH(channel),
1058 ATTR_ADDR_MATCH(dimm),
1059 ATTR_ADDR_MATCH(rank),
1060 ATTR_ADDR_MATCH(bank),
1061 ATTR_ADDR_MATCH(page),
1062 ATTR_ADDR_MATCH(col),
1288c18f 1063 { } /* End of list */
a5538e53
MCC
1064};
1065
1288c18f 1066static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
a5538e53
MCC
1067 .name = "inject_addrmatch",
1068 .mcidev_attr = i7core_addrmatch_attrs,
1069};
1070
1288c18f 1071static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
f338d736
MCC
1072 ATTR_COUNTER(0),
1073 ATTR_COUNTER(1),
1074 ATTR_COUNTER(2),
64aab720 1075 { .attr = { .name = NULL } }
f338d736
MCC
1076};
1077
1288c18f 1078static const struct mcidev_sysfs_group i7core_udimm_counters = {
f338d736
MCC
1079 .name = "all_channel_counts",
1080 .mcidev_attr = i7core_udimm_counters_attrs,
1081};
1082
1288c18f 1083static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
194a40fe
MCC
1084 {
1085 .attr = {
1086 .name = "inject_section",
1087 .mode = (S_IRUGO | S_IWUSR)
1088 },
1089 .show = i7core_inject_section_show,
1090 .store = i7core_inject_section_store,
1091 }, {
1092 .attr = {
1093 .name = "inject_type",
1094 .mode = (S_IRUGO | S_IWUSR)
1095 },
1096 .show = i7core_inject_type_show,
1097 .store = i7core_inject_type_store,
1098 }, {
1099 .attr = {
1100 .name = "inject_eccmask",
1101 .mode = (S_IRUGO | S_IWUSR)
1102 },
1103 .show = i7core_inject_eccmask_show,
1104 .store = i7core_inject_eccmask_store,
1105 }, {
a5538e53 1106 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1107 }, {
1108 .attr = {
1109 .name = "inject_enable",
1110 .mode = (S_IRUGO | S_IWUSR)
1111 },
1112 .show = i7core_inject_enable_show,
1113 .store = i7core_inject_enable_store,
1114 },
1288c18f
MCC
1115 { } /* End of list */
1116};
1117
1118static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1119 {
1120 .attr = {
1121 .name = "inject_section",
1122 .mode = (S_IRUGO | S_IWUSR)
1123 },
1124 .show = i7core_inject_section_show,
1125 .store = i7core_inject_section_store,
1126 }, {
1127 .attr = {
1128 .name = "inject_type",
1129 .mode = (S_IRUGO | S_IWUSR)
1130 },
1131 .show = i7core_inject_type_show,
1132 .store = i7core_inject_type_store,
1133 }, {
1134 .attr = {
1135 .name = "inject_eccmask",
1136 .mode = (S_IRUGO | S_IWUSR)
1137 },
1138 .show = i7core_inject_eccmask_show,
1139 .store = i7core_inject_eccmask_store,
1140 }, {
1141 .grp = &i7core_inject_addrmatch,
1142 }, {
1143 .attr = {
1144 .name = "inject_enable",
1145 .mode = (S_IRUGO | S_IWUSR)
1146 },
1147 .show = i7core_inject_enable_show,
1148 .store = i7core_inject_enable_store,
1149 }, {
1150 .grp = &i7core_udimm_counters,
1151 },
1152 { } /* End of list */
194a40fe
MCC
1153};
1154
a0c36a1f
MCC
1155/****************************************************************************
1156 Device initialization routines: put/get, init/exit
1157 ****************************************************************************/
1158
1159/*
64c10f6e 1160 * i7core_put_all_devices 'put' all the devices that we have
a0c36a1f
MCC
1161 * reserved via 'get'
1162 */
13d6e9b6 1163static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1164{
13d6e9b6 1165 int i;
a0c36a1f 1166
22e6bcbd 1167 debugf0(__FILE__ ": %s()\n", __func__);
de06eeef 1168 for (i = 0; i < i7core_dev->n_devs; i++) {
22e6bcbd
MCC
1169 struct pci_dev *pdev = i7core_dev->pdev[i];
1170 if (!pdev)
1171 continue;
1172 debugf0("Removing dev %02x:%02x.%d\n",
1173 pdev->bus->number,
1174 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1175 pci_dev_put(pdev);
1176 }
13d6e9b6 1177}
66607706 1178
13d6e9b6
MCC
1179static void i7core_put_all_devices(void)
1180{
42538680 1181 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1182
39300e71 1183 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
13d6e9b6 1184 i7core_put_devices(i7core_dev);
2aa9be44 1185 free_i7core_dev(i7core_dev);
39300e71 1186 }
a0c36a1f
MCC
1187}
1188
1288c18f 1189static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
bc2d7245
KM
1190{
1191 struct pci_dev *pdev = NULL;
1192 int i;
54a08ab1 1193
bc2d7245 1194 /*
e7bf068a 1195 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
bc2d7245
KM
1196 * aren't announced by acpi. So, we need to use a legacy scan probing
1197 * to detect them
1198 */
bd9e19ca
VM
1199 while (table && table->descr) {
1200 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1201 if (unlikely(!pdev)) {
1202 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1203 pcibios_scan_specific_bus(255-i);
1204 }
bda14289 1205 pci_dev_put(pdev);
bd9e19ca 1206 table++;
bc2d7245
KM
1207 }
1208}
1209
bda14289
MCC
1210static unsigned i7core_pci_lastbus(void)
1211{
1212 int last_bus = 0, bus;
1213 struct pci_bus *b = NULL;
1214
1215 while ((b = pci_find_next_bus(b)) != NULL) {
1216 bus = b->number;
1217 debugf0("Found bus %d\n", bus);
1218 if (bus > last_bus)
1219 last_bus = bus;
1220 }
1221
1222 debugf0("Last bus %d\n", last_bus);
1223
1224 return last_bus;
1225}
1226
a0c36a1f 1227/*
64c10f6e 1228 * i7core_get_all_devices Find and perform 'get' operation on the MCH's
a0c36a1f
MCC
1229 * device/functions we want to reference for this driver
1230 *
1231 * Need to 'get' device 16 func 1 and func 2
1232 */
b197cba0
HS
1233static int i7core_get_onedevice(struct pci_dev **prev,
1234 const struct pci_id_table *table,
1235 const unsigned devno,
1236 const unsigned last_bus)
a0c36a1f 1237{
66607706 1238 struct i7core_dev *i7core_dev;
b197cba0 1239 const struct pci_id_descr *dev_descr = &table->descr[devno];
66607706 1240
8f331907 1241 struct pci_dev *pdev = NULL;
67166af4
MCC
1242 u8 bus = 0;
1243 u8 socket = 0;
a0c36a1f 1244
c77720b9 1245 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
de06eeef 1246 dev_descr->dev_id, *prev);
c77720b9 1247
224e871f
MCC
1248 /*
1249 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1250 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1251 * to probe for the alternate address in case of failure
1252 */
1253 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1254 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1255 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1256
1257 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1258 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1259 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1260 *prev);
1261
c77720b9
MCC
1262 if (!pdev) {
1263 if (*prev) {
1264 *prev = pdev;
1265 return 0;
d1fd4fb6
MCC
1266 }
1267
de06eeef 1268 if (dev_descr->optional)
c77720b9 1269 return 0;
310cbb72 1270
bd9e19ca
VM
1271 if (devno == 0)
1272 return -ENODEV;
1273
ab089374 1274 i7core_printk(KERN_INFO,
c77720b9 1275 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1276 dev_descr->dev, dev_descr->func,
1277 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
67166af4 1278
c77720b9
MCC
1279 /* End of list, leave */
1280 return -ENODEV;
1281 }
1282 bus = pdev->bus->number;
67166af4 1283
bda14289 1284 socket = last_bus - bus;
c77720b9 1285
66607706
MCC
1286 i7core_dev = get_i7core_dev(socket);
1287 if (!i7core_dev) {
848b2f7e 1288 i7core_dev = alloc_i7core_dev(socket, table);
2896637b
HS
1289 if (!i7core_dev) {
1290 pci_dev_put(pdev);
66607706 1291 return -ENOMEM;
2896637b 1292 }
c77720b9 1293 }
67166af4 1294
66607706 1295 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1296 i7core_printk(KERN_ERR,
1297 "Duplicated device for "
1298 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1299 bus, dev_descr->dev, dev_descr->func,
1300 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1301 pci_dev_put(pdev);
1302 return -ENODEV;
1303 }
67166af4 1304
66607706 1305 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1306
1307 /* Sanity check */
de06eeef
MCC
1308 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1309 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
c77720b9
MCC
1310 i7core_printk(KERN_ERR,
1311 "Device PCI ID %04x:%04x "
1312 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
de06eeef 1313 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
c77720b9 1314 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
de06eeef 1315 bus, dev_descr->dev, dev_descr->func);
c77720b9
MCC
1316 return -ENODEV;
1317 }
ef708b53 1318
c77720b9
MCC
1319 /* Be sure that the device is enabled */
1320 if (unlikely(pci_enable_device(pdev) < 0)) {
1321 i7core_printk(KERN_ERR,
1322 "Couldn't enable "
1323 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1324 bus, dev_descr->dev, dev_descr->func,
1325 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1326 return -ENODEV;
1327 }
ef708b53 1328
d4c27795 1329 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1330 socket, bus, dev_descr->dev,
1331 dev_descr->func,
1332 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
8f331907 1333
a3e15416
MCC
1334 /*
1335 * As stated on drivers/pci/search.c, the reference count for
1336 * @from is always decremented if it is not %NULL. So, as we need
1337 * to get all devices up to null, we need to do a get for the device
1338 */
1339 pci_dev_get(pdev);
1340
c77720b9 1341 *prev = pdev;
ef708b53 1342
c77720b9
MCC
1343 return 0;
1344}
a0c36a1f 1345
64c10f6e 1346static int i7core_get_all_devices(void)
c77720b9 1347{
3c52cc57 1348 int i, rc, last_bus;
c77720b9 1349 struct pci_dev *pdev = NULL;
3c52cc57 1350 const struct pci_id_table *table = pci_dev_table;
bd9e19ca 1351
bda14289
MCC
1352 last_bus = i7core_pci_lastbus();
1353
3c52cc57 1354 while (table && table->descr) {
bd9e19ca
VM
1355 for (i = 0; i < table->n_devs; i++) {
1356 pdev = NULL;
1357 do {
b197cba0 1358 rc = i7core_get_onedevice(&pdev, table, i,
bda14289 1359 last_bus);
bd9e19ca
VM
1360 if (rc < 0) {
1361 if (i == 0) {
1362 i = table->n_devs;
1363 break;
1364 }
1365 i7core_put_all_devices();
1366 return -ENODEV;
1367 }
1368 } while (pdev);
1369 }
3c52cc57 1370 table++;
c77720b9 1371 }
66607706 1372
ef708b53 1373 return 0;
ef708b53
MCC
1374}
1375
f4742949
MCC
1376static int mci_bind_devs(struct mem_ctl_info *mci,
1377 struct i7core_dev *i7core_dev)
ef708b53
MCC
1378{
1379 struct i7core_pvt *pvt = mci->pvt_info;
1380 struct pci_dev *pdev;
f4742949 1381 int i, func, slot;
27100db0 1382 char *family;
ef708b53 1383
27100db0
MCC
1384 pvt->is_registered = false;
1385 pvt->enable_scrub = false;
de06eeef 1386 for (i = 0; i < i7core_dev->n_devs; i++) {
f4742949
MCC
1387 pdev = i7core_dev->pdev[i];
1388 if (!pdev)
66607706
MCC
1389 continue;
1390
f4742949
MCC
1391 func = PCI_FUNC(pdev->devfn);
1392 slot = PCI_SLOT(pdev->devfn);
1393 if (slot == 3) {
1394 if (unlikely(func > MAX_MCR_FUNC))
1395 goto error;
1396 pvt->pci_mcr[func] = pdev;
1397 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1398 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1399 goto error;
f4742949 1400 pvt->pci_ch[slot - 4][func] = pdev;
27100db0 1401 } else if (!slot && !func) {
f4742949 1402 pvt->pci_noncore = pdev;
27100db0
MCC
1403
1404 /* Detect the processor family */
1405 switch (pdev->device) {
1406 case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1407 family = "Xeon 35xx/ i7core";
1408 pvt->enable_scrub = false;
1409 break;
1410 case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1411 family = "i7-800/i5-700";
1412 pvt->enable_scrub = false;
1413 break;
1414 case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1415 family = "Xeon 34xx";
1416 pvt->enable_scrub = false;
1417 break;
1418 case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1419 family = "Xeon 55xx";
1420 pvt->enable_scrub = true;
1421 break;
1422 case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1423 family = "Xeon 56xx / i7-900";
1424 pvt->enable_scrub = true;
1425 break;
1426 default:
1427 family = "unknown";
1428 pvt->enable_scrub = false;
1429 }
1430 debugf0("Detected a processor type %s\n", family);
1431 } else
f4742949 1432 goto error;
ef708b53 1433
f4742949
MCC
1434 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1435 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1436 pdev, i7core_dev->socket);
14d2c083 1437
f4742949
MCC
1438 if (PCI_SLOT(pdev->devfn) == 3 &&
1439 PCI_FUNC(pdev->devfn) == 2)
27100db0 1440 pvt->is_registered = true;
a0c36a1f 1441 }
e9bd2e73 1442
a0c36a1f 1443 return 0;
ef708b53
MCC
1444
1445error:
1446 i7core_printk(KERN_ERR, "Device %d, function %d "
1447 "is out of the expected range\n",
1448 slot, func);
1449 return -EINVAL;
a0c36a1f
MCC
1450}
1451
442305b1
MCC
1452/****************************************************************************
1453 Error check routines
1454 ****************************************************************************/
0975c16f 1455static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
1288c18f
MCC
1456 const int chan,
1457 const int dimm,
1458 const int add)
b4e8f0b6 1459{
0975c16f 1460 int i;
b4e8f0b6
MCC
1461
1462 for (i = 0; i < add; i++) {
0975c16f
MCC
1463 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
1464 chan, dimm, -1, "error", "", NULL);
b4e8f0b6
MCC
1465 }
1466}
1467
1468static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1288c18f
MCC
1469 const int chan,
1470 const int new0,
1471 const int new1,
1472 const int new2)
b4e8f0b6
MCC
1473{
1474 struct i7core_pvt *pvt = mci->pvt_info;
1475 int add0 = 0, add1 = 0, add2 = 0;
1476 /* Updates CE counters if it is not the first time here */
f4742949 1477 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1478 /* Updates CE counters */
1479
f4742949
MCC
1480 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1481 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1482 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1483
1484 if (add2 < 0)
1485 add2 += 0x7fff;
f4742949 1486 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1487
1488 if (add1 < 0)
1489 add1 += 0x7fff;
f4742949 1490 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1491
1492 if (add0 < 0)
1493 add0 += 0x7fff;
f4742949 1494 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1495 } else
f4742949 1496 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1497
1498 /* Store the new values */
f4742949
MCC
1499 pvt->rdimm_last_ce_count[chan][2] = new2;
1500 pvt->rdimm_last_ce_count[chan][1] = new1;
1501 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1502
1503 /*updated the edac core */
1504 if (add0 != 0)
0975c16f 1505 i7core_rdimm_update_errcount(mci, chan, 0, add0);
b4e8f0b6 1506 if (add1 != 0)
0975c16f 1507 i7core_rdimm_update_errcount(mci, chan, 1, add1);
b4e8f0b6 1508 if (add2 != 0)
0975c16f 1509 i7core_rdimm_update_errcount(mci, chan, 2, add2);
b4e8f0b6
MCC
1510
1511}
1512
f4742949 1513static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1514{
1515 struct i7core_pvt *pvt = mci->pvt_info;
1516 u32 rcv[3][2];
1517 int i, new0, new1, new2;
1518
1519 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1520 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1521 &rcv[0][0]);
f4742949 1522 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1523 &rcv[0][1]);
f4742949 1524 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1525 &rcv[1][0]);
f4742949 1526 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1527 &rcv[1][1]);
f4742949 1528 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1529 &rcv[2][0]);
f4742949 1530 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1531 &rcv[2][1]);
1532 for (i = 0 ; i < 3; i++) {
1533 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1534 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1535 /*if the channel has 3 dimms*/
f4742949 1536 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1537 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1538 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1539 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1540 } else {
1541 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1542 DIMM_BOT_COR_ERR(rcv[i][0]);
1543 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1544 DIMM_BOT_COR_ERR(rcv[i][1]);
1545 new2 = 0;
1546 }
1547
f4742949 1548 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1549 }
1550}
442305b1
MCC
1551
1552/* This function is based on the device 3 function 4 registers as described on:
1553 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1554 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1555 * also available at:
1556 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1557 */
f4742949 1558static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1559{
1560 struct i7core_pvt *pvt = mci->pvt_info;
1561 u32 rcv1, rcv0;
1562 int new0, new1, new2;
1563
f4742949 1564 if (!pvt->pci_mcr[4]) {
b990538a 1565 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1566 return;
1567 }
1568
b4e8f0b6 1569 /* Corrected test errors */
f4742949
MCC
1570 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1571 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1572
1573 /* Store the new values */
1574 new2 = DIMM2_COR_ERR(rcv1);
1575 new1 = DIMM1_COR_ERR(rcv0);
1576 new0 = DIMM0_COR_ERR(rcv0);
1577
442305b1 1578 /* Updates CE counters if it is not the first time here */
f4742949 1579 if (pvt->ce_count_available) {
442305b1
MCC
1580 /* Updates CE counters */
1581 int add0, add1, add2;
1582
f4742949
MCC
1583 add2 = new2 - pvt->udimm_last_ce_count[2];
1584 add1 = new1 - pvt->udimm_last_ce_count[1];
1585 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1586
1587 if (add2 < 0)
1588 add2 += 0x7fff;
f4742949 1589 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1590
1591 if (add1 < 0)
1592 add1 += 0x7fff;
f4742949 1593 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1594
1595 if (add0 < 0)
1596 add0 += 0x7fff;
f4742949 1597 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1598
1599 if (add0 | add1 | add2)
1600 i7core_printk(KERN_ERR, "New Corrected error(s): "
1601 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1602 add0, add1, add2);
442305b1 1603 } else
f4742949 1604 pvt->ce_count_available = 1;
442305b1
MCC
1605
1606 /* Store the new values */
f4742949
MCC
1607 pvt->udimm_last_ce_count[2] = new2;
1608 pvt->udimm_last_ce_count[1] = new1;
1609 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1610}
1611
8a2f118e
MCC
1612/*
1613 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1614 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1615 * Nehalem are defined as family 0x06, model 0x1a
1616 *
1617 * The MCA registers used here are the following ones:
8a2f118e 1618 * struct mce field MCA Register
f237fcf2
MCC
1619 * m->status MSR_IA32_MC8_STATUS
1620 * m->addr MSR_IA32_MC8_ADDR
1621 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1622 * In the case of Nehalem, the error information is masked at .status and .misc
1623 * fields
1624 */
d5381642 1625static void i7core_mce_output_error(struct mem_ctl_info *mci,
1288c18f 1626 const struct mce *m)
d5381642 1627{
b4e8f0b6 1628 struct i7core_pvt *pvt = mci->pvt_info;
e17a2f42 1629 char *type, *optype, *err, msg[80];
0975c16f 1630 enum hw_event_mc_err_type tp_event;
8a2f118e 1631 unsigned long error = m->status & 0x1ff0000l;
0975c16f
MCC
1632 bool uncorrected_error = m->mcgstatus & 1ll << 61;
1633 bool ripv = m->mcgstatus & 1;
a639539f 1634 u32 optypenum = (m->status >> 4) & 0x07;
8cf2d239 1635 u32 core_err_cnt = (m->status >> 38) & 0x7fff;
8a2f118e
MCC
1636 u32 dimm = (m->misc >> 16) & 0x3;
1637 u32 channel = (m->misc >> 18) & 0x3;
1638 u32 syndrome = m->misc >> 32;
1639 u32 errnum = find_first_bit(&error, 32);
1640
0975c16f
MCC
1641 if (uncorrected_error) {
1642 if (ripv) {
1643 type = "FATAL";
1644 tp_event = HW_EVENT_ERR_FATAL;
1645 } else {
1646 type = "NON_FATAL";
1647 tp_event = HW_EVENT_ERR_UNCORRECTED;
1648 }
1649 } else {
1650 type = "CORRECTED";
1651 tp_event = HW_EVENT_ERR_CORRECTED;
1652 }
c5d34528 1653
a639539f 1654 switch (optypenum) {
b990538a
MCC
1655 case 0:
1656 optype = "generic undef request";
1657 break;
1658 case 1:
1659 optype = "read error";
1660 break;
1661 case 2:
1662 optype = "write error";
1663 break;
1664 case 3:
1665 optype = "addr/cmd error";
1666 break;
1667 case 4:
1668 optype = "scrubbing error";
1669 break;
1670 default:
1671 optype = "reserved";
1672 break;
a639539f
MCC
1673 }
1674
8a2f118e
MCC
1675 switch (errnum) {
1676 case 16:
1677 err = "read ECC error";
1678 break;
1679 case 17:
1680 err = "RAS ECC error";
1681 break;
1682 case 18:
1683 err = "write parity error";
1684 break;
1685 case 19:
1686 err = "redundacy loss";
1687 break;
1688 case 20:
1689 err = "reserved";
1690 break;
1691 case 21:
1692 err = "memory range error";
1693 break;
1694 case 22:
1695 err = "RTID out of range";
1696 break;
1697 case 23:
1698 err = "address parity error";
1699 break;
1700 case 24:
1701 err = "byte enable parity error";
1702 break;
1703 default:
1704 err = "unknown";
d5381642 1705 }
d5381642 1706
e17a2f42 1707 snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
8a2f118e 1708
0975c16f
MCC
1709 /*
1710 * Call the helper to output message
1711 * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1712 * only one event
1713 */
1714 if (uncorrected_error || !pvt->is_registered)
1715 edac_mc_handle_error(tp_event, mci,
1716 m->addr >> PAGE_SHIFT,
1717 m->addr & ~PAGE_MASK,
1718 syndrome,
1719 channel, dimm, -1,
1720 err, msg, m);
d5381642
MCC
1721}
1722
87d1d272
MCC
1723/*
1724 * i7core_check_error Retrieve and process errors reported by the
1725 * hardware. Called by the Core module.
1726 */
1727static void i7core_check_error(struct mem_ctl_info *mci)
1728{
d5381642
MCC
1729 struct i7core_pvt *pvt = mci->pvt_info;
1730 int i;
1731 unsigned count = 0;
ca9c90ba 1732 struct mce *m;
d5381642 1733
ca9c90ba
MCC
1734 /*
1735 * MCE first step: Copy all mce errors into a temporary buffer
1736 * We use a double buffering here, to reduce the risk of
25985edc 1737 * losing an error.
ca9c90ba
MCC
1738 */
1739 smp_rmb();
321ece4d
MCC
1740 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1741 % MCE_LOG_LEN;
ca9c90ba 1742 if (!count)
8a311e17 1743 goto check_ce_error;
f4742949 1744
ca9c90ba 1745 m = pvt->mce_outentry;
321ece4d
MCC
1746 if (pvt->mce_in + count > MCE_LOG_LEN) {
1747 unsigned l = MCE_LOG_LEN - pvt->mce_in;
f4742949 1748
ca9c90ba
MCC
1749 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1750 smp_wmb();
1751 pvt->mce_in = 0;
1752 count -= l;
1753 m += l;
1754 }
1755 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1756 smp_wmb();
1757 pvt->mce_in += count;
1758
1759 smp_rmb();
1760 if (pvt->mce_overrun) {
1761 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1762 pvt->mce_overrun);
1763 smp_wmb();
1764 pvt->mce_overrun = 0;
1765 }
d5381642 1766
ca9c90ba
MCC
1767 /*
1768 * MCE second step: parse errors and display
1769 */
d5381642 1770 for (i = 0; i < count; i++)
ca9c90ba 1771 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1772
ca9c90ba
MCC
1773 /*
1774 * Now, let's increment CE error counts
1775 */
8a311e17 1776check_ce_error:
f4742949
MCC
1777 if (!pvt->is_registered)
1778 i7core_udimm_check_mc_ecc_err(mci);
1779 else
1780 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1781}
1782
d5381642
MCC
1783/*
1784 * i7core_mce_check_error Replicates mcelog routine to get errors
1785 * This routine simply queues mcelog errors, and
1786 * return. The error itself should be handled later
1787 * by i7core_check_error.
6e103be1
MCC
1788 * WARNING: As this routine should be called at NMI time, extra care should
1789 * be taken to avoid deadlocks, and to be as fast as possible.
d5381642 1790 */
4140c542
BP
1791static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1792 void *data)
d5381642 1793{
4140c542
BP
1794 struct mce *mce = (struct mce *)data;
1795 struct i7core_dev *i7_dev;
1796 struct mem_ctl_info *mci;
1797 struct i7core_pvt *pvt;
1798
1799 i7_dev = get_i7core_dev(mce->socketid);
1800 if (!i7_dev)
1801 return NOTIFY_BAD;
1802
1803 mci = i7_dev->mci;
1804 pvt = mci->pvt_info;
d5381642 1805
8a2f118e
MCC
1806 /*
1807 * Just let mcelog handle it if the error is
1808 * outside the memory controller
1809 */
1810 if (((mce->status & 0xffff) >> 7) != 1)
4140c542 1811 return NOTIFY_DONE;
8a2f118e 1812
f237fcf2
MCC
1813 /* Bank 8 registers are the only ones that we know how to handle */
1814 if (mce->bank != 8)
4140c542 1815 return NOTIFY_DONE;
f237fcf2 1816
3b918c12 1817#ifdef CONFIG_SMP
f4742949 1818 /* Only handle if it is the right mc controller */
5034086b 1819 if (mce->socketid != pvt->i7core_dev->socket)
4140c542 1820 return NOTIFY_DONE;
3b918c12 1821#endif
f4742949 1822
ca9c90ba 1823 smp_rmb();
321ece4d 1824 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
ca9c90ba
MCC
1825 smp_wmb();
1826 pvt->mce_overrun++;
4140c542 1827 return NOTIFY_DONE;
d5381642 1828 }
6e103be1
MCC
1829
1830 /* Copy memory error at the ringbuffer */
1831 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
ca9c90ba 1832 smp_wmb();
321ece4d 1833 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
d5381642 1834
c5d34528
MCC
1835 /* Handle fatal errors immediately */
1836 if (mce->mcgstatus & 1)
1837 i7core_check_error(mci);
1838
e7bf068a 1839 /* Advise mcelog that the errors were handled */
4140c542 1840 return NOTIFY_STOP;
d5381642
MCC
1841}
1842
4140c542
BP
1843static struct notifier_block i7_mce_dec = {
1844 .notifier_call = i7core_mce_check_error,
1845};
1846
535e9c78
NC
1847struct memdev_dmi_entry {
1848 u8 type;
1849 u8 length;
1850 u16 handle;
1851 u16 phys_mem_array_handle;
1852 u16 mem_err_info_handle;
1853 u16 total_width;
1854 u16 data_width;
1855 u16 size;
1856 u8 form;
1857 u8 device_set;
1858 u8 device_locator;
1859 u8 bank_locator;
1860 u8 memory_type;
1861 u16 type_detail;
1862 u16 speed;
1863 u8 manufacturer;
1864 u8 serial_number;
1865 u8 asset_tag;
1866 u8 part_number;
1867 u8 attributes;
1868 u32 extended_size;
1869 u16 conf_mem_clk_speed;
1870} __attribute__((__packed__));
1871
1872
1873/*
1874 * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1875 * memory devices show the same speed, and if they don't then consider
1876 * all speeds to be invalid.
1877 */
1878static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1879{
1880 int *dclk_freq = _dclk_freq;
1881 u16 dmi_mem_clk_speed;
1882
1883 if (*dclk_freq == -1)
1884 return;
1885
1886 if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1887 struct memdev_dmi_entry *memdev_dmi_entry =
1888 (struct memdev_dmi_entry *)dh;
1889 unsigned long conf_mem_clk_speed_offset =
1890 (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1891 (unsigned long)&memdev_dmi_entry->type;
1892 unsigned long speed_offset =
1893 (unsigned long)&memdev_dmi_entry->speed -
1894 (unsigned long)&memdev_dmi_entry->type;
1895
1896 /* Check that a DIMM is present */
1897 if (memdev_dmi_entry->size == 0)
1898 return;
1899
1900 /*
1901 * Pick the configured speed if it's available, otherwise
1902 * pick the DIMM speed, or we don't have a speed.
1903 */
1904 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1905 dmi_mem_clk_speed =
1906 memdev_dmi_entry->conf_mem_clk_speed;
1907 } else if (memdev_dmi_entry->length > speed_offset) {
1908 dmi_mem_clk_speed = memdev_dmi_entry->speed;
1909 } else {
1910 *dclk_freq = -1;
1911 return;
1912 }
1913
1914 if (*dclk_freq == 0) {
1915 /* First pass, speed was 0 */
1916 if (dmi_mem_clk_speed > 0) {
1917 /* Set speed if a valid speed is read */
1918 *dclk_freq = dmi_mem_clk_speed;
1919 } else {
1920 /* Otherwise we don't have a valid speed */
1921 *dclk_freq = -1;
1922 }
1923 } else if (*dclk_freq > 0 &&
1924 *dclk_freq != dmi_mem_clk_speed) {
1925 /*
1926 * If we have a speed, check that all DIMMS are the same
1927 * speed, otherwise set the speed as invalid.
1928 */
1929 *dclk_freq = -1;
1930 }
1931 }
1932}
1933
1934/*
1935 * The default DCLK frequency is used as a fallback if we
1936 * fail to find anything reliable in the DMI. The value
1937 * is taken straight from the datasheet.
1938 */
1939#define DEFAULT_DCLK_FREQ 800
1940
1941static int get_dclk_freq(void)
1942{
1943 int dclk_freq = 0;
1944
1945 dmi_walk(decode_dclk, (void *)&dclk_freq);
1946
1947 if (dclk_freq < 1)
1948 return DEFAULT_DCLK_FREQ;
1949
1950 return dclk_freq;
1951}
1952
e8b6a127
SG
1953/*
1954 * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate
1955 * to hardware according to SCRUBINTERVAL formula
1956 * found in datasheet.
1957 */
1958static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1959{
1960 struct i7core_pvt *pvt = mci->pvt_info;
1961 struct pci_dev *pdev;
e8b6a127
SG
1962 u32 dw_scrub;
1963 u32 dw_ssr;
1964
1965 /* Get data from the MC register, function 2 */
1966 pdev = pvt->pci_mcr[2];
1967 if (!pdev)
1968 return -ENODEV;
1969
1970 pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1971
1972 if (new_bw == 0) {
1973 /* Prepare to disable petrol scrub */
1974 dw_scrub &= ~STARTSCRUB;
1975 /* Stop the patrol scrub engine */
535e9c78
NC
1976 write_and_test(pdev, MC_SCRUB_CONTROL,
1977 dw_scrub & ~SCRUBINTERVAL_MASK);
e8b6a127
SG
1978
1979 /* Get current status of scrub rate and set bit to disable */
1980 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1981 dw_ssr &= ~SSR_MODE_MASK;
1982 dw_ssr |= SSR_MODE_DISABLE;
1983 } else {
535e9c78
NC
1984 const int cache_line_size = 64;
1985 const u32 freq_dclk_mhz = pvt->dclk_freq;
1986 unsigned long long scrub_interval;
e8b6a127
SG
1987 /*
1988 * Translate the desired scrub rate to a register value and
535e9c78 1989 * program the corresponding register value.
e8b6a127 1990 */
535e9c78 1991 scrub_interval = (unsigned long long)freq_dclk_mhz *
4fad8098
SD
1992 cache_line_size * 1000000;
1993 do_div(scrub_interval, new_bw);
535e9c78
NC
1994
1995 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1996 return -EINVAL;
1997
1998 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
e8b6a127
SG
1999
2000 /* Start the patrol scrub engine */
2001 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2002 STARTSCRUB | dw_scrub);
2003
2004 /* Get current status of scrub rate and set bit to enable */
2005 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2006 dw_ssr &= ~SSR_MODE_MASK;
2007 dw_ssr |= SSR_MODE_ENABLE;
2008 }
2009 /* Disable or enable scrubbing */
2010 pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2011
2012 return new_bw;
2013}
2014
2015/*
2016 * get_sdram_scrub_rate This routine convert current scrub rate value
2017 * into byte/sec bandwidth accourding to
2018 * SCRUBINTERVAL formula found in datasheet.
2019 */
2020static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2021{
2022 struct i7core_pvt *pvt = mci->pvt_info;
2023 struct pci_dev *pdev;
2024 const u32 cache_line_size = 64;
535e9c78
NC
2025 const u32 freq_dclk_mhz = pvt->dclk_freq;
2026 unsigned long long scrub_rate;
e8b6a127
SG
2027 u32 scrubval;
2028
2029 /* Get data from the MC register, function 2 */
2030 pdev = pvt->pci_mcr[2];
2031 if (!pdev)
2032 return -ENODEV;
2033
2034 /* Get current scrub control data */
2035 pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2036
2037 /* Mask highest 8-bits to 0 */
535e9c78 2038 scrubval &= SCRUBINTERVAL_MASK;
e8b6a127
SG
2039 if (!scrubval)
2040 return 0;
2041
2042 /* Calculate scrub rate value into byte/sec bandwidth */
535e9c78 2043 scrub_rate = (unsigned long long)freq_dclk_mhz *
4fad8098
SD
2044 1000000 * cache_line_size;
2045 do_div(scrub_rate, scrubval);
535e9c78 2046 return (int)scrub_rate;
e8b6a127
SG
2047}
2048
2049static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2050{
2051 struct i7core_pvt *pvt = mci->pvt_info;
2052 u32 pci_lock;
2053
2054 /* Unlock writes to pci registers */
2055 pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2056 pci_lock &= ~0x3;
2057 pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2058 pci_lock | MC_CFG_UNLOCK);
2059
2060 mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2061 mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2062}
2063
2064static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2065{
2066 struct i7core_pvt *pvt = mci->pvt_info;
2067 u32 pci_lock;
2068
2069 /* Lock writes to pci registers */
2070 pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2071 pci_lock &= ~0x3;
2072 pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2073 pci_lock | MC_CFG_LOCK);
2074}
2075
a3aa0a4a
HS
2076static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2077{
2078 pvt->i7core_pci = edac_pci_create_generic_ctl(
2079 &pvt->i7core_dev->pdev[0]->dev,
2080 EDAC_MOD_STR);
2081 if (unlikely(!pvt->i7core_pci))
f9902f24
MCC
2082 i7core_printk(KERN_WARNING,
2083 "Unable to setup PCI error report via EDAC\n");
a3aa0a4a
HS
2084}
2085
2086static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2087{
2088 if (likely(pvt->i7core_pci))
2089 edac_pci_release_generic_ctl(pvt->i7core_pci);
2090 else
2091 i7core_printk(KERN_ERR,
2092 "Couldn't find mem_ctl_info for socket %d\n",
2093 pvt->i7core_dev->socket);
2094 pvt->i7core_pci = NULL;
2095}
2096
1c6edbbe
HS
2097static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2098{
2099 struct mem_ctl_info *mci = i7core_dev->mci;
2100 struct i7core_pvt *pvt;
2101
2102 if (unlikely(!mci || !mci->pvt_info)) {
2103 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2104 __func__, &i7core_dev->pdev[0]->dev);
2105
2106 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2107 return;
2108 }
2109
2110 pvt = mci->pvt_info;
2111
2112 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2113 __func__, mci, &i7core_dev->pdev[0]->dev);
2114
e8b6a127 2115 /* Disable scrubrate setting */
27100db0
MCC
2116 if (pvt->enable_scrub)
2117 disable_sdram_scrub_setting(mci);
e8b6a127 2118
3653ada5 2119 mce_unregister_decode_chain(&i7_mce_dec);
1c6edbbe
HS
2120
2121 /* Disable EDAC polling */
2122 i7core_pci_ctl_release(pvt);
2123
2124 /* Remove MC sysfs nodes */
2125 edac_mc_del_mc(mci->dev);
2126
2127 debugf1("%s: free mci struct\n", mci->ctl_name);
2128 kfree(mci->ctl_name);
2129 edac_mc_free(mci);
2130 i7core_dev->mci = NULL;
2131}
2132
aace4283 2133static int i7core_register_mci(struct i7core_dev *i7core_dev)
a0c36a1f
MCC
2134{
2135 struct mem_ctl_info *mci;
2136 struct i7core_pvt *pvt;
0975c16f
MCC
2137 int rc;
2138 struct edac_mc_layer layers[2];
a0c36a1f 2139
a0c36a1f 2140 /* allocate a new MC control structure */
0975c16f
MCC
2141
2142 layers[0].type = EDAC_MC_LAYER_CHANNEL;
2143 layers[0].size = NUM_CHANS;
2144 layers[0].is_virt_csrow = false;
2145 layers[1].type = EDAC_MC_LAYER_SLOT;
2146 layers[1].size = MAX_DIMMS;
2147 layers[1].is_virt_csrow = true;
ca0907b9 2148 mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
0975c16f 2149 sizeof(*pvt));
f4742949
MCC
2150 if (unlikely(!mci))
2151 return -ENOMEM;
a0c36a1f 2152
3cfd0146
MCC
2153 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2154 __func__, mci, &i7core_dev->pdev[0]->dev);
a0c36a1f 2155
a0c36a1f 2156 pvt = mci->pvt_info;
ef708b53 2157 memset(pvt, 0, sizeof(*pvt));
67166af4 2158
6d37d240
MCC
2159 /* Associates i7core_dev and mci for future usage */
2160 pvt->i7core_dev = i7core_dev;
2161 i7core_dev->mci = mci;
2162
41fcb7fe
MCC
2163 /*
2164 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2165 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2166 * memory channels
2167 */
2168 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
2169 mci->edac_ctl_cap = EDAC_FLAG_NONE;
2170 mci->edac_cap = EDAC_FLAG_NONE;
2171 mci->mod_name = "i7core_edac.c";
2172 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
2173 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2174 i7core_dev->socket);
2175 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 2176 mci->ctl_page_to_phys = NULL;
1288c18f 2177
ef708b53 2178 /* Store pci devices at mci for faster access */
f4742949 2179 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 2180 if (unlikely(rc < 0))
628c5ddf 2181 goto fail0;
ef708b53 2182
5939813b
HS
2183 if (pvt->is_registered)
2184 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2185 else
2186 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2187
ef708b53 2188 /* Get dimm basic config */
2e5185f7 2189 get_dimm_config(mci);
5939813b
HS
2190 /* record ptr to the generic device */
2191 mci->dev = &i7core_dev->pdev[0]->dev;
2192 /* Set the function pointer to an actual operation function */
2193 mci->edac_check = i7core_check_error;
ef708b53 2194
e8b6a127 2195 /* Enable scrubrate setting */
27100db0
MCC
2196 if (pvt->enable_scrub)
2197 enable_sdram_scrub_setting(mci);
e8b6a127 2198
a0c36a1f 2199 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 2200 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
2201 debugf0("MC: " __FILE__
2202 ": %s(): failed edac_mc_add_mc()\n", __func__);
2203 /* FIXME: perhaps some code should go here that disables error
2204 * reporting if we just enabled it
2205 */
b7c76151
MCC
2206
2207 rc = -EINVAL;
628c5ddf 2208 goto fail0;
a0c36a1f
MCC
2209 }
2210
194a40fe 2211 /* Default error mask is any memory */
ef708b53 2212 pvt->inject.channel = 0;
194a40fe
MCC
2213 pvt->inject.dimm = -1;
2214 pvt->inject.rank = -1;
2215 pvt->inject.bank = -1;
2216 pvt->inject.page = -1;
2217 pvt->inject.col = -1;
2218
a3aa0a4a
HS
2219 /* allocating generic PCI control info */
2220 i7core_pci_ctl_create(pvt);
2221
535e9c78
NC
2222 /* DCLK for scrub rate setting */
2223 pvt->dclk_freq = get_dclk_freq();
2224
3653ada5 2225 mce_register_decode_chain(&i7_mce_dec);
f4742949 2226
628c5ddf
HS
2227 return 0;
2228
628c5ddf
HS
2229fail0:
2230 kfree(mci->ctl_name);
2231 edac_mc_free(mci);
1c6edbbe 2232 i7core_dev->mci = NULL;
f4742949
MCC
2233 return rc;
2234}
2235
2236/*
2237 * i7core_probe Probe for ONE instance of device to see if it is
2238 * present.
2239 * return:
2240 * 0 for FOUND a device
2241 * < 0 for error code
2242 */
2d95d815 2243
f4742949
MCC
2244static int __devinit i7core_probe(struct pci_dev *pdev,
2245 const struct pci_device_id *id)
2246{
40557591 2247 int rc, count = 0;
f4742949
MCC
2248 struct i7core_dev *i7core_dev;
2249
2d95d815
MCC
2250 /* get the pci devices we want to reserve for our use */
2251 mutex_lock(&i7core_edac_lock);
2252
f4742949 2253 /*
d4c27795 2254 * All memory controllers are allocated at the first pass.
f4742949 2255 */
2d95d815
MCC
2256 if (unlikely(probed >= 1)) {
2257 mutex_unlock(&i7core_edac_lock);
76a7bd81 2258 return -ENODEV;
2d95d815
MCC
2259 }
2260 probed++;
de06eeef 2261
64c10f6e 2262 rc = i7core_get_all_devices();
f4742949
MCC
2263 if (unlikely(rc < 0))
2264 goto fail0;
2265
2266 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
40557591 2267 count++;
aace4283 2268 rc = i7core_register_mci(i7core_dev);
d4c27795
MCC
2269 if (unlikely(rc < 0))
2270 goto fail1;
d5381642
MCC
2271 }
2272
40557591
MCC
2273 /*
2274 * Nehalem-EX uses a different memory controller. However, as the
2275 * memory controller is not visible on some Nehalem/Nehalem-EP, we
2276 * need to indirectly probe via a X58 PCI device. The same devices
2277 * are found on (some) Nehalem-EX. So, on those machines, the
2278 * probe routine needs to return -ENODEV, as the actual Memory
2279 * Controller registers won't be detected.
2280 */
2281 if (!count) {
2282 rc = -ENODEV;
2283 goto fail1;
2284 }
2285
2286 i7core_printk(KERN_INFO,
2287 "Driver loaded, %d memory controller(s) found.\n",
2288 count);
8f331907 2289
66607706 2290 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
2291 return 0;
2292
66607706 2293fail1:
88ef5ea9
MCC
2294 list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2295 i7core_unregister_mci(i7core_dev);
2296
13d6e9b6 2297 i7core_put_all_devices();
66607706
MCC
2298fail0:
2299 mutex_unlock(&i7core_edac_lock);
b7c76151 2300 return rc;
a0c36a1f
MCC
2301}
2302
2303/*
2304 * i7core_remove destructor for one instance of device
2305 *
2306 */
2307static void __devexit i7core_remove(struct pci_dev *pdev)
2308{
64c10f6e 2309 struct i7core_dev *i7core_dev;
a0c36a1f
MCC
2310
2311 debugf0(__FILE__ ": %s()\n", __func__);
2312
22e6bcbd
MCC
2313 /*
2314 * we have a trouble here: pdev value for removal will be wrong, since
2315 * it will point to the X58 register used to detect that the machine
2316 * is a Nehalem or upper design. However, due to the way several PCI
2317 * devices are grouped together to provide MC functionality, we need
2318 * to use a different method for releasing the devices
2319 */
87d1d272 2320
66607706 2321 mutex_lock(&i7core_edac_lock);
71fe0170
HS
2322
2323 if (unlikely(!probed)) {
2324 mutex_unlock(&i7core_edac_lock);
2325 return;
2326 }
2327
88ef5ea9
MCC
2328 list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2329 i7core_unregister_mci(i7core_dev);
64c10f6e
HS
2330
2331 /* Release PCI resources */
2332 i7core_put_all_devices();
2333
2d95d815
MCC
2334 probed--;
2335
66607706 2336 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
2337}
2338
a0c36a1f
MCC
2339MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2340
2341/*
2342 * i7core_driver pci_driver structure for this module
2343 *
2344 */
2345static struct pci_driver i7core_driver = {
2346 .name = "i7core_edac",
2347 .probe = i7core_probe,
2348 .remove = __devexit_p(i7core_remove),
2349 .id_table = i7core_pci_tbl,
2350};
2351
2352/*
2353 * i7core_init Module entry function
2354 * Try to initialize this module for its devices
2355 */
2356static int __init i7core_init(void)
2357{
2358 int pci_rc;
2359
2360 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2361
2362 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2363 opstate_init();
2364
54a08ab1
MCC
2365 if (use_pci_fixup)
2366 i7core_xeon_pci_fixup(pci_dev_table);
bc2d7245 2367
a0c36a1f
MCC
2368 pci_rc = pci_register_driver(&i7core_driver);
2369
3ef288a9
MCC
2370 if (pci_rc >= 0)
2371 return 0;
2372
2373 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2374 pci_rc);
2375
2376 return pci_rc;
a0c36a1f
MCC
2377}
2378
2379/*
2380 * i7core_exit() Module exit function
2381 * Unregister the driver
2382 */
2383static void __exit i7core_exit(void)
2384{
2385 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2386 pci_unregister_driver(&i7core_driver);
2387}
2388
2389module_init(i7core_init);
2390module_exit(i7core_exit);
2391
2392MODULE_LICENSE("GPL");
2393MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2394MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2395MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2396 I7CORE_REVISION);
2397
2398module_param(edac_op_state, int, 0444);
2399MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");