i7core_edac: Be sure that the edac pci handler will be properly released
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / edac / i7core_edac.c
CommitLineData
52707f91
MCC
1/* Intel i7 core/Nehalem Memory Controller kernel module
2 *
3 * This driver supports yhe memory controllers found on the Intel
4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6 * and Westmere-EP.
a0c36a1f
MCC
7 *
8 * This file may be distributed under the terms of the
9 * GNU General Public License version 2 only.
10 *
52707f91 11 * Copyright (c) 2009-2010 by:
a0c36a1f
MCC
12 * Mauro Carvalho Chehab <mchehab@redhat.com>
13 *
14 * Red Hat Inc. http://www.redhat.com
15 *
16 * Forked and adapted from the i5400_edac driver
17 *
18 * Based on the following public Intel datasheets:
19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20 * Datasheet, Volume 2:
21 * http://download.intel.com/design/processor/datashts/320835.pdf
22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
23 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24 * also available at:
25 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26 */
27
a0c36a1f
MCC
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/pci.h>
31#include <linux/pci_ids.h>
32#include <linux/slab.h>
3b918c12 33#include <linux/delay.h>
a0c36a1f
MCC
34#include <linux/edac.h>
35#include <linux/mmzone.h>
d5381642 36#include <linux/edac_mce.h>
f4742949 37#include <linux/smp.h>
14d2c083 38#include <asm/processor.h>
a0c36a1f
MCC
39
40#include "edac_core.h"
41
f4742949
MCC
42/*
43 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
44 * registers start at bus 255, and are not reported by BIOS.
45 * We currently find devices with only 2 sockets. In order to support more QPI
46 * Quick Path Interconnect, just increment this number.
47 */
48#define MAX_SOCKET_BUSES 2
49
50
a0c36a1f
MCC
51/*
52 * Alter this version for the module when modifications are made
53 */
54#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
55#define EDAC_MOD_STR "i7core_edac"
56
a0c36a1f
MCC
57/*
58 * Debug macros
59 */
60#define i7core_printk(level, fmt, arg...) \
61 edac_printk(level, "i7core", fmt, ##arg)
62
63#define i7core_mc_printk(mci, level, fmt, arg...) \
64 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
65
66/*
67 * i7core Memory Controller Registers
68 */
69
e9bd2e73
MCC
70 /* OFFSETS for Device 0 Function 0 */
71
72#define MC_CFG_CONTROL 0x90
73
a0c36a1f
MCC
74 /* OFFSETS for Device 3 Function 0 */
75
76#define MC_CONTROL 0x48
77#define MC_STATUS 0x4c
78#define MC_MAX_DOD 0x64
79
442305b1
MCC
80/*
81 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
82 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
83 */
84
85#define MC_TEST_ERR_RCV1 0x60
86 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
87
88#define MC_TEST_ERR_RCV0 0x64
89 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
90 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
91
b4e8f0b6
MCC
92/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
93#define MC_COR_ECC_CNT_0 0x80
94#define MC_COR_ECC_CNT_1 0x84
95#define MC_COR_ECC_CNT_2 0x88
96#define MC_COR_ECC_CNT_3 0x8c
97#define MC_COR_ECC_CNT_4 0x90
98#define MC_COR_ECC_CNT_5 0x94
99
100#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
101#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
102
103
a0c36a1f
MCC
104 /* OFFSETS for Devices 4,5 and 6 Function 0 */
105
0b2b7b7e
MCC
106#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
107 #define THREE_DIMMS_PRESENT (1 << 24)
108 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
109 #define QUAD_RANK_PRESENT (1 << 22)
110 #define REGISTERED_DIMM (1 << 15)
111
f122a892
MCC
112#define MC_CHANNEL_MAPPER 0x60
113 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
114 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
115
0b2b7b7e
MCC
116#define MC_CHANNEL_RANK_PRESENT 0x7c
117 #define RANK_PRESENT_MASK 0xffff
118
a0c36a1f 119#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
120#define MC_CHANNEL_ERROR_MASK 0xf8
121#define MC_CHANNEL_ERROR_INJECT 0xfc
122 #define INJECT_ADDR_PARITY 0x10
123 #define INJECT_ECC 0x08
124 #define MASK_CACHELINE 0x06
125 #define MASK_FULL_CACHELINE 0x06
126 #define MASK_MSB32_CACHELINE 0x04
127 #define MASK_LSB32_CACHELINE 0x02
128 #define NO_MASK_CACHELINE 0x00
129 #define REPEAT_EN 0x01
a0c36a1f 130
0b2b7b7e 131 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 132
0b2b7b7e
MCC
133#define MC_DOD_CH_DIMM0 0x48
134#define MC_DOD_CH_DIMM1 0x4c
135#define MC_DOD_CH_DIMM2 0x50
136 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
137 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
138 #define DIMM_PRESENT_MASK (1 << 9)
139 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
140 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
141 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
142 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
143 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 144 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 145 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
146 #define MC_DOD_NUMCOL_MASK 3
147 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 148
f122a892
MCC
149#define MC_RANK_PRESENT 0x7c
150
0b2b7b7e
MCC
151#define MC_SAG_CH_0 0x80
152#define MC_SAG_CH_1 0x84
153#define MC_SAG_CH_2 0x88
154#define MC_SAG_CH_3 0x8c
155#define MC_SAG_CH_4 0x90
156#define MC_SAG_CH_5 0x94
157#define MC_SAG_CH_6 0x98
158#define MC_SAG_CH_7 0x9c
159
160#define MC_RIR_LIMIT_CH_0 0x40
161#define MC_RIR_LIMIT_CH_1 0x44
162#define MC_RIR_LIMIT_CH_2 0x48
163#define MC_RIR_LIMIT_CH_3 0x4C
164#define MC_RIR_LIMIT_CH_4 0x50
165#define MC_RIR_LIMIT_CH_5 0x54
166#define MC_RIR_LIMIT_CH_6 0x58
167#define MC_RIR_LIMIT_CH_7 0x5C
168#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
169
170#define MC_RIR_WAY_CH 0x80
171 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
172 #define MC_RIR_WAY_RANK_MASK 0x7
173
a0c36a1f
MCC
174/*
175 * i7core structs
176 */
177
178#define NUM_CHANS 3
442305b1
MCC
179#define MAX_DIMMS 3 /* Max DIMMS per channel */
180#define MAX_MCR_FUNC 4
181#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
182
183struct i7core_info {
184 u32 mc_control;
185 u32 mc_status;
186 u32 max_dod;
f122a892 187 u32 ch_map;
a0c36a1f
MCC
188};
189
194a40fe
MCC
190
191struct i7core_inject {
192 int enable;
193
194 u32 section;
195 u32 type;
196 u32 eccmask;
197
198 /* Error address mask */
199 int channel, dimm, rank, bank, page, col;
200};
201
0b2b7b7e 202struct i7core_channel {
442305b1
MCC
203 u32 ranks;
204 u32 dimms;
0b2b7b7e
MCC
205};
206
8f331907 207struct pci_id_descr {
66607706
MCC
208 int dev;
209 int func;
210 int dev_id;
de06eeef 211 int optional;
8f331907
MCC
212};
213
bd9e19ca
VM
214struct pci_id_table {
215 struct pci_id_descr *descr;
216 int n_devs;
217};
218
f4742949
MCC
219struct i7core_dev {
220 struct list_head list;
221 u8 socket;
222 struct pci_dev **pdev;
de06eeef 223 int n_devs;
f4742949
MCC
224 struct mem_ctl_info *mci;
225};
226
a0c36a1f 227struct i7core_pvt {
f4742949
MCC
228 struct pci_dev *pci_noncore;
229 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
230 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
231
232 struct i7core_dev *i7core_dev;
67166af4 233
a0c36a1f 234 struct i7core_info info;
194a40fe 235 struct i7core_inject inject;
f4742949 236 struct i7core_channel channel[NUM_CHANS];
67166af4 237
f4742949 238 int channels; /* Number of active channels */
442305b1 239
f4742949
MCC
240 int ce_count_available;
241 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
242
243 /* ECC corrected errors counts per udimm */
f4742949
MCC
244 unsigned long udimm_ce_count[MAX_DIMMS];
245 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 246 /* ECC corrected errors counts per rdimm */
f4742949
MCC
247 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
248 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 249
f4742949 250 unsigned int is_registered;
14d2c083 251
d5381642
MCC
252 /* mcelog glue */
253 struct edac_mce edac_mce;
ca9c90ba
MCC
254
255 /* Fifo double buffers */
d5381642 256 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
257 struct mce mce_outentry[MCE_LOG_LEN];
258
259 /* Fifo in/out counters */
260 unsigned mce_in, mce_out;
261
262 /* Count indicator to show errors not got */
263 unsigned mce_overrun;
939747bd
MCC
264
265 /* Struct to control EDAC polling */
266 struct edac_pci_ctl_info *i7core_pci;
a0c36a1f
MCC
267};
268
66607706
MCC
269/* Static vars */
270static LIST_HEAD(i7core_edac_list);
271static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 272
8f331907
MCC
273#define PCI_DESCR(device, function, device_id) \
274 .dev = (device), \
275 .func = (function), \
276 .dev_id = (device_id)
277
bd9e19ca 278struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
8f331907
MCC
279 /* Memory controller */
280 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
281 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
de06eeef
MCC
282 /* Exists only for RDIMM */
283 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
8f331907
MCC
284 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
285
286 /* Channel 0 */
287 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
288 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
289 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
290 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
291
292 /* Channel 1 */
293 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
294 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
295 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
296 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
297
298 /* Channel 2 */
299 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
300 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
301 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
302 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
303
304 /* Generic Non-core registers */
305 /*
306 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
307 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
308 * the probing code needs to test for the other address in case of
309 * failure of this one
310 */
fd382654 311 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
310cbb72 312
a0c36a1f 313};
8f331907 314
52a2e4fc
MCC
315struct pci_id_descr pci_dev_descr_lynnfield[] = {
316 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
317 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
318 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
319
320 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
321 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
322 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
323 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
324
508fa179
MCC
325 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
326 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
327 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
328 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
52a2e4fc 329
f05da2f7
MCC
330 /*
331 * This is the PCI device has an alternate address on some
332 * processors like Core i7 860
333 */
52a2e4fc
MCC
334 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
335};
336
bd9e19ca
VM
337struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
338 /* Memory controller */
339 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
340 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
341 /* Exists only for RDIMM */
342 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
343 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
344
345 /* Channel 0 */
346 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
347 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
348 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
349 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
350
351 /* Channel 1 */
352 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
353 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
354 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
355 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
356
357 /* Channel 2 */
358 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
359 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
360 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
361 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
362
363 /* Generic Non-core registers */
364 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
365
366};
367
368#define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
369struct pci_id_table pci_dev_table[] = {
370 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
371 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
372 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
373};
374
8f331907
MCC
375/*
376 * pci_device_id table for which devices we are looking for
8f331907
MCC
377 */
378static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 379 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
f05da2f7 380 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
8f331907
MCC
381 {0,} /* 0 terminated list. */
382};
383
a0c36a1f
MCC
384/****************************************************************************
385 Anciliary status routines
386 ****************************************************************************/
387
388 /* MC_CONTROL bits */
ef708b53
MCC
389#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
390#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
391
392 /* MC_STATUS bits */
61053fde 393#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 394#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
395
396 /* MC_MAX_DOD read functions */
854d3349 397static inline int numdimms(u32 dimms)
a0c36a1f 398{
854d3349 399 return (dimms & 0x3) + 1;
a0c36a1f
MCC
400}
401
854d3349 402static inline int numrank(u32 rank)
a0c36a1f
MCC
403{
404 static int ranks[4] = { 1, 2, 4, -EINVAL };
405
854d3349 406 return ranks[rank & 0x3];
a0c36a1f
MCC
407}
408
854d3349 409static inline int numbank(u32 bank)
a0c36a1f
MCC
410{
411 static int banks[4] = { 4, 8, 16, -EINVAL };
412
854d3349 413 return banks[bank & 0x3];
a0c36a1f
MCC
414}
415
854d3349 416static inline int numrow(u32 row)
a0c36a1f
MCC
417{
418 static int rows[8] = {
419 1 << 12, 1 << 13, 1 << 14, 1 << 15,
420 1 << 16, -EINVAL, -EINVAL, -EINVAL,
421 };
422
854d3349 423 return rows[row & 0x7];
a0c36a1f
MCC
424}
425
854d3349 426static inline int numcol(u32 col)
a0c36a1f
MCC
427{
428 static int cols[8] = {
429 1 << 10, 1 << 11, 1 << 12, -EINVAL,
430 };
854d3349 431 return cols[col & 0x3];
a0c36a1f
MCC
432}
433
f4742949 434static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
435{
436 struct i7core_dev *i7core_dev;
437
438 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
439 if (i7core_dev->socket == socket)
440 return i7core_dev;
441 }
442
443 return NULL;
444}
445
a0c36a1f
MCC
446/****************************************************************************
447 Memory check routines
448 ****************************************************************************/
67166af4
MCC
449static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
450 unsigned func)
ef708b53 451{
66607706 452 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 453 int i;
ef708b53 454
66607706
MCC
455 if (!i7core_dev)
456 return NULL;
457
de06eeef 458 for (i = 0; i < i7core_dev->n_devs; i++) {
66607706 459 if (!i7core_dev->pdev[i])
ef708b53
MCC
460 continue;
461
66607706
MCC
462 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
463 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
464 return i7core_dev->pdev[i];
ef708b53
MCC
465 }
466 }
467
eb94fc40
MCC
468 return NULL;
469}
470
ec6df24c
MCC
471/**
472 * i7core_get_active_channels() - gets the number of channels and csrows
473 * @socket: Quick Path Interconnect socket
474 * @channels: Number of channels that will be returned
475 * @csrows: Number of csrows found
476 *
477 * Since EDAC core needs to know in advance the number of available channels
478 * and csrows, in order to allocate memory for csrows/channels, it is needed
479 * to run two similar steps. At the first step, implemented on this function,
480 * it checks the number of csrows/channels present at one socket.
481 * this is used in order to properly allocate the size of mci components.
482 *
483 * It should be noticed that none of the current available datasheets explain
484 * or even mention how csrows are seen by the memory controller. So, we need
485 * to add a fake description for csrows.
486 * So, this driver is attributing one DIMM memory for one csrow.
487 */
67166af4
MCC
488static int i7core_get_active_channels(u8 socket, unsigned *channels,
489 unsigned *csrows)
eb94fc40
MCC
490{
491 struct pci_dev *pdev = NULL;
492 int i, j;
493 u32 status, control;
494
495 *channels = 0;
496 *csrows = 0;
497
67166af4 498 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 499 if (!pdev) {
67166af4
MCC
500 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
501 socket);
ef708b53 502 return -ENODEV;
b7c76151 503 }
ef708b53
MCC
504
505 /* Device 3 function 0 reads */
506 pci_read_config_dword(pdev, MC_STATUS, &status);
507 pci_read_config_dword(pdev, MC_CONTROL, &control);
508
509 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 510 u32 dimm_dod[3];
ef708b53
MCC
511 /* Check if the channel is active */
512 if (!(control & (1 << (8 + i))))
513 continue;
514
515 /* Check if the channel is disabled */
41fcb7fe 516 if (status & (1 << i))
ef708b53 517 continue;
ef708b53 518
67166af4 519 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 520 if (!pdev) {
67166af4
MCC
521 i7core_printk(KERN_ERR, "Couldn't find socket %d "
522 "fn %d.%d!!!\n",
523 socket, i + 4, 1);
eb94fc40
MCC
524 return -ENODEV;
525 }
526 /* Devices 4-6 function 1 */
527 pci_read_config_dword(pdev,
528 MC_DOD_CH_DIMM0, &dimm_dod[0]);
529 pci_read_config_dword(pdev,
530 MC_DOD_CH_DIMM1, &dimm_dod[1]);
531 pci_read_config_dword(pdev,
532 MC_DOD_CH_DIMM2, &dimm_dod[2]);
533
ef708b53 534 (*channels)++;
eb94fc40
MCC
535
536 for (j = 0; j < 3; j++) {
537 if (!DIMM_PRESENT(dimm_dod[j]))
538 continue;
539 (*csrows)++;
540 }
ef708b53
MCC
541 }
542
c77720b9 543 debugf0("Number of active channels on socket %d: %d\n",
67166af4 544 socket, *channels);
1c6fed80 545
ef708b53
MCC
546 return 0;
547}
548
f4742949 549static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
550{
551 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 552 struct csrow_info *csr;
854d3349 553 struct pci_dev *pdev;
ba6c5c62 554 int i, j;
5566cb7c 555 unsigned long last_page = 0;
1c6fed80 556 enum edac_type mode;
854d3349 557 enum mem_type mtype;
a0c36a1f 558
854d3349 559 /* Get data from the MC register, function 0 */
f4742949 560 pdev = pvt->pci_mcr[0];
7dd6953c 561 if (!pdev)
8f331907
MCC
562 return -ENODEV;
563
f122a892 564 /* Device 3 function 0 reads */
7dd6953c
MCC
565 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
566 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
567 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
568 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 569
17cb7b0c 570 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 571 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 572 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 573
1c6fed80 574 if (ECC_ENABLED(pvt)) {
41fcb7fe 575 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
576 if (ECCx8(pvt))
577 mode = EDAC_S8ECD8ED;
578 else
579 mode = EDAC_S4ECD4ED;
580 } else {
a0c36a1f 581 debugf0("ECC disabled\n");
1c6fed80
MCC
582 mode = EDAC_NONE;
583 }
a0c36a1f
MCC
584
585 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
586 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
587 "x%x x 0x%x\n",
854d3349
MCC
588 numdimms(pvt->info.max_dod),
589 numrank(pvt->info.max_dod >> 2),
276b824c 590 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
591 numrow(pvt->info.max_dod >> 6),
592 numcol(pvt->info.max_dod >> 9));
a0c36a1f 593
0b2b7b7e 594 for (i = 0; i < NUM_CHANS; i++) {
854d3349 595 u32 data, dimm_dod[3], value[8];
0b2b7b7e 596
52a2e4fc
MCC
597 if (!pvt->pci_ch[i][0])
598 continue;
599
0b2b7b7e
MCC
600 if (!CH_ACTIVE(pvt, i)) {
601 debugf0("Channel %i is not active\n", i);
602 continue;
603 }
604 if (CH_DISABLED(pvt, i)) {
605 debugf0("Channel %i is disabled\n", i);
606 continue;
607 }
608
f122a892 609 /* Devices 4-6 function 0 */
f4742949 610 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
611 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
612
f4742949 613 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 614 4 : 2;
0b2b7b7e 615
854d3349
MCC
616 if (data & REGISTERED_DIMM)
617 mtype = MEM_RDDR3;
14d2c083 618 else
854d3349
MCC
619 mtype = MEM_DDR3;
620#if 0
0b2b7b7e
MCC
621 if (data & THREE_DIMMS_PRESENT)
622 pvt->channel[i].dimms = 3;
623 else if (data & SINGLE_QUAD_RANK_PRESENT)
624 pvt->channel[i].dimms = 1;
625 else
626 pvt->channel[i].dimms = 2;
854d3349
MCC
627#endif
628
629 /* Devices 4-6 function 1 */
f4742949 630 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 631 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 632 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 633 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 634 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 635 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 636
1c6fed80 637 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 638 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
639 i,
640 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
641 data,
f4742949 642 pvt->channel[i].ranks,
41fcb7fe 643 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
644
645 for (j = 0; j < 3; j++) {
646 u32 banks, ranks, rows, cols;
5566cb7c 647 u32 size, npages;
854d3349
MCC
648
649 if (!DIMM_PRESENT(dimm_dod[j]))
650 continue;
651
652 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
653 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
654 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
655 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
656
5566cb7c
MCC
657 /* DDR3 has 8 I/O banks */
658 size = (rows * cols * banks * ranks) >> (20 - 3);
659
f4742949 660 pvt->channel[i].dimms++;
854d3349 661
17cb7b0c
MCC
662 debugf0("\tdimm %d %d Mb offset: %x, "
663 "bank: %d, rank: %d, row: %#x, col: %#x\n",
664 j, size,
854d3349
MCC
665 RANKOFFSET(dimm_dod[j]),
666 banks, ranks, rows, cols);
667
eb94fc40
MCC
668#if PAGE_SHIFT > 20
669 npages = size >> (PAGE_SHIFT - 20);
670#else
671 npages = size << (20 - PAGE_SHIFT);
672#endif
5566cb7c 673
ba6c5c62 674 csr = &mci->csrows[*csrow];
5566cb7c
MCC
675 csr->first_page = last_page + 1;
676 last_page += npages;
677 csr->last_page = last_page;
678 csr->nr_pages = npages;
679
854d3349 680 csr->page_mask = 0;
eb94fc40 681 csr->grain = 8;
ba6c5c62 682 csr->csrow_idx = *csrow;
eb94fc40
MCC
683 csr->nr_channels = 1;
684
685 csr->channels[0].chan_idx = i;
686 csr->channels[0].ce_count = 0;
854d3349 687
f4742949 688 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 689
854d3349
MCC
690 switch (banks) {
691 case 4:
692 csr->dtype = DEV_X4;
693 break;
694 case 8:
695 csr->dtype = DEV_X8;
696 break;
697 case 16:
698 csr->dtype = DEV_X16;
699 break;
700 default:
701 csr->dtype = DEV_UNKNOWN;
702 }
703
704 csr->edac_mode = mode;
705 csr->mtype = mtype;
706
ba6c5c62 707 (*csrow)++;
854d3349 708 }
1c6fed80 709
854d3349
MCC
710 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
711 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
712 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
713 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
714 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
715 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
716 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
717 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 718 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 719 for (j = 0; j < 8; j++)
17cb7b0c 720 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
721 (value[j] >> 27) & 0x1,
722 (value[j] >> 24) & 0x7,
723 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
724 }
725
a0c36a1f
MCC
726 return 0;
727}
728
194a40fe
MCC
729/****************************************************************************
730 Error insertion routines
731 ****************************************************************************/
732
733/* The i7core has independent error injection features per channel.
734 However, to have a simpler code, we don't allow enabling error injection
735 on more than one channel.
736 Also, since a change at an inject parameter will be applied only at enable,
737 we're disabling error injection on all write calls to the sysfs nodes that
738 controls the error code injection.
739 */
8f331907 740static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
741{
742 struct i7core_pvt *pvt = mci->pvt_info;
743
744 pvt->inject.enable = 0;
745
f4742949 746 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
747 return -ENODEV;
748
f4742949 749 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 750 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
751
752 return 0;
194a40fe
MCC
753}
754
755/*
756 * i7core inject inject.section
757 *
758 * accept and store error injection inject.section value
759 * bit 0 - refers to the lower 32-byte half cacheline
760 * bit 1 - refers to the upper 32-byte half cacheline
761 */
762static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
763 const char *data, size_t count)
764{
765 struct i7core_pvt *pvt = mci->pvt_info;
766 unsigned long value;
767 int rc;
768
769 if (pvt->inject.enable)
41fcb7fe 770 disable_inject(mci);
194a40fe
MCC
771
772 rc = strict_strtoul(data, 10, &value);
773 if ((rc < 0) || (value > 3))
2068def5 774 return -EIO;
194a40fe
MCC
775
776 pvt->inject.section = (u32) value;
777 return count;
778}
779
780static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
781 char *data)
782{
783 struct i7core_pvt *pvt = mci->pvt_info;
784 return sprintf(data, "0x%08x\n", pvt->inject.section);
785}
786
787/*
788 * i7core inject.type
789 *
790 * accept and store error injection inject.section value
791 * bit 0 - repeat enable - Enable error repetition
792 * bit 1 - inject ECC error
793 * bit 2 - inject parity error
794 */
795static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
796 const char *data, size_t count)
797{
798 struct i7core_pvt *pvt = mci->pvt_info;
799 unsigned long value;
800 int rc;
801
802 if (pvt->inject.enable)
41fcb7fe 803 disable_inject(mci);
194a40fe
MCC
804
805 rc = strict_strtoul(data, 10, &value);
806 if ((rc < 0) || (value > 7))
2068def5 807 return -EIO;
194a40fe
MCC
808
809 pvt->inject.type = (u32) value;
810 return count;
811}
812
813static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
814 char *data)
815{
816 struct i7core_pvt *pvt = mci->pvt_info;
817 return sprintf(data, "0x%08x\n", pvt->inject.type);
818}
819
820/*
821 * i7core_inject_inject.eccmask_store
822 *
823 * The type of error (UE/CE) will depend on the inject.eccmask value:
824 * Any bits set to a 1 will flip the corresponding ECC bit
825 * Correctable errors can be injected by flipping 1 bit or the bits within
826 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
827 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
828 * uncorrectable error to be injected.
829 */
830static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
831 const char *data, size_t count)
832{
833 struct i7core_pvt *pvt = mci->pvt_info;
834 unsigned long value;
835 int rc;
836
837 if (pvt->inject.enable)
41fcb7fe 838 disable_inject(mci);
194a40fe
MCC
839
840 rc = strict_strtoul(data, 10, &value);
841 if (rc < 0)
2068def5 842 return -EIO;
194a40fe
MCC
843
844 pvt->inject.eccmask = (u32) value;
845 return count;
846}
847
848static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
849 char *data)
850{
851 struct i7core_pvt *pvt = mci->pvt_info;
852 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
853}
854
855/*
856 * i7core_addrmatch
857 *
858 * The type of error (UE/CE) will depend on the inject.eccmask value:
859 * Any bits set to a 1 will flip the corresponding ECC bit
860 * Correctable errors can be injected by flipping 1 bit or the bits within
861 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
862 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
863 * uncorrectable error to be injected.
864 */
194a40fe 865
a5538e53
MCC
866#define DECLARE_ADDR_MATCH(param, limit) \
867static ssize_t i7core_inject_store_##param( \
868 struct mem_ctl_info *mci, \
869 const char *data, size_t count) \
870{ \
cc301b3a 871 struct i7core_pvt *pvt; \
a5538e53
MCC
872 long value; \
873 int rc; \
874 \
cc301b3a
MCC
875 debugf1("%s()\n", __func__); \
876 pvt = mci->pvt_info; \
877 \
a5538e53
MCC
878 if (pvt->inject.enable) \
879 disable_inject(mci); \
880 \
4f87fad1 881 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
a5538e53
MCC
882 value = -1; \
883 else { \
884 rc = strict_strtoul(data, 10, &value); \
885 if ((rc < 0) || (value >= limit)) \
886 return -EIO; \
887 } \
888 \
889 pvt->inject.param = value; \
890 \
891 return count; \
892} \
893 \
894static ssize_t i7core_inject_show_##param( \
895 struct mem_ctl_info *mci, \
896 char *data) \
897{ \
cc301b3a
MCC
898 struct i7core_pvt *pvt; \
899 \
900 pvt = mci->pvt_info; \
901 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
902 if (pvt->inject.param < 0) \
903 return sprintf(data, "any\n"); \
904 else \
905 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
906}
907
a5538e53
MCC
908#define ATTR_ADDR_MATCH(param) \
909 { \
910 .attr = { \
911 .name = #param, \
912 .mode = (S_IRUGO | S_IWUSR) \
913 }, \
914 .show = i7core_inject_show_##param, \
915 .store = i7core_inject_store_##param, \
916 }
194a40fe 917
a5538e53
MCC
918DECLARE_ADDR_MATCH(channel, 3);
919DECLARE_ADDR_MATCH(dimm, 3);
920DECLARE_ADDR_MATCH(rank, 4);
921DECLARE_ADDR_MATCH(bank, 32);
922DECLARE_ADDR_MATCH(page, 0x10000);
923DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 924
276b824c
MCC
925static int write_and_test(struct pci_dev *dev, int where, u32 val)
926{
927 u32 read;
928 int count;
929
4157d9f5
MCC
930 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
931 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
932 where, val);
933
276b824c
MCC
934 for (count = 0; count < 10; count++) {
935 if (count)
b990538a 936 msleep(100);
276b824c
MCC
937 pci_write_config_dword(dev, where, val);
938 pci_read_config_dword(dev, where, &read);
939
940 if (read == val)
941 return 0;
942 }
943
4157d9f5
MCC
944 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
945 "write=%08x. Read=%08x\n",
946 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
947 where, val, read);
276b824c
MCC
948
949 return -EINVAL;
950}
951
194a40fe
MCC
952/*
953 * This routine prepares the Memory Controller for error injection.
954 * The error will be injected when some process tries to write to the
955 * memory that matches the given criteria.
956 * The criteria can be set in terms of a mask where dimm, rank, bank, page
957 * and col can be specified.
958 * A -1 value for any of the mask items will make the MCU to ignore
959 * that matching criteria for error injection.
960 *
961 * It should be noticed that the error will only happen after a write operation
962 * on a memory that matches the condition. if REPEAT_EN is not enabled at
963 * inject mask, then it will produce just one error. Otherwise, it will repeat
964 * until the injectmask would be cleaned.
965 *
966 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
967 * is reliable enough to check if the MC is using the
968 * three channels. However, this is not clear at the datasheet.
969 */
970static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
971 const char *data, size_t count)
972{
973 struct i7core_pvt *pvt = mci->pvt_info;
974 u32 injectmask;
975 u64 mask = 0;
976 int rc;
977 long enable;
978
f4742949 979 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
980 return 0;
981
194a40fe
MCC
982 rc = strict_strtoul(data, 10, &enable);
983 if ((rc < 0))
984 return 0;
985
986 if (enable) {
987 pvt->inject.enable = 1;
988 } else {
989 disable_inject(mci);
990 return count;
991 }
992
993 /* Sets pvt->inject.dimm mask */
994 if (pvt->inject.dimm < 0)
486dd09f 995 mask |= 1LL << 41;
194a40fe 996 else {
f4742949 997 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 998 mask |= (pvt->inject.dimm & 0x3LL) << 35;
194a40fe 999 else
486dd09f 1000 mask |= (pvt->inject.dimm & 0x1LL) << 36;
194a40fe
MCC
1001 }
1002
1003 /* Sets pvt->inject.rank mask */
1004 if (pvt->inject.rank < 0)
486dd09f 1005 mask |= 1LL << 40;
194a40fe 1006 else {
f4742949 1007 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 1008 mask |= (pvt->inject.rank & 0x1LL) << 34;
194a40fe 1009 else
486dd09f 1010 mask |= (pvt->inject.rank & 0x3LL) << 34;
194a40fe
MCC
1011 }
1012
1013 /* Sets pvt->inject.bank mask */
1014 if (pvt->inject.bank < 0)
486dd09f 1015 mask |= 1LL << 39;
194a40fe 1016 else
486dd09f 1017 mask |= (pvt->inject.bank & 0x15LL) << 30;
194a40fe
MCC
1018
1019 /* Sets pvt->inject.page mask */
1020 if (pvt->inject.page < 0)
486dd09f 1021 mask |= 1LL << 38;
194a40fe 1022 else
486dd09f 1023 mask |= (pvt->inject.page & 0xffff) << 14;
194a40fe
MCC
1024
1025 /* Sets pvt->inject.column mask */
1026 if (pvt->inject.col < 0)
486dd09f 1027 mask |= 1LL << 37;
194a40fe 1028 else
486dd09f 1029 mask |= (pvt->inject.col & 0x3fff);
194a40fe 1030
276b824c
MCC
1031 /*
1032 * bit 0: REPEAT_EN
1033 * bits 1-2: MASK_HALF_CACHELINE
1034 * bit 3: INJECT_ECC
1035 * bit 4: INJECT_ADDR_PARITY
1036 */
1037
1038 injectmask = (pvt->inject.type & 1) |
1039 (pvt->inject.section & 0x3) << 1 |
1040 (pvt->inject.type & 0x6) << (3 - 1);
1041
1042 /* Unlock writes to registers - this register is write only */
f4742949 1043 pci_write_config_dword(pvt->pci_noncore,
67166af4 1044 MC_CFG_CONTROL, 0x2);
e9bd2e73 1045
f4742949 1046 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1047 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1048 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1049 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1050
f4742949 1051 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1052 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1053
f4742949 1054 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1055 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1056
194a40fe 1057 /*
276b824c
MCC
1058 * This is something undocumented, based on my tests
1059 * Without writing 8 to this register, errors aren't injected. Not sure
1060 * why.
194a40fe 1061 */
f4742949 1062 pci_write_config_dword(pvt->pci_noncore,
276b824c 1063 MC_CFG_CONTROL, 8);
194a40fe 1064
41fcb7fe
MCC
1065 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1066 " inject 0x%08x\n",
194a40fe
MCC
1067 mask, pvt->inject.eccmask, injectmask);
1068
7b029d03 1069
194a40fe
MCC
1070 return count;
1071}
1072
1073static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1074 char *data)
1075{
1076 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1077 u32 injectmask;
1078
52a2e4fc
MCC
1079 if (!pvt->pci_ch[pvt->inject.channel][0])
1080 return 0;
1081
f4742949 1082 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1083 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1084
1085 debugf0("Inject error read: 0x%018x\n", injectmask);
1086
1087 if (injectmask & 0x0c)
1088 pvt->inject.enable = 1;
1089
194a40fe
MCC
1090 return sprintf(data, "%d\n", pvt->inject.enable);
1091}
1092
f338d736
MCC
1093#define DECLARE_COUNTER(param) \
1094static ssize_t i7core_show_counter_##param( \
1095 struct mem_ctl_info *mci, \
1096 char *data) \
1097{ \
1098 struct i7core_pvt *pvt = mci->pvt_info; \
1099 \
1100 debugf1("%s() \n", __func__); \
1101 if (!pvt->ce_count_available || (pvt->is_registered)) \
1102 return sprintf(data, "data unavailable\n"); \
1103 return sprintf(data, "%lu\n", \
1104 pvt->udimm_ce_count[param]); \
1105}
442305b1 1106
f338d736
MCC
1107#define ATTR_COUNTER(param) \
1108 { \
1109 .attr = { \
1110 .name = __stringify(udimm##param), \
1111 .mode = (S_IRUGO | S_IWUSR) \
1112 }, \
1113 .show = i7core_show_counter_##param \
d88b8507 1114 }
442305b1 1115
f338d736
MCC
1116DECLARE_COUNTER(0);
1117DECLARE_COUNTER(1);
1118DECLARE_COUNTER(2);
442305b1 1119
194a40fe
MCC
1120/*
1121 * Sysfs struct
1122 */
a5538e53
MCC
1123
1124
1125static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1126 ATTR_ADDR_MATCH(channel),
1127 ATTR_ADDR_MATCH(dimm),
1128 ATTR_ADDR_MATCH(rank),
1129 ATTR_ADDR_MATCH(bank),
1130 ATTR_ADDR_MATCH(page),
1131 ATTR_ADDR_MATCH(col),
1132 { .attr = { .name = NULL } }
1133};
1134
a5538e53
MCC
1135static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1136 .name = "inject_addrmatch",
1137 .mcidev_attr = i7core_addrmatch_attrs,
1138};
1139
f338d736
MCC
1140static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1141 ATTR_COUNTER(0),
1142 ATTR_COUNTER(1),
1143 ATTR_COUNTER(2),
64aab720 1144 { .attr = { .name = NULL } }
f338d736
MCC
1145};
1146
1147static struct mcidev_sysfs_group i7core_udimm_counters = {
1148 .name = "all_channel_counts",
1149 .mcidev_attr = i7core_udimm_counters_attrs,
1150};
1151
a5538e53 1152static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
194a40fe
MCC
1153 {
1154 .attr = {
1155 .name = "inject_section",
1156 .mode = (S_IRUGO | S_IWUSR)
1157 },
1158 .show = i7core_inject_section_show,
1159 .store = i7core_inject_section_store,
1160 }, {
1161 .attr = {
1162 .name = "inject_type",
1163 .mode = (S_IRUGO | S_IWUSR)
1164 },
1165 .show = i7core_inject_type_show,
1166 .store = i7core_inject_type_store,
1167 }, {
1168 .attr = {
1169 .name = "inject_eccmask",
1170 .mode = (S_IRUGO | S_IWUSR)
1171 },
1172 .show = i7core_inject_eccmask_show,
1173 .store = i7core_inject_eccmask_store,
1174 }, {
a5538e53 1175 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1176 }, {
1177 .attr = {
1178 .name = "inject_enable",
1179 .mode = (S_IRUGO | S_IWUSR)
1180 },
1181 .show = i7core_inject_enable_show,
1182 .store = i7core_inject_enable_store,
1183 },
f338d736 1184 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
42538680 1185 { .attr = { .name = NULL } }
194a40fe
MCC
1186};
1187
a0c36a1f
MCC
1188/****************************************************************************
1189 Device initialization routines: put/get, init/exit
1190 ****************************************************************************/
1191
1192/*
1193 * i7core_put_devices 'put' all the devices that we have
1194 * reserved via 'get'
1195 */
13d6e9b6 1196static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1197{
13d6e9b6 1198 int i;
a0c36a1f 1199
22e6bcbd 1200 debugf0(__FILE__ ": %s()\n", __func__);
de06eeef 1201 for (i = 0; i < i7core_dev->n_devs; i++) {
22e6bcbd
MCC
1202 struct pci_dev *pdev = i7core_dev->pdev[i];
1203 if (!pdev)
1204 continue;
1205 debugf0("Removing dev %02x:%02x.%d\n",
1206 pdev->bus->number,
1207 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1208 pci_dev_put(pdev);
1209 }
13d6e9b6 1210 kfree(i7core_dev->pdev);
22e6bcbd 1211 list_del(&i7core_dev->list);
13d6e9b6
MCC
1212 kfree(i7core_dev);
1213}
66607706 1214
13d6e9b6
MCC
1215static void i7core_put_all_devices(void)
1216{
42538680 1217 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1218
42538680 1219 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1220 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1221}
1222
bd9e19ca 1223static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
bc2d7245
KM
1224{
1225 struct pci_dev *pdev = NULL;
1226 int i;
1227 /*
1228 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1229 * aren't announced by acpi. So, we need to use a legacy scan probing
1230 * to detect them
1231 */
bd9e19ca
VM
1232 while (table && table->descr) {
1233 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1234 if (unlikely(!pdev)) {
1235 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1236 pcibios_scan_specific_bus(255-i);
1237 }
bda14289 1238 pci_dev_put(pdev);
bd9e19ca 1239 table++;
bc2d7245
KM
1240 }
1241}
1242
bda14289
MCC
1243static unsigned i7core_pci_lastbus(void)
1244{
1245 int last_bus = 0, bus;
1246 struct pci_bus *b = NULL;
1247
1248 while ((b = pci_find_next_bus(b)) != NULL) {
1249 bus = b->number;
1250 debugf0("Found bus %d\n", bus);
1251 if (bus > last_bus)
1252 last_bus = bus;
1253 }
1254
1255 debugf0("Last bus %d\n", last_bus);
1256
1257 return last_bus;
1258}
1259
a0c36a1f
MCC
1260/*
1261 * i7core_get_devices Find and perform 'get' operation on the MCH's
1262 * device/functions we want to reference for this driver
1263 *
1264 * Need to 'get' device 16 func 1 and func 2
1265 */
de06eeef 1266int i7core_get_onedevice(struct pci_dev **prev, int devno,
bda14289
MCC
1267 struct pci_id_descr *dev_descr, unsigned n_devs,
1268 unsigned last_bus)
a0c36a1f 1269{
66607706
MCC
1270 struct i7core_dev *i7core_dev;
1271
8f331907 1272 struct pci_dev *pdev = NULL;
67166af4
MCC
1273 u8 bus = 0;
1274 u8 socket = 0;
a0c36a1f 1275
c77720b9 1276 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
de06eeef 1277 dev_descr->dev_id, *prev);
c77720b9 1278
c77720b9
MCC
1279 /*
1280 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1281 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1282 * to probe for the alternate address in case of failure
1283 */
de06eeef 1284 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
c77720b9 1285 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
fd382654 1286 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
d1fd4fb6 1287
bd9e19ca 1288 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
f05da2f7
MCC
1289 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1290 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1291 *prev);
1292
c77720b9
MCC
1293 if (!pdev) {
1294 if (*prev) {
1295 *prev = pdev;
1296 return 0;
d1fd4fb6
MCC
1297 }
1298
de06eeef 1299 if (dev_descr->optional)
c77720b9 1300 return 0;
310cbb72 1301
bd9e19ca
VM
1302 if (devno == 0)
1303 return -ENODEV;
1304
ab089374 1305 i7core_printk(KERN_INFO,
c77720b9 1306 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1307 dev_descr->dev, dev_descr->func,
1308 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
67166af4 1309
c77720b9
MCC
1310 /* End of list, leave */
1311 return -ENODEV;
1312 }
1313 bus = pdev->bus->number;
67166af4 1314
bda14289 1315 socket = last_bus - bus;
c77720b9 1316
66607706
MCC
1317 i7core_dev = get_i7core_dev(socket);
1318 if (!i7core_dev) {
1319 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1320 if (!i7core_dev)
1321 return -ENOMEM;
de06eeef 1322 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
66607706 1323 GFP_KERNEL);
2a6fae32
AB
1324 if (!i7core_dev->pdev) {
1325 kfree(i7core_dev);
66607706 1326 return -ENOMEM;
2a6fae32 1327 }
66607706 1328 i7core_dev->socket = socket;
de06eeef 1329 i7core_dev->n_devs = n_devs;
66607706 1330 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1331 }
67166af4 1332
66607706 1333 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1334 i7core_printk(KERN_ERR,
1335 "Duplicated device for "
1336 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1337 bus, dev_descr->dev, dev_descr->func,
1338 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1339 pci_dev_put(pdev);
1340 return -ENODEV;
1341 }
67166af4 1342
66607706 1343 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1344
1345 /* Sanity check */
de06eeef
MCC
1346 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1347 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
c77720b9
MCC
1348 i7core_printk(KERN_ERR,
1349 "Device PCI ID %04x:%04x "
1350 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
de06eeef 1351 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
c77720b9 1352 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
de06eeef 1353 bus, dev_descr->dev, dev_descr->func);
c77720b9
MCC
1354 return -ENODEV;
1355 }
ef708b53 1356
c77720b9
MCC
1357 /* Be sure that the device is enabled */
1358 if (unlikely(pci_enable_device(pdev) < 0)) {
1359 i7core_printk(KERN_ERR,
1360 "Couldn't enable "
1361 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1362 bus, dev_descr->dev, dev_descr->func,
1363 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1364 return -ENODEV;
1365 }
ef708b53 1366
d4c27795 1367 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1368 socket, bus, dev_descr->dev,
1369 dev_descr->func,
1370 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
8f331907 1371
c77720b9 1372 *prev = pdev;
ef708b53 1373
c77720b9
MCC
1374 return 0;
1375}
a0c36a1f 1376
bd9e19ca 1377static int i7core_get_devices(struct pci_id_table *table)
c77720b9 1378{
bda14289 1379 int i, rc, last_bus;
c77720b9 1380 struct pci_dev *pdev = NULL;
bd9e19ca
VM
1381 struct pci_id_descr *dev_descr;
1382
bda14289
MCC
1383 last_bus = i7core_pci_lastbus();
1384
bd9e19ca
VM
1385 while (table && table->descr) {
1386 dev_descr = table->descr;
1387 for (i = 0; i < table->n_devs; i++) {
1388 pdev = NULL;
1389 do {
bda14289
MCC
1390 rc = i7core_get_onedevice(&pdev, i,
1391 &dev_descr[i],
1392 table->n_devs,
1393 last_bus);
bd9e19ca
VM
1394 if (rc < 0) {
1395 if (i == 0) {
1396 i = table->n_devs;
1397 break;
1398 }
1399 i7core_put_all_devices();
1400 return -ENODEV;
1401 }
1402 } while (pdev);
1403 }
1404 table++;
c77720b9 1405 }
66607706 1406
ef708b53 1407 return 0;
bd9e19ca 1408 return 0;
ef708b53
MCC
1409}
1410
f4742949
MCC
1411static int mci_bind_devs(struct mem_ctl_info *mci,
1412 struct i7core_dev *i7core_dev)
ef708b53
MCC
1413{
1414 struct i7core_pvt *pvt = mci->pvt_info;
1415 struct pci_dev *pdev;
f4742949 1416 int i, func, slot;
ef708b53 1417
f4742949
MCC
1418 /* Associates i7core_dev and mci for future usage */
1419 pvt->i7core_dev = i7core_dev;
1420 i7core_dev->mci = mci;
66607706 1421
f4742949 1422 pvt->is_registered = 0;
de06eeef 1423 for (i = 0; i < i7core_dev->n_devs; i++) {
f4742949
MCC
1424 pdev = i7core_dev->pdev[i];
1425 if (!pdev)
66607706
MCC
1426 continue;
1427
f4742949
MCC
1428 func = PCI_FUNC(pdev->devfn);
1429 slot = PCI_SLOT(pdev->devfn);
1430 if (slot == 3) {
1431 if (unlikely(func > MAX_MCR_FUNC))
1432 goto error;
1433 pvt->pci_mcr[func] = pdev;
1434 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1435 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1436 goto error;
f4742949
MCC
1437 pvt->pci_ch[slot - 4][func] = pdev;
1438 } else if (!slot && !func)
1439 pvt->pci_noncore = pdev;
1440 else
1441 goto error;
ef708b53 1442
f4742949
MCC
1443 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1444 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1445 pdev, i7core_dev->socket);
14d2c083 1446
f4742949
MCC
1447 if (PCI_SLOT(pdev->devfn) == 3 &&
1448 PCI_FUNC(pdev->devfn) == 2)
1449 pvt->is_registered = 1;
a0c36a1f 1450 }
e9bd2e73 1451
f338d736
MCC
1452 /*
1453 * Add extra nodes to count errors on udimm
1454 * For registered memory, this is not needed, since the counters
1455 * are already displayed at the standard locations
1456 */
1457 if (!pvt->is_registered)
1458 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1459 &i7core_udimm_counters;
1460
a0c36a1f 1461 return 0;
ef708b53
MCC
1462
1463error:
1464 i7core_printk(KERN_ERR, "Device %d, function %d "
1465 "is out of the expected range\n",
1466 slot, func);
1467 return -EINVAL;
a0c36a1f
MCC
1468}
1469
442305b1
MCC
1470/****************************************************************************
1471 Error check routines
1472 ****************************************************************************/
f4742949 1473static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1474 int chan, int dimm, int add)
1475{
1476 char *msg;
1477 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1478 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1479
1480 for (i = 0; i < add; i++) {
1481 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1482 "(Socket=%d channel=%d dimm=%d)",
1483 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1484
1485 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1486 kfree (msg);
1487 }
1488}
1489
1490static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1491 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1492{
1493 struct i7core_pvt *pvt = mci->pvt_info;
1494 int add0 = 0, add1 = 0, add2 = 0;
1495 /* Updates CE counters if it is not the first time here */
f4742949 1496 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1497 /* Updates CE counters */
1498
f4742949
MCC
1499 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1500 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1501 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1502
1503 if (add2 < 0)
1504 add2 += 0x7fff;
f4742949 1505 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1506
1507 if (add1 < 0)
1508 add1 += 0x7fff;
f4742949 1509 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1510
1511 if (add0 < 0)
1512 add0 += 0x7fff;
f4742949 1513 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1514 } else
f4742949 1515 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1516
1517 /* Store the new values */
f4742949
MCC
1518 pvt->rdimm_last_ce_count[chan][2] = new2;
1519 pvt->rdimm_last_ce_count[chan][1] = new1;
1520 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1521
1522 /*updated the edac core */
1523 if (add0 != 0)
f4742949 1524 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1525 if (add1 != 0)
f4742949 1526 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1527 if (add2 != 0)
f4742949 1528 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1529
1530}
1531
f4742949 1532static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1533{
1534 struct i7core_pvt *pvt = mci->pvt_info;
1535 u32 rcv[3][2];
1536 int i, new0, new1, new2;
1537
1538 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1539 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1540 &rcv[0][0]);
f4742949 1541 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1542 &rcv[0][1]);
f4742949 1543 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1544 &rcv[1][0]);
f4742949 1545 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1546 &rcv[1][1]);
f4742949 1547 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1548 &rcv[2][0]);
f4742949 1549 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1550 &rcv[2][1]);
1551 for (i = 0 ; i < 3; i++) {
1552 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1553 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1554 /*if the channel has 3 dimms*/
f4742949 1555 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1556 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1557 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1558 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1559 } else {
1560 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1561 DIMM_BOT_COR_ERR(rcv[i][0]);
1562 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1563 DIMM_BOT_COR_ERR(rcv[i][1]);
1564 new2 = 0;
1565 }
1566
f4742949 1567 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1568 }
1569}
442305b1
MCC
1570
1571/* This function is based on the device 3 function 4 registers as described on:
1572 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1573 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1574 * also available at:
1575 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1576 */
f4742949 1577static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1578{
1579 struct i7core_pvt *pvt = mci->pvt_info;
1580 u32 rcv1, rcv0;
1581 int new0, new1, new2;
1582
f4742949 1583 if (!pvt->pci_mcr[4]) {
b990538a 1584 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1585 return;
1586 }
1587
b4e8f0b6 1588 /* Corrected test errors */
f4742949
MCC
1589 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1590 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1591
1592 /* Store the new values */
1593 new2 = DIMM2_COR_ERR(rcv1);
1594 new1 = DIMM1_COR_ERR(rcv0);
1595 new0 = DIMM0_COR_ERR(rcv0);
1596
442305b1 1597 /* Updates CE counters if it is not the first time here */
f4742949 1598 if (pvt->ce_count_available) {
442305b1
MCC
1599 /* Updates CE counters */
1600 int add0, add1, add2;
1601
f4742949
MCC
1602 add2 = new2 - pvt->udimm_last_ce_count[2];
1603 add1 = new1 - pvt->udimm_last_ce_count[1];
1604 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1605
1606 if (add2 < 0)
1607 add2 += 0x7fff;
f4742949 1608 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1609
1610 if (add1 < 0)
1611 add1 += 0x7fff;
f4742949 1612 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1613
1614 if (add0 < 0)
1615 add0 += 0x7fff;
f4742949 1616 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1617
1618 if (add0 | add1 | add2)
1619 i7core_printk(KERN_ERR, "New Corrected error(s): "
1620 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1621 add0, add1, add2);
442305b1 1622 } else
f4742949 1623 pvt->ce_count_available = 1;
442305b1
MCC
1624
1625 /* Store the new values */
f4742949
MCC
1626 pvt->udimm_last_ce_count[2] = new2;
1627 pvt->udimm_last_ce_count[1] = new1;
1628 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1629}
1630
8a2f118e
MCC
1631/*
1632 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1633 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1634 * Nehalem are defined as family 0x06, model 0x1a
1635 *
1636 * The MCA registers used here are the following ones:
8a2f118e 1637 * struct mce field MCA Register
f237fcf2
MCC
1638 * m->status MSR_IA32_MC8_STATUS
1639 * m->addr MSR_IA32_MC8_ADDR
1640 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1641 * In the case of Nehalem, the error information is masked at .status and .misc
1642 * fields
1643 */
d5381642
MCC
1644static void i7core_mce_output_error(struct mem_ctl_info *mci,
1645 struct mce *m)
1646{
b4e8f0b6 1647 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1648 char *type, *optype, *err, *msg;
8a2f118e 1649 unsigned long error = m->status & 0x1ff0000l;
a639539f 1650 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1651 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1652 u32 dimm = (m->misc >> 16) & 0x3;
1653 u32 channel = (m->misc >> 18) & 0x3;
1654 u32 syndrome = m->misc >> 32;
1655 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1656 int csrow;
8a2f118e 1657
c5d34528
MCC
1658 if (m->mcgstatus & 1)
1659 type = "FATAL";
1660 else
1661 type = "NON_FATAL";
1662
a639539f 1663 switch (optypenum) {
b990538a
MCC
1664 case 0:
1665 optype = "generic undef request";
1666 break;
1667 case 1:
1668 optype = "read error";
1669 break;
1670 case 2:
1671 optype = "write error";
1672 break;
1673 case 3:
1674 optype = "addr/cmd error";
1675 break;
1676 case 4:
1677 optype = "scrubbing error";
1678 break;
1679 default:
1680 optype = "reserved";
1681 break;
a639539f
MCC
1682 }
1683
8a2f118e
MCC
1684 switch (errnum) {
1685 case 16:
1686 err = "read ECC error";
1687 break;
1688 case 17:
1689 err = "RAS ECC error";
1690 break;
1691 case 18:
1692 err = "write parity error";
1693 break;
1694 case 19:
1695 err = "redundacy loss";
1696 break;
1697 case 20:
1698 err = "reserved";
1699 break;
1700 case 21:
1701 err = "memory range error";
1702 break;
1703 case 22:
1704 err = "RTID out of range";
1705 break;
1706 case 23:
1707 err = "address parity error";
1708 break;
1709 case 24:
1710 err = "byte enable parity error";
1711 break;
1712 default:
1713 err = "unknown";
d5381642 1714 }
d5381642 1715
f237fcf2 1716 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1717 msg = kasprintf(GFP_ATOMIC,
f4742949 1718 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1719 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1720 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1721 syndrome, core_err_cnt, (long long)m->status,
1722 (long long)m->misc, optype, err);
8a2f118e
MCC
1723
1724 debugf0("%s", msg);
d5381642 1725
f4742949 1726 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1727
d5381642 1728 /* Call the helper to output message */
b4e8f0b6
MCC
1729 if (m->mcgstatus & 1)
1730 edac_mc_handle_fbd_ue(mci, csrow, 0,
1731 0 /* FIXME: should be channel here */, msg);
f4742949 1732 else if (!pvt->is_registered)
b4e8f0b6
MCC
1733 edac_mc_handle_fbd_ce(mci, csrow,
1734 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1735
1736 kfree(msg);
d5381642
MCC
1737}
1738
87d1d272
MCC
1739/*
1740 * i7core_check_error Retrieve and process errors reported by the
1741 * hardware. Called by the Core module.
1742 */
1743static void i7core_check_error(struct mem_ctl_info *mci)
1744{
d5381642
MCC
1745 struct i7core_pvt *pvt = mci->pvt_info;
1746 int i;
1747 unsigned count = 0;
ca9c90ba 1748 struct mce *m;
d5381642 1749
ca9c90ba
MCC
1750 /*
1751 * MCE first step: Copy all mce errors into a temporary buffer
1752 * We use a double buffering here, to reduce the risk of
1753 * loosing an error.
1754 */
1755 smp_rmb();
321ece4d
MCC
1756 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1757 % MCE_LOG_LEN;
ca9c90ba 1758 if (!count)
8a311e17 1759 goto check_ce_error;
f4742949 1760
ca9c90ba 1761 m = pvt->mce_outentry;
321ece4d
MCC
1762 if (pvt->mce_in + count > MCE_LOG_LEN) {
1763 unsigned l = MCE_LOG_LEN - pvt->mce_in;
f4742949 1764
ca9c90ba
MCC
1765 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1766 smp_wmb();
1767 pvt->mce_in = 0;
1768 count -= l;
1769 m += l;
1770 }
1771 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1772 smp_wmb();
1773 pvt->mce_in += count;
1774
1775 smp_rmb();
1776 if (pvt->mce_overrun) {
1777 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1778 pvt->mce_overrun);
1779 smp_wmb();
1780 pvt->mce_overrun = 0;
1781 }
d5381642 1782
ca9c90ba
MCC
1783 /*
1784 * MCE second step: parse errors and display
1785 */
d5381642 1786 for (i = 0; i < count; i++)
ca9c90ba 1787 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1788
ca9c90ba
MCC
1789 /*
1790 * Now, let's increment CE error counts
1791 */
8a311e17 1792check_ce_error:
f4742949
MCC
1793 if (!pvt->is_registered)
1794 i7core_udimm_check_mc_ecc_err(mci);
1795 else
1796 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1797}
1798
d5381642
MCC
1799/*
1800 * i7core_mce_check_error Replicates mcelog routine to get errors
1801 * This routine simply queues mcelog errors, and
1802 * return. The error itself should be handled later
1803 * by i7core_check_error.
6e103be1
MCC
1804 * WARNING: As this routine should be called at NMI time, extra care should
1805 * be taken to avoid deadlocks, and to be as fast as possible.
d5381642
MCC
1806 */
1807static int i7core_mce_check_error(void *priv, struct mce *mce)
1808{
c5d34528
MCC
1809 struct mem_ctl_info *mci = priv;
1810 struct i7core_pvt *pvt = mci->pvt_info;
d5381642 1811
8a2f118e
MCC
1812 /*
1813 * Just let mcelog handle it if the error is
1814 * outside the memory controller
1815 */
1816 if (((mce->status & 0xffff) >> 7) != 1)
1817 return 0;
1818
f237fcf2
MCC
1819 /* Bank 8 registers are the only ones that we know how to handle */
1820 if (mce->bank != 8)
1821 return 0;
1822
3b918c12 1823#ifdef CONFIG_SMP
f4742949 1824 /* Only handle if it is the right mc controller */
6e103be1 1825 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
f4742949 1826 return 0;
3b918c12 1827#endif
f4742949 1828
ca9c90ba 1829 smp_rmb();
321ece4d 1830 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
ca9c90ba
MCC
1831 smp_wmb();
1832 pvt->mce_overrun++;
1833 return 0;
d5381642 1834 }
6e103be1
MCC
1835
1836 /* Copy memory error at the ringbuffer */
1837 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
ca9c90ba 1838 smp_wmb();
321ece4d 1839 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
d5381642 1840
c5d34528
MCC
1841 /* Handle fatal errors immediately */
1842 if (mce->mcgstatus & 1)
1843 i7core_check_error(mci);
1844
d5381642 1845 /* Advice mcelog that the error were handled */
8a2f118e 1846 return 1;
d5381642
MCC
1847}
1848
f4742949
MCC
1849static int i7core_register_mci(struct i7core_dev *i7core_dev,
1850 int num_channels, int num_csrows)
a0c36a1f
MCC
1851{
1852 struct mem_ctl_info *mci;
1853 struct i7core_pvt *pvt;
ba6c5c62 1854 int csrow = 0;
f4742949 1855 int rc;
a0c36a1f 1856
a0c36a1f 1857 /* allocate a new MC control structure */
d4c27795
MCC
1858 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1859 i7core_dev->socket);
f4742949
MCC
1860 if (unlikely(!mci))
1861 return -ENOMEM;
a0c36a1f
MCC
1862
1863 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1864
f4742949
MCC
1865 /* record ptr to the generic device */
1866 mci->dev = &i7core_dev->pdev[0]->dev;
1867
a0c36a1f 1868 pvt = mci->pvt_info;
ef708b53 1869 memset(pvt, 0, sizeof(*pvt));
67166af4 1870
41fcb7fe
MCC
1871 /*
1872 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1873 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1874 * memory channels
1875 */
1876 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1877 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1878 mci->edac_cap = EDAC_FLAG_NONE;
1879 mci->mod_name = "i7core_edac.c";
1880 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1881 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1882 i7core_dev->socket);
1883 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1884 mci->ctl_page_to_phys = NULL;
a5538e53 1885 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
87d1d272
MCC
1886 /* Set the function pointer to an actual operation function */
1887 mci->edac_check = i7core_check_error;
8f331907 1888
ef708b53 1889 /* Store pci devices at mci for faster access */
f4742949 1890 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1891 if (unlikely(rc < 0))
f4742949 1892 goto fail;
ef708b53
MCC
1893
1894 /* Get dimm basic config */
f4742949 1895 get_dimm_config(mci, &csrow);
ef708b53 1896
a0c36a1f 1897 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1898 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1899 debugf0("MC: " __FILE__
1900 ": %s(): failed edac_mc_add_mc()\n", __func__);
1901 /* FIXME: perhaps some code should go here that disables error
1902 * reporting if we just enabled it
1903 */
b7c76151
MCC
1904
1905 rc = -EINVAL;
f4742949 1906 goto fail;
a0c36a1f
MCC
1907 }
1908
1909 /* allocating generic PCI control info */
939747bd 1910 pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
f4742949 1911 EDAC_MOD_STR);
939747bd 1912 if (unlikely(!pvt->i7core_pci)) {
a0c36a1f
MCC
1913 printk(KERN_WARNING
1914 "%s(): Unable to create PCI control\n",
1915 __func__);
1916 printk(KERN_WARNING
1917 "%s(): PCI error report via EDAC not setup\n",
1918 __func__);
1919 }
1920
194a40fe 1921 /* Default error mask is any memory */
ef708b53 1922 pvt->inject.channel = 0;
194a40fe
MCC
1923 pvt->inject.dimm = -1;
1924 pvt->inject.rank = -1;
1925 pvt->inject.bank = -1;
1926 pvt->inject.page = -1;
1927 pvt->inject.col = -1;
1928
d5381642 1929 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1930 pvt->edac_mce.priv = mci;
d5381642 1931 pvt->edac_mce.check_error = i7core_mce_check_error;
d5381642
MCC
1932
1933 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1934 if (unlikely(rc < 0)) {
d5381642
MCC
1935 debugf0("MC: " __FILE__
1936 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1937 }
1938
1939fail:
d4d1ef45
TL
1940 if (rc < 0)
1941 edac_mc_free(mci);
f4742949
MCC
1942 return rc;
1943}
1944
1945/*
1946 * i7core_probe Probe for ONE instance of device to see if it is
1947 * present.
1948 * return:
1949 * 0 for FOUND a device
1950 * < 0 for error code
1951 */
2d95d815
MCC
1952
1953static int probed = 0;
1954
f4742949
MCC
1955static int __devinit i7core_probe(struct pci_dev *pdev,
1956 const struct pci_device_id *id)
1957{
f4742949
MCC
1958 int rc;
1959 struct i7core_dev *i7core_dev;
1960
2d95d815
MCC
1961 /* get the pci devices we want to reserve for our use */
1962 mutex_lock(&i7core_edac_lock);
1963
f4742949 1964 /*
d4c27795 1965 * All memory controllers are allocated at the first pass.
f4742949 1966 */
2d95d815
MCC
1967 if (unlikely(probed >= 1)) {
1968 mutex_unlock(&i7core_edac_lock);
f4742949 1969 return -EINVAL;
2d95d815
MCC
1970 }
1971 probed++;
de06eeef 1972
bd9e19ca 1973 rc = i7core_get_devices(pci_dev_table);
f4742949
MCC
1974 if (unlikely(rc < 0))
1975 goto fail0;
1976
1977 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1978 int channels;
1979 int csrows;
1980
1981 /* Check the number of active and not disabled channels */
1982 rc = i7core_get_active_channels(i7core_dev->socket,
1983 &channels, &csrows);
1984 if (unlikely(rc < 0))
1985 goto fail1;
1986
d4c27795
MCC
1987 rc = i7core_register_mci(i7core_dev, channels, csrows);
1988 if (unlikely(rc < 0))
1989 goto fail1;
d5381642
MCC
1990 }
1991
ef708b53 1992 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1993
66607706 1994 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1995 return 0;
1996
66607706 1997fail1:
13d6e9b6 1998 i7core_put_all_devices();
66607706
MCC
1999fail0:
2000 mutex_unlock(&i7core_edac_lock);
b7c76151 2001 return rc;
a0c36a1f
MCC
2002}
2003
2004/*
2005 * i7core_remove destructor for one instance of device
2006 *
2007 */
2008static void __devexit i7core_remove(struct pci_dev *pdev)
2009{
2010 struct mem_ctl_info *mci;
22e6bcbd 2011 struct i7core_dev *i7core_dev, *tmp;
939747bd 2012 struct i7core_pvt *pvt;
a0c36a1f
MCC
2013
2014 debugf0(__FILE__ ": %s()\n", __func__);
2015
22e6bcbd
MCC
2016 /*
2017 * we have a trouble here: pdev value for removal will be wrong, since
2018 * it will point to the X58 register used to detect that the machine
2019 * is a Nehalem or upper design. However, due to the way several PCI
2020 * devices are grouped together to provide MC functionality, we need
2021 * to use a different method for releasing the devices
2022 */
87d1d272 2023
66607706 2024 mutex_lock(&i7core_edac_lock);
22e6bcbd 2025 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
939747bd
MCC
2026 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2027 if (unlikely(!mci || !mci->pvt_info)) {
2028 i7core_printk(KERN_ERR,
2029 "Couldn't find mci hanler\n");
2030 } else {
2031 pvt = mci->pvt_info;
22e6bcbd 2032 i7core_dev = pvt->i7core_dev;
939747bd
MCC
2033
2034 if (likely(pvt->i7core_pci))
2035 edac_pci_release_generic_ctl(pvt->i7core_pci);
2036 else
2037 i7core_printk(KERN_ERR,
2038 "Couldn't find mem_ctl_info for socket %d\n",
2039 i7core_dev->socket);
2040 pvt->i7core_pci = NULL;
2041
2042 edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2043
22e6bcbd
MCC
2044 edac_mce_unregister(&pvt->edac_mce);
2045 kfree(mci->ctl_name);
2046 edac_mc_free(mci);
2047 i7core_put_devices(i7core_dev);
22e6bcbd
MCC
2048 }
2049 }
2d95d815
MCC
2050 probed--;
2051
66607706 2052 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
2053}
2054
a0c36a1f
MCC
2055MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2056
2057/*
2058 * i7core_driver pci_driver structure for this module
2059 *
2060 */
2061static struct pci_driver i7core_driver = {
2062 .name = "i7core_edac",
2063 .probe = i7core_probe,
2064 .remove = __devexit_p(i7core_remove),
2065 .id_table = i7core_pci_tbl,
2066};
2067
2068/*
2069 * i7core_init Module entry function
2070 * Try to initialize this module for its devices
2071 */
2072static int __init i7core_init(void)
2073{
2074 int pci_rc;
2075
2076 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2077
2078 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2079 opstate_init();
2080
bd9e19ca 2081 i7core_xeon_pci_fixup(pci_dev_table);
bc2d7245 2082
a0c36a1f
MCC
2083 pci_rc = pci_register_driver(&i7core_driver);
2084
3ef288a9
MCC
2085 if (pci_rc >= 0)
2086 return 0;
2087
2088 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2089 pci_rc);
2090
2091 return pci_rc;
a0c36a1f
MCC
2092}
2093
2094/*
2095 * i7core_exit() Module exit function
2096 * Unregister the driver
2097 */
2098static void __exit i7core_exit(void)
2099{
2100 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2101 pci_unregister_driver(&i7core_driver);
2102}
2103
2104module_init(i7core_init);
2105module_exit(i7core_exit);
2106
2107MODULE_LICENSE("GPL");
2108MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2109MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2110MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2111 I7CORE_REVISION);
2112
2113module_param(edac_op_state, int, 0444);
2114MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");