Merge remote-tracking branch 'asoc/topic/adau1373' into asoc-next
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
da9bb1d2 28#include <linux/ctype.h>
c0d12172 29#include <linux/edac.h>
53f2d028 30#include <linux/bitops.h>
da9bb1d2
AC
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
20bcb7a8 34#include "edac_core.h"
7c9281d7 35#include "edac_module.h"
da9bb1d2 36
53f2d028
MCC
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
da9bb1d2 41/* lock to memory controller's control array */
63b7df91 42static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 43static LIST_HEAD(mc_devices);
da9bb1d2 44
80cc7d87
MCC
45/*
46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
47 * apei/ghes and i7core_edac to be used at the same time.
48 */
49static void const *edac_mc_owner;
50
6e84d359
MCC
51unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
52 unsigned len)
53{
54 struct mem_ctl_info *mci = dimm->mci;
55 int i, n, count = 0;
56 char *p = buf;
57
58 for (i = 0; i < mci->n_layers; i++) {
59 n = snprintf(p, len, "%s %d ",
60 edac_layer_name[mci->layers[i].type],
61 dimm->location[i]);
62 p += n;
63 len -= n;
64 count += n;
65 if (!len)
66 break;
67 }
68
69 return count;
70}
71
da9bb1d2
AC
72#ifdef CONFIG_EDAC_DEBUG
73
a4b4be3f 74static void edac_mc_dump_channel(struct rank_info *chan)
da9bb1d2 75{
6e84d359
MCC
76 edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx);
77 edac_dbg(4, " channel = %p\n", chan);
78 edac_dbg(4, " channel->csrow = %p\n", chan->csrow);
79 edac_dbg(4, " channel->dimm = %p\n", chan->dimm);
4275be63
MCC
80}
81
6e84d359 82static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
4275be63 83{
6e84d359
MCC
84 char location[80];
85
86 edac_dimm_info_location(dimm, location, sizeof(location));
87
88 edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
9713faec 89 dimm->mci->csbased ? "rank" : "dimm",
6e84d359
MCC
90 number, location, dimm->csrow, dimm->cschannel);
91 edac_dbg(4, " dimm = %p\n", dimm);
92 edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
93 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
94 edac_dbg(4, " dimm->grain = %d\n", dimm->grain);
95 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
da9bb1d2
AC
96}
97
2da1c119 98static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2 99{
6e84d359
MCC
100 edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
101 edac_dbg(4, " csrow = %p\n", csrow);
102 edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page);
103 edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page);
104 edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask);
105 edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels);
106 edac_dbg(4, " csrow->channels = %p\n", csrow->channels);
107 edac_dbg(4, " csrow->mci = %p\n", csrow->mci);
da9bb1d2
AC
108}
109
2da1c119 110static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2 111{
956b9ba1
JP
112 edac_dbg(3, "\tmci = %p\n", mci);
113 edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
114 edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
115 edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
116 edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
117 edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
118 mci->nr_csrows, mci->csrows);
119 edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
120 mci->tot_dimms, mci->dimms);
121 edac_dbg(3, "\tdev = %p\n", mci->pdev);
122 edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
123 mci->mod_name, mci->ctl_name);
124 edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
da9bb1d2
AC
125}
126
24f9a7fe
BP
127#endif /* CONFIG_EDAC_DEBUG */
128
239642fe
BP
129/*
130 * keep those in sync with the enum mem_type
131 */
132const char *edac_mem_types[] = {
133 "Empty csrow",
134 "Reserved csrow type",
135 "Unknown csrow type",
136 "Fast page mode RAM",
137 "Extended data out RAM",
138 "Burst Extended data out RAM",
139 "Single data rate SDRAM",
140 "Registered single data rate SDRAM",
141 "Double data rate SDRAM",
142 "Registered Double data rate SDRAM",
143 "Rambus DRAM",
144 "Unbuffered DDR2 RAM",
145 "Fully buffered DDR2",
146 "Registered DDR2 RAM",
147 "Rambus XDR",
148 "Unbuffered DDR3 RAM",
149 "Registered DDR3 RAM",
150};
151EXPORT_SYMBOL_GPL(edac_mem_types);
152
93e4fe64
MCC
153/**
154 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
155 * @p: pointer to a pointer with the memory offset to be used. At
156 * return, this will be incremented to point to the next offset
157 * @size: Size of the data structure to be reserved
158 * @n_elems: Number of elements that should be reserved
da9bb1d2
AC
159 *
160 * If 'size' is a constant, the compiler will optimize this whole function
93e4fe64
MCC
161 * down to either a no-op or the addition of a constant to the value of '*p'.
162 *
163 * The 'p' pointer is absolutely needed to keep the proper advancing
164 * further in memory to the proper offsets when allocating the struct along
165 * with its embedded structs, as edac_device_alloc_ctl_info() does it
166 * above, for example.
167 *
168 * At return, the pointer 'p' will be incremented to be used on a next call
169 * to this function.
da9bb1d2 170 */
93e4fe64 171void *edac_align_ptr(void **p, unsigned size, int n_elems)
da9bb1d2
AC
172{
173 unsigned align, r;
93e4fe64 174 void *ptr = *p;
da9bb1d2 175
93e4fe64
MCC
176 *p += size * n_elems;
177
178 /*
179 * 'p' can possibly be an unaligned item X such that sizeof(X) is
180 * 'size'. Adjust 'p' so that its alignment is at least as
181 * stringent as what the compiler would provide for X and return
182 * the aligned result.
183 * Here we assume that the alignment of a "long long" is the most
da9bb1d2
AC
184 * stringent alignment that the compiler will ever provide by default.
185 * As far as I know, this is a reasonable assumption.
186 */
187 if (size > sizeof(long))
188 align = sizeof(long long);
189 else if (size > sizeof(int))
190 align = sizeof(long);
191 else if (size > sizeof(short))
192 align = sizeof(int);
193 else if (size > sizeof(char))
194 align = sizeof(short);
195 else
079708b9 196 return (char *)ptr;
da9bb1d2 197
8447c4d1 198 r = (unsigned long)p % align;
da9bb1d2
AC
199
200 if (r == 0)
079708b9 201 return (char *)ptr;
da9bb1d2 202
93e4fe64
MCC
203 *p += align - r;
204
7391c6dc 205 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
206}
207
faa2ad09
SR
208static void _edac_mc_free(struct mem_ctl_info *mci)
209{
210 int i, chn, row;
211 struct csrow_info *csr;
212 const unsigned int tot_dimms = mci->tot_dimms;
213 const unsigned int tot_channels = mci->num_cschannel;
214 const unsigned int tot_csrows = mci->nr_csrows;
215
216 if (mci->dimms) {
217 for (i = 0; i < tot_dimms; i++)
218 kfree(mci->dimms[i]);
219 kfree(mci->dimms);
220 }
221 if (mci->csrows) {
222 for (row = 0; row < tot_csrows; row++) {
223 csr = mci->csrows[row];
224 if (csr) {
225 if (csr->channels) {
226 for (chn = 0; chn < tot_channels; chn++)
227 kfree(csr->channels[chn]);
228 kfree(csr->channels);
229 }
230 kfree(csr);
231 }
232 }
233 kfree(mci->csrows);
234 }
235 kfree(mci);
236}
237
da9bb1d2 238/**
4275be63
MCC
239 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
240 * @mc_num: Memory controller number
241 * @n_layers: Number of MC hierarchy layers
242 * layers: Describes each layer as seen by the Memory Controller
243 * @size_pvt: size of private storage needed
244 *
da9bb1d2
AC
245 *
246 * Everything is kmalloc'ed as one big chunk - more efficient.
247 * Only can be used if all structures have the same lifetime - otherwise
248 * you have to allocate and initialize your own structures.
249 *
250 * Use edac_mc_free() to free mc structures allocated by this function.
251 *
4275be63
MCC
252 * NOTE: drivers handle multi-rank memories in different ways: in some
253 * drivers, one multi-rank memory stick is mapped as one entry, while, in
254 * others, a single multi-rank memory stick would be mapped into several
255 * entries. Currently, this function will allocate multiple struct dimm_info
256 * on such scenarios, as grouping the multiple ranks require drivers change.
257 *
da9bb1d2 258 * Returns:
ca0907b9
MCC
259 * On failure: NULL
260 * On success: struct mem_ctl_info pointer
da9bb1d2 261 */
ca0907b9
MCC
262struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
263 unsigned n_layers,
264 struct edac_mc_layer *layers,
265 unsigned sz_pvt)
da9bb1d2
AC
266{
267 struct mem_ctl_info *mci;
4275be63 268 struct edac_mc_layer *layer;
de3910eb
MCC
269 struct csrow_info *csr;
270 struct rank_info *chan;
a7d7d2e1 271 struct dimm_info *dimm;
4275be63
MCC
272 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
273 unsigned pos[EDAC_MAX_LAYERS];
4275be63
MCC
274 unsigned size, tot_dimms = 1, count = 1;
275 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
5926ff50 276 void *pvt, *p, *ptr = NULL;
de3910eb 277 int i, j, row, chn, n, len, off;
4275be63
MCC
278 bool per_rank = false;
279
280 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
281 /*
282 * Calculate the total amount of dimms and csrows/cschannels while
283 * in the old API emulation mode
284 */
285 for (i = 0; i < n_layers; i++) {
286 tot_dimms *= layers[i].size;
287 if (layers[i].is_virt_csrow)
288 tot_csrows *= layers[i].size;
289 else
290 tot_channels *= layers[i].size;
291
292 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
293 per_rank = true;
294 }
da9bb1d2
AC
295
296 /* Figure out the offsets of the various items from the start of an mc
297 * structure. We want the alignment of each item to be at least as
298 * stringent as what the compiler would provide if we could simply
299 * hardcode everything into a single struct.
300 */
93e4fe64 301 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
4275be63 302 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
4275be63
MCC
303 for (i = 0; i < n_layers; i++) {
304 count *= layers[i].size;
956b9ba1 305 edac_dbg(4, "errcount layer %d size %d\n", i, count);
4275be63
MCC
306 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
307 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
308 tot_errcount += 2 * count;
309 }
310
956b9ba1 311 edac_dbg(4, "allocating %d error counters\n", tot_errcount);
93e4fe64 312 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
079708b9 313 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 314
956b9ba1
JP
315 edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
316 size,
317 tot_dimms,
318 per_rank ? "ranks" : "dimms",
319 tot_csrows * tot_channels);
de3910eb 320
8096cfaf
DT
321 mci = kzalloc(size, GFP_KERNEL);
322 if (mci == NULL)
da9bb1d2
AC
323 return NULL;
324
325 /* Adjust pointers so they point within the memory we just allocated
326 * rather than an imaginary chunk of memory located at address 0.
327 */
4275be63 328 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
4275be63
MCC
329 for (i = 0; i < n_layers; i++) {
330 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
331 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
332 }
079708b9 333 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 334
b8f6f975 335 /* setup index and various internal pointers */
4275be63 336 mci->mc_idx = mc_num;
4275be63 337 mci->tot_dimms = tot_dimms;
da9bb1d2 338 mci->pvt_info = pvt;
4275be63
MCC
339 mci->n_layers = n_layers;
340 mci->layers = layer;
341 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
342 mci->nr_csrows = tot_csrows;
343 mci->num_cschannel = tot_channels;
9713faec 344 mci->csbased = per_rank;
da9bb1d2 345
a7d7d2e1 346 /*
de3910eb 347 * Alocate and fill the csrow/channels structs
a7d7d2e1 348 */
d3d09e18 349 mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
de3910eb
MCC
350 if (!mci->csrows)
351 goto error;
4275be63 352 for (row = 0; row < tot_csrows; row++) {
de3910eb
MCC
353 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
354 if (!csr)
355 goto error;
356 mci->csrows[row] = csr;
4275be63
MCC
357 csr->csrow_idx = row;
358 csr->mci = mci;
359 csr->nr_channels = tot_channels;
d3d09e18 360 csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
de3910eb
MCC
361 GFP_KERNEL);
362 if (!csr->channels)
363 goto error;
4275be63
MCC
364
365 for (chn = 0; chn < tot_channels; chn++) {
de3910eb
MCC
366 chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
367 if (!chan)
368 goto error;
369 csr->channels[chn] = chan;
da9bb1d2 370 chan->chan_idx = chn;
4275be63
MCC
371 chan->csrow = csr;
372 }
373 }
374
375 /*
de3910eb 376 * Allocate and fill the dimm structs
4275be63 377 */
d3d09e18 378 mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
de3910eb
MCC
379 if (!mci->dimms)
380 goto error;
381
4275be63
MCC
382 memset(&pos, 0, sizeof(pos));
383 row = 0;
384 chn = 0;
4275be63 385 for (i = 0; i < tot_dimms; i++) {
de3910eb
MCC
386 chan = mci->csrows[row]->channels[chn];
387 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
388 if (off < 0 || off >= tot_dimms) {
389 edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
390 goto error;
391 }
4275be63 392
de3910eb 393 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
08a4a136
DC
394 if (!dimm)
395 goto error;
de3910eb 396 mci->dimms[off] = dimm;
4275be63 397 dimm->mci = mci;
4275be63 398
5926ff50
MCC
399 /*
400 * Copy DIMM location and initialize it.
401 */
402 len = sizeof(dimm->label);
403 p = dimm->label;
404 n = snprintf(p, len, "mc#%u", mc_num);
405 p += n;
406 len -= n;
407 for (j = 0; j < n_layers; j++) {
408 n = snprintf(p, len, "%s#%u",
409 edac_layer_name[layers[j].type],
410 pos[j]);
411 p += n;
412 len -= n;
4275be63
MCC
413 dimm->location[j] = pos[j];
414
5926ff50
MCC
415 if (len <= 0)
416 break;
417 }
418
4275be63
MCC
419 /* Link it to the csrows old API data */
420 chan->dimm = dimm;
421 dimm->csrow = row;
422 dimm->cschannel = chn;
423
424 /* Increment csrow location */
24bef66e 425 if (layers[0].is_virt_csrow) {
4275be63 426 chn++;
24bef66e
MCC
427 if (chn == tot_channels) {
428 chn = 0;
429 row++;
430 }
431 } else {
432 row++;
433 if (row == tot_csrows) {
434 row = 0;
435 chn++;
436 }
4275be63 437 }
a7d7d2e1 438
4275be63
MCC
439 /* Increment dimm location */
440 for (j = n_layers - 1; j >= 0; j--) {
441 pos[j]++;
442 if (pos[j] < layers[j].size)
443 break;
444 pos[j] = 0;
da9bb1d2
AC
445 }
446 }
447
81d87cb1 448 mci->op_state = OP_ALLOC;
8096cfaf 449
da9bb1d2 450 return mci;
de3910eb
MCC
451
452error:
faa2ad09 453 _edac_mc_free(mci);
de3910eb
MCC
454
455 return NULL;
4275be63 456}
9110540f 457EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 458
da9bb1d2 459/**
8096cfaf
DT
460 * edac_mc_free
461 * 'Free' a previously allocated 'mci' structure
da9bb1d2 462 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
463 */
464void edac_mc_free(struct mem_ctl_info *mci)
465{
956b9ba1 466 edac_dbg(1, "\n");
bbc560ae 467
faa2ad09
SR
468 /* If we're not yet registered with sysfs free only what was allocated
469 * in edac_mc_alloc().
470 */
471 if (!device_is_registered(&mci->dev)) {
472 _edac_mc_free(mci);
473 return;
474 }
475
de3910eb 476 /* the mci instance is freed here, when the sysfs object is dropped */
7a623c03 477 edac_unregister_sysfs(mci);
da9bb1d2 478}
9110540f 479EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 480
bce19683 481
939747bd 482/**
bce19683
DT
483 * find_mci_by_dev
484 *
485 * scan list of controllers looking for the one that manages
486 * the 'dev' device
939747bd 487 * @dev: pointer to a struct device related with the MCI
bce19683 488 */
939747bd 489struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
490{
491 struct mem_ctl_info *mci;
492 struct list_head *item;
493
956b9ba1 494 edac_dbg(3, "\n");
da9bb1d2
AC
495
496 list_for_each(item, &mc_devices) {
497 mci = list_entry(item, struct mem_ctl_info, link);
498
fd687502 499 if (mci->pdev == dev)
da9bb1d2
AC
500 return mci;
501 }
502
503 return NULL;
504}
939747bd 505EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 506
81d87cb1
DJ
507/*
508 * handler for EDAC to check if NMI type handler has asserted interrupt
509 */
510static int edac_mc_assert_error_check_and_clear(void)
511{
66ee2f94 512 int old_state;
81d87cb1 513
079708b9 514 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
515 return 1;
516
66ee2f94
DJ
517 old_state = edac_err_assert;
518 edac_err_assert = 0;
81d87cb1 519
66ee2f94 520 return old_state;
81d87cb1
DJ
521}
522
523/*
524 * edac_mc_workq_function
525 * performs the operation scheduled by a workq request
526 */
81d87cb1
DJ
527static void edac_mc_workq_function(struct work_struct *work_req)
528{
fbeb4384 529 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 530 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
531
532 mutex_lock(&mem_ctls_mutex);
533
bf52fa4a
DT
534 /* if this control struct has movd to offline state, we are done */
535 if (mci->op_state == OP_OFFLINE) {
536 mutex_unlock(&mem_ctls_mutex);
537 return;
538 }
539
81d87cb1
DJ
540 /* Only poll controllers that are running polled and have a check */
541 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
542 mci->edac_check(mci);
543
81d87cb1
DJ
544 mutex_unlock(&mem_ctls_mutex);
545
546 /* Reschedule */
4de78c68 547 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 548 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
549}
550
551/*
552 * edac_mc_workq_setup
553 * initialize a workq item for this mci
554 * passing in the new delay period in msec
bf52fa4a
DT
555 *
556 * locking model:
557 *
558 * called with the mem_ctls_mutex held
81d87cb1 559 */
bf52fa4a 560static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1 561{
956b9ba1 562 edac_dbg(0, "\n");
81d87cb1 563
bf52fa4a
DT
564 /* if this instance is not in the POLL state, then simply return */
565 if (mci->op_state != OP_RUNNING_POLL)
566 return;
567
81d87cb1 568 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
41f63c53 569 mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
81d87cb1
DJ
570}
571
572/*
573 * edac_mc_workq_teardown
574 * stop the workq processing on this mci
bf52fa4a
DT
575 *
576 * locking model:
577 *
578 * called WITHOUT lock held
81d87cb1 579 */
bf52fa4a 580static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
581{
582 int status;
583
00740c58
BP
584 if (mci->op_state != OP_RUNNING_POLL)
585 return;
586
bce19683
DT
587 status = cancel_delayed_work(&mci->work);
588 if (status == 0) {
956b9ba1 589 edac_dbg(0, "not canceled, flush the queue\n");
bf52fa4a 590
bce19683
DT
591 /* workq instance might be running, wait for it */
592 flush_workqueue(edac_workqueue);
81d87cb1
DJ
593 }
594}
595
596/*
bce19683
DT
597 * edac_mc_reset_delay_period(unsigned long value)
598 *
599 * user space has updated our poll period value, need to
600 * reset our workq delays
81d87cb1 601 */
bce19683 602void edac_mc_reset_delay_period(int value)
81d87cb1 603{
bce19683
DT
604 struct mem_ctl_info *mci;
605 struct list_head *item;
606
607 mutex_lock(&mem_ctls_mutex);
608
bce19683
DT
609 list_for_each(item, &mc_devices) {
610 mci = list_entry(item, struct mem_ctl_info, link);
611
612 edac_mc_workq_setup(mci, (unsigned long) value);
613 }
81d87cb1
DJ
614
615 mutex_unlock(&mem_ctls_mutex);
616}
617
bce19683
DT
618
619
2d7bbb91
DT
620/* Return 0 on success, 1 on failure.
621 * Before calling this function, caller must
622 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
623 *
624 * locking model:
625 *
626 * called with the mem_ctls_mutex lock held
2d7bbb91 627 */
079708b9 628static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
629{
630 struct list_head *item, *insert_before;
631 struct mem_ctl_info *p;
da9bb1d2 632
2d7bbb91 633 insert_before = &mc_devices;
da9bb1d2 634
fd687502 635 p = find_mci_by_dev(mci->pdev);
bf52fa4a 636 if (unlikely(p != NULL))
2d7bbb91 637 goto fail0;
da9bb1d2 638
2d7bbb91
DT
639 list_for_each(item, &mc_devices) {
640 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 641
2d7bbb91
DT
642 if (p->mc_idx >= mci->mc_idx) {
643 if (unlikely(p->mc_idx == mci->mc_idx))
644 goto fail1;
da9bb1d2 645
2d7bbb91
DT
646 insert_before = item;
647 break;
da9bb1d2 648 }
da9bb1d2
AC
649 }
650
651 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 652 atomic_inc(&edac_handlers);
da9bb1d2 653 return 0;
2d7bbb91 654
052dfb45 655fail0:
2d7bbb91 656 edac_printk(KERN_WARNING, EDAC_MC,
fd687502 657 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
17aa7e03 658 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
659 return 1;
660
052dfb45 661fail1:
2d7bbb91 662 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
663 "bug in low-level driver: attempt to assign\n"
664 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 665 return 1;
da9bb1d2
AC
666}
667
80cc7d87 668static int del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 669{
80cc7d87 670 int handlers = atomic_dec_return(&edac_handlers);
a1d03fcc 671 list_del_rcu(&mci->link);
e2e77098
LJ
672
673 /* these are for safe removal of devices from global list while
674 * NMI handlers may be traversing list
675 */
676 synchronize_rcu();
677 INIT_LIST_HEAD(&mci->link);
80cc7d87
MCC
678
679 return handlers;
a1d03fcc
DP
680}
681
5da0831c
DT
682/**
683 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
684 *
685 * If found, return a pointer to the structure.
686 * Else return NULL.
687 *
688 * Caller must hold mem_ctls_mutex.
689 */
079708b9 690struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
691{
692 struct list_head *item;
693 struct mem_ctl_info *mci;
694
695 list_for_each(item, &mc_devices) {
696 mci = list_entry(item, struct mem_ctl_info, link);
697
698 if (mci->mc_idx >= idx) {
699 if (mci->mc_idx == idx)
700 return mci;
701
702 break;
703 }
704 }
705
706 return NULL;
707}
708EXPORT_SYMBOL(edac_mc_find);
709
da9bb1d2 710/**
472678eb
DP
711 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
712 * create sysfs entries associated with mci structure
da9bb1d2
AC
713 * @mci: pointer to the mci structure to be added to the list
714 *
715 * Return:
716 * 0 Success
717 * !0 Failure
718 */
719
720/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 721int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 722{
80cc7d87 723 int ret = -EINVAL;
956b9ba1 724 edac_dbg(0, "\n");
b8f6f975 725
da9bb1d2
AC
726#ifdef CONFIG_EDAC_DEBUG
727 if (edac_debug_level >= 3)
728 edac_mc_dump_mci(mci);
e7ecd891 729
da9bb1d2
AC
730 if (edac_debug_level >= 4) {
731 int i;
732
733 for (i = 0; i < mci->nr_csrows; i++) {
6e84d359
MCC
734 struct csrow_info *csrow = mci->csrows[i];
735 u32 nr_pages = 0;
da9bb1d2 736 int j;
e7ecd891 737
6e84d359
MCC
738 for (j = 0; j < csrow->nr_channels; j++)
739 nr_pages += csrow->channels[j]->dimm->nr_pages;
740 if (!nr_pages)
741 continue;
742 edac_mc_dump_csrow(csrow);
743 for (j = 0; j < csrow->nr_channels; j++)
744 if (csrow->channels[j]->dimm->nr_pages)
745 edac_mc_dump_channel(csrow->channels[j]);
da9bb1d2 746 }
4275be63 747 for (i = 0; i < mci->tot_dimms; i++)
6e84d359
MCC
748 if (mci->dimms[i]->nr_pages)
749 edac_mc_dump_dimm(mci->dimms[i], i);
da9bb1d2
AC
750 }
751#endif
63b7df91 752 mutex_lock(&mem_ctls_mutex);
da9bb1d2 753
80cc7d87
MCC
754 if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
755 ret = -EPERM;
756 goto fail0;
757 }
758
da9bb1d2 759 if (add_mc_to_global_list(mci))
028a7b6d 760 goto fail0;
da9bb1d2
AC
761
762 /* set load time so that error rate can be tracked */
763 mci->start_time = jiffies;
764
9794f33d 765 if (edac_create_sysfs_mci_device(mci)) {
766 edac_mc_printk(mci, KERN_WARNING,
052dfb45 767 "failed to create sysfs device\n");
9794f33d 768 goto fail1;
769 }
da9bb1d2 770
81d87cb1
DJ
771 /* If there IS a check routine, then we are running POLLED */
772 if (mci->edac_check != NULL) {
773 /* This instance is NOW RUNNING */
774 mci->op_state = OP_RUNNING_POLL;
775
776 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
777 } else {
778 mci->op_state = OP_RUNNING_INTERRUPT;
779 }
780
da9bb1d2 781 /* Report action taken */
bf52fa4a 782 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 783 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 784
80cc7d87
MCC
785 edac_mc_owner = mci->mod_name;
786
63b7df91 787 mutex_unlock(&mem_ctls_mutex);
028a7b6d 788 return 0;
da9bb1d2 789
052dfb45 790fail1:
028a7b6d
DP
791 del_mc_from_global_list(mci);
792
052dfb45 793fail0:
63b7df91 794 mutex_unlock(&mem_ctls_mutex);
80cc7d87 795 return ret;
da9bb1d2 796}
9110540f 797EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 798
da9bb1d2 799/**
472678eb
DP
800 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
801 * remove mci structure from global list
37f04581 802 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 803 *
18dbc337 804 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 805 */
079708b9 806struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 807{
18dbc337 808 struct mem_ctl_info *mci;
da9bb1d2 809
956b9ba1 810 edac_dbg(0, "\n");
bf52fa4a 811
63b7df91 812 mutex_lock(&mem_ctls_mutex);
18dbc337 813
bf52fa4a
DT
814 /* find the requested mci struct in the global list */
815 mci = find_mci_by_dev(dev);
816 if (mci == NULL) {
63b7df91 817 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
818 return NULL;
819 }
820
80cc7d87
MCC
821 if (!del_mc_from_global_list(mci))
822 edac_mc_owner = NULL;
63b7df91 823 mutex_unlock(&mem_ctls_mutex);
bf52fa4a 824
bb31b312 825 /* flush workq processes */
bf52fa4a 826 edac_mc_workq_teardown(mci);
bb31b312
BP
827
828 /* marking MCI offline */
829 mci->op_state = OP_OFFLINE;
830
831 /* remove from sysfs */
bf52fa4a
DT
832 edac_remove_sysfs_mci_device(mci);
833
537fba28 834 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 835 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 836 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 837
18dbc337 838 return mci;
da9bb1d2 839}
9110540f 840EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 841
2da1c119
AB
842static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
843 u32 size)
da9bb1d2
AC
844{
845 struct page *pg;
846 void *virt_addr;
847 unsigned long flags = 0;
848
956b9ba1 849 edac_dbg(3, "\n");
da9bb1d2
AC
850
851 /* ECC error page was not in our memory. Ignore it. */
079708b9 852 if (!pfn_valid(page))
da9bb1d2
AC
853 return;
854
855 /* Find the actual page structure then map it and fix */
856 pg = pfn_to_page(page);
857
858 if (PageHighMem(pg))
859 local_irq_save(flags);
860
4e5df7ca 861 virt_addr = kmap_atomic(pg);
da9bb1d2
AC
862
863 /* Perform architecture specific atomic scrub operation */
864 atomic_scrub(virt_addr + offset, size);
865
866 /* Unmap and complete */
4e5df7ca 867 kunmap_atomic(virt_addr);
da9bb1d2
AC
868
869 if (PageHighMem(pg))
870 local_irq_restore(flags);
871}
872
da9bb1d2 873/* FIXME - should return -1 */
e7ecd891 874int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2 875{
de3910eb 876 struct csrow_info **csrows = mci->csrows;
a895bf8b 877 int row, i, j, n;
da9bb1d2 878
956b9ba1 879 edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
da9bb1d2
AC
880 row = -1;
881
882 for (i = 0; i < mci->nr_csrows; i++) {
de3910eb 883 struct csrow_info *csrow = csrows[i];
a895bf8b
MCC
884 n = 0;
885 for (j = 0; j < csrow->nr_channels; j++) {
de3910eb 886 struct dimm_info *dimm = csrow->channels[j]->dimm;
a895bf8b
MCC
887 n += dimm->nr_pages;
888 }
889 if (n == 0)
da9bb1d2
AC
890 continue;
891
956b9ba1
JP
892 edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
893 mci->mc_idx,
894 csrow->first_page, page, csrow->last_page,
895 csrow->page_mask);
da9bb1d2
AC
896
897 if ((page >= csrow->first_page) &&
898 (page <= csrow->last_page) &&
899 ((page & csrow->page_mask) ==
900 (csrow->first_page & csrow->page_mask))) {
901 row = i;
902 break;
903 }
904 }
905
906 if (row == -1)
537fba28 907 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
908 "could not look up page error address %lx\n",
909 (unsigned long)page);
da9bb1d2
AC
910
911 return row;
912}
9110540f 913EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 914
4275be63
MCC
915const char *edac_layer_name[] = {
916 [EDAC_MC_LAYER_BRANCH] = "branch",
917 [EDAC_MC_LAYER_CHANNEL] = "channel",
918 [EDAC_MC_LAYER_SLOT] = "slot",
919 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
c66b5a79 920 [EDAC_MC_LAYER_ALL_MEM] = "memory",
4275be63
MCC
921};
922EXPORT_SYMBOL_GPL(edac_layer_name);
923
924static void edac_inc_ce_error(struct mem_ctl_info *mci,
9eb07a7f
MCC
925 bool enable_per_layer_report,
926 const int pos[EDAC_MAX_LAYERS],
927 const u16 count)
da9bb1d2 928{
4275be63 929 int i, index = 0;
da9bb1d2 930
9eb07a7f 931 mci->ce_mc += count;
da9bb1d2 932
4275be63 933 if (!enable_per_layer_report) {
9eb07a7f 934 mci->ce_noinfo_count += count;
da9bb1d2
AC
935 return;
936 }
e7ecd891 937
4275be63
MCC
938 for (i = 0; i < mci->n_layers; i++) {
939 if (pos[i] < 0)
940 break;
941 index += pos[i];
9eb07a7f 942 mci->ce_per_layer[i][index] += count;
4275be63
MCC
943
944 if (i < mci->n_layers - 1)
945 index *= mci->layers[i + 1].size;
946 }
947}
948
949static void edac_inc_ue_error(struct mem_ctl_info *mci,
950 bool enable_per_layer_report,
9eb07a7f
MCC
951 const int pos[EDAC_MAX_LAYERS],
952 const u16 count)
4275be63
MCC
953{
954 int i, index = 0;
955
9eb07a7f 956 mci->ue_mc += count;
4275be63
MCC
957
958 if (!enable_per_layer_report) {
9eb07a7f 959 mci->ce_noinfo_count += count;
da9bb1d2
AC
960 return;
961 }
962
4275be63
MCC
963 for (i = 0; i < mci->n_layers; i++) {
964 if (pos[i] < 0)
965 break;
966 index += pos[i];
9eb07a7f 967 mci->ue_per_layer[i][index] += count;
a7d7d2e1 968
4275be63
MCC
969 if (i < mci->n_layers - 1)
970 index *= mci->layers[i + 1].size;
971 }
972}
da9bb1d2 973
4275be63 974static void edac_ce_error(struct mem_ctl_info *mci,
9eb07a7f 975 const u16 error_count,
4275be63
MCC
976 const int pos[EDAC_MAX_LAYERS],
977 const char *msg,
978 const char *location,
979 const char *label,
980 const char *detail,
981 const char *other_detail,
982 const bool enable_per_layer_report,
983 const unsigned long page_frame_number,
984 const unsigned long offset_in_page,
53f2d028 985 long grain)
4275be63
MCC
986{
987 unsigned long remapped_page;
f430d570
BP
988 char *msg_aux = "";
989
990 if (*msg)
991 msg_aux = " ";
4275be63
MCC
992
993 if (edac_mc_get_log_ce()) {
994 if (other_detail && *other_detail)
995 edac_mc_printk(mci, KERN_WARNING,
f430d570
BP
996 "%d CE %s%son %s (%s %s - %s)\n",
997 error_count, msg, msg_aux, label,
998 location, detail, other_detail);
4275be63
MCC
999 else
1000 edac_mc_printk(mci, KERN_WARNING,
f430d570
BP
1001 "%d CE %s%son %s (%s %s)\n",
1002 error_count, msg, msg_aux, label,
1003 location, detail);
4275be63 1004 }
9eb07a7f 1005 edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
da9bb1d2
AC
1006
1007 if (mci->scrub_mode & SCRUB_SW_SRC) {
1008 /*
4275be63
MCC
1009 * Some memory controllers (called MCs below) can remap
1010 * memory so that it is still available at a different
1011 * address when PCI devices map into memory.
1012 * MC's that can't do this, lose the memory where PCI
1013 * devices are mapped. This mapping is MC-dependent
1014 * and so we call back into the MC driver for it to
1015 * map the MC page to a physical (CPU) page which can
1016 * then be mapped to a virtual page - which can then
1017 * be scrubbed.
1018 */
da9bb1d2 1019 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
1020 mci->ctl_page_to_phys(mci, page_frame_number) :
1021 page_frame_number;
da9bb1d2 1022
4275be63
MCC
1023 edac_mc_scrub_block(remapped_page,
1024 offset_in_page, grain);
da9bb1d2
AC
1025 }
1026}
1027
4275be63 1028static void edac_ue_error(struct mem_ctl_info *mci,
9eb07a7f 1029 const u16 error_count,
4275be63
MCC
1030 const int pos[EDAC_MAX_LAYERS],
1031 const char *msg,
1032 const char *location,
1033 const char *label,
1034 const char *detail,
1035 const char *other_detail,
1036 const bool enable_per_layer_report)
da9bb1d2 1037{
f430d570
BP
1038 char *msg_aux = "";
1039
1040 if (*msg)
1041 msg_aux = " ";
1042
4275be63
MCC
1043 if (edac_mc_get_log_ue()) {
1044 if (other_detail && *other_detail)
1045 edac_mc_printk(mci, KERN_WARNING,
f430d570
BP
1046 "%d UE %s%son %s (%s %s - %s)\n",
1047 error_count, msg, msg_aux, label,
1048 location, detail, other_detail);
4275be63
MCC
1049 else
1050 edac_mc_printk(mci, KERN_WARNING,
f430d570
BP
1051 "%d UE %s%son %s (%s %s)\n",
1052 error_count, msg, msg_aux, label,
1053 location, detail);
4275be63 1054 }
e7ecd891 1055
4275be63
MCC
1056 if (edac_mc_get_panic_on_ue()) {
1057 if (other_detail && *other_detail)
f430d570
BP
1058 panic("UE %s%son %s (%s%s - %s)\n",
1059 msg, msg_aux, label, location, detail, other_detail);
4275be63 1060 else
f430d570
BP
1061 panic("UE %s%son %s (%s%s)\n",
1062 msg, msg_aux, label, location, detail);
4275be63
MCC
1063 }
1064
9eb07a7f 1065 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
da9bb1d2
AC
1066}
1067
e7e24830
MCC
1068/**
1069 * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1070 * anything to discover the error location
1071 *
1072 * @type: severity of the error (CE/UE/Fatal)
1073 * @mci: a struct mem_ctl_info pointer
1074 * @e: error description
1075 *
1076 * This raw function is used internally by edac_mc_handle_error(). It should
1077 * only be called directly when the hardware error come directly from BIOS,
1078 * like in the case of APEI GHES driver.
1079 */
1080void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1081 struct mem_ctl_info *mci,
1082 struct edac_raw_error_desc *e)
1083{
1084 char detail[80];
1085 int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1086
1087 /* Memory type dependent details about the error */
1088 if (type == HW_EVENT_ERR_CORRECTED) {
1089 snprintf(detail, sizeof(detail),
1090 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1091 e->page_frame_number, e->offset_in_page,
1092 e->grain, e->syndrome);
1093 edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1094 detail, e->other_detail, e->enable_per_layer_report,
1095 e->page_frame_number, e->offset_in_page, e->grain);
1096 } else {
1097 snprintf(detail, sizeof(detail),
1098 "page:0x%lx offset:0x%lx grain:%ld",
1099 e->page_frame_number, e->offset_in_page, e->grain);
1100
1101 edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1102 detail, e->other_detail, e->enable_per_layer_report);
1103 }
1104
1105
1106}
1107EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
53f2d028
MCC
1108
1109/**
1110 * edac_mc_handle_error - reports a memory event to userspace
1111 *
1112 * @type: severity of the error (CE/UE/Fatal)
1113 * @mci: a struct mem_ctl_info pointer
9eb07a7f 1114 * @error_count: Number of errors of the same type
53f2d028
MCC
1115 * @page_frame_number: mem page where the error occurred
1116 * @offset_in_page: offset of the error inside the page
1117 * @syndrome: ECC syndrome
1118 * @top_layer: Memory layer[0] position
1119 * @mid_layer: Memory layer[1] position
1120 * @low_layer: Memory layer[2] position
1121 * @msg: Message meaningful to the end users that
1122 * explains the event
1123 * @other_detail: Technical details about the event that
1124 * may help hardware manufacturers and
1125 * EDAC developers to analyse the event
53f2d028 1126 */
4275be63
MCC
1127void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1128 struct mem_ctl_info *mci,
9eb07a7f 1129 const u16 error_count,
4275be63
MCC
1130 const unsigned long page_frame_number,
1131 const unsigned long offset_in_page,
1132 const unsigned long syndrome,
53f2d028
MCC
1133 const int top_layer,
1134 const int mid_layer,
1135 const int low_layer,
4275be63 1136 const char *msg,
03f7eae8 1137 const char *other_detail)
da9bb1d2 1138{
4275be63
MCC
1139 char *p;
1140 int row = -1, chan = -1;
53f2d028 1141 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
c7ef7645 1142 int i, n_labels = 0;
53f2d028 1143 u8 grain_bits;
c7ef7645 1144 struct edac_raw_error_desc *e = &mci->error_desc;
da9bb1d2 1145
956b9ba1 1146 edac_dbg(3, "MC%d\n", mci->mc_idx);
da9bb1d2 1147
c7ef7645
MCC
1148 /* Fills the error report buffer */
1149 memset(e, 0, sizeof (*e));
1150 e->error_count = error_count;
1151 e->top_layer = top_layer;
1152 e->mid_layer = mid_layer;
1153 e->low_layer = low_layer;
1154 e->page_frame_number = page_frame_number;
1155 e->offset_in_page = offset_in_page;
1156 e->syndrome = syndrome;
1157 e->msg = msg;
1158 e->other_detail = other_detail;
1159
4275be63
MCC
1160 /*
1161 * Check if the event report is consistent and if the memory
1162 * location is known. If it is known, enable_per_layer_report will be
1163 * true, the DIMM(s) label info will be filled and the per-layer
1164 * error counters will be incremented.
1165 */
1166 for (i = 0; i < mci->n_layers; i++) {
1167 if (pos[i] >= (int)mci->layers[i].size) {
4275be63
MCC
1168
1169 edac_mc_printk(mci, KERN_ERR,
1170 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1171 edac_layer_name[mci->layers[i].type],
1172 pos[i], mci->layers[i].size);
1173 /*
1174 * Instead of just returning it, let's use what's
1175 * known about the error. The increment routines and
1176 * the DIMM filter logic will do the right thing by
1177 * pointing the likely damaged DIMMs.
1178 */
1179 pos[i] = -1;
1180 }
1181 if (pos[i] >= 0)
c7ef7645 1182 e->enable_per_layer_report = true;
da9bb1d2
AC
1183 }
1184
4275be63
MCC
1185 /*
1186 * Get the dimm label/grain that applies to the match criteria.
1187 * As the error algorithm may not be able to point to just one memory
1188 * stick, the logic here will get all possible labels that could
1189 * pottentially be affected by the error.
1190 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1191 * to have only the MC channel and the MC dimm (also called "branch")
1192 * but the channel is not known, as the memory is arranged in pairs,
1193 * where each memory belongs to a separate channel within the same
1194 * branch.
1195 */
c7ef7645 1196 p = e->label;
4275be63 1197 *p = '\0';
4da1b7bf 1198
4275be63 1199 for (i = 0; i < mci->tot_dimms; i++) {
de3910eb 1200 struct dimm_info *dimm = mci->dimms[i];
da9bb1d2 1201
53f2d028 1202 if (top_layer >= 0 && top_layer != dimm->location[0])
4275be63 1203 continue;
53f2d028 1204 if (mid_layer >= 0 && mid_layer != dimm->location[1])
4275be63 1205 continue;
53f2d028 1206 if (low_layer >= 0 && low_layer != dimm->location[2])
4275be63 1207 continue;
da9bb1d2 1208
4275be63 1209 /* get the max grain, over the error match range */
c7ef7645
MCC
1210 if (dimm->grain > e->grain)
1211 e->grain = dimm->grain;
9794f33d 1212
4275be63
MCC
1213 /*
1214 * If the error is memory-controller wide, there's no need to
1215 * seek for the affected DIMMs because the whole
1216 * channel/memory controller/... may be affected.
1217 * Also, don't show errors for empty DIMM slots.
1218 */
c7ef7645
MCC
1219 if (e->enable_per_layer_report && dimm->nr_pages) {
1220 if (n_labels >= EDAC_MAX_LABELS) {
1221 e->enable_per_layer_report = false;
1222 break;
1223 }
1224 n_labels++;
1225 if (p != e->label) {
4275be63
MCC
1226 strcpy(p, OTHER_LABEL);
1227 p += strlen(OTHER_LABEL);
1228 }
1229 strcpy(p, dimm->label);
1230 p += strlen(p);
1231 *p = '\0';
1232
1233 /*
1234 * get csrow/channel of the DIMM, in order to allow
1235 * incrementing the compat API counters
1236 */
956b9ba1 1237 edac_dbg(4, "%s csrows map: (%d,%d)\n",
9713faec 1238 mci->csbased ? "rank" : "dimm",
956b9ba1 1239 dimm->csrow, dimm->cschannel);
4275be63
MCC
1240 if (row == -1)
1241 row = dimm->csrow;
1242 else if (row >= 0 && row != dimm->csrow)
1243 row = -2;
1244
1245 if (chan == -1)
1246 chan = dimm->cschannel;
1247 else if (chan >= 0 && chan != dimm->cschannel)
1248 chan = -2;
1249 }
9794f33d 1250 }
1251
c7ef7645
MCC
1252 if (!e->enable_per_layer_report) {
1253 strcpy(e->label, "any memory");
4275be63 1254 } else {
956b9ba1 1255 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
c7ef7645
MCC
1256 if (p == e->label)
1257 strcpy(e->label, "unknown memory");
4275be63
MCC
1258 if (type == HW_EVENT_ERR_CORRECTED) {
1259 if (row >= 0) {
9eb07a7f 1260 mci->csrows[row]->ce_count += error_count;
4275be63 1261 if (chan >= 0)
9eb07a7f 1262 mci->csrows[row]->channels[chan]->ce_count += error_count;
4275be63
MCC
1263 }
1264 } else
1265 if (row >= 0)
9eb07a7f 1266 mci->csrows[row]->ue_count += error_count;
9794f33d 1267 }
1268
4275be63 1269 /* Fill the RAM location data */
c7ef7645 1270 p = e->location;
4da1b7bf 1271
4275be63
MCC
1272 for (i = 0; i < mci->n_layers; i++) {
1273 if (pos[i] < 0)
1274 continue;
9794f33d 1275
4275be63
MCC
1276 p += sprintf(p, "%s:%d ",
1277 edac_layer_name[mci->layers[i].type],
1278 pos[i]);
9794f33d 1279 }
c7ef7645 1280 if (p > e->location)
53f2d028
MCC
1281 *(p - 1) = '\0';
1282
1283 /* Report the error via the trace interface */
c7ef7645
MCC
1284 grain_bits = fls_long(e->grain) + 1;
1285 trace_mc_event(type, e->msg, e->label, e->error_count,
1286 mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1287 PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
e7e24830 1288 grain_bits, e->syndrome, e->other_detail);
a7d7d2e1 1289
e7e24830 1290 edac_raw_mc_handle_error(type, mci, e);
9794f33d 1291}
4275be63 1292EXPORT_SYMBOL_GPL(edac_mc_handle_error);