x86, mce: remove mce_init unused argument
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / kernel / cpu / mcheck / mce.c
CommitLineData
1da177e4
LT
1/*
2 * Machine check handler.
e9eee03e 3 *
1da177e4 4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
d88203d1
TG
5 * Rest from unknown author(s).
6 * 2004 Andi Kleen. Rewrote most of it.
b79109c3
AK
7 * Copyright 2008 Intel Corporation
8 * Author: Andi Kleen
1da177e4 9 */
e9eee03e
IM
10#include <linux/thread_info.h>
11#include <linux/capability.h>
12#include <linux/miscdevice.h>
13#include <linux/ratelimit.h>
14#include <linux/kallsyms.h>
15#include <linux/rcupdate.h>
38c4c97c 16#include <linux/smp_lock.h>
e9eee03e
IM
17#include <linux/kobject.h>
18#include <linux/kdebug.h>
19#include <linux/kernel.h>
20#include <linux/percpu.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/sysdev.h>
8c566ef5 23#include <linux/ctype.h>
e9eee03e 24#include <linux/sched.h>
0d7482e3 25#include <linux/sysfs.h>
e9eee03e
IM
26#include <linux/types.h>
27#include <linux/init.h>
28#include <linux/kmod.h>
29#include <linux/poll.h>
30#include <linux/cpu.h>
31#include <linux/fs.h>
32
d88203d1 33#include <asm/processor.h>
1da177e4 34#include <asm/uaccess.h>
e02e68d3 35#include <asm/idle.h>
e9eee03e
IM
36#include <asm/mce.h>
37#include <asm/msr.h>
38#include <asm/smp.h>
1da177e4 39
711c2e48
IM
40#include "mce.h"
41
5d727926
AK
42/* Handle unconfigured int18 (should never happen) */
43static void unexpected_machine_check(struct pt_regs *regs, long error_code)
44{
45 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
46 smp_processor_id());
47}
48
49/* Call the installed machine check handler for this CPU setup. */
50void (*machine_check_vector)(struct pt_regs *, long error_code) =
51 unexpected_machine_check;
04b2b1a4
AK
52
53int mce_disabled;
54
4efc0670 55#ifdef CONFIG_X86_NEW_MCE
711c2e48 56
e9eee03e 57#define MISC_MCELOG_MINOR 227
0d7482e3 58
553f265f
AK
59atomic_t mce_entry;
60
bd78432c
TH
61/*
62 * Tolerant levels:
63 * 0: always panic on uncorrected errors, log corrected errors
64 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
65 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
66 * 3: never panic or SIGBUS, log all errors (for testing only)
67 */
e9eee03e
IM
68static int tolerant = 1;
69static int banks;
70static u64 *bank;
71static unsigned long notify_user;
72static int rip_msr;
73static int mce_bootlog = -1;
a98f0dd3 74
e9eee03e
IM
75static char trigger[128];
76static char *trigger_argv[2] = { trigger, NULL };
1da177e4 77
06b7a7a5
AK
78static unsigned long dont_init_banks;
79
e02e68d3
TH
80static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
81
ee031c31
AK
82/* MCA banks polled by the period polling timer for corrected events */
83DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
84 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
85};
86
06b7a7a5
AK
87static inline int skip_bank_init(int i)
88{
89 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
90}
91
b5f2fa4e
AK
92/* Do initial initialization of a struct mce */
93void mce_setup(struct mce *m)
94{
95 memset(m, 0, sizeof(struct mce));
96 m->cpu = smp_processor_id();
97 rdtscll(m->tsc);
98}
99
ea149b36
AK
100DEFINE_PER_CPU(struct mce, injectm);
101EXPORT_PER_CPU_SYMBOL_GPL(injectm);
102
1da177e4
LT
103/*
104 * Lockless MCE logging infrastructure.
105 * This avoids deadlocks on printk locks without having to break locks. Also
106 * separate MCEs from kernel messages to avoid bogus bug reports.
107 */
108
231fd906 109static struct mce_log mcelog = {
1da177e4
LT
110 MCE_LOG_SIGNATURE,
111 MCE_LOG_LEN,
d88203d1 112};
1da177e4
LT
113
114void mce_log(struct mce *mce)
115{
116 unsigned next, entry;
e9eee03e 117
1da177e4 118 mce->finished = 0;
7644143c 119 wmb();
1da177e4
LT
120 for (;;) {
121 entry = rcu_dereference(mcelog.next);
673242c1 122 for (;;) {
e9eee03e
IM
123 /*
124 * When the buffer fills up discard new entries.
125 * Assume that the earlier errors are the more
126 * interesting ones:
127 */
673242c1 128 if (entry >= MCE_LOG_LEN) {
53756d37 129 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
673242c1
AK
130 return;
131 }
e9eee03e 132 /* Old left over entry. Skip: */
673242c1
AK
133 if (mcelog.entry[entry].finished) {
134 entry++;
135 continue;
136 }
7644143c 137 break;
1da177e4 138 }
1da177e4
LT
139 smp_rmb();
140 next = entry + 1;
141 if (cmpxchg(&mcelog.next, entry, next) == entry)
142 break;
143 }
144 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
7644143c 145 wmb();
1da177e4 146 mcelog.entry[entry].finished = 1;
7644143c 147 wmb();
1da177e4 148
e02e68d3 149 set_bit(0, &notify_user);
1da177e4
LT
150}
151
152static void print_mce(struct mce *m)
153{
154 printk(KERN_EMERG "\n"
4855170f 155 KERN_EMERG "HARDWARE ERROR\n"
1da177e4
LT
156 KERN_EMERG
157 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
158 m->cpu, m->mcgstatus, m->bank, m->status);
65ea5b03 159 if (m->ip) {
d88203d1 160 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
1da177e4 161 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
65ea5b03 162 m->cs, m->ip);
1da177e4 163 if (m->cs == __KERNEL_CS)
65ea5b03 164 print_symbol("{%s}", m->ip);
1da177e4
LT
165 printk("\n");
166 }
f6d1826d 167 printk(KERN_EMERG "TSC %llx ", m->tsc);
1da177e4 168 if (m->addr)
f6d1826d 169 printk("ADDR %llx ", m->addr);
1da177e4 170 if (m->misc)
f6d1826d 171 printk("MISC %llx ", m->misc);
1da177e4 172 printk("\n");
4855170f 173 printk(KERN_EMERG "This is not a software problem!\n");
d88203d1
TG
174 printk(KERN_EMERG "Run through mcelog --ascii to decode "
175 "and contact your hardware vendor\n");
1da177e4
LT
176}
177
3cde5c8c 178static void mce_panic(char *msg, struct mce *backup, u64 start)
d88203d1 179{
1da177e4 180 int i;
e02e68d3 181
d896a940
AK
182 bust_spinlocks(1);
183 console_verbose();
1da177e4 184 for (i = 0; i < MCE_LOG_LEN; i++) {
3cde5c8c 185 u64 tsc = mcelog.entry[i].tsc;
d88203d1 186
3cde5c8c 187 if ((s64)(tsc - start) < 0)
1da177e4 188 continue;
d88203d1 189 print_mce(&mcelog.entry[i]);
1da177e4
LT
190 if (backup && mcelog.entry[i].tsc == backup->tsc)
191 backup = NULL;
192 }
193 if (backup)
194 print_mce(backup);
e02e68d3 195 panic(msg);
d88203d1 196}
1da177e4 197
ea149b36
AK
198/* Support code for software error injection */
199
200static int msr_to_offset(u32 msr)
201{
202 unsigned bank = __get_cpu_var(injectm.bank);
203 if (msr == rip_msr)
204 return offsetof(struct mce, ip);
205 if (msr == MSR_IA32_MC0_STATUS + bank*4)
206 return offsetof(struct mce, status);
207 if (msr == MSR_IA32_MC0_ADDR + bank*4)
208 return offsetof(struct mce, addr);
209 if (msr == MSR_IA32_MC0_MISC + bank*4)
210 return offsetof(struct mce, misc);
211 if (msr == MSR_IA32_MCG_STATUS)
212 return offsetof(struct mce, mcgstatus);
213 return -1;
214}
215
5f8c1a54
AK
216/* MSR access wrappers used for error injection */
217static u64 mce_rdmsrl(u32 msr)
218{
219 u64 v;
ea149b36
AK
220 if (__get_cpu_var(injectm).finished) {
221 int offset = msr_to_offset(msr);
222 if (offset < 0)
223 return 0;
224 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
225 }
5f8c1a54
AK
226 rdmsrl(msr, v);
227 return v;
228}
229
230static void mce_wrmsrl(u32 msr, u64 v)
231{
ea149b36
AK
232 if (__get_cpu_var(injectm).finished) {
233 int offset = msr_to_offset(msr);
234 if (offset >= 0)
235 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
236 return;
237 }
5f8c1a54
AK
238 wrmsrl(msr, v);
239}
240
88ccbedd 241int mce_available(struct cpuinfo_x86 *c)
1da177e4 242{
04b2b1a4 243 if (mce_disabled)
5b4408fd 244 return 0;
3d1712c9 245 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
1da177e4
LT
246}
247
94ad8474
AK
248static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
249{
250 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
65ea5b03 251 m->ip = regs->ip;
94ad8474
AK
252 m->cs = regs->cs;
253 } else {
65ea5b03 254 m->ip = 0;
94ad8474
AK
255 m->cs = 0;
256 }
257 if (rip_msr) {
258 /* Assume the RIP in the MSR is exact. Is this true? */
259 m->mcgstatus |= MCG_STATUS_EIPV;
5f8c1a54 260 m->ip = mce_rdmsrl(rip_msr);
94ad8474
AK
261 m->cs = 0;
262 }
263}
264
d88203d1 265/*
b79109c3
AK
266 * Poll for corrected events or events that happened before reset.
267 * Those are just logged through /dev/mcelog.
268 *
269 * This is executed in standard interrupt context.
270 */
ee031c31 271void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
b79109c3
AK
272{
273 struct mce m;
274 int i;
275
276 mce_setup(&m);
277
5f8c1a54 278 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
b79109c3 279 for (i = 0; i < banks; i++) {
ee031c31 280 if (!bank[i] || !test_bit(i, *b))
b79109c3
AK
281 continue;
282
283 m.misc = 0;
284 m.addr = 0;
285 m.bank = i;
286 m.tsc = 0;
287
288 barrier();
5f8c1a54 289 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
b79109c3
AK
290 if (!(m.status & MCI_STATUS_VAL))
291 continue;
292
293 /*
294 * Uncorrected events are handled by the exception handler
295 * when it is enabled. But when the exception is disabled log
296 * everything.
297 *
298 * TBD do the same check for MCI_STATUS_EN here?
299 */
300 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
301 continue;
302
303 if (m.status & MCI_STATUS_MISCV)
5f8c1a54 304 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
b79109c3 305 if (m.status & MCI_STATUS_ADDRV)
5f8c1a54 306 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
b79109c3
AK
307
308 if (!(flags & MCP_TIMESTAMP))
309 m.tsc = 0;
310 /*
311 * Don't get the IP here because it's unlikely to
312 * have anything to do with the actual error location.
313 */
5679af4c
AK
314 if (!(flags & MCP_DONTLOG)) {
315 mce_log(&m);
316 add_taint(TAINT_MACHINE_CHECK);
317 }
b79109c3
AK
318
319 /*
320 * Clear state for this bank.
321 */
5f8c1a54 322 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
b79109c3
AK
323 }
324
325 /*
326 * Don't clear MCG_STATUS here because it's only defined for
327 * exceptions.
328 */
88921be3
AK
329
330 sync_core();
b79109c3 331}
ea149b36 332EXPORT_SYMBOL_GPL(machine_check_poll);
b79109c3
AK
333
334/*
335 * The actual machine check handler. This only handles real
336 * exceptions when something got corrupted coming in through int 18.
337 *
338 * This is executed in NMI context not subject to normal locking rules. This
339 * implies that most kernel services cannot be safely used. Don't even
340 * think about putting a printk in there!
1da177e4 341 */
e9eee03e 342void do_machine_check(struct pt_regs *regs, long error_code)
1da177e4
LT
343{
344 struct mce m, panicm;
e9eee03e 345 int panicm_found = 0;
1da177e4
LT
346 u64 mcestart = 0;
347 int i;
bd78432c
TH
348 /*
349 * If no_way_out gets set, there is no safe way to recover from this
350 * MCE. If tolerant is cranked up, we'll try anyway.
351 */
352 int no_way_out = 0;
353 /*
354 * If kill_it gets set, there might be a way to recover from this
355 * error.
356 */
357 int kill_it = 0;
b79109c3 358 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1da177e4 359
553f265f
AK
360 atomic_inc(&mce_entry);
361
b79109c3 362 if (notify_die(DIE_NMI, "machine check", regs, error_code,
22f5991c 363 18, SIGKILL) == NOTIFY_STOP)
b79109c3
AK
364 goto out2;
365 if (!banks)
553f265f 366 goto out2;
1da177e4 367
b5f2fa4e
AK
368 mce_setup(&m);
369
5f8c1a54 370 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
e9eee03e 371
bd78432c 372 /* if the restart IP is not valid, we're done for */
1da177e4 373 if (!(m.mcgstatus & MCG_STATUS_RIPV))
bd78432c 374 no_way_out = 1;
d88203d1 375
1da177e4
LT
376 rdtscll(mcestart);
377 barrier();
378
379 for (i = 0; i < banks; i++) {
b79109c3 380 __clear_bit(i, toclear);
0d7482e3 381 if (!bank[i])
1da177e4 382 continue;
d88203d1
TG
383
384 m.misc = 0;
1da177e4
LT
385 m.addr = 0;
386 m.bank = i;
1da177e4 387
5f8c1a54 388 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
1da177e4
LT
389 if ((m.status & MCI_STATUS_VAL) == 0)
390 continue;
391
b79109c3
AK
392 /*
393 * Non uncorrected errors are handled by machine_check_poll
394 * Leave them alone.
395 */
396 if ((m.status & MCI_STATUS_UC) == 0)
397 continue;
398
399 /*
400 * Set taint even when machine check was not enabled.
401 */
402 add_taint(TAINT_MACHINE_CHECK);
403
404 __set_bit(i, toclear);
405
1da177e4 406 if (m.status & MCI_STATUS_EN) {
bd78432c
TH
407 /* if PCC was set, there's no way out */
408 no_way_out |= !!(m.status & MCI_STATUS_PCC);
409 /*
410 * If this error was uncorrectable and there was
411 * an overflow, we're in trouble. If no overflow,
412 * we might get away with just killing a task.
413 */
414 if (m.status & MCI_STATUS_UC) {
415 if (tolerant < 1 || m.status & MCI_STATUS_OVER)
416 no_way_out = 1;
417 kill_it = 1;
418 }
b79109c3
AK
419 } else {
420 /*
421 * Machine check event was not enabled. Clear, but
422 * ignore.
423 */
424 continue;
1da177e4
LT
425 }
426
427 if (m.status & MCI_STATUS_MISCV)
5f8c1a54 428 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
1da177e4 429 if (m.status & MCI_STATUS_ADDRV)
5f8c1a54 430 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
1da177e4 431
94ad8474 432 mce_get_rip(&m, regs);
b79109c3 433 mce_log(&m);
1da177e4 434
e9eee03e
IM
435 /*
436 * Did this bank cause the exception?
437 *
438 * Assume that the bank with uncorrectable errors did it,
439 * and that there is only a single one:
440 */
441 if ((m.status & MCI_STATUS_UC) &&
442 (m.status & MCI_STATUS_EN)) {
1da177e4
LT
443 panicm = m;
444 panicm_found = 1;
445 }
1da177e4
LT
446 }
447
e9eee03e
IM
448 /*
449 * If we didn't find an uncorrectable error, pick
450 * the last one (shouldn't happen, just being safe).
451 */
1da177e4
LT
452 if (!panicm_found)
453 panicm = m;
bd78432c
TH
454
455 /*
456 * If we have decided that we just CAN'T continue, and the user
e9eee03e 457 * has not set tolerant to an insane level, give up and die.
bd78432c
TH
458 */
459 if (no_way_out && tolerant < 3)
1da177e4 460 mce_panic("Machine check", &panicm, mcestart);
bd78432c
TH
461
462 /*
463 * If the error seems to be unrecoverable, something should be
464 * done. Try to kill as little as possible. If we can kill just
465 * one task, do that. If the user has set the tolerance very
466 * high, don't try to do anything at all.
467 */
468 if (kill_it && tolerant < 3) {
1da177e4
LT
469 int user_space = 0;
470
bd78432c
TH
471 /*
472 * If the EIPV bit is set, it means the saved IP is the
473 * instruction which caused the MCE.
474 */
475 if (m.mcgstatus & MCG_STATUS_EIPV)
65ea5b03 476 user_space = panicm.ip && (panicm.cs & 3);
bd78432c
TH
477
478 /*
479 * If we know that the error was in user space, send a
480 * SIGBUS. Otherwise, panic if tolerance is low.
481 *
380851bc 482 * force_sig() takes an awful lot of locks and has a slight
bd78432c
TH
483 * risk of deadlocking.
484 */
485 if (user_space) {
380851bc 486 force_sig(SIGBUS, current);
bd78432c
TH
487 } else if (panic_on_oops || tolerant < 2) {
488 mce_panic("Uncorrected machine check",
489 &panicm, mcestart);
490 }
1da177e4
LT
491 }
492
e02e68d3
TH
493 /* notify userspace ASAP */
494 set_thread_flag(TIF_MCE_NOTIFY);
495
bd78432c 496 /* the last thing we do is clear state */
b79109c3
AK
497 for (i = 0; i < banks; i++) {
498 if (test_bit(i, toclear))
5f8c1a54 499 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
b79109c3 500 }
5f8c1a54 501 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
553f265f
AK
502 out2:
503 atomic_dec(&mce_entry);
88921be3 504 sync_core();
1da177e4 505}
ea149b36 506EXPORT_SYMBOL_GPL(do_machine_check);
1da177e4 507
15d5f839
DZ
508#ifdef CONFIG_X86_MCE_INTEL
509/***
510 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
676b1855 511 * @cpu: The CPU on which the event occurred.
15d5f839
DZ
512 * @status: Event status information
513 *
514 * This function should be called by the thermal interrupt after the
515 * event has been processed and the decision was made to log the event
516 * further.
517 *
518 * The status parameter will be saved to the 'status' field of 'struct mce'
519 * and historically has been the register value of the
520 * MSR_IA32_THERMAL_STATUS (Intel) msr.
521 */
b5f2fa4e 522void mce_log_therm_throt_event(__u64 status)
15d5f839
DZ
523{
524 struct mce m;
525
b5f2fa4e 526 mce_setup(&m);
15d5f839
DZ
527 m.bank = MCE_THERMAL_BANK;
528 m.status = status;
15d5f839
DZ
529 mce_log(&m);
530}
531#endif /* CONFIG_X86_MCE_INTEL */
532
1da177e4 533/*
8a336b0a
TH
534 * Periodic polling timer for "silent" machine check errors. If the
535 * poller finds an MCE, poll 2x faster. When the poller finds no more
536 * errors, poll 2x slower (up to check_interval seconds).
1da177e4 537 */
1da177e4 538static int check_interval = 5 * 60; /* 5 minutes */
e9eee03e 539
6298c512 540static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
52d168e2 541static DEFINE_PER_CPU(struct timer_list, mce_timer);
1da177e4 542
52d168e2 543static void mcheck_timer(unsigned long data)
1da177e4 544{
52d168e2 545 struct timer_list *t = &per_cpu(mce_timer, data);
6298c512 546 int *n;
52d168e2
AK
547
548 WARN_ON(smp_processor_id() != data);
549
e9eee03e 550 if (mce_available(&current_cpu_data)) {
ee031c31
AK
551 machine_check_poll(MCP_TIMESTAMP,
552 &__get_cpu_var(mce_poll_banks));
e9eee03e 553 }
1da177e4
LT
554
555 /*
e02e68d3
TH
556 * Alert userspace if needed. If we logged an MCE, reduce the
557 * polling interval, otherwise increase the polling interval.
1da177e4 558 */
6298c512 559 n = &__get_cpu_var(next_interval);
e02e68d3 560 if (mce_notify_user()) {
6298c512 561 *n = max(*n/2, HZ/100);
e02e68d3 562 } else {
6298c512 563 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
e02e68d3
TH
564 }
565
6298c512 566 t->expires = jiffies + *n;
52d168e2 567 add_timer(t);
e02e68d3
TH
568}
569
9bd98405
AK
570static void mce_do_trigger(struct work_struct *work)
571{
572 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
573}
574
575static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
576
e02e68d3 577/*
9bd98405
AK
578 * Notify the user(s) about new machine check events.
579 * Can be called from interrupt context, but not from machine check/NMI
580 * context.
e02e68d3
TH
581 */
582int mce_notify_user(void)
583{
8457c84d
AK
584 /* Not more than two messages every minute */
585 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
586
e02e68d3 587 clear_thread_flag(TIF_MCE_NOTIFY);
e9eee03e 588
e02e68d3 589 if (test_and_clear_bit(0, &notify_user)) {
e02e68d3 590 wake_up_interruptible(&mce_wait);
9bd98405
AK
591
592 /*
593 * There is no risk of missing notifications because
594 * work_pending is always cleared before the function is
595 * executed.
596 */
597 if (trigger[0] && !work_pending(&mce_trigger_work))
598 schedule_work(&mce_trigger_work);
e02e68d3 599
8457c84d 600 if (__ratelimit(&ratelimit))
8a336b0a 601 printk(KERN_INFO "Machine check events logged\n");
e02e68d3
TH
602
603 return 1;
1da177e4 604 }
e02e68d3
TH
605 return 0;
606}
ea149b36 607EXPORT_SYMBOL_GPL(mce_notify_user);
8a336b0a 608
d88203d1 609/*
1da177e4
LT
610 * Initialize Machine Checks for a CPU.
611 */
0d7482e3 612static int mce_cap_init(void)
1da177e4 613{
0d7482e3 614 unsigned b;
e9eee03e 615 u64 cap;
1da177e4
LT
616
617 rdmsrl(MSR_IA32_MCG_CAP, cap);
01c6680a
TG
618
619 b = cap & MCG_BANKCNT_MASK;
b659294b
IM
620 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
621
0d7482e3
AK
622 if (b > MAX_NR_BANKS) {
623 printk(KERN_WARNING
624 "MCE: Using only %u machine check banks out of %u\n",
625 MAX_NR_BANKS, b);
626 b = MAX_NR_BANKS;
627 }
628
629 /* Don't support asymmetric configurations today */
630 WARN_ON(banks != 0 && b != banks);
631 banks = b;
632 if (!bank) {
633 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
634 if (!bank)
635 return -ENOMEM;
636 memset(bank, 0xff, banks * sizeof(u64));
1da177e4 637 }
0d7482e3 638
94ad8474 639 /* Use accurate RIP reporting if available. */
01c6680a 640 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
94ad8474 641 rip_msr = MSR_IA32_MCG_EIP;
1da177e4 642
0d7482e3
AK
643 return 0;
644}
645
8be91105 646static void mce_init(void)
0d7482e3 647{
e9eee03e 648 mce_banks_t all_banks;
0d7482e3
AK
649 u64 cap;
650 int i;
651
b79109c3
AK
652 /*
653 * Log the machine checks left over from the previous reset.
654 */
ee031c31 655 bitmap_fill(all_banks, MAX_NR_BANKS);
5679af4c 656 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
1da177e4
LT
657
658 set_in_cr4(X86_CR4_MCE);
659
0d7482e3 660 rdmsrl(MSR_IA32_MCG_CAP, cap);
1da177e4
LT
661 if (cap & MCG_CTL_P)
662 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
663
664 for (i = 0; i < banks; i++) {
06b7a7a5
AK
665 if (skip_bank_init(i))
666 continue;
0d7482e3 667 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
1da177e4 668 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
d88203d1 669 }
1da177e4
LT
670}
671
672/* Add per CPU specific workarounds here */
ec5b3d32 673static void mce_cpu_quirks(struct cpuinfo_x86 *c)
d88203d1 674{
1da177e4 675 /* This should be disabled by the BIOS, but isn't always */
911f6a7b 676 if (c->x86_vendor == X86_VENDOR_AMD) {
e9eee03e
IM
677 if (c->x86 == 15 && banks > 4) {
678 /*
679 * disable GART TBL walk error reporting, which
680 * trips off incorrectly with the IOMMU & 3ware
681 * & Cerberus:
682 */
0d7482e3 683 clear_bit(10, (unsigned long *)&bank[4]);
e9eee03e
IM
684 }
685 if (c->x86 <= 17 && mce_bootlog < 0) {
686 /*
687 * Lots of broken BIOS around that don't clear them
688 * by default and leave crap in there. Don't log:
689 */
911f6a7b 690 mce_bootlog = 0;
e9eee03e 691 }
2e6f694f
AK
692 /*
693 * Various K7s with broken bank 0 around. Always disable
694 * by default.
695 */
696 if (c->x86 == 6)
697 bank[0] = 0;
1da177e4 698 }
e583538f 699
06b7a7a5
AK
700 if (c->x86_vendor == X86_VENDOR_INTEL) {
701 /*
702 * SDM documents that on family 6 bank 0 should not be written
703 * because it aliases to another special BIOS controlled
704 * register.
705 * But it's not aliased anymore on model 0x1a+
706 * Don't ignore bank 0 completely because there could be a
707 * valid event later, merely don't write CTL0.
708 */
709
710 if (c->x86 == 6 && c->x86_model < 0x1A)
711 __set_bit(0, &dont_init_banks);
712 }
d88203d1 713}
1da177e4 714
4efc0670
AK
715static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
716{
717 if (c->x86 != 5)
718 return;
719 switch (c->x86_vendor) {
720 case X86_VENDOR_INTEL:
721 if (mce_p5_enabled())
722 intel_p5_mcheck_init(c);
723 break;
724 case X86_VENDOR_CENTAUR:
725 winchip_mcheck_init(c);
726 break;
727 }
728}
729
cc3ca220 730static void mce_cpu_features(struct cpuinfo_x86 *c)
1da177e4
LT
731{
732 switch (c->x86_vendor) {
733 case X86_VENDOR_INTEL:
734 mce_intel_feature_init(c);
735 break;
89b831ef
JS
736 case X86_VENDOR_AMD:
737 mce_amd_feature_init(c);
738 break;
1da177e4
LT
739 default:
740 break;
741 }
742}
743
52d168e2
AK
744static void mce_init_timer(void)
745{
746 struct timer_list *t = &__get_cpu_var(mce_timer);
6298c512 747 int *n = &__get_cpu_var(next_interval);
52d168e2 748
6298c512
AK
749 *n = check_interval * HZ;
750 if (!*n)
52d168e2
AK
751 return;
752 setup_timer(t, mcheck_timer, smp_processor_id());
6298c512 753 t->expires = round_jiffies(jiffies + *n);
52d168e2
AK
754 add_timer(t);
755}
756
d88203d1 757/*
1da177e4 758 * Called for each booted CPU to set up machine checks.
e9eee03e 759 * Must be called with preempt off:
1da177e4 760 */
e6982c67 761void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1da177e4 762{
4efc0670
AK
763 if (mce_disabled)
764 return;
765
766 mce_ancient_init(c);
767
5b4408fd 768 if (!mce_available(c))
1da177e4
LT
769 return;
770
0d7482e3 771 if (mce_cap_init() < 0) {
04b2b1a4 772 mce_disabled = 1;
0d7482e3
AK
773 return;
774 }
775 mce_cpu_quirks(c);
776
5d727926
AK
777 machine_check_vector = do_machine_check;
778
8be91105 779 mce_init();
1da177e4 780 mce_cpu_features(c);
52d168e2 781 mce_init_timer();
1da177e4
LT
782}
783
784/*
785 * Character device to read and clear the MCE log.
786 */
787
f528e7ba 788static DEFINE_SPINLOCK(mce_state_lock);
e9eee03e
IM
789static int open_count; /* #times opened */
790static int open_exclu; /* already open exclusive? */
f528e7ba
TH
791
792static int mce_open(struct inode *inode, struct file *file)
793{
38c4c97c 794 lock_kernel();
f528e7ba
TH
795 spin_lock(&mce_state_lock);
796
797 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
798 spin_unlock(&mce_state_lock);
38c4c97c 799 unlock_kernel();
e9eee03e 800
f528e7ba
TH
801 return -EBUSY;
802 }
803
804 if (file->f_flags & O_EXCL)
805 open_exclu = 1;
806 open_count++;
807
808 spin_unlock(&mce_state_lock);
38c4c97c 809 unlock_kernel();
f528e7ba 810
bd78432c 811 return nonseekable_open(inode, file);
f528e7ba
TH
812}
813
814static int mce_release(struct inode *inode, struct file *file)
815{
816 spin_lock(&mce_state_lock);
817
818 open_count--;
819 open_exclu = 0;
820
821 spin_unlock(&mce_state_lock);
822
823 return 0;
824}
825
d88203d1
TG
826static void collect_tscs(void *data)
827{
1da177e4 828 unsigned long *cpu_tsc = (unsigned long *)data;
d88203d1 829
1da177e4 830 rdtscll(cpu_tsc[smp_processor_id()]);
d88203d1 831}
1da177e4 832
e9eee03e
IM
833static DEFINE_MUTEX(mce_read_mutex);
834
d88203d1
TG
835static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
836 loff_t *off)
1da177e4 837{
e9eee03e 838 char __user *buf = ubuf;
f0de53bb 839 unsigned long *cpu_tsc;
ef41df43 840 unsigned prev, next;
1da177e4
LT
841 int i, err;
842
6bca67f9 843 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
f0de53bb
AK
844 if (!cpu_tsc)
845 return -ENOMEM;
846
8c8b8859 847 mutex_lock(&mce_read_mutex);
1da177e4
LT
848 next = rcu_dereference(mcelog.next);
849
850 /* Only supports full reads right now */
d88203d1 851 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
8c8b8859 852 mutex_unlock(&mce_read_mutex);
f0de53bb 853 kfree(cpu_tsc);
e9eee03e 854
1da177e4
LT
855 return -EINVAL;
856 }
857
858 err = 0;
ef41df43
HY
859 prev = 0;
860 do {
861 for (i = prev; i < next; i++) {
862 unsigned long start = jiffies;
863
864 while (!mcelog.entry[i].finished) {
865 if (time_after_eq(jiffies, start + 2)) {
866 memset(mcelog.entry + i, 0,
867 sizeof(struct mce));
868 goto timeout;
869 }
870 cpu_relax();
673242c1 871 }
ef41df43
HY
872 smp_rmb();
873 err |= copy_to_user(buf, mcelog.entry + i,
874 sizeof(struct mce));
875 buf += sizeof(struct mce);
876timeout:
877 ;
673242c1 878 }
1da177e4 879
ef41df43
HY
880 memset(mcelog.entry + prev, 0,
881 (next - prev) * sizeof(struct mce));
882 prev = next;
883 next = cmpxchg(&mcelog.next, prev, 0);
884 } while (next != prev);
1da177e4 885
b2b18660 886 synchronize_sched();
1da177e4 887
d88203d1
TG
888 /*
889 * Collect entries that were still getting written before the
890 * synchronize.
891 */
15c8b6c1 892 on_each_cpu(collect_tscs, cpu_tsc, 1);
e9eee03e 893
d88203d1
TG
894 for (i = next; i < MCE_LOG_LEN; i++) {
895 if (mcelog.entry[i].finished &&
896 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
897 err |= copy_to_user(buf, mcelog.entry+i,
898 sizeof(struct mce));
1da177e4
LT
899 smp_rmb();
900 buf += sizeof(struct mce);
901 memset(&mcelog.entry[i], 0, sizeof(struct mce));
902 }
d88203d1 903 }
8c8b8859 904 mutex_unlock(&mce_read_mutex);
f0de53bb 905 kfree(cpu_tsc);
e9eee03e 906
d88203d1 907 return err ? -EFAULT : buf - ubuf;
1da177e4
LT
908}
909
e02e68d3
TH
910static unsigned int mce_poll(struct file *file, poll_table *wait)
911{
912 poll_wait(file, &mce_wait, wait);
913 if (rcu_dereference(mcelog.next))
914 return POLLIN | POLLRDNORM;
915 return 0;
916}
917
c68461b6 918static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1da177e4
LT
919{
920 int __user *p = (int __user *)arg;
d88203d1 921
1da177e4 922 if (!capable(CAP_SYS_ADMIN))
d88203d1 923 return -EPERM;
e9eee03e 924
1da177e4 925 switch (cmd) {
d88203d1 926 case MCE_GET_RECORD_LEN:
1da177e4
LT
927 return put_user(sizeof(struct mce), p);
928 case MCE_GET_LOG_LEN:
d88203d1 929 return put_user(MCE_LOG_LEN, p);
1da177e4
LT
930 case MCE_GETCLEAR_FLAGS: {
931 unsigned flags;
d88203d1
TG
932
933 do {
1da177e4 934 flags = mcelog.flags;
d88203d1 935 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
e9eee03e 936
d88203d1 937 return put_user(flags, p);
1da177e4
LT
938 }
939 default:
d88203d1
TG
940 return -ENOTTY;
941 }
1da177e4
LT
942}
943
a1ff41bf 944/* Modified in mce-inject.c, so not static or const */
ea149b36 945struct file_operations mce_chrdev_ops = {
e9eee03e
IM
946 .open = mce_open,
947 .release = mce_release,
948 .read = mce_read,
949 .poll = mce_poll,
950 .unlocked_ioctl = mce_ioctl,
1da177e4 951};
ea149b36 952EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1da177e4
LT
953
954static struct miscdevice mce_log_device = {
955 MISC_MCELOG_MINOR,
956 "mcelog",
957 &mce_chrdev_ops,
958};
959
13503fa9
HS
960/*
961 * mce=off disables machine check
962 * mce=TOLERANCELEVEL (number, see above)
963 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
964 * mce=nobootlog Don't log MCEs from before booting.
965 */
1da177e4
LT
966static int __init mcheck_enable(char *str)
967{
4efc0670
AK
968 if (*str == 0)
969 enable_p5_mce();
970 if (*str == '=')
971 str++;
1da177e4 972 if (!strcmp(str, "off"))
04b2b1a4 973 mce_disabled = 1;
13503fa9
HS
974 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
975 mce_bootlog = (str[0] == 'b');
8c566ef5
AK
976 else if (isdigit(str[0]))
977 get_option(&str, &tolerant);
13503fa9 978 else {
4efc0670 979 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
13503fa9
HS
980 str);
981 return 0;
982 }
9b41046c 983 return 1;
1da177e4 984}
4efc0670 985__setup("mce", mcheck_enable);
1da177e4 986
d88203d1 987/*
1da177e4 988 * Sysfs support
d88203d1 989 */
1da177e4 990
973a2dd1
AK
991/*
992 * Disable machine checks on suspend and shutdown. We can't really handle
993 * them later.
994 */
995static int mce_disable(void)
996{
997 int i;
998
06b7a7a5
AK
999 for (i = 0; i < banks; i++) {
1000 if (!skip_bank_init(i))
1001 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1002 }
973a2dd1
AK
1003 return 0;
1004}
1005
1006static int mce_suspend(struct sys_device *dev, pm_message_t state)
1007{
1008 return mce_disable();
1009}
1010
1011static int mce_shutdown(struct sys_device *dev)
1012{
1013 return mce_disable();
1014}
1015
e9eee03e
IM
1016/*
1017 * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
1018 * Only one CPU is active at this time, the others get re-added later using
1019 * CPU hotplug:
1020 */
1da177e4
LT
1021static int mce_resume(struct sys_device *dev)
1022{
8be91105 1023 mce_init();
6ec68bff 1024 mce_cpu_features(&current_cpu_data);
e9eee03e 1025
1da177e4
LT
1026 return 0;
1027}
1028
52d168e2
AK
1029static void mce_cpu_restart(void *data)
1030{
1031 del_timer_sync(&__get_cpu_var(mce_timer));
1032 if (mce_available(&current_cpu_data))
8be91105 1033 mce_init();
52d168e2
AK
1034 mce_init_timer();
1035}
1036
1da177e4 1037/* Reinit MCEs after user configuration changes */
d88203d1
TG
1038static void mce_restart(void)
1039{
52d168e2 1040 on_each_cpu(mce_cpu_restart, NULL, 1);
1da177e4
LT
1041}
1042
1043static struct sysdev_class mce_sysclass = {
e9eee03e
IM
1044 .suspend = mce_suspend,
1045 .shutdown = mce_shutdown,
1046 .resume = mce_resume,
1047 .name = "machinecheck",
1da177e4
LT
1048};
1049
cb491fca 1050DEFINE_PER_CPU(struct sys_device, mce_dev);
e9eee03e
IM
1051
1052__cpuinitdata
1053void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1da177e4 1054
0d7482e3
AK
1055static struct sysdev_attribute *bank_attrs;
1056
1057static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1058 char *buf)
1059{
1060 u64 b = bank[attr - bank_attrs];
e9eee03e 1061
f6d1826d 1062 return sprintf(buf, "%llx\n", b);
0d7482e3
AK
1063}
1064
1065static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1066 const char *buf, size_t siz)
1067{
1068 char *end;
1069 u64 new = simple_strtoull(buf, &end, 0);
e9eee03e 1070
0d7482e3
AK
1071 if (end == buf)
1072 return -EINVAL;
e9eee03e 1073
0d7482e3
AK
1074 bank[attr - bank_attrs] = new;
1075 mce_restart();
e9eee03e 1076
0d7482e3
AK
1077 return end-buf;
1078}
a98f0dd3 1079
e9eee03e
IM
1080static ssize_t
1081show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
a98f0dd3
AK
1082{
1083 strcpy(buf, trigger);
1084 strcat(buf, "\n");
1085 return strlen(trigger) + 1;
1086}
1087
4a0b2b4d 1088static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
e9eee03e 1089 const char *buf, size_t siz)
a98f0dd3
AK
1090{
1091 char *p;
1092 int len;
e9eee03e 1093
a98f0dd3
AK
1094 strncpy(trigger, buf, sizeof(trigger));
1095 trigger[sizeof(trigger)-1] = 0;
1096 len = strlen(trigger);
1097 p = strchr(trigger, '\n');
e9eee03e
IM
1098
1099 if (*p)
1100 *p = 0;
1101
a98f0dd3
AK
1102 return len;
1103}
1104
b56f642d
AK
1105static ssize_t store_int_with_restart(struct sys_device *s,
1106 struct sysdev_attribute *attr,
1107 const char *buf, size_t size)
1108{
1109 ssize_t ret = sysdev_store_int(s, attr, buf, size);
1110 mce_restart();
1111 return ret;
1112}
1113
a98f0dd3 1114static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
d95d62c0 1115static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
e9eee03e 1116
b56f642d
AK
1117static struct sysdev_ext_attribute attr_check_interval = {
1118 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
1119 store_int_with_restart),
1120 &check_interval
1121};
e9eee03e 1122
cb491fca 1123static struct sysdev_attribute *mce_attrs[] = {
b56f642d 1124 &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
a98f0dd3
AK
1125 NULL
1126};
1da177e4 1127
cb491fca 1128static cpumask_var_t mce_dev_initialized;
bae19fe0 1129
e9eee03e 1130/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
91c6d400 1131static __cpuinit int mce_create_device(unsigned int cpu)
1da177e4
LT
1132{
1133 int err;
73ca5358 1134 int i;
92cb7612 1135
90367556 1136 if (!mce_available(&boot_cpu_data))
91c6d400
AK
1137 return -EIO;
1138
cb491fca
IM
1139 memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
1140 per_cpu(mce_dev, cpu).id = cpu;
1141 per_cpu(mce_dev, cpu).cls = &mce_sysclass;
91c6d400 1142
cb491fca 1143 err = sysdev_register(&per_cpu(mce_dev, cpu));
d435d862
AM
1144 if (err)
1145 return err;
1146
cb491fca
IM
1147 for (i = 0; mce_attrs[i]; i++) {
1148 err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
d435d862
AM
1149 if (err)
1150 goto error;
1151 }
0d7482e3 1152 for (i = 0; i < banks; i++) {
cb491fca 1153 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
0d7482e3
AK
1154 &bank_attrs[i]);
1155 if (err)
1156 goto error2;
1157 }
cb491fca 1158 cpumask_set_cpu(cpu, mce_dev_initialized);
91c6d400 1159
d435d862 1160 return 0;
0d7482e3 1161error2:
cb491fca
IM
1162 while (--i >= 0)
1163 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
d435d862 1164error:
cb491fca
IM
1165 while (--i >= 0)
1166 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1167
1168 sysdev_unregister(&per_cpu(mce_dev, cpu));
d435d862 1169
91c6d400
AK
1170 return err;
1171}
1172
2d9cd6c2 1173static __cpuinit void mce_remove_device(unsigned int cpu)
91c6d400 1174{
73ca5358
SL
1175 int i;
1176
cb491fca 1177 if (!cpumask_test_cpu(cpu, mce_dev_initialized))
bae19fe0
AH
1178 return;
1179
cb491fca
IM
1180 for (i = 0; mce_attrs[i]; i++)
1181 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1182
0d7482e3 1183 for (i = 0; i < banks; i++)
cb491fca
IM
1184 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
1185
1186 sysdev_unregister(&per_cpu(mce_dev, cpu));
1187 cpumask_clear_cpu(cpu, mce_dev_initialized);
91c6d400 1188}
91c6d400 1189
d6b75584 1190/* Make sure there are no machine checks on offlined CPUs. */
ec5b3d32 1191static void mce_disable_cpu(void *h)
d6b75584 1192{
88ccbedd 1193 unsigned long action = *(unsigned long *)h;
cb491fca 1194 int i;
d6b75584
AK
1195
1196 if (!mce_available(&current_cpu_data))
1197 return;
88ccbedd
AK
1198 if (!(action & CPU_TASKS_FROZEN))
1199 cmci_clear();
06b7a7a5
AK
1200 for (i = 0; i < banks; i++) {
1201 if (!skip_bank_init(i))
1202 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1203 }
d6b75584
AK
1204}
1205
ec5b3d32 1206static void mce_reenable_cpu(void *h)
d6b75584 1207{
88ccbedd 1208 unsigned long action = *(unsigned long *)h;
e9eee03e 1209 int i;
d6b75584
AK
1210
1211 if (!mce_available(&current_cpu_data))
1212 return;
e9eee03e 1213
88ccbedd
AK
1214 if (!(action & CPU_TASKS_FROZEN))
1215 cmci_reenable();
06b7a7a5
AK
1216 for (i = 0; i < banks; i++) {
1217 if (!skip_bank_init(i))
1218 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1219 }
d6b75584
AK
1220}
1221
91c6d400 1222/* Get notified when a cpu comes on/off. Be hotplug friendly. */
e9eee03e
IM
1223static int __cpuinit
1224mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
91c6d400
AK
1225{
1226 unsigned int cpu = (unsigned long)hcpu;
52d168e2 1227 struct timer_list *t = &per_cpu(mce_timer, cpu);
91c6d400
AK
1228
1229 switch (action) {
bae19fe0
AH
1230 case CPU_ONLINE:
1231 case CPU_ONLINE_FROZEN:
1232 mce_create_device(cpu);
8735728e
RW
1233 if (threshold_cpu_callback)
1234 threshold_cpu_callback(action, cpu);
91c6d400 1235 break;
91c6d400 1236 case CPU_DEAD:
8bb78442 1237 case CPU_DEAD_FROZEN:
8735728e
RW
1238 if (threshold_cpu_callback)
1239 threshold_cpu_callback(action, cpu);
91c6d400
AK
1240 mce_remove_device(cpu);
1241 break;
52d168e2
AK
1242 case CPU_DOWN_PREPARE:
1243 case CPU_DOWN_PREPARE_FROZEN:
1244 del_timer_sync(t);
88ccbedd 1245 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
52d168e2
AK
1246 break;
1247 case CPU_DOWN_FAILED:
1248 case CPU_DOWN_FAILED_FROZEN:
6298c512
AK
1249 t->expires = round_jiffies(jiffies +
1250 __get_cpu_var(next_interval));
52d168e2 1251 add_timer_on(t, cpu);
88ccbedd
AK
1252 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1253 break;
1254 case CPU_POST_DEAD:
1255 /* intentionally ignoring frozen here */
1256 cmci_rediscover(cpu);
52d168e2 1257 break;
91c6d400 1258 }
bae19fe0 1259 return NOTIFY_OK;
91c6d400
AK
1260}
1261
1e35669d 1262static struct notifier_block mce_cpu_notifier __cpuinitdata = {
91c6d400
AK
1263 .notifier_call = mce_cpu_callback,
1264};
1265
0d7482e3
AK
1266static __init int mce_init_banks(void)
1267{
1268 int i;
1269
1270 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1271 GFP_KERNEL);
1272 if (!bank_attrs)
1273 return -ENOMEM;
1274
1275 for (i = 0; i < banks; i++) {
1276 struct sysdev_attribute *a = &bank_attrs[i];
e9eee03e
IM
1277
1278 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
0d7482e3
AK
1279 if (!a->attr.name)
1280 goto nomem;
e9eee03e
IM
1281
1282 a->attr.mode = 0644;
1283 a->show = show_bank;
1284 a->store = set_bank;
0d7482e3
AK
1285 }
1286 return 0;
1287
1288nomem:
1289 while (--i >= 0)
1290 kfree(bank_attrs[i].attr.name);
1291 kfree(bank_attrs);
1292 bank_attrs = NULL;
e9eee03e 1293
0d7482e3
AK
1294 return -ENOMEM;
1295}
1296
91c6d400
AK
1297static __init int mce_init_device(void)
1298{
1299 int err;
1300 int i = 0;
1301
1da177e4
LT
1302 if (!mce_available(&boot_cpu_data))
1303 return -EIO;
0d7482e3 1304
cb491fca 1305 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
996867d0 1306
0d7482e3
AK
1307 err = mce_init_banks();
1308 if (err)
1309 return err;
1310
1da177e4 1311 err = sysdev_class_register(&mce_sysclass);
d435d862
AM
1312 if (err)
1313 return err;
91c6d400
AK
1314
1315 for_each_online_cpu(i) {
d435d862
AM
1316 err = mce_create_device(i);
1317 if (err)
1318 return err;
91c6d400
AK
1319 }
1320
be6b5a35 1321 register_hotcpu_notifier(&mce_cpu_notifier);
1da177e4 1322 misc_register(&mce_log_device);
e9eee03e 1323
1da177e4 1324 return err;
1da177e4 1325}
91c6d400 1326
1da177e4 1327device_initcall(mce_init_device);
a988d334 1328
4efc0670 1329#else /* CONFIG_X86_OLD_MCE: */
a988d334 1330
a988d334
IM
1331int nr_mce_banks;
1332EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1333
a988d334
IM
1334/* This has to be run for each processor */
1335void mcheck_init(struct cpuinfo_x86 *c)
1336{
1337 if (mce_disabled == 1)
1338 return;
1339
1340 switch (c->x86_vendor) {
1341 case X86_VENDOR_AMD:
1342 amd_mcheck_init(c);
1343 break;
1344
1345 case X86_VENDOR_INTEL:
1346 if (c->x86 == 5)
1347 intel_p5_mcheck_init(c);
1348 if (c->x86 == 6)
1349 intel_p6_mcheck_init(c);
1350 if (c->x86 == 15)
1351 intel_p4_mcheck_init(c);
1352 break;
1353
1354 case X86_VENDOR_CENTAUR:
1355 if (c->x86 == 5)
1356 winchip_mcheck_init(c);
1357 break;
1358
1359 default:
1360 break;
1361 }
b659294b 1362 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
a988d334
IM
1363}
1364
a988d334
IM
1365static int __init mcheck_enable(char *str)
1366{
1367 mce_disabled = -1;
1368 return 1;
1369}
1370
a988d334
IM
1371__setup("mce", mcheck_enable);
1372
d7c3c9a6
AK
1373#endif /* CONFIG_X86_OLD_MCE */
1374
1375/*
1376 * Old style boot options parsing. Only for compatibility.
1377 */
1378static int __init mcheck_disable(char *str)
1379{
1380 mce_disabled = 1;
1381 return 1;
1382}
1383__setup("nomce", mcheck_disable);