From 39f152ffbfedb42b57b6e0c896eeae51dbe83b7a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Nov 2016 18:44:45 +0100 Subject: [PATCH] x86/mcheck: Reorganize the hotplug callbacks Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it become an independent early hotplug callback. The main problem here was that the init on the boot CPU may happen too late (device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving and MCE event at boot time leading to a shutdown (if the MCE feature is not yet enabled). Here is attempt two: the timming stays as-is but the ordering of the functions is changed: - mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer struct but won't fire the timer. This is moved to CPU_ONLINE since its cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early in the shutdown phase, it should be okay to start it late in the bring up phase. - CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor CPUs (including Intel where it was not disabled during shutdown). To keep this working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped beause the notifier runs nowdays on the target CPU. - CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device() but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and mce_device_remove()). In order to keep this symmetrical I am moving the clean up from CPU_DEAD to CPU_DOWN_PREPARE. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Borislav Petkov Cc: Tony Luck Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 36 ++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 052b5e05c3c4..a524faa51400 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1745,6 +1745,14 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) add_timer_on(t, cpu); } +static void __mcheck_cpu_setup_timer(void) +{ + struct timer_list *t = this_cpu_ptr(&mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_pinned_timer(t, mce_timer_fn, cpu); +} + static void __mcheck_cpu_init_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); @@ -1796,7 +1804,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); - __mcheck_cpu_init_timer(); + __mcheck_cpu_setup_timer(); } /* @@ -2470,28 +2478,25 @@ static void mce_device_remove(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void mce_disable_cpu(void) { - unsigned long action = *(unsigned long *)h; - if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - if (!(action & CPU_TASKS_FROZEN)) + if (!cpuhp_tasks_frozen) cmci_clear(); vendor_disable_error_reporting(); } -static void mce_reenable_cpu(void *h) +static void mce_reenable_cpu(void) { - unsigned long action = *(unsigned long *)h; int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - if (!(action & CPU_TASKS_FROZEN)) + if (!cpuhp_tasks_frozen) cmci_reenable(); for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; @@ -2510,6 +2515,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: + case CPU_DOWN_FAILED: mce_device_create(cpu); @@ -2517,11 +2523,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) mce_device_remove(cpu); return NOTIFY_BAD; } - + mce_reenable_cpu(); + mce_start_timer(cpu, t); break; case CPU_DEAD: - mce_threshold_remove_device(cpu); - mce_device_remove(cpu); mce_intel_hcpu_update(cpu); /* intentionally ignoring frozen here */ @@ -2529,12 +2534,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) cmci_rediscover(); break; case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + mce_disable_cpu(); del_timer_sync(t); - break; - case CPU_DOWN_FAILED: - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); + + mce_threshold_remove_device(cpu); + mce_device_remove(cpu); break; } -- 2.20.1