From a7fcf28d431ef70afaa91496e64e16dc51dccec4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 6 Mar 2015 17:50:19 -0800 Subject: [PATCH] x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack() and fix it on x86_32 I broke 32-bit kernels. The implementation of sp0 was correct as far as I can tell, but sp0 was much weirder on x86_32 than I realized. It has the following issues: - Init's sp0 is inconsistent with everything else's: non-init tasks are offset by 8 bytes. (I have no idea why, and the comment is unhelpful.) - vm86 does crazy things to sp0. Fix it up by replacing this_cpu_sp0() with current_top_of_stack() and using a new percpu variable to track the top of the stack on x86_32. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Fixes: 75182b1632a8 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()") Link: http://lkml.kernel.org/r/d09dbe270883433776e0cbee3c7079433349e96d.1425692936.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 11 ++++++++++- arch/x86/include/asm/thread_info.h | 4 +--- arch/x86/kernel/cpu/common.c | 13 +++++++++++-- arch/x86/kernel/process_32.c | 11 +++++++---- arch/x86/kernel/smpboot.c | 2 ++ arch/x86/kernel/traps.c | 4 ++-- 6 files changed, 33 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f5e3ec63767d..48a61c1c626e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -284,6 +284,10 @@ struct tss_struct { DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +#ifdef CONFIG_X86_32 +DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +#endif + /* * Save the original ist values for checking stack pointers during debugging */ @@ -564,9 +568,14 @@ static inline void native_swapgs(void) #endif } -static inline unsigned long this_cpu_sp0(void) +static inline unsigned long current_top_of_stack(void) { +#ifdef CONFIG_X86_64 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); +#else + /* sp0 on x86_32 is special in and around vm86 mode. */ + return this_cpu_read_stable(cpu_current_top_of_stack); +#endif } #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a2fa1899494e..7740edd56fed 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -158,9 +158,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); static inline struct thread_info *current_thread_info(void) { - struct thread_info *ti; - ti = (void *)(this_cpu_sp0() - THREAD_SIZE); - return ti; + return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); } static inline unsigned long current_stack_pointer(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5d0f0cc7ea26..76348334b934 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1130,8 +1130,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; /* - * The following four percpu variables are hot. Align current_task to - * cacheline size such that all four fall in the same cacheline. + * The following percpu variables are hot. Align current_task to + * cacheline size such that they fall in the same cacheline. */ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; @@ -1226,6 +1226,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +/* + * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find + * the top of the kernel stack. Use an extra percpu variable to track the + * top of the kernel stack directly. + */ +DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = + (unsigned long)&init_thread_union + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); + #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0405cab6634d..1b9963faf4eb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -306,13 +306,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0. This changes current_thread_info(). + * Reload esp0, kernel_stack, and current_top_of_stack. This changes + * current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - KERNEL_STACK_OFFSET); + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - KERNEL_STACK_OFFSET); + this_cpu_write(cpu_current_top_of_stack, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE); /* * Restore %gs if needed (which is common) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index febc6aabc72e..759388c538cf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -806,6 +806,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); + per_cpu(cpu_current_top_of_stack, cpu) = + (unsigned long)task_stack_page(idle) + THREAD_SIZE; #else clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fa290586ed37..081252c44cde 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -174,8 +174,8 @@ void ist_begin_non_atomic(struct pt_regs *regs) * will catch asm bugs and any attempt to use ist_preempt_enable * from double_fault. */ - BUG_ON((unsigned long)(this_cpu_sp0() - current_stack_pointer()) >= - THREAD_SIZE); + BUG_ON((unsigned long)(current_top_of_stack() - + current_stack_pointer()) >= THREAD_SIZE); preempt_count_sub(HARDIRQ_OFFSET); } -- 2.20.1