x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack() and fix it on x86_32
authorAndy Lutomirski <luto@amacapital.net>
Sat, 7 Mar 2015 01:50:19 +0000 (17:50 -0800)
committerIngo Molnar <mingo@kernel.org>
Sat, 7 Mar 2015 08:34:03 +0000 (09:34 +0100)
I broke 32-bit kernels.  The implementation of sp0 was correct
as far as I can tell, but sp0 was much weirder on x86_32 than I
realized.  It has the following issues:

 - Init's sp0 is inconsistent with everything else's: non-init tasks
   are offset by 8 bytes.  (I have no idea why, and the comment is unhelpful.)

 - vm86 does crazy things to sp0.

Fix it up by replacing this_cpu_sp0() with
current_top_of_stack() and using a new percpu variable to track
the top of the stack on x86_32.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 75182b1632a8 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()")
Link: http://lkml.kernel.org/r/d09dbe270883433776e0cbee3c7079433349e96d.1425692936.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/processor.h
arch/x86/include/asm/thread_info.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/process_32.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c

index f5e3ec63767d8b4575586c7f535d1a2d1fef9cc6..48a61c1c626ea99a53561a311cf87553b3a29f4d 100644 (file)
@@ -284,6 +284,10 @@ struct tss_struct {
 
 DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
 
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
+
 /*
  * Save the original ist values for checking stack pointers during debugging
  */
@@ -564,9 +568,14 @@ static inline void native_swapgs(void)
 #endif
 }
 
-static inline unsigned long this_cpu_sp0(void)
+static inline unsigned long current_top_of_stack(void)
 {
+#ifdef CONFIG_X86_64
        return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+       /* sp0 on x86_32 is special in and around vm86 mode. */
+       return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
 }
 
 #ifdef CONFIG_PARAVIRT
index a2fa1899494eb2b3e0cc62554843aa43b4b44c8d..7740edd56fed7d2cf3016fa91b79948e94848c99 100644 (file)
@@ -158,9 +158,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 
 static inline struct thread_info *current_thread_info(void)
 {
-       struct thread_info *ti;
-       ti = (void *)(this_cpu_sp0() - THREAD_SIZE);
-       return ti;
+       return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
 }
 
 static inline unsigned long current_stack_pointer(void)
index 5d0f0cc7ea26645e18bf5ff1a68d42233c005c44..76348334b934722d353003c53782a0832fcd1bed 100644 (file)
@@ -1130,8 +1130,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
                     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
 /*
- * The following four percpu variables are hot.  Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot.  Align current_task to
+ * cacheline size such that they fall in the same cacheline.
  */
 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
        &init_task;
@@ -1226,6 +1226,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack.  Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+       (unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
index 0405cab6634d0b83ea8966528fdbadd8a6e37381..1b9963faf4ebc223d56f33d68a031cbbc07166d6 100644 (file)
@@ -306,13 +306,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        arch_end_context_switch(next_p);
 
        /*
-        * Reload esp0.  This changes current_thread_info().
+        * Reload esp0, kernel_stack, and current_top_of_stack.  This changes
+        * current_thread_info().
         */
        load_sp0(tss, next);
-
        this_cpu_write(kernel_stack,
-                 (unsigned long)task_stack_page(next_p) +
-                 THREAD_SIZE - KERNEL_STACK_OFFSET);
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE - KERNEL_STACK_OFFSET);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
 
        /*
         * Restore %gs if needed (which is common)
index febc6aabc72e049443f68c167622d50cd8344f16..759388c538cf7960df92b84d6bd9d78ff15e73ca 100644 (file)
@@ -806,6 +806,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 #ifdef CONFIG_X86_32
        /* Stack for startup_32 can be just as for start_secondary onwards */
        irq_ctx_init(cpu);
+       per_cpu(cpu_current_top_of_stack, cpu) =
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
 #else
        clear_tsk_thread_flag(idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
index fa290586ed3721fa526eeee22903ff30994622e2..081252c44cde4195fa5c27e754212ed2db10b74d 100644 (file)
@@ -174,8 +174,8 @@ void ist_begin_non_atomic(struct pt_regs *regs)
         * will catch asm bugs and any attempt to use ist_preempt_enable
         * from double_fault.
         */
-       BUG_ON((unsigned long)(this_cpu_sp0() - current_stack_pointer()) >=
-              THREAD_SIZE);
+       BUG_ON((unsigned long)(current_top_of_stack() -
+                              current_stack_pointer()) >= THREAD_SIZE);
 
        preempt_count_sub(HARDIRQ_OFFSET);
 }