x86/process: Optimize TIF checks in __switch_to_xtra()
authorKyle Huey <me@kylehuey.com>
Tue, 14 Feb 2017 08:11:02 +0000 (00:11 -0800)
committerThomas Gleixner <tglx@linutronix.de>
Sat, 11 Mar 2017 11:45:17 +0000 (12:45 +0100)
Help the compiler to avoid reevaluating the thread flags for each checked
bit by reordering the bit checks and providing an explicit xor for
evaluation.

With default defconfigs for each arch,

x86_64: arch/x86/kernel/process.o
text       data     bss     dec     hex
3056       8577      16   11649    2d81 Before
3024    8577      16   11617    2d61 After

i386: arch/x86/kernel/process.o
text       data     bss     dec     hex
2957    8673       8   11638    2d76 Before
2925    8673       8   11606    2d56 After

Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kyle Huey <khuey@kylehuey.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/kernel/process.c

index f675915617110fa4cae6c74efc35ba8ccd12eb46..ea9ea2582dab6cedb867739e3bc44f1f70a019bc 100644 (file)
@@ -182,54 +182,61 @@ int set_tsc_mode(unsigned int val)
        return 0;
 }
 
+static inline void switch_to_bitmap(struct tss_struct *tss,
+                                   struct thread_struct *prev,
+                                   struct thread_struct *next,
+                                   unsigned long tifp, unsigned long tifn)
+{
+       if (tifn & _TIF_IO_BITMAP) {
+               /*
+                * Copy the relevant range of the IO bitmap.
+                * Normally this is 128 bytes or less:
+                */
+               memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+                      max(prev->io_bitmap_max, next->io_bitmap_max));
+               /*
+                * Make sure that the TSS limit is correct for the CPU
+                * to notice the IO bitmap.
+                */
+               refresh_tss_limit();
+       } else if (tifp & _TIF_IO_BITMAP) {
+               /*
+                * Clear any possible leftover bits:
+                */
+               memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+       }
+}
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                      struct tss_struct *tss)
 {
        struct thread_struct *prev, *next;
+       unsigned long tifp, tifn;
 
        prev = &prev_p->thread;
        next = &next_p->thread;
 
-       if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
-           test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
+       tifn = READ_ONCE(task_thread_info(next_p)->flags);
+       tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+       switch_to_bitmap(tss, prev, next, tifp, tifn);
+
+       propagate_user_return_notify(prev_p, next_p);
+
+       if ((tifp ^ tifn) & _TIF_BLOCKSTEP) {
                unsigned long debugctl = get_debugctlmsr();
 
                debugctl &= ~DEBUGCTLMSR_BTF;
-               if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
+               if (tifn & _TIF_BLOCKSTEP)
                        debugctl |= DEBUGCTLMSR_BTF;
-
                update_debugctlmsr(debugctl);
        }
 
-       if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-           test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-               /* prev and next are different */
-               if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+       if ((tifp ^ tifn) & _TIF_NOTSC) {
+               if (tifn & _TIF_NOTSC)
                        hard_disable_TSC();
                else
                        hard_enable_TSC();
        }
-
-       if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
-               /*
-                * Copy the relevant range of the IO bitmap.
-                * Normally this is 128 bytes or less:
-                */
-               memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-                      max(prev->io_bitmap_max, next->io_bitmap_max));
-
-               /*
-                * Make sure that the TSS limit is correct for the CPU
-                * to notice the IO bitmap.
-                */
-               refresh_tss_limit();
-       } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
-               /*
-                * Clear any possible leftover bits:
-                */
-               memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
-       }
-       propagate_user_return_notify(prev_p, next_p);
 }
 
 /*