arm64: add support for kernel mode NEON in any context without preemption
authorJeongtae Park <jtp.park@samsung.com>
Thu, 10 Nov 2016 06:39:58 +0000 (15:39 +0900)
committerEunyoung Lee <ey470.lee@samsung.com>
Tue, 19 Jun 2018 08:43:19 +0000 (17:43 +0900)
Change-Id: I09a5d88cc304ca1dba9fabf3f4c29aacebd88771
Signed-off-by: Jeongtae Park <jtp.park@samsung.com>
Signed-off-by: Wooki Min <wooki.min@samsung.com>
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/processor.h
arch/arm64/kernel/fpsimd.c

index 410c48163c6a1f3783752a116cbdec669bd66f09..89dd572e39cd8db326936d025823c48e4e4b4bf0 100644 (file)
@@ -41,6 +41,22 @@ struct fpsimd_state {
        unsigned int cpu;
 };
 
+struct fpsimd_kernel_state {
+       __uint128_t vregs[32];
+       u32 fpsr;
+       u32 fpcr;
+       /*
+        * indicate the depth of using FP/SIMD registers in kernel mode.
+        * above kernel state should be preserved at first time
+        * before FP/SIMD registers be used by other tasks
+        * and the state should be restored before they be used by own.
+        *
+        * a kernel thread which uses FP/SIMD registers have to
+        * set this depth and it could utilize for a tasks executes
+        * some NEON instructions without preemption disable.
+        */
+       atomic_t depth;
+};
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -71,6 +87,12 @@ extern void fpsimd_flush_task_state(struct task_struct *target);
 extern void __efi_fpsimd_begin(void);
 extern void __efi_fpsimd_end(void);
 
+void fpsimd_set_task_using(struct task_struct *t);
+void fpsimd_clr_task_using(struct task_struct *t);
+
+void fpsimd_get(void);
+void fpsimd_put(void);
+
 #endif
 
 #endif
index fda6f5812281c2d6f549283d4277ab04e4cb153d..907ccfd0a4ffdab5f9abf5584e160979f93a83ff 100644 (file)
@@ -108,6 +108,7 @@ struct thread_struct {
        unsigned long           tp2_value;
 #endif
        struct fpsimd_state     fpsimd_state;
+       struct fpsimd_kernel_state fpsimd_kernel_state;
        unsigned long           fault_address;  /* fault info */
        unsigned long           fault_code;     /* ESR_EL1 value */
        struct debug_info       debug;          /* debugging */
index 5d547deb6996c0091c64f14de18b5b8a75c88ae4..8903c01777d4d227f120a650a50e2f0928701810 100644 (file)
@@ -137,15 +137,31 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 
 void fpsimd_thread_switch(struct task_struct *next)
 {
+       struct fpsimd_state *cur_st = &current->thread.fpsimd_state;
+       struct fpsimd_kernel_state *cur_kst
+                       = &current->thread.fpsimd_kernel_state;
+       struct fpsimd_state *nxt_st = &next->thread.fpsimd_state;
+       struct fpsimd_kernel_state *nxt_kst
+                       = &next->thread.fpsimd_kernel_state;
+
        if (!system_supports_fpsimd())
                return;
+
        /*
         * Save the current FPSIMD state to memory, but only if whatever is in
         * the registers is in fact the most recent userland FPSIMD state of
         * 'current'.
         */
        if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
-               fpsimd_save_state(&current->thread.fpsimd_state);
+               fpsimd_save_state(cur_st);
+
+       if (atomic_read(&cur_kst->depth))
+               fpsimd_save_state((struct fpsimd_state *)cur_kst);
+
+       if (atomic_read(&nxt_kst->depth)) {
+               fpsimd_load_state((struct fpsimd_state *)nxt_kst);
+               this_cpu_write(fpsimd_last_state, (struct fpsimd_state *)nxt_kst);
+       }
 
        if (next->mm) {
                /*
@@ -155,10 +171,8 @@ void fpsimd_thread_switch(struct task_struct *next)
                 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
                 * upon the next return to userland.
                 */
-               struct fpsimd_state *st = &next->thread.fpsimd_state;
-
-               if (__this_cpu_read(fpsimd_last_state) == st
-                   && st->cpu == smp_processor_id())
+               if (__this_cpu_read(fpsimd_last_state) == nxt_st
+                   && nxt_st->cpu == smp_processor_id())
                        clear_ti_thread_flag(task_thread_info(next),
                                             TIF_FOREIGN_FPSTATE);
                else
@@ -252,6 +266,42 @@ void fpsimd_flush_task_state(struct task_struct *t)
        t->thread.fpsimd_state.cpu = NR_CPUS;
 }
 
+void fpsimd_set_task_using(struct task_struct *t)
+{
+       atomic_set(&t->thread.fpsimd_kernel_state.depth, 1);
+}
+
+void fpsimd_clr_task_using(struct task_struct *t)
+{
+       atomic_set(&t->thread.fpsimd_kernel_state.depth, 0);
+}
+
+void fpsimd_get(void)
+{
+       if (in_interrupt())
+               return;
+
+       if (atomic_inc_return(&current->thread.fpsimd_kernel_state.depth) == 1) {
+               preempt_disable();
+               if (current->mm &&
+                   !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
+                       fpsimd_save_state(&current->thread.fpsimd_state);
+                       fpsimd_flush_task_state(current);
+               }
+               this_cpu_write(fpsimd_last_state, NULL);
+               preempt_enable();
+       }
+}
+
+void fpsimd_put(void)
+{
+       if (in_interrupt())
+               return;
+
+       BUG_ON(atomic_dec_return(
+               &current->thread.fpsimd_kernel_state.depth) < 0);
+}
+
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 DEFINE_PER_CPU(bool, kernel_neon_busy);
@@ -382,13 +432,22 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 {
        switch (cmd) {
        case CPU_PM_ENTER:
-               if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
+               if ((current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
+                    || atomic_read(&current->thread.fpsimd_kernel_state.depth)) {
                        fpsimd_save_state(&current->thread.fpsimd_state);
+               }
                this_cpu_write(fpsimd_last_state, NULL);
                break;
        case CPU_PM_EXIT:
                if (current->mm)
                        set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+               if (atomic_read(&current->thread.fpsimd_kernel_state.depth)) {
+                       fpsimd_load_state(&current->thread.fpsimd_state);
+                       this_cpu_write(fpsimd_last_state,
+                                       &current->thread.fpsimd_state);
+                       current->thread.fpsimd_state.cpu = smp_processor_id();
+               }
                break;
        case CPU_PM_ENTER_FAILED:
        default: