From 0212bfdb9e2bfd8d17ab7e3f1c6426b00675fb1b Mon Sep 17 00:00:00 2001 From: Jeongtae Park Date: Fri, 1 Jun 2018 09:03:33 +0900 Subject: [PATCH] Revert "arm64: neon: Remove support for nested or hardirq kernel-mode NEON" This reverts commit cb84d11e1625aa3a081d898ca2640bf3a9ca0e96. Change-Id: I710273beb3ebd06ba3ee770a80716f95ec0592c0 --- arch/arm64/include/asm/fpsimd.h | 14 ++++ arch/arm64/include/asm/fpsimdmacros.h | 56 +++++++++++++ arch/arm64/include/asm/neon.h | 4 +- arch/arm64/include/asm/simd.h | 35 +------- arch/arm64/kernel/entry-fpsimd.S | 24 ++++++ arch/arm64/kernel/fpsimd.c | 116 ++++++++------------------ 6 files changed, 135 insertions(+), 114 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 410c48163c6a..5155f21e15e3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -41,6 +41,16 @@ struct fpsimd_state { unsigned int cpu; }; +/* + * Struct for stacking the bottom 'n' FP/SIMD registers. + */ +struct fpsimd_partial_state { + u32 fpsr; + u32 fpcr; + u32 num_regs; + __uint128_t vregs[32]; +}; + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* Masks for extracting the FPSR and FPCR from the FPSCR */ @@ -67,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state); extern void fpsimd_flush_task_state(struct task_struct *target); +extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, + u32 num_regs); +extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); + /* For use by EFI runtime services calls only */ extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_end(void); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 0f5fdd388b0d..a2daf1293028 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -75,3 +75,59 @@ ldr w\tmpnr, [\state, #16 * 2 + 4] fpsimd_restore_fpcr x\tmpnr, \state .endm + +.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 + mrs x\tmpnr1, fpsr + str w\numnr, [\state, #8] + mrs x\tmpnr2, fpcr + stp w\tmpnr1, w\tmpnr2, [\state] + adr x\tmpnr1, 0f + add \state, \state, x\numnr, lsl #4 + sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 + br x\tmpnr1 + stp q30, q31, [\state, #-16 * 30 - 16] + stp q28, q29, [\state, #-16 * 28 - 16] + stp q26, q27, [\state, #-16 * 26 - 16] + stp q24, q25, [\state, #-16 * 24 - 16] + stp q22, q23, [\state, #-16 * 22 - 16] + stp q20, q21, [\state, #-16 * 20 - 16] + stp q18, q19, [\state, #-16 * 18 - 16] + stp q16, q17, [\state, #-16 * 16 - 16] + stp q14, q15, [\state, #-16 * 14 - 16] + stp q12, q13, [\state, #-16 * 12 - 16] + stp q10, q11, [\state, #-16 * 10 - 16] + stp q8, q9, [\state, #-16 * 8 - 16] + stp q6, q7, [\state, #-16 * 6 - 16] + stp q4, q5, [\state, #-16 * 4 - 16] + stp q2, q3, [\state, #-16 * 2 - 16] + stp q0, q1, [\state, #-16 * 0 - 16] +0: +.endm + +.macro fpsimd_restore_partial state, tmpnr1, tmpnr2 + ldp w\tmpnr1, w\tmpnr2, [\state] + msr fpsr, x\tmpnr1 + fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1 + adr x\tmpnr1, 0f + ldr w\tmpnr2, [\state, #8] + add \state, \state, x\tmpnr2, lsl #4 + sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 + br x\tmpnr1 + ldp q30, q31, [\state, #-16 * 30 - 16] + ldp q28, q29, [\state, #-16 * 28 - 16] + ldp q26, q27, [\state, #-16 * 26 - 16] + ldp q24, q25, [\state, #-16 * 24 - 16] + ldp q22, q23, [\state, #-16 * 22 - 16] + ldp q20, q21, [\state, #-16 * 20 - 16] + ldp q18, q19, [\state, #-16 * 18 - 16] + ldp q16, q17, [\state, #-16 * 16 - 16] + ldp q14, q15, [\state, #-16 * 14 - 16] + ldp q12, q13, [\state, #-16 * 12 - 16] + ldp q10, q11, [\state, #-16 * 10 - 16] + ldp q8, q9, [\state, #-16 * 8 - 16] + ldp q6, q7, [\state, #-16 * 6 - 16] + ldp q4, q5, [\state, #-16 * 4 - 16] + ldp q2, q3, [\state, #-16 * 2 - 16] + ldp q0, q1, [\state, #-16 * 0 - 16] +0: +.endm diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index fb9d137256a6..5368bd04fe7b 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -16,7 +16,9 @@ #define cpu_has_neon() system_supports_fpsimd() -void kernel_neon_begin(void); +#define kernel_neon_begin() kernel_neon_begin_partial(32) + +void kernel_neon_begin_partial(u32 num_regs); void kernel_neon_end(void); #endif /* ! __ASM_NEON_H */ diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index fa8b3fe932e6..96959b52afae 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -9,48 +9,15 @@ #ifndef __ASM_SIMD_H #define __ASM_SIMD_H -#include -#include -#include -#include #include -#ifdef CONFIG_KERNEL_MODE_NEON - -DECLARE_PER_CPU(bool, kernel_neon_busy); - /* * may_use_simd - whether it is allowable at this time to issue SIMD * instructions or access the SIMD register file - * - * Callers must not assume that the result remains true beyond the next - * preempt_enable() or return from softirq context. */ static __must_check inline bool may_use_simd(void) { - /* - * The raw_cpu_read() is racy if called with preemption enabled. - * This is not a bug: kernel_neon_busy is only set when - * preemption is disabled, so we cannot migrate to another CPU - * while it is set, nor can we migrate to a CPU where it is set. - * So, if we find it clear on some CPU then we're guaranteed to - * find it clear on any CPU we could migrate to. - * - * If we are in between kernel_neon_begin()...kernel_neon_end(), - * the flag will be set, but preemption is also disabled, so we - * can't migrate to another CPU and spuriously see it become - * false. - */ - return !in_irq() && !irqs_disabled() && !in_nmi() && - !raw_cpu_read(kernel_neon_busy); + return true; } -#else /* ! CONFIG_KERNEL_MODE_NEON */ - -static __must_check inline bool may_use_simd(void) { - return false; -} - -#endif /* ! CONFIG_KERNEL_MODE_NEON */ - #endif diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa6..c44a82f146b1 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) + fpsimd_save_partial x0, 1, 8, 9 + ret +ENDPROC(fpsimd_save_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) + fpsimd_restore_partial x0, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5d547deb6996..082756890137 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,18 +17,18 @@ * along with this program. If not, see . */ -#include #include #include #include #include -#include #include #include #include +#include #include #include +#include #include #define FPEXC_IOF (1 << 0) @@ -65,13 +65,6 @@ * CPU currently contain the most recent userland FPSIMD state of the current * task. * - * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may - * save the task's FPSIMD context back to task_struct from softirq context. - * To prevent this from racing with the manipulation of the task's FPSIMD state - * from task context and thereby corrupting the state, it is necessary to - * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with local_bh_disable() unless softirqs are already masked. - * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_state.cpu field * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu @@ -171,14 +164,9 @@ void fpsimd_flush_thread(void) { if (!system_supports_fpsimd()) return; - - local_bh_disable(); - memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); - - local_bh_enable(); } /* @@ -189,13 +177,10 @@ void fpsimd_preserve_current_state(void) { if (!system_supports_fpsimd()) return; - - local_bh_disable(); - + preempt_disable(); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - - local_bh_enable(); + preempt_enable(); } /* @@ -207,9 +192,7 @@ void fpsimd_restore_current_state(void) { if (!system_supports_fpsimd()) return; - - local_bh_disable(); - + preempt_disable(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -217,8 +200,7 @@ void fpsimd_restore_current_state(void) __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } - - local_bh_enable(); + preempt_enable(); } /* @@ -230,9 +212,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state) { if (!system_supports_fpsimd()) return; - - local_bh_disable(); - + preempt_disable(); fpsimd_load_state(state); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -240,8 +220,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state) __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } - - local_bh_enable(); + preempt_enable(); } /* @@ -254,70 +233,49 @@ void fpsimd_flush_task_state(struct task_struct *t) #ifdef CONFIG_KERNEL_MODE_NEON -DEFINE_PER_CPU(bool, kernel_neon_busy); -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); /* * Kernel-side NEON support functions */ - -/* - * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling - * context - * - * Must not be called unless may_use_simd() returns true. - * Task context in the FPSIMD registers is saved back to memory as necessary. - * - * A matching call to kernel_neon_end() must be made before returning from the - * calling context. - * - * The caller may freely use the FPSIMD registers until kernel_neon_end() is - * called. - */ -void kernel_neon_begin(void) +void kernel_neon_begin_partial(u32 num_regs) { if (WARN_ON(!system_supports_fpsimd())) return; + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - BUG_ON(!may_use_simd()); - - local_bh_disable(); - - __this_cpu_write(kernel_neon_busy, true); - - /* Save unsaved task fpsimd state, if any: */ - if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) - fpsimd_save_state(¤t->thread.fpsimd_state); - - /* Invalidate any task state remaining in the fpsimd regs: */ - __this_cpu_write(fpsimd_last_state, NULL); - - preempt_disable(); - - local_bh_enable(); + BUG_ON(num_regs > 32); + fpsimd_save_partial_state(s, roundup(num_regs, 2)); + } else { + /* + * Save the userland FPSIMD state if we have one and if we + * haven't done so already. Clear fpsimd_last_state to indicate + * that there is no longer userland FPSIMD state in the + * registers. + */ + preempt_disable(); + if (current->mm && + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } } -EXPORT_SYMBOL(kernel_neon_begin); +EXPORT_SYMBOL(kernel_neon_begin_partial); -/* - * kernel_neon_end(): give the CPU FPSIMD registers back to the current task - * - * Must be called from a context in which kernel_neon_begin() was previously - * called, with no call to kernel_neon_end() in the meantime. - * - * The caller must not use the FPSIMD registers after this function is called, - * unless kernel_neon_begin() is called again in the meantime. - */ void kernel_neon_end(void) { - bool busy; - if (!system_supports_fpsimd()) return; - - busy = __this_cpu_xchg(kernel_neon_busy, false); - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ - - preempt_enable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + fpsimd_load_partial_state(s); + } else { + preempt_enable(); + } } EXPORT_SYMBOL(kernel_neon_end); -- 2.20.1