Revert "arm64: neon: Remove support for nested or hardirq kernel-mode NEON"
authorJeongtae Park <jtp.park@samsung.com>
Fri, 1 Jun 2018 00:03:33 +0000 (09:03 +0900)
committerEunyoung Lee <ey470.lee@samsung.com>
Tue, 19 Jun 2018 08:43:22 +0000 (17:43 +0900)
This reverts commit cb84d11e1625aa3a081d898ca2640bf3a9ca0e96.

Change-Id: I710273beb3ebd06ba3ee770a80716f95ec0592c0

arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/fpsimdmacros.h
arch/arm64/include/asm/neon.h
arch/arm64/include/asm/simd.h
arch/arm64/kernel/entry-fpsimd.S
arch/arm64/kernel/fpsimd.c

index 410c48163c6a1f3783752a116cbdec669bd66f09..5155f21e15e3657cb331cbaca50912cc711ef467 100644 (file)
@@ -41,6 +41,16 @@ struct fpsimd_state {
        unsigned int cpu;
 };
 
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+       u32             fpsr;
+       u32             fpcr;
+       u32             num_regs;
+       __uint128_t     vregs[32];
+};
+
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -67,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+                                     u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
 /* For use by EFI runtime services calls only */
 extern void __efi_fpsimd_begin(void);
 extern void __efi_fpsimd_end(void);
index 0f5fdd388b0dfb0ee17d01e4357033866d9caa1b..a2daf12930284183122bd14da5a0c2b441c91034 100644 (file)
        ldr     w\tmpnr, [\state, #16 * 2 + 4]
        fpsimd_restore_fpcr x\tmpnr, \state
 .endm
+
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+       mrs     x\tmpnr1, fpsr
+       str     w\numnr, [\state, #8]
+       mrs     x\tmpnr2, fpcr
+       stp     w\tmpnr1, w\tmpnr2, [\state]
+       adr     x\tmpnr1, 0f
+       add     \state, \state, x\numnr, lsl #4
+       sub     x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+       br      x\tmpnr1
+       stp     q30, q31, [\state, #-16 * 30 - 16]
+       stp     q28, q29, [\state, #-16 * 28 - 16]
+       stp     q26, q27, [\state, #-16 * 26 - 16]
+       stp     q24, q25, [\state, #-16 * 24 - 16]
+       stp     q22, q23, [\state, #-16 * 22 - 16]
+       stp     q20, q21, [\state, #-16 * 20 - 16]
+       stp     q18, q19, [\state, #-16 * 18 - 16]
+       stp     q16, q17, [\state, #-16 * 16 - 16]
+       stp     q14, q15, [\state, #-16 * 14 - 16]
+       stp     q12, q13, [\state, #-16 * 12 - 16]
+       stp     q10, q11, [\state, #-16 * 10 - 16]
+       stp     q8, q9, [\state, #-16 * 8 - 16]
+       stp     q6, q7, [\state, #-16 * 6 - 16]
+       stp     q4, q5, [\state, #-16 * 4 - 16]
+       stp     q2, q3, [\state, #-16 * 2 - 16]
+       stp     q0, q1, [\state, #-16 * 0 - 16]
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+       ldp     w\tmpnr1, w\tmpnr2, [\state]
+       msr     fpsr, x\tmpnr1
+       fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
+       adr     x\tmpnr1, 0f
+       ldr     w\tmpnr2, [\state, #8]
+       add     \state, \state, x\tmpnr2, lsl #4
+       sub     x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+       br      x\tmpnr1
+       ldp     q30, q31, [\state, #-16 * 30 - 16]
+       ldp     q28, q29, [\state, #-16 * 28 - 16]
+       ldp     q26, q27, [\state, #-16 * 26 - 16]
+       ldp     q24, q25, [\state, #-16 * 24 - 16]
+       ldp     q22, q23, [\state, #-16 * 22 - 16]
+       ldp     q20, q21, [\state, #-16 * 20 - 16]
+       ldp     q18, q19, [\state, #-16 * 18 - 16]
+       ldp     q16, q17, [\state, #-16 * 16 - 16]
+       ldp     q14, q15, [\state, #-16 * 14 - 16]
+       ldp     q12, q13, [\state, #-16 * 12 - 16]
+       ldp     q10, q11, [\state, #-16 * 10 - 16]
+       ldp     q8, q9, [\state, #-16 * 8 - 16]
+       ldp     q6, q7, [\state, #-16 * 6 - 16]
+       ldp     q4, q5, [\state, #-16 * 4 - 16]
+       ldp     q2, q3, [\state, #-16 * 2 - 16]
+       ldp     q0, q1, [\state, #-16 * 0 - 16]
+0:
+.endm
index fb9d137256a6986b6681d8ef20367b9461e6d84d..5368bd04fe7b73945f57afd7b4c3d6cd691fecc6 100644 (file)
@@ -16,7 +16,9 @@
 
 #define cpu_has_neon()         system_supports_fpsimd()
 
-void kernel_neon_begin(void);
+#define kernel_neon_begin()    kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
 void kernel_neon_end(void);
 
 #endif /* ! __ASM_NEON_H */
index fa8b3fe932e6f568841017215524bc0b894cbf28..96959b52afae40d3aeaa8878384b939dd4aa2aab 100644 (file)
@@ -9,48 +9,15 @@
 #ifndef __ASM_SIMD_H
 #define __ASM_SIMD_H
 
-#include <linux/compiler.h>
-#include <linux/irqflags.h>
-#include <linux/percpu.h>
-#include <linux/preempt.h>
 #include <linux/types.h>
 
-#ifdef CONFIG_KERNEL_MODE_NEON
-
-DECLARE_PER_CPU(bool, kernel_neon_busy);
-
 /*
  * may_use_simd - whether it is allowable at this time to issue SIMD
  *                instructions or access the SIMD register file
- *
- * Callers must not assume that the result remains true beyond the next
- * preempt_enable() or return from softirq context.
  */
 static __must_check inline bool may_use_simd(void)
 {
-       /*
-        * The raw_cpu_read() is racy if called with preemption enabled.
-        * This is not a bug: kernel_neon_busy is only set when
-        * preemption is disabled, so we cannot migrate to another CPU
-        * while it is set, nor can we migrate to a CPU where it is set.
-        * So, if we find it clear on some CPU then we're guaranteed to
-        * find it clear on any CPU we could migrate to.
-        *
-        * If we are in between kernel_neon_begin()...kernel_neon_end(),
-        * the flag will be set, but preemption is also disabled, so we
-        * can't migrate to another CPU and spuriously see it become
-        * false.
-        */
-       return !in_irq() && !irqs_disabled() && !in_nmi() &&
-               !raw_cpu_read(kernel_neon_busy);
+       return true;
 }
 
-#else /* ! CONFIG_KERNEL_MODE_NEON */
-
-static __must_check inline bool may_use_simd(void) {
-       return false;
-}
-
-#endif /* ! CONFIG_KERNEL_MODE_NEON */
-
 #endif
index 6a27cd6dbfa6dd81be8a6042e5366262a8146801..c44a82f146b16664850ddadbfdc4699784c4e468 100644 (file)
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
        fpsimd_restore x0, 8
        ret
 ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+       fpsimd_save_partial x0, 1, 8, 9
+       ret
+ENDPROC(fpsimd_save_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+       fpsimd_restore_partial x0, 8, 9
+       ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
index 5d547deb6996c0091c64f14de18b5b8a75c88ae4..08275689013700c7a6810d3c5d7aeb65c5e19518 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/bottom_half.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
+#include <linux/hardirq.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
+#include <asm/neon.h>
 #include <asm/simd.h>
 
 #define FPEXC_IOF      (1 << 0)
  * CPU currently contain the most recent userland FPSIMD state of the current
  * task.
  *
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with local_bh_disable() unless softirqs are already masked.
- *
  * For a certain task, the sequence may look something like this:
  * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
  *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
@@ -171,14 +164,9 @@ void fpsimd_flush_thread(void)
 {
        if (!system_supports_fpsimd())
                return;
-
-       local_bh_disable();
-
        memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
        fpsimd_flush_task_state(current);
        set_thread_flag(TIF_FOREIGN_FPSTATE);
-
-       local_bh_enable();
 }
 
 /*
@@ -189,13 +177,10 @@ void fpsimd_preserve_current_state(void)
 {
        if (!system_supports_fpsimd())
                return;
-
-       local_bh_disable();
-
+       preempt_disable();
        if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
                fpsimd_save_state(&current->thread.fpsimd_state);
-
-       local_bh_enable();
+       preempt_enable();
 }
 
 /*
@@ -207,9 +192,7 @@ void fpsimd_restore_current_state(void)
 {
        if (!system_supports_fpsimd())
                return;
-
-       local_bh_disable();
-
+       preempt_disable();
        if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
                struct fpsimd_state *st = &current->thread.fpsimd_state;
 
@@ -217,8 +200,7 @@ void fpsimd_restore_current_state(void)
                __this_cpu_write(fpsimd_last_state, st);
                st->cpu = smp_processor_id();
        }
-
-       local_bh_enable();
+       preempt_enable();
 }
 
 /*
@@ -230,9 +212,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 {
        if (!system_supports_fpsimd())
                return;
-
-       local_bh_disable();
-
+       preempt_disable();
        fpsimd_load_state(state);
        if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
                struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -240,8 +220,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
                __this_cpu_write(fpsimd_last_state, st);
                st->cpu = smp_processor_id();
        }
-
-       local_bh_enable();
+       preempt_enable();
 }
 
 /*
@@ -254,70 +233,49 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 #ifdef CONFIG_KERNEL_MODE_NEON
 
-DEFINE_PER_CPU(bool, kernel_neon_busy);
-EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
 
 /*
  * Kernel-side NEON support functions
  */
-
-/*
- * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
- * context
- *
- * Must not be called unless may_use_simd() returns true.
- * Task context in the FPSIMD registers is saved back to memory as necessary.
- *
- * A matching call to kernel_neon_end() must be made before returning from the
- * calling context.
- *
- * The caller may freely use the FPSIMD registers until kernel_neon_end() is
- * called.
- */
-void kernel_neon_begin(void)
+void kernel_neon_begin_partial(u32 num_regs)
 {
        if (WARN_ON(!system_supports_fpsimd()))
                return;
+       if (in_interrupt()) {
+               struct fpsimd_partial_state *s = this_cpu_ptr(
+                       in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
 
-       BUG_ON(!may_use_simd());
-
-       local_bh_disable();
-
-       __this_cpu_write(kernel_neon_busy, true);
-
-       /* Save unsaved task fpsimd state, if any: */
-       if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-               fpsimd_save_state(&current->thread.fpsimd_state);
-
-       /* Invalidate any task state remaining in the fpsimd regs: */
-       __this_cpu_write(fpsimd_last_state, NULL);
-
-       preempt_disable();
-
-       local_bh_enable();
+               BUG_ON(num_regs > 32);
+               fpsimd_save_partial_state(s, roundup(num_regs, 2));
+       } else {
+               /*
+                * Save the userland FPSIMD state if we have one and if we
+                * haven't done so already. Clear fpsimd_last_state to indicate
+                * that there is no longer userland FPSIMD state in the
+                * registers.
+                */
+               preempt_disable();
+               if (current->mm &&
+                   !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+                       fpsimd_save_state(&current->thread.fpsimd_state);
+               this_cpu_write(fpsimd_last_state, NULL);
+       }
 }
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL(kernel_neon_begin_partial);
 
-/*
- * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
- *
- * Must be called from a context in which kernel_neon_begin() was previously
- * called, with no call to kernel_neon_end() in the meantime.
- *
- * The caller must not use the FPSIMD registers after this function is called,
- * unless kernel_neon_begin() is called again in the meantime.
- */
 void kernel_neon_end(void)
 {
-       bool busy;
-
        if (!system_supports_fpsimd())
                return;
-
-       busy = __this_cpu_xchg(kernel_neon_busy, false);
-       WARN_ON(!busy); /* No matching kernel_neon_begin()? */
-
-       preempt_enable();
+       if (in_interrupt()) {
+               struct fpsimd_partial_state *s = this_cpu_ptr(
+                       in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+               fpsimd_load_partial_state(s);
+       } else {
+               preempt_enable();
+       }
 }
 EXPORT_SYMBOL(kernel_neon_end);