arm64: add support for kernel mode NEON in interrupt context
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Mon, 24 Feb 2014 14:26:29 +0000 (15:26 +0100)
committerArd Biesheuvel <ard.biesheuvel@linaro.org>
Thu, 8 May 2014 09:31:57 +0000 (11:31 +0200)
This patch modifies kernel_neon_begin() and kernel_neon_end(), so
they may be called from any context. To address the case where only
a couple of registers are needed, kernel_neon_begin_partial(u32) is
introduced which takes as a parameter the number of bottom 'n' NEON
q-registers required. To mark the end of such a partial section, the
regular kernel_neon_end() should be used.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/fpsimdmacros.h
arch/arm64/include/asm/neon.h
arch/arm64/kernel/entry-fpsimd.S
arch/arm64/kernel/fpsimd.c

index 7a900142dbc89436e7c639f82ee5244ee64769fb..50f559f574fe53b4d25995bd465c7b90f0a4b8be 100644 (file)
@@ -41,6 +41,17 @@ struct fpsimd_state {
        unsigned int cpu;
 };
 
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+       u32             fpsr;
+       u32             fpcr;
+       u32             num_regs;
+       __uint128_t     vregs[32];
+};
+
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK    0xf800009f
@@ -66,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+                                     u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
 #endif
 
 #endif
index bbec599c96bd61df88740e7e1295d81540189edc..768414d55e642f461788c0a031e1749c83cfac86 100644 (file)
        ldr     w\tmpnr, [\state, #16 * 2 + 4]
        msr     fpcr, x\tmpnr
 .endm
+
+.altmacro
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+       mrs     x\tmpnr1, fpsr
+       str     w\numnr, [\state, #8]
+       mrs     x\tmpnr2, fpcr
+       stp     w\tmpnr1, w\tmpnr2, [\state]
+       adr     x\tmpnr1, 0f
+       add     \state, \state, x\numnr, lsl #4
+       sub     x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+       br      x\tmpnr1
+       .irp    qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+       .irp    qb, %(qa + 1)
+       stp     q\qa, q\qb, [\state, # -16 * \qa - 16]
+       .endr
+       .endr
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+       ldp     w\tmpnr1, w\tmpnr2, [\state]
+       msr     fpsr, x\tmpnr1
+       msr     fpcr, x\tmpnr2
+       adr     x\tmpnr1, 0f
+       ldr     w\tmpnr2, [\state, #8]
+       add     \state, \state, x\tmpnr2, lsl #4
+       sub     x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+       br      x\tmpnr1
+       .irp    qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+       .irp    qb, %(qa + 1)
+       ldp     q\qa, q\qb, [\state, # -16 * \qa - 16]
+       .endr
+       .endr
+0:
+.endm
index b0cc58a9778025f28ac9af461119e8ffb899665f..13ce4cc18e268840036f0e7cfaa6f56fae52afa5 100644 (file)
@@ -8,7 +8,11 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/types.h>
+
 #define cpu_has_neon()         (1)
 
-void kernel_neon_begin(void);
+#define kernel_neon_begin()    kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
 void kernel_neon_end(void);
index 6a27cd6dbfa6dd81be8a6042e5366262a8146801..d358ccacfc00275bd8ef8462e8839ac9f12161f8 100644 (file)
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
        fpsimd_restore x0, 8
        ret
 ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+       fpsimd_save_partial x0, 1, 8, 9
+       ret
+ENDPROC(fpsimd_load_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+       fpsimd_restore_partial x0, 8, 9
+       ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
index 5ae89303c3ab16d9e963922fe217e6a14fe2b18b..ad8aebb1cdef7d289d609015bc22d51498cddc08 100644 (file)
@@ -218,29 +218,45 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 #ifdef CONFIG_KERNEL_MODE_NEON
 
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+
 /*
  * Kernel-side NEON support functions
  */
-void kernel_neon_begin(void)
+void kernel_neon_begin_partial(u32 num_regs)
 {
-       /* Avoid using the NEON in interrupt context */
-       BUG_ON(in_interrupt());
-       preempt_disable();
+       if (in_interrupt()) {
+               struct fpsimd_partial_state *s = this_cpu_ptr(
+                       in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
 
-       /*
-        * Save the userland FPSIMD state if we have one and if we haven't done
-        * so already. Clear fpsimd_last_state to indicate that there is no
-        * longer userland FPSIMD state in the registers.
-        */
-       if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-               fpsimd_save_state(&current->thread.fpsimd_state);
-       this_cpu_write(fpsimd_last_state, NULL);
+               BUG_ON(num_regs > 32);
+               fpsimd_save_partial_state(s, roundup(num_regs, 2));
+       } else {
+               /*
+                * Save the userland FPSIMD state if we have one and if we
+                * haven't done so already. Clear fpsimd_last_state to indicate
+                * that there is no longer userland FPSIMD state in the
+                * registers.
+                */
+               preempt_disable();
+               if (current->mm &&
+                   !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+                       fpsimd_save_state(&current->thread.fpsimd_state);
+               this_cpu_write(fpsimd_last_state, NULL);
+       }
 }
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL(kernel_neon_begin_partial);
 
 void kernel_neon_end(void)
 {
-       preempt_enable();
+       if (in_interrupt()) {
+               struct fpsimd_partial_state *s = this_cpu_ptr(
+                       in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+               fpsimd_load_partial_state(s);
+       } else {
+               preempt_enable();
+       }
 }
 EXPORT_SYMBOL(kernel_neon_end);