unsigned int cpu;
};
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+ u32 fpsr;
+ u32 fpcr;
+ u32 num_regs;
+ __uint128_t vregs[32];
+};
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+ u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state
.endm
+
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+ mrs x\tmpnr1, fpsr
+ str w\numnr, [\state, #8]
+ mrs x\tmpnr2, fpcr
+ stp w\tmpnr1, w\tmpnr2, [\state]
+ adr x\tmpnr1, 0f
+ add \state, \state, x\numnr, lsl #4
+ sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+ br x\tmpnr1
+ stp q30, q31, [\state, #-16 * 30 - 16]
+ stp q28, q29, [\state, #-16 * 28 - 16]
+ stp q26, q27, [\state, #-16 * 26 - 16]
+ stp q24, q25, [\state, #-16 * 24 - 16]
+ stp q22, q23, [\state, #-16 * 22 - 16]
+ stp q20, q21, [\state, #-16 * 20 - 16]
+ stp q18, q19, [\state, #-16 * 18 - 16]
+ stp q16, q17, [\state, #-16 * 16 - 16]
+ stp q14, q15, [\state, #-16 * 14 - 16]
+ stp q12, q13, [\state, #-16 * 12 - 16]
+ stp q10, q11, [\state, #-16 * 10 - 16]
+ stp q8, q9, [\state, #-16 * 8 - 16]
+ stp q6, q7, [\state, #-16 * 6 - 16]
+ stp q4, q5, [\state, #-16 * 4 - 16]
+ stp q2, q3, [\state, #-16 * 2 - 16]
+ stp q0, q1, [\state, #-16 * 0 - 16]
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+ ldp w\tmpnr1, w\tmpnr2, [\state]
+ msr fpsr, x\tmpnr1
+ fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
+ adr x\tmpnr1, 0f
+ ldr w\tmpnr2, [\state, #8]
+ add \state, \state, x\tmpnr2, lsl #4
+ sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+ br x\tmpnr1
+ ldp q30, q31, [\state, #-16 * 30 - 16]
+ ldp q28, q29, [\state, #-16 * 28 - 16]
+ ldp q26, q27, [\state, #-16 * 26 - 16]
+ ldp q24, q25, [\state, #-16 * 24 - 16]
+ ldp q22, q23, [\state, #-16 * 22 - 16]
+ ldp q20, q21, [\state, #-16 * 20 - 16]
+ ldp q18, q19, [\state, #-16 * 18 - 16]
+ ldp q16, q17, [\state, #-16 * 16 - 16]
+ ldp q14, q15, [\state, #-16 * 14 - 16]
+ ldp q12, q13, [\state, #-16 * 12 - 16]
+ ldp q10, q11, [\state, #-16 * 10 - 16]
+ ldp q8, q9, [\state, #-16 * 8 - 16]
+ ldp q6, q7, [\state, #-16 * 6 - 16]
+ ldp q4, q5, [\state, #-16 * 4 - 16]
+ ldp q2, q3, [\state, #-16 * 2 - 16]
+ ldp q0, q1, [\state, #-16 * 0 - 16]
+0:
+.endm
#define cpu_has_neon() system_supports_fpsimd()
-void kernel_neon_begin(void);
+#define kernel_neon_begin() kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_end(void);
#endif /* ! __ASM_NEON_H */
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
-#include <linux/compiler.h>
-#include <linux/irqflags.h>
-#include <linux/percpu.h>
-#include <linux/preempt.h>
#include <linux/types.h>
-#ifdef CONFIG_KERNEL_MODE_NEON
-
-DECLARE_PER_CPU(bool, kernel_neon_busy);
-
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
* instructions or access the SIMD register file
- *
- * Callers must not assume that the result remains true beyond the next
- * preempt_enable() or return from softirq context.
*/
static __must_check inline bool may_use_simd(void)
{
- /*
- * The raw_cpu_read() is racy if called with preemption enabled.
- * This is not a bug: kernel_neon_busy is only set when
- * preemption is disabled, so we cannot migrate to another CPU
- * while it is set, nor can we migrate to a CPU where it is set.
- * So, if we find it clear on some CPU then we're guaranteed to
- * find it clear on any CPU we could migrate to.
- *
- * If we are in between kernel_neon_begin()...kernel_neon_end(),
- * the flag will be set, but preemption is also disabled, so we
- * can't migrate to another CPU and spuriously see it become
- * false.
- */
- return !in_irq() && !irqs_disabled() && !in_nmi() &&
- !raw_cpu_read(kernel_neon_busy);
+ return true;
}
-#else /* ! CONFIG_KERNEL_MODE_NEON */
-
-static __must_check inline bool may_use_simd(void) {
- return false;
-}
-
-#endif /* ! CONFIG_KERNEL_MODE_NEON */
-
#endif
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+ fpsimd_save_partial x0, 1, 8, 9
+ ret
+ENDPROC(fpsimd_save_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+ fpsimd_restore_partial x0, 8, 9
+ ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/bottom_half.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/sched/signal.h>
#include <linux/signal.h>
+#include <linux/hardirq.h>
#include <asm/fpsimd.h>
#include <asm/cputype.h>
+#include <asm/neon.h>
#include <asm/simd.h>
#define FPEXC_IOF (1 << 0)
* CPU currently contain the most recent userland FPSIMD state of the current
* task.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with local_bh_disable() unless softirqs are already masked.
- *
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_state.cpu field
* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
{
if (!system_supports_fpsimd())
return;
-
- local_bh_disable();
-
memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE);
-
- local_bh_enable();
}
/*
{
if (!system_supports_fpsimd())
return;
-
- local_bh_disable();
-
+ preempt_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(¤t->thread.fpsimd_state);
-
- local_bh_enable();
+ preempt_enable();
}
/*
{
if (!system_supports_fpsimd())
return;
-
- local_bh_disable();
-
+ preempt_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
__this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
-
- local_bh_enable();
+ preempt_enable();
}
/*
{
if (!system_supports_fpsimd())
return;
-
- local_bh_disable();
-
+ preempt_disable();
fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
__this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
-
- local_bh_enable();
+ preempt_enable();
}
/*
#ifdef CONFIG_KERNEL_MODE_NEON
-DEFINE_PER_CPU(bool, kernel_neon_busy);
-EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
/*
* Kernel-side NEON support functions
*/
-
-/*
- * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
- * context
- *
- * Must not be called unless may_use_simd() returns true.
- * Task context in the FPSIMD registers is saved back to memory as necessary.
- *
- * A matching call to kernel_neon_end() must be made before returning from the
- * calling context.
- *
- * The caller may freely use the FPSIMD registers until kernel_neon_end() is
- * called.
- */
-void kernel_neon_begin(void)
+void kernel_neon_begin_partial(u32 num_regs)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
+ if (in_interrupt()) {
+ struct fpsimd_partial_state *s = this_cpu_ptr(
+ in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
- BUG_ON(!may_use_simd());
-
- local_bh_disable();
-
- __this_cpu_write(kernel_neon_busy, true);
-
- /* Save unsaved task fpsimd state, if any: */
- if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(¤t->thread.fpsimd_state);
-
- /* Invalidate any task state remaining in the fpsimd regs: */
- __this_cpu_write(fpsimd_last_state, NULL);
-
- preempt_disable();
-
- local_bh_enable();
+ BUG_ON(num_regs > 32);
+ fpsimd_save_partial_state(s, roundup(num_regs, 2));
+ } else {
+ /*
+ * Save the userland FPSIMD state if we have one and if we
+ * haven't done so already. Clear fpsimd_last_state to indicate
+ * that there is no longer userland FPSIMD state in the
+ * registers.
+ */
+ preempt_disable();
+ if (current->mm &&
+ !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(¤t->thread.fpsimd_state);
+ this_cpu_write(fpsimd_last_state, NULL);
+ }
}
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL(kernel_neon_begin_partial);
-/*
- * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
- *
- * Must be called from a context in which kernel_neon_begin() was previously
- * called, with no call to kernel_neon_end() in the meantime.
- *
- * The caller must not use the FPSIMD registers after this function is called,
- * unless kernel_neon_begin() is called again in the meantime.
- */
void kernel_neon_end(void)
{
- bool busy;
-
if (!system_supports_fpsimd())
return;
-
- busy = __this_cpu_xchg(kernel_neon_busy, false);
- WARN_ON(!busy); /* No matching kernel_neon_begin()? */
-
- preempt_enable();
+ if (in_interrupt()) {
+ struct fpsimd_partial_state *s = this_cpu_ptr(
+ in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+ fpsimd_load_partial_state(s);
+ } else {
+ preempt_enable();
+ }
}
EXPORT_SYMBOL(kernel_neon_end);