*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/cache.h>
.text
.endif
.endm
- .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
__select \out0, \in0, 0
__select t0, \in1, 1
__load \out0, \out0, 0, \sz, \op
__load t0, t0, 3, \sz, \op
__load \t4, \t4, 3, \sz, \op
+ .ifnb \oldcpsr
+ /*
+ * This is the final round and we're done with all data-dependent table
+ * lookups, so we can safely re-enable interrupts.
+ */
+ restore_irqs \oldcpsr
+ .endif
+
eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
ldm rk!, {t1, t2}
eor \out1, \out1, t2
.endm
- .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
- __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
.endm
- .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
- __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
.endm
.macro __rev, out, in
.macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
+ // Load keys first, to reduce latency in case they're not cached yet.
+ ldm rk!, {r8-r11}
+
ldr r4, [in]
ldr r5, [in, #4]
ldr r6, [in, #8]
ldr r7, [in, #12]
- ldm rk!, {r8-r11}
-
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
__rev r5, r5
eor r7, r7, r11
__adrl ttab, \ttab
+ /*
+ * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+ * L1 cache, assuming cacheline size >= 32. This is a hardening measure
+ * intended to make cache-timing attacks more difficult. They may not
+ * be fully prevented, however; see the paper
+ * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+ * ("Cache-timing attacks on AES") for a discussion of the many
+ * difficulties involved in writing truly constant-time AES software.
+ */
+ save_and_disable_irqs t0
+ .set i, 0
+ .rept 1024 / 128
+ ldr r8, [ttab, #i + 0]
+ ldr r9, [ttab, #i + 32]
+ ldr r10, [ttab, #i + 64]
+ ldr r11, [ttab, #i + 96]
+ .set i, i + 128
+ .endr
+ push {t0} // oldcpsr
tst rounds, #2
bne 1f
\round r4, r5, r6, r7, r8, r9, r10, r11
b 0b
-2: __adrl ttab, \ltab
- \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
+2: .ifb \ltab
+ add ttab, ttab, #1
+ .else
+ __adrl ttab, \ltab
+ // Prefetch inverse S-box for final round; see explanation above
+ .set i, 0
+ .rept 256 / 64
+ ldr t0, [ttab, #i + 0]
+ ldr t1, [ttab, #i + 32]
+ .set i, i + 64
+ .endr
+ .endif
+
+ pop {rounds} // oldcpsr
+ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
.endm
ENTRY(__aes_arm_encrypt)
- do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ do_crypt fround, crypto_ft_tab,, 2
ENDPROC(__aes_arm_encrypt)
.align 5
static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
-__visible const u32 crypto_ft_tab[4][256] = {
+/* cacheline-aligned to facilitate prefetching into cache */
+__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
{
0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
}
};
-__visible const u32 crypto_fl_tab[4][256] = {
+__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
{
0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
}
};
-__visible const u32 crypto_it_tab[4][256] = {
+__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
{
0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
}
};
-__visible const u32 crypto_il_tab[4][256] = {
+__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
{
0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
0x00000030, 0x00000036, 0x000000a5, 0x00000038,