1 #define __ARM_ARCH__ __LINUX_ARM_ARCH__
3 #include <linux/linkage.h>
9 # AES assembly implementation for ARMv8 AArch32
10 # - aes_v8_cbc_encrypt
11 # - aes_v8_cbc_decrypt
15 .long 0x00000001,0x00000001,0x00000001,0x00000001
16 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
17 .long 0x0000001b,0x0000001b,0x0000001b,0x0000001b
19 # void aes_v8_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
20 # int rounds, int blocks, u8 iv[], int first);
22 ENTRY(aes_v8_cbc_encrypt)
24 vpush {q4, q5, q6, q7}
25 ldr r4, [sp, #76] // blocks
26 ldr r5, [sp, #80] // iv
27 ldr r6, [sp, #84] // first
42 vld1.8 {d10-d11}, [r2]!
43 vld1.8 {d12-d13}, [r2]!
44 vld1.8 {d14-d15}, [r2]!
45 vld1.8 {d16-d17}, [r2]!
46 vld1.8 {d18-d19}, [r2]!
47 vld1.8 {d20-d21}, [r2]!
48 vld1.8 {d22-d23}, [r2]!
49 vld1.8 {d24-d25}, [r2]
51 // load input 16 bytes, and eor with iv
76 // store output 16 bytes, and continue next round
90 vld1.8 {d10-d11}, [r2]!
91 vld1.8 {d12-d13}, [r2]!
92 vld1.8 {d14-d15}, [r2]!
93 vld1.8 {d16-d17}, [r2]!
94 vld1.8 {d18-d19}, [r2]!
95 vld1.8 {d20-d21}, [r2]!
96 vld1.8 {d22-d23}, [r2]!
97 vld1.8 {d24-d25}, [r2]!
98 vld1.8 {d26-d27}, [r2]!
99 vld1.8 {d28-d29}, [r2]
101 // load input 16 bytes, and eor with iv
102 vld1.8 {d0-d1}, [r1]!
130 // store output 16 bytes, and continue next round
131 vst1.8 {d2-d3}, [r0]!
135 vpop {q4, q5, q6, q7}
141 vld1.8 {d4-d5}, [r2]!
142 vld1.8 {d6-d7}, [r2]!
143 vld1.8 {d8-d9}, [r2]!
144 vld1.8 {d10-d11}, [r2]!
145 vld1.8 {d12-d13}, [r2]!
146 vld1.8 {d14-d15}, [r2]!
147 vld1.8 {d16-d17}, [r2]!
148 vld1.8 {d18-d19}, [r2]!
149 vld1.8 {d20-d21}, [r2]!
150 vld1.8 {d22-d23}, [r2]!
151 vld1.8 {d24-d25}, [r2]!
152 vld1.8 {d26-d27}, [r2]!
153 vld1.8 {d28-d29}, [r2]!
156 // load input 16 bytes, and eor with iv
157 vld1.8 {d0-d1}, [r1]!
190 vld1.8 {d30-d31}, [r2]
193 // store output 16 bytes, and continue next round
194 vst1.8 {d2-d3}, [r0]!
198 vpop {q4, q5, q6, q7}
201 ENDPROC(aes_v8_cbc_encrypt)
204 # void aes_v8_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
205 # int rounds, int blocks, u8 iv[], int first);
207 ENTRY(aes_v8_cbc_decrypt)
209 vpush {q4, q5, q6, q7}
210 ldr r4, [sp, #76] // blocks
211 ldr r5, [sp, #80] // iv
212 ldr r6, [sp, #84] // first
224 vld1.8 {d10-d11}, [r2]!
225 vld1.8 {d12-d13}, [r2]!
226 vld1.8 {d14-d15}, [r2]!
227 vld1.8 {d16-d17}, [r2]!
228 vld1.8 {d18-d19}, [r2]!
229 vld1.8 {d20-d21}, [r2]!
230 vld1.8 {d22-d23}, [r2]!
231 vld1.8 {d24-d25}, [r2]!
232 vld1.8 {d26-d27}, [r2]!
233 vld1.8 {d28-d29}, [r2]!
234 vld1.8 {d30-d31}, [r2]!
239 // Load input 32 bytes
240 vld1.8 {d0-d3}, [r1]!
288 vst1.8 {d0-d1}, [r0]!
289 vst1.8 {d2-d3}, [r0]!
291 bpl .Lcbcdec128_2XLoop
294 bne .Lcbcdec128_FinalRound
295 vpop {q4, q5, q6, q7}
298 .Lcbcdec128_FinalRound:
299 // load input 16 bytes
300 vld1.8 {d0-d1}, [r1]!
323 vst1.8 {d0-d1}, [r0]!
324 vpop {q4, q5, q6, q7}
330 vld1.8 {d10-d11}, [r2]!
331 vld1.8 {d12-d13}, [r2]!
332 vld1.8 {d14-d15}, [r2]!
333 vld1.8 {d16-d17}, [r2]!
334 vld1.8 {d18-d19}, [r2]!
335 vld1.8 {d20-d21}, [r2]!
336 vld1.8 {d22-d23}, [r2]!
337 vld1.8 {d24-d25}, [r2]!
338 vld1.8 {d26-d27}, [r2]!
339 vld1.8 {d28-d29}, [r2]!
345 // Load input 32 bytes
346 vld1.8 {d0-d3}, [r1]!
391 vld1.8 {d30-d31}, [r2]!
396 vld1.8 {d30-d31}, [r2]!
399 vld1.8 {d30-d31}, [r2]!
406 vst1.8 {d0-d1}, [r0]!
407 vst1.8 {d2-d3}, [r0]!
409 bpl .Lcbcdec192_2XLoop
412 bne .Lcbcdec192_FinalRound
413 vpop {q4, q5, q6, q7}
416 .Lcbcdec192_FinalRound:
417 // load input 16 bytes
418 vld1.8 {d0-d1}, [r1]!
444 vld1.8 {d30-d31}, [r2]!
446 vld1.8 {d30-d31}, [r2]!
449 vst1.8 {d0-d1}, [r0]!
450 vpop {q4, q5, q6, q7}
456 vld1.8 {d10-d11}, [r2]!
457 vld1.8 {d12-d13}, [r2]!
458 vld1.8 {d14-d15}, [r2]!
459 vld1.8 {d16-d17}, [r2]!
460 vld1.8 {d18-d19}, [r2]!
461 vld1.8 {d20-d21}, [r2]!
462 vld1.8 {d22-d23}, [r2]!
463 vld1.8 {d24-d25}, [r2]!
464 vld1.8 {d26-d27}, [r2]!
465 vld1.8 {d28-d29}, [r2]!
471 // Load input 32 bytes
472 vld1.8 {d0-d3}, [r1]!
517 vld1.8 {d30-d31}, [r2]!
522 vld1.8 {d30-d31}, [r2]!
527 vld1.8 {d30-d31}, [r2]!
532 vld1.8 {d30-d31}, [r2]!
535 vld1.8 {d30-d31}, [r2]!
542 vst1.8 {d0-d1}, [r0]!
543 vst1.8 {d2-d3}, [r0]!
545 bpl .Lcbcdec256_2XLoop
548 bne .Lcbcdec256_FinalRound
549 vpop {q4, q5, q6, q7}
552 .Lcbcdec256_FinalRound:
553 // load input 16 bytes
554 vld1.8 {d0-d1}, [r1]!
580 vld1.8 {d30-d31}, [r2]!
583 vld1.8 {d30-d31}, [r2]!
586 vld1.8 {d30-d31}, [r2]!
588 vld1.8 {d30-d31}, [r2]!
591 vst1.8 {d0-d1}, [r0]!
592 vpop {q4, q5, q6, q7}
596 ENDPROC(aes_v8_cbc_decrypt)