Merge branch 'for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/kgene/linux...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / crypto / aesni-intel_asm.S
1 /*
2 * Implement AES algorithm in Intel AES-NI instructions.
3 *
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
6 *
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
10 * Kahraman Akdemir
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18 #include <linux/linkage.h>
19 #include <asm/inst.h>
20
21 .text
22
23 #define STATE1 %xmm0
24 #define STATE2 %xmm4
25 #define STATE3 %xmm5
26 #define STATE4 %xmm6
27 #define STATE STATE1
28 #define IN1 %xmm1
29 #define IN2 %xmm7
30 #define IN3 %xmm8
31 #define IN4 %xmm9
32 #define IN IN1
33 #define KEY %xmm2
34 #define IV %xmm3
35 #define BSWAP_MASK %xmm10
36 #define CTR %xmm11
37 #define INC %xmm12
38
39 #define KEYP %rdi
40 #define OUTP %rsi
41 #define INP %rdx
42 #define LEN %rcx
43 #define IVP %r8
44 #define KLEN %r9d
45 #define T1 %r10
46 #define TKEYP T1
47 #define T2 %r11
48 #define TCTR_LOW T2
49
50 _key_expansion_128:
51 _key_expansion_256a:
52 pshufd $0b11111111, %xmm1, %xmm1
53 shufps $0b00010000, %xmm0, %xmm4
54 pxor %xmm4, %xmm0
55 shufps $0b10001100, %xmm0, %xmm4
56 pxor %xmm4, %xmm0
57 pxor %xmm1, %xmm0
58 movaps %xmm0, (%rcx)
59 add $0x10, %rcx
60 ret
61
62 _key_expansion_192a:
63 pshufd $0b01010101, %xmm1, %xmm1
64 shufps $0b00010000, %xmm0, %xmm4
65 pxor %xmm4, %xmm0
66 shufps $0b10001100, %xmm0, %xmm4
67 pxor %xmm4, %xmm0
68 pxor %xmm1, %xmm0
69
70 movaps %xmm2, %xmm5
71 movaps %xmm2, %xmm6
72 pslldq $4, %xmm5
73 pshufd $0b11111111, %xmm0, %xmm3
74 pxor %xmm3, %xmm2
75 pxor %xmm5, %xmm2
76
77 movaps %xmm0, %xmm1
78 shufps $0b01000100, %xmm0, %xmm6
79 movaps %xmm6, (%rcx)
80 shufps $0b01001110, %xmm2, %xmm1
81 movaps %xmm1, 16(%rcx)
82 add $0x20, %rcx
83 ret
84
85 _key_expansion_192b:
86 pshufd $0b01010101, %xmm1, %xmm1
87 shufps $0b00010000, %xmm0, %xmm4
88 pxor %xmm4, %xmm0
89 shufps $0b10001100, %xmm0, %xmm4
90 pxor %xmm4, %xmm0
91 pxor %xmm1, %xmm0
92
93 movaps %xmm2, %xmm5
94 pslldq $4, %xmm5
95 pshufd $0b11111111, %xmm0, %xmm3
96 pxor %xmm3, %xmm2
97 pxor %xmm5, %xmm2
98
99 movaps %xmm0, (%rcx)
100 add $0x10, %rcx
101 ret
102
103 _key_expansion_256b:
104 pshufd $0b10101010, %xmm1, %xmm1
105 shufps $0b00010000, %xmm2, %xmm4
106 pxor %xmm4, %xmm2
107 shufps $0b10001100, %xmm2, %xmm4
108 pxor %xmm4, %xmm2
109 pxor %xmm1, %xmm2
110 movaps %xmm2, (%rcx)
111 add $0x10, %rcx
112 ret
113
114 /*
115 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
116 * unsigned int key_len)
117 */
118 ENTRY(aesni_set_key)
119 movups (%rsi), %xmm0 # user key (first 16 bytes)
120 movaps %xmm0, (%rdi)
121 lea 0x10(%rdi), %rcx # key addr
122 movl %edx, 480(%rdi)
123 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
124 cmp $24, %dl
125 jb .Lenc_key128
126 je .Lenc_key192
127 movups 0x10(%rsi), %xmm2 # other user key
128 movaps %xmm2, (%rcx)
129 add $0x10, %rcx
130 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
131 call _key_expansion_256a
132 AESKEYGENASSIST 0x1 %xmm0 %xmm1
133 call _key_expansion_256b
134 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
135 call _key_expansion_256a
136 AESKEYGENASSIST 0x2 %xmm0 %xmm1
137 call _key_expansion_256b
138 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
139 call _key_expansion_256a
140 AESKEYGENASSIST 0x4 %xmm0 %xmm1
141 call _key_expansion_256b
142 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
143 call _key_expansion_256a
144 AESKEYGENASSIST 0x8 %xmm0 %xmm1
145 call _key_expansion_256b
146 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
147 call _key_expansion_256a
148 AESKEYGENASSIST 0x10 %xmm0 %xmm1
149 call _key_expansion_256b
150 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
151 call _key_expansion_256a
152 AESKEYGENASSIST 0x20 %xmm0 %xmm1
153 call _key_expansion_256b
154 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
155 call _key_expansion_256a
156 jmp .Ldec_key
157 .Lenc_key192:
158 movq 0x10(%rsi), %xmm2 # other user key
159 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
160 call _key_expansion_192a
161 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
162 call _key_expansion_192b
163 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
164 call _key_expansion_192a
165 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
166 call _key_expansion_192b
167 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
168 call _key_expansion_192a
169 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
170 call _key_expansion_192b
171 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
172 call _key_expansion_192a
173 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
174 call _key_expansion_192b
175 jmp .Ldec_key
176 .Lenc_key128:
177 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
178 call _key_expansion_128
179 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
180 call _key_expansion_128
181 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
182 call _key_expansion_128
183 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
184 call _key_expansion_128
185 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
186 call _key_expansion_128
187 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
188 call _key_expansion_128
189 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
190 call _key_expansion_128
191 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
192 call _key_expansion_128
193 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
194 call _key_expansion_128
195 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
196 call _key_expansion_128
197 .Ldec_key:
198 sub $0x10, %rcx
199 movaps (%rdi), %xmm0
200 movaps (%rcx), %xmm1
201 movaps %xmm0, 240(%rcx)
202 movaps %xmm1, 240(%rdi)
203 add $0x10, %rdi
204 lea 240-16(%rcx), %rsi
205 .align 4
206 .Ldec_key_loop:
207 movaps (%rdi), %xmm0
208 AESIMC %xmm0 %xmm1
209 movaps %xmm1, (%rsi)
210 add $0x10, %rdi
211 sub $0x10, %rsi
212 cmp %rcx, %rdi
213 jb .Ldec_key_loop
214 xor %rax, %rax
215 ret
216
217 /*
218 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
219 */
220 ENTRY(aesni_enc)
221 movl 480(KEYP), KLEN # key length
222 movups (INP), STATE # input
223 call _aesni_enc1
224 movups STATE, (OUTP) # output
225 ret
226
227 /*
228 * _aesni_enc1: internal ABI
229 * input:
230 * KEYP: key struct pointer
231 * KLEN: round count
232 * STATE: initial state (input)
233 * output:
234 * STATE: finial state (output)
235 * changed:
236 * KEY
237 * TKEYP (T1)
238 */
239 _aesni_enc1:
240 movaps (KEYP), KEY # key
241 mov KEYP, TKEYP
242 pxor KEY, STATE # round 0
243 add $0x30, TKEYP
244 cmp $24, KLEN
245 jb .Lenc128
246 lea 0x20(TKEYP), TKEYP
247 je .Lenc192
248 add $0x20, TKEYP
249 movaps -0x60(TKEYP), KEY
250 AESENC KEY STATE
251 movaps -0x50(TKEYP), KEY
252 AESENC KEY STATE
253 .align 4
254 .Lenc192:
255 movaps -0x40(TKEYP), KEY
256 AESENC KEY STATE
257 movaps -0x30(TKEYP), KEY
258 AESENC KEY STATE
259 .align 4
260 .Lenc128:
261 movaps -0x20(TKEYP), KEY
262 AESENC KEY STATE
263 movaps -0x10(TKEYP), KEY
264 AESENC KEY STATE
265 movaps (TKEYP), KEY
266 AESENC KEY STATE
267 movaps 0x10(TKEYP), KEY
268 AESENC KEY STATE
269 movaps 0x20(TKEYP), KEY
270 AESENC KEY STATE
271 movaps 0x30(TKEYP), KEY
272 AESENC KEY STATE
273 movaps 0x40(TKEYP), KEY
274 AESENC KEY STATE
275 movaps 0x50(TKEYP), KEY
276 AESENC KEY STATE
277 movaps 0x60(TKEYP), KEY
278 AESENC KEY STATE
279 movaps 0x70(TKEYP), KEY
280 AESENCLAST KEY STATE
281 ret
282
283 /*
284 * _aesni_enc4: internal ABI
285 * input:
286 * KEYP: key struct pointer
287 * KLEN: round count
288 * STATE1: initial state (input)
289 * STATE2
290 * STATE3
291 * STATE4
292 * output:
293 * STATE1: finial state (output)
294 * STATE2
295 * STATE3
296 * STATE4
297 * changed:
298 * KEY
299 * TKEYP (T1)
300 */
301 _aesni_enc4:
302 movaps (KEYP), KEY # key
303 mov KEYP, TKEYP
304 pxor KEY, STATE1 # round 0
305 pxor KEY, STATE2
306 pxor KEY, STATE3
307 pxor KEY, STATE4
308 add $0x30, TKEYP
309 cmp $24, KLEN
310 jb .L4enc128
311 lea 0x20(TKEYP), TKEYP
312 je .L4enc192
313 add $0x20, TKEYP
314 movaps -0x60(TKEYP), KEY
315 AESENC KEY STATE1
316 AESENC KEY STATE2
317 AESENC KEY STATE3
318 AESENC KEY STATE4
319 movaps -0x50(TKEYP), KEY
320 AESENC KEY STATE1
321 AESENC KEY STATE2
322 AESENC KEY STATE3
323 AESENC KEY STATE4
324 #.align 4
325 .L4enc192:
326 movaps -0x40(TKEYP), KEY
327 AESENC KEY STATE1
328 AESENC KEY STATE2
329 AESENC KEY STATE3
330 AESENC KEY STATE4
331 movaps -0x30(TKEYP), KEY
332 AESENC KEY STATE1
333 AESENC KEY STATE2
334 AESENC KEY STATE3
335 AESENC KEY STATE4
336 #.align 4
337 .L4enc128:
338 movaps -0x20(TKEYP), KEY
339 AESENC KEY STATE1
340 AESENC KEY STATE2
341 AESENC KEY STATE3
342 AESENC KEY STATE4
343 movaps -0x10(TKEYP), KEY
344 AESENC KEY STATE1
345 AESENC KEY STATE2
346 AESENC KEY STATE3
347 AESENC KEY STATE4
348 movaps (TKEYP), KEY
349 AESENC KEY STATE1
350 AESENC KEY STATE2
351 AESENC KEY STATE3
352 AESENC KEY STATE4
353 movaps 0x10(TKEYP), KEY
354 AESENC KEY STATE1
355 AESENC KEY STATE2
356 AESENC KEY STATE3
357 AESENC KEY STATE4
358 movaps 0x20(TKEYP), KEY
359 AESENC KEY STATE1
360 AESENC KEY STATE2
361 AESENC KEY STATE3
362 AESENC KEY STATE4
363 movaps 0x30(TKEYP), KEY
364 AESENC KEY STATE1
365 AESENC KEY STATE2
366 AESENC KEY STATE3
367 AESENC KEY STATE4
368 movaps 0x40(TKEYP), KEY
369 AESENC KEY STATE1
370 AESENC KEY STATE2
371 AESENC KEY STATE3
372 AESENC KEY STATE4
373 movaps 0x50(TKEYP), KEY
374 AESENC KEY STATE1
375 AESENC KEY STATE2
376 AESENC KEY STATE3
377 AESENC KEY STATE4
378 movaps 0x60(TKEYP), KEY
379 AESENC KEY STATE1
380 AESENC KEY STATE2
381 AESENC KEY STATE3
382 AESENC KEY STATE4
383 movaps 0x70(TKEYP), KEY
384 AESENCLAST KEY STATE1 # last round
385 AESENCLAST KEY STATE2
386 AESENCLAST KEY STATE3
387 AESENCLAST KEY STATE4
388 ret
389
390 /*
391 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
392 */
393 ENTRY(aesni_dec)
394 mov 480(KEYP), KLEN # key length
395 add $240, KEYP
396 movups (INP), STATE # input
397 call _aesni_dec1
398 movups STATE, (OUTP) #output
399 ret
400
401 /*
402 * _aesni_dec1: internal ABI
403 * input:
404 * KEYP: key struct pointer
405 * KLEN: key length
406 * STATE: initial state (input)
407 * output:
408 * STATE: finial state (output)
409 * changed:
410 * KEY
411 * TKEYP (T1)
412 */
413 _aesni_dec1:
414 movaps (KEYP), KEY # key
415 mov KEYP, TKEYP
416 pxor KEY, STATE # round 0
417 add $0x30, TKEYP
418 cmp $24, KLEN
419 jb .Ldec128
420 lea 0x20(TKEYP), TKEYP
421 je .Ldec192
422 add $0x20, TKEYP
423 movaps -0x60(TKEYP), KEY
424 AESDEC KEY STATE
425 movaps -0x50(TKEYP), KEY
426 AESDEC KEY STATE
427 .align 4
428 .Ldec192:
429 movaps -0x40(TKEYP), KEY
430 AESDEC KEY STATE
431 movaps -0x30(TKEYP), KEY
432 AESDEC KEY STATE
433 .align 4
434 .Ldec128:
435 movaps -0x20(TKEYP), KEY
436 AESDEC KEY STATE
437 movaps -0x10(TKEYP), KEY
438 AESDEC KEY STATE
439 movaps (TKEYP), KEY
440 AESDEC KEY STATE
441 movaps 0x10(TKEYP), KEY
442 AESDEC KEY STATE
443 movaps 0x20(TKEYP), KEY
444 AESDEC KEY STATE
445 movaps 0x30(TKEYP), KEY
446 AESDEC KEY STATE
447 movaps 0x40(TKEYP), KEY
448 AESDEC KEY STATE
449 movaps 0x50(TKEYP), KEY
450 AESDEC KEY STATE
451 movaps 0x60(TKEYP), KEY
452 AESDEC KEY STATE
453 movaps 0x70(TKEYP), KEY
454 AESDECLAST KEY STATE
455 ret
456
457 /*
458 * _aesni_dec4: internal ABI
459 * input:
460 * KEYP: key struct pointer
461 * KLEN: key length
462 * STATE1: initial state (input)
463 * STATE2
464 * STATE3
465 * STATE4
466 * output:
467 * STATE1: finial state (output)
468 * STATE2
469 * STATE3
470 * STATE4
471 * changed:
472 * KEY
473 * TKEYP (T1)
474 */
475 _aesni_dec4:
476 movaps (KEYP), KEY # key
477 mov KEYP, TKEYP
478 pxor KEY, STATE1 # round 0
479 pxor KEY, STATE2
480 pxor KEY, STATE3
481 pxor KEY, STATE4
482 add $0x30, TKEYP
483 cmp $24, KLEN
484 jb .L4dec128
485 lea 0x20(TKEYP), TKEYP
486 je .L4dec192
487 add $0x20, TKEYP
488 movaps -0x60(TKEYP), KEY
489 AESDEC KEY STATE1
490 AESDEC KEY STATE2
491 AESDEC KEY STATE3
492 AESDEC KEY STATE4
493 movaps -0x50(TKEYP), KEY
494 AESDEC KEY STATE1
495 AESDEC KEY STATE2
496 AESDEC KEY STATE3
497 AESDEC KEY STATE4
498 .align 4
499 .L4dec192:
500 movaps -0x40(TKEYP), KEY
501 AESDEC KEY STATE1
502 AESDEC KEY STATE2
503 AESDEC KEY STATE3
504 AESDEC KEY STATE4
505 movaps -0x30(TKEYP), KEY
506 AESDEC KEY STATE1
507 AESDEC KEY STATE2
508 AESDEC KEY STATE3
509 AESDEC KEY STATE4
510 .align 4
511 .L4dec128:
512 movaps -0x20(TKEYP), KEY
513 AESDEC KEY STATE1
514 AESDEC KEY STATE2
515 AESDEC KEY STATE3
516 AESDEC KEY STATE4
517 movaps -0x10(TKEYP), KEY
518 AESDEC KEY STATE1
519 AESDEC KEY STATE2
520 AESDEC KEY STATE3
521 AESDEC KEY STATE4
522 movaps (TKEYP), KEY
523 AESDEC KEY STATE1
524 AESDEC KEY STATE2
525 AESDEC KEY STATE3
526 AESDEC KEY STATE4
527 movaps 0x10(TKEYP), KEY
528 AESDEC KEY STATE1
529 AESDEC KEY STATE2
530 AESDEC KEY STATE3
531 AESDEC KEY STATE4
532 movaps 0x20(TKEYP), KEY
533 AESDEC KEY STATE1
534 AESDEC KEY STATE2
535 AESDEC KEY STATE3
536 AESDEC KEY STATE4
537 movaps 0x30(TKEYP), KEY
538 AESDEC KEY STATE1
539 AESDEC KEY STATE2
540 AESDEC KEY STATE3
541 AESDEC KEY STATE4
542 movaps 0x40(TKEYP), KEY
543 AESDEC KEY STATE1
544 AESDEC KEY STATE2
545 AESDEC KEY STATE3
546 AESDEC KEY STATE4
547 movaps 0x50(TKEYP), KEY
548 AESDEC KEY STATE1
549 AESDEC KEY STATE2
550 AESDEC KEY STATE3
551 AESDEC KEY STATE4
552 movaps 0x60(TKEYP), KEY
553 AESDEC KEY STATE1
554 AESDEC KEY STATE2
555 AESDEC KEY STATE3
556 AESDEC KEY STATE4
557 movaps 0x70(TKEYP), KEY
558 AESDECLAST KEY STATE1 # last round
559 AESDECLAST KEY STATE2
560 AESDECLAST KEY STATE3
561 AESDECLAST KEY STATE4
562 ret
563
564 /*
565 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
566 * size_t len)
567 */
568 ENTRY(aesni_ecb_enc)
569 test LEN, LEN # check length
570 jz .Lecb_enc_ret
571 mov 480(KEYP), KLEN
572 cmp $16, LEN
573 jb .Lecb_enc_ret
574 cmp $64, LEN
575 jb .Lecb_enc_loop1
576 .align 4
577 .Lecb_enc_loop4:
578 movups (INP), STATE1
579 movups 0x10(INP), STATE2
580 movups 0x20(INP), STATE3
581 movups 0x30(INP), STATE4
582 call _aesni_enc4
583 movups STATE1, (OUTP)
584 movups STATE2, 0x10(OUTP)
585 movups STATE3, 0x20(OUTP)
586 movups STATE4, 0x30(OUTP)
587 sub $64, LEN
588 add $64, INP
589 add $64, OUTP
590 cmp $64, LEN
591 jge .Lecb_enc_loop4
592 cmp $16, LEN
593 jb .Lecb_enc_ret
594 .align 4
595 .Lecb_enc_loop1:
596 movups (INP), STATE1
597 call _aesni_enc1
598 movups STATE1, (OUTP)
599 sub $16, LEN
600 add $16, INP
601 add $16, OUTP
602 cmp $16, LEN
603 jge .Lecb_enc_loop1
604 .Lecb_enc_ret:
605 ret
606
607 /*
608 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
609 * size_t len);
610 */
611 ENTRY(aesni_ecb_dec)
612 test LEN, LEN
613 jz .Lecb_dec_ret
614 mov 480(KEYP), KLEN
615 add $240, KEYP
616 cmp $16, LEN
617 jb .Lecb_dec_ret
618 cmp $64, LEN
619 jb .Lecb_dec_loop1
620 .align 4
621 .Lecb_dec_loop4:
622 movups (INP), STATE1
623 movups 0x10(INP), STATE2
624 movups 0x20(INP), STATE3
625 movups 0x30(INP), STATE4
626 call _aesni_dec4
627 movups STATE1, (OUTP)
628 movups STATE2, 0x10(OUTP)
629 movups STATE3, 0x20(OUTP)
630 movups STATE4, 0x30(OUTP)
631 sub $64, LEN
632 add $64, INP
633 add $64, OUTP
634 cmp $64, LEN
635 jge .Lecb_dec_loop4
636 cmp $16, LEN
637 jb .Lecb_dec_ret
638 .align 4
639 .Lecb_dec_loop1:
640 movups (INP), STATE1
641 call _aesni_dec1
642 movups STATE1, (OUTP)
643 sub $16, LEN
644 add $16, INP
645 add $16, OUTP
646 cmp $16, LEN
647 jge .Lecb_dec_loop1
648 .Lecb_dec_ret:
649 ret
650
651 /*
652 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
653 * size_t len, u8 *iv)
654 */
655 ENTRY(aesni_cbc_enc)
656 cmp $16, LEN
657 jb .Lcbc_enc_ret
658 mov 480(KEYP), KLEN
659 movups (IVP), STATE # load iv as initial state
660 .align 4
661 .Lcbc_enc_loop:
662 movups (INP), IN # load input
663 pxor IN, STATE
664 call _aesni_enc1
665 movups STATE, (OUTP) # store output
666 sub $16, LEN
667 add $16, INP
668 add $16, OUTP
669 cmp $16, LEN
670 jge .Lcbc_enc_loop
671 movups STATE, (IVP)
672 .Lcbc_enc_ret:
673 ret
674
675 /*
676 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
677 * size_t len, u8 *iv)
678 */
679 ENTRY(aesni_cbc_dec)
680 cmp $16, LEN
681 jb .Lcbc_dec_just_ret
682 mov 480(KEYP), KLEN
683 add $240, KEYP
684 movups (IVP), IV
685 cmp $64, LEN
686 jb .Lcbc_dec_loop1
687 .align 4
688 .Lcbc_dec_loop4:
689 movups (INP), IN1
690 movaps IN1, STATE1
691 movups 0x10(INP), IN2
692 movaps IN2, STATE2
693 movups 0x20(INP), IN3
694 movaps IN3, STATE3
695 movups 0x30(INP), IN4
696 movaps IN4, STATE4
697 call _aesni_dec4
698 pxor IV, STATE1
699 pxor IN1, STATE2
700 pxor IN2, STATE3
701 pxor IN3, STATE4
702 movaps IN4, IV
703 movups STATE1, (OUTP)
704 movups STATE2, 0x10(OUTP)
705 movups STATE3, 0x20(OUTP)
706 movups STATE4, 0x30(OUTP)
707 sub $64, LEN
708 add $64, INP
709 add $64, OUTP
710 cmp $64, LEN
711 jge .Lcbc_dec_loop4
712 cmp $16, LEN
713 jb .Lcbc_dec_ret
714 .align 4
715 .Lcbc_dec_loop1:
716 movups (INP), IN
717 movaps IN, STATE
718 call _aesni_dec1
719 pxor IV, STATE
720 movups STATE, (OUTP)
721 movaps IN, IV
722 sub $16, LEN
723 add $16, INP
724 add $16, OUTP
725 cmp $16, LEN
726 jge .Lcbc_dec_loop1
727 .Lcbc_dec_ret:
728 movups IV, (IVP)
729 .Lcbc_dec_just_ret:
730 ret
731
732 .align 16
733 .Lbswap_mask:
734 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
735
736 /*
737 * _aesni_inc_init: internal ABI
738 * setup registers used by _aesni_inc
739 * input:
740 * IV
741 * output:
742 * CTR: == IV, in little endian
743 * TCTR_LOW: == lower qword of CTR
744 * INC: == 1, in little endian
745 * BSWAP_MASK == endian swapping mask
746 */
747 _aesni_inc_init:
748 movaps .Lbswap_mask, BSWAP_MASK
749 movaps IV, CTR
750 PSHUFB_XMM BSWAP_MASK CTR
751 mov $1, TCTR_LOW
752 MOVQ_R64_XMM TCTR_LOW INC
753 MOVQ_R64_XMM CTR TCTR_LOW
754 ret
755
756 /*
757 * _aesni_inc: internal ABI
758 * Increase IV by 1, IV is in big endian
759 * input:
760 * IV
761 * CTR: == IV, in little endian
762 * TCTR_LOW: == lower qword of CTR
763 * INC: == 1, in little endian
764 * BSWAP_MASK == endian swapping mask
765 * output:
766 * IV: Increase by 1
767 * changed:
768 * CTR: == output IV, in little endian
769 * TCTR_LOW: == lower qword of CTR
770 */
771 _aesni_inc:
772 paddq INC, CTR
773 add $1, TCTR_LOW
774 jnc .Linc_low
775 pslldq $8, INC
776 paddq INC, CTR
777 psrldq $8, INC
778 .Linc_low:
779 movaps CTR, IV
780 PSHUFB_XMM BSWAP_MASK IV
781 ret
782
783 /*
784 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
785 * size_t len, u8 *iv)
786 */
787 ENTRY(aesni_ctr_enc)
788 cmp $16, LEN
789 jb .Lctr_enc_just_ret
790 mov 480(KEYP), KLEN
791 movups (IVP), IV
792 call _aesni_inc_init
793 cmp $64, LEN
794 jb .Lctr_enc_loop1
795 .align 4
796 .Lctr_enc_loop4:
797 movaps IV, STATE1
798 call _aesni_inc
799 movups (INP), IN1
800 movaps IV, STATE2
801 call _aesni_inc
802 movups 0x10(INP), IN2
803 movaps IV, STATE3
804 call _aesni_inc
805 movups 0x20(INP), IN3
806 movaps IV, STATE4
807 call _aesni_inc
808 movups 0x30(INP), IN4
809 call _aesni_enc4
810 pxor IN1, STATE1
811 movups STATE1, (OUTP)
812 pxor IN2, STATE2
813 movups STATE2, 0x10(OUTP)
814 pxor IN3, STATE3
815 movups STATE3, 0x20(OUTP)
816 pxor IN4, STATE4
817 movups STATE4, 0x30(OUTP)
818 sub $64, LEN
819 add $64, INP
820 add $64, OUTP
821 cmp $64, LEN
822 jge .Lctr_enc_loop4
823 cmp $16, LEN
824 jb .Lctr_enc_ret
825 .align 4
826 .Lctr_enc_loop1:
827 movaps IV, STATE
828 call _aesni_inc
829 movups (INP), IN
830 call _aesni_enc1
831 pxor IN, STATE
832 movups STATE, (OUTP)
833 sub $16, LEN
834 add $16, INP
835 add $16, OUTP
836 cmp $16, LEN
837 jge .Lctr_enc_loop1
838 .Lctr_enc_ret:
839 movups IV, (IVP)
840 .Lctr_enc_just_ret:
841 ret