RDMA/nes: ACK MPA Reply frame
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / drivers / crypto / vmx / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
30 #
31 # May 2016
32 #
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
35 #
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
39 #
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
43
44 $flavour = shift;
45
46 if ($flavour =~ /64/) {
47 $SIZE_T =8;
48 $LRSAVE =2*$SIZE_T;
49 $STU ="stdu";
50 $POP ="ld";
51 $PUSH ="std";
52 $UCMP ="cmpld";
53 $SHL ="sldi";
54 } elsif ($flavour =~ /32/) {
55 $SIZE_T =4;
56 $LRSAVE =$SIZE_T;
57 $STU ="stwu";
58 $POP ="lwz";
59 $PUSH ="stw";
60 $UCMP ="cmplw";
61 $SHL ="slwi";
62 } else { die "nonsense $flavour"; }
63
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
65
66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69 die "can't locate ppc-xlate.pl";
70
71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
72
73 $FRAME=8*$SIZE_T;
74 $prefix="aes_p8";
75
76 $sp="r1";
77 $vrsave="r12";
78
79 #########################################################################
80 {{{ # Key setup procedures #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
84
85 $code.=<<___;
86 .machine "any"
87
88 .text
89
90 .align 7
91 rcon:
92 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
95 .long 0,0,0,0 ?asis
96 Lconsts:
97 mflr r0
98 bcl 20,31,\$+4
99 mflr $ptr #vvvvv "distance between . and rcon
100 addi $ptr,$ptr,-0x48
101 mtlr r0
102 blr
103 .long 0
104 .byte 0,12,0x14,0,0,0,0,0
105 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
106
107 .globl .${prefix}_set_encrypt_key
108 Lset_encrypt_key:
109 mflr r11
110 $PUSH r11,$LRSAVE($sp)
111
112 li $ptr,-1
113 ${UCMP}i $inp,0
114 beq- Lenc_key_abort # if ($inp==0) return -1;
115 ${UCMP}i $out,0
116 beq- Lenc_key_abort # if ($out==0) return -1;
117 li $ptr,-2
118 cmpwi $bits,128
119 blt- Lenc_key_abort
120 cmpwi $bits,256
121 bgt- Lenc_key_abort
122 andi. r0,$bits,0x3f
123 bne- Lenc_key_abort
124
125 lis r0,0xfff0
126 mfspr $vrsave,256
127 mtspr 256,r0
128
129 bl Lconsts
130 mtlr r11
131
132 neg r9,$inp
133 lvx $in0,0,$inp
134 addi $inp,$inp,15 # 15 is not typo
135 lvsr $key,0,r9 # borrow $key
136 li r8,0x20
137 cmpwi $bits,192
138 lvx $in1,0,$inp
139 le?vspltisb $mask,0x0f # borrow $mask
140 lvx $rcon,0,$ptr
141 le?vxor $key,$key,$mask # adjust for byte swap
142 lvx $mask,r8,$ptr
143 addi $ptr,$ptr,0x10
144 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
145 li $cnt,8
146 vxor $zero,$zero,$zero
147 mtctr $cnt
148
149 ?lvsr $outperm,0,$out
150 vspltisb $outmask,-1
151 lvx $outhead,0,$out
152 ?vperm $outmask,$zero,$outmask,$outperm
153
154 blt Loop128
155 addi $inp,$inp,8
156 beq L192
157 addi $inp,$inp,8
158 b L256
159
160 .align 4
161 Loop128:
162 vperm $key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi $tmp,$zero,$in0,12 # >>32
164 vperm $outtail,$in0,$in0,$outperm # rotate
165 vsel $stage,$outhead,$outtail,$outmask
166 vmr $outhead,$outtail
167 vcipherlast $key,$key,$rcon
168 stvx $stage,0,$out
169 addi $out,$out,16
170
171 vxor $in0,$in0,$tmp
172 vsldoi $tmp,$zero,$tmp,12 # >>32
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vadduwm $rcon,$rcon,$rcon
177 vxor $in0,$in0,$key
178 bdnz Loop128
179
180 lvx $rcon,0,$ptr # last two round keys
181
182 vperm $key,$in0,$in0,$mask # rotate-n-splat
183 vsldoi $tmp,$zero,$in0,12 # >>32
184 vperm $outtail,$in0,$in0,$outperm # rotate
185 vsel $stage,$outhead,$outtail,$outmask
186 vmr $outhead,$outtail
187 vcipherlast $key,$key,$rcon
188 stvx $stage,0,$out
189 addi $out,$out,16
190
191 vxor $in0,$in0,$tmp
192 vsldoi $tmp,$zero,$tmp,12 # >>32
193 vxor $in0,$in0,$tmp
194 vsldoi $tmp,$zero,$tmp,12 # >>32
195 vxor $in0,$in0,$tmp
196 vadduwm $rcon,$rcon,$rcon
197 vxor $in0,$in0,$key
198
199 vperm $key,$in0,$in0,$mask # rotate-n-splat
200 vsldoi $tmp,$zero,$in0,12 # >>32
201 vperm $outtail,$in0,$in0,$outperm # rotate
202 vsel $stage,$outhead,$outtail,$outmask
203 vmr $outhead,$outtail
204 vcipherlast $key,$key,$rcon
205 stvx $stage,0,$out
206 addi $out,$out,16
207
208 vxor $in0,$in0,$tmp
209 vsldoi $tmp,$zero,$tmp,12 # >>32
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vxor $in0,$in0,$key
214 vperm $outtail,$in0,$in0,$outperm # rotate
215 vsel $stage,$outhead,$outtail,$outmask
216 vmr $outhead,$outtail
217 stvx $stage,0,$out
218
219 addi $inp,$out,15 # 15 is not typo
220 addi $out,$out,0x50
221
222 li $rounds,10
223 b Ldone
224
225 .align 4
226 L192:
227 lvx $tmp,0,$inp
228 li $cnt,4
229 vperm $outtail,$in0,$in0,$outperm # rotate
230 vsel $stage,$outhead,$outtail,$outmask
231 vmr $outhead,$outtail
232 stvx $stage,0,$out
233 addi $out,$out,16
234 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
235 vspltisb $key,8 # borrow $key
236 mtctr $cnt
237 vsububm $mask,$mask,$key # adjust the mask
238
239 Loop192:
240 vperm $key,$in1,$in1,$mask # roate-n-splat
241 vsldoi $tmp,$zero,$in0,12 # >>32
242 vcipherlast $key,$key,$rcon
243
244 vxor $in0,$in0,$tmp
245 vsldoi $tmp,$zero,$tmp,12 # >>32
246 vxor $in0,$in0,$tmp
247 vsldoi $tmp,$zero,$tmp,12 # >>32
248 vxor $in0,$in0,$tmp
249
250 vsldoi $stage,$zero,$in1,8
251 vspltw $tmp,$in0,3
252 vxor $tmp,$tmp,$in1
253 vsldoi $in1,$zero,$in1,12 # >>32
254 vadduwm $rcon,$rcon,$rcon
255 vxor $in1,$in1,$tmp
256 vxor $in0,$in0,$key
257 vxor $in1,$in1,$key
258 vsldoi $stage,$stage,$in0,8
259
260 vperm $key,$in1,$in1,$mask # rotate-n-splat
261 vsldoi $tmp,$zero,$in0,12 # >>32
262 vperm $outtail,$stage,$stage,$outperm # rotate
263 vsel $stage,$outhead,$outtail,$outmask
264 vmr $outhead,$outtail
265 vcipherlast $key,$key,$rcon
266 stvx $stage,0,$out
267 addi $out,$out,16
268
269 vsldoi $stage,$in0,$in1,8
270 vxor $in0,$in0,$tmp
271 vsldoi $tmp,$zero,$tmp,12 # >>32
272 vperm $outtail,$stage,$stage,$outperm # rotate
273 vsel $stage,$outhead,$outtail,$outmask
274 vmr $outhead,$outtail
275 vxor $in0,$in0,$tmp
276 vsldoi $tmp,$zero,$tmp,12 # >>32
277 vxor $in0,$in0,$tmp
278 stvx $stage,0,$out
279 addi $out,$out,16
280
281 vspltw $tmp,$in0,3
282 vxor $tmp,$tmp,$in1
283 vsldoi $in1,$zero,$in1,12 # >>32
284 vadduwm $rcon,$rcon,$rcon
285 vxor $in1,$in1,$tmp
286 vxor $in0,$in0,$key
287 vxor $in1,$in1,$key
288 vperm $outtail,$in0,$in0,$outperm # rotate
289 vsel $stage,$outhead,$outtail,$outmask
290 vmr $outhead,$outtail
291 stvx $stage,0,$out
292 addi $inp,$out,15 # 15 is not typo
293 addi $out,$out,16
294 bdnz Loop192
295
296 li $rounds,12
297 addi $out,$out,0x20
298 b Ldone
299
300 .align 4
301 L256:
302 lvx $tmp,0,$inp
303 li $cnt,7
304 li $rounds,14
305 vperm $outtail,$in0,$in0,$outperm # rotate
306 vsel $stage,$outhead,$outtail,$outmask
307 vmr $outhead,$outtail
308 stvx $stage,0,$out
309 addi $out,$out,16
310 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
311 mtctr $cnt
312
313 Loop256:
314 vperm $key,$in1,$in1,$mask # rotate-n-splat
315 vsldoi $tmp,$zero,$in0,12 # >>32
316 vperm $outtail,$in1,$in1,$outperm # rotate
317 vsel $stage,$outhead,$outtail,$outmask
318 vmr $outhead,$outtail
319 vcipherlast $key,$key,$rcon
320 stvx $stage,0,$out
321 addi $out,$out,16
322
323 vxor $in0,$in0,$tmp
324 vsldoi $tmp,$zero,$tmp,12 # >>32
325 vxor $in0,$in0,$tmp
326 vsldoi $tmp,$zero,$tmp,12 # >>32
327 vxor $in0,$in0,$tmp
328 vadduwm $rcon,$rcon,$rcon
329 vxor $in0,$in0,$key
330 vperm $outtail,$in0,$in0,$outperm # rotate
331 vsel $stage,$outhead,$outtail,$outmask
332 vmr $outhead,$outtail
333 stvx $stage,0,$out
334 addi $inp,$out,15 # 15 is not typo
335 addi $out,$out,16
336 bdz Ldone
337
338 vspltw $key,$in0,3 # just splat
339 vsldoi $tmp,$zero,$in1,12 # >>32
340 vsbox $key,$key
341
342 vxor $in1,$in1,$tmp
343 vsldoi $tmp,$zero,$tmp,12 # >>32
344 vxor $in1,$in1,$tmp
345 vsldoi $tmp,$zero,$tmp,12 # >>32
346 vxor $in1,$in1,$tmp
347
348 vxor $in1,$in1,$key
349 b Loop256
350
351 .align 4
352 Ldone:
353 lvx $in1,0,$inp # redundant in aligned case
354 vsel $in1,$outhead,$in1,$outmask
355 stvx $in1,0,$inp
356 li $ptr,0
357 mtspr 256,$vrsave
358 stw $rounds,0($out)
359
360 Lenc_key_abort:
361 mr r3,$ptr
362 blr
363 .long 0
364 .byte 0,12,0x14,1,0,0,3,0
365 .long 0
366 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
367
368 .globl .${prefix}_set_decrypt_key
369 $STU $sp,-$FRAME($sp)
370 mflr r10
371 $PUSH r10,$FRAME+$LRSAVE($sp)
372 bl Lset_encrypt_key
373 mtlr r10
374
375 cmpwi r3,0
376 bne- Ldec_key_abort
377
378 slwi $cnt,$rounds,4
379 subi $inp,$out,240 # first round key
380 srwi $rounds,$rounds,1
381 add $out,$inp,$cnt # last round key
382 mtctr $rounds
383
384 Ldeckey:
385 lwz r0, 0($inp)
386 lwz r6, 4($inp)
387 lwz r7, 8($inp)
388 lwz r8, 12($inp)
389 addi $inp,$inp,16
390 lwz r9, 0($out)
391 lwz r10,4($out)
392 lwz r11,8($out)
393 lwz r12,12($out)
394 stw r0, 0($out)
395 stw r6, 4($out)
396 stw r7, 8($out)
397 stw r8, 12($out)
398 subi $out,$out,16
399 stw r9, -16($inp)
400 stw r10,-12($inp)
401 stw r11,-8($inp)
402 stw r12,-4($inp)
403 bdnz Ldeckey
404
405 xor r3,r3,r3 # return value
406 Ldec_key_abort:
407 addi $sp,$sp,$FRAME
408 blr
409 .long 0
410 .byte 0,12,4,1,0x80,0,3,0
411 .long 0
412 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
413 ___
414 }}}
415 #########################################################################
416 {{{ # Single block en- and decrypt procedures #
417 sub gen_block () {
418 my $dir = shift;
419 my $n = $dir eq "de" ? "n" : "";
420 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
421
422 $code.=<<___;
423 .globl .${prefix}_${dir}crypt
424 lwz $rounds,240($key)
425 lis r0,0xfc00
426 mfspr $vrsave,256
427 li $idx,15 # 15 is not typo
428 mtspr 256,r0
429
430 lvx v0,0,$inp
431 neg r11,$out
432 lvx v1,$idx,$inp
433 lvsl v2,0,$inp # inpperm
434 le?vspltisb v4,0x0f
435 ?lvsl v3,0,r11 # outperm
436 le?vxor v2,v2,v4
437 li $idx,16
438 vperm v0,v0,v1,v2 # align [and byte swap in LE]
439 lvx v1,0,$key
440 ?lvsl v5,0,$key # keyperm
441 srwi $rounds,$rounds,1
442 lvx v2,$idx,$key
443 addi $idx,$idx,16
444 subi $rounds,$rounds,1
445 ?vperm v1,v1,v2,v5 # align round key
446
447 vxor v0,v0,v1
448 lvx v1,$idx,$key
449 addi $idx,$idx,16
450 mtctr $rounds
451
452 Loop_${dir}c:
453 ?vperm v2,v2,v1,v5
454 v${n}cipher v0,v0,v2
455 lvx v2,$idx,$key
456 addi $idx,$idx,16
457 ?vperm v1,v1,v2,v5
458 v${n}cipher v0,v0,v1
459 lvx v1,$idx,$key
460 addi $idx,$idx,16
461 bdnz Loop_${dir}c
462
463 ?vperm v2,v2,v1,v5
464 v${n}cipher v0,v0,v2
465 lvx v2,$idx,$key
466 ?vperm v1,v1,v2,v5
467 v${n}cipherlast v0,v0,v1
468
469 vspltisb v2,-1
470 vxor v1,v1,v1
471 li $idx,15 # 15 is not typo
472 ?vperm v2,v1,v2,v3 # outmask
473 le?vxor v3,v3,v4
474 lvx v1,0,$out # outhead
475 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
476 vsel v1,v1,v0,v2
477 lvx v4,$idx,$out
478 stvx v1,0,$out
479 vsel v0,v0,v4,v2
480 stvx v0,$idx,$out
481
482 mtspr 256,$vrsave
483 blr
484 .long 0
485 .byte 0,12,0x14,0,0,0,3,0
486 .long 0
487 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
488 ___
489 }
490 &gen_block("en");
491 &gen_block("de");
492 }}}
493 #########################################################################
494 {{{ # CBC en- and decrypt procedures #
495 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
496 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
497 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
498 map("v$_",(4..10));
499 $code.=<<___;
500 .globl .${prefix}_cbc_encrypt
501 ${UCMP}i $len,16
502 bltlr-
503
504 cmpwi $enc,0 # test direction
505 lis r0,0xffe0
506 mfspr $vrsave,256
507 mtspr 256,r0
508
509 li $idx,15
510 vxor $rndkey0,$rndkey0,$rndkey0
511 le?vspltisb $tmp,0x0f
512
513 lvx $ivec,0,$ivp # load [unaligned] iv
514 lvsl $inpperm,0,$ivp
515 lvx $inptail,$idx,$ivp
516 le?vxor $inpperm,$inpperm,$tmp
517 vperm $ivec,$ivec,$inptail,$inpperm
518
519 neg r11,$inp
520 ?lvsl $keyperm,0,$key # prepare for unaligned key
521 lwz $rounds,240($key)
522
523 lvsr $inpperm,0,r11 # prepare for unaligned load
524 lvx $inptail,0,$inp
525 addi $inp,$inp,15 # 15 is not typo
526 le?vxor $inpperm,$inpperm,$tmp
527
528 ?lvsr $outperm,0,$out # prepare for unaligned store
529 vspltisb $outmask,-1
530 lvx $outhead,0,$out
531 ?vperm $outmask,$rndkey0,$outmask,$outperm
532 le?vxor $outperm,$outperm,$tmp
533
534 srwi $rounds,$rounds,1
535 li $idx,16
536 subi $rounds,$rounds,1
537 beq Lcbc_dec
538
539 Lcbc_enc:
540 vmr $inout,$inptail
541 lvx $inptail,0,$inp
542 addi $inp,$inp,16
543 mtctr $rounds
544 subi $len,$len,16 # len-=16
545
546 lvx $rndkey0,0,$key
547 vperm $inout,$inout,$inptail,$inpperm
548 lvx $rndkey1,$idx,$key
549 addi $idx,$idx,16
550 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
551 vxor $inout,$inout,$rndkey0
552 lvx $rndkey0,$idx,$key
553 addi $idx,$idx,16
554 vxor $inout,$inout,$ivec
555
556 Loop_cbc_enc:
557 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
558 vcipher $inout,$inout,$rndkey1
559 lvx $rndkey1,$idx,$key
560 addi $idx,$idx,16
561 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
562 vcipher $inout,$inout,$rndkey0
563 lvx $rndkey0,$idx,$key
564 addi $idx,$idx,16
565 bdnz Loop_cbc_enc
566
567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
568 vcipher $inout,$inout,$rndkey1
569 lvx $rndkey1,$idx,$key
570 li $idx,16
571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
572 vcipherlast $ivec,$inout,$rndkey0
573 ${UCMP}i $len,16
574
575 vperm $tmp,$ivec,$ivec,$outperm
576 vsel $inout,$outhead,$tmp,$outmask
577 vmr $outhead,$tmp
578 stvx $inout,0,$out
579 addi $out,$out,16
580 bge Lcbc_enc
581
582 b Lcbc_done
583
584 .align 4
585 Lcbc_dec:
586 ${UCMP}i $len,128
587 bge _aesp8_cbc_decrypt8x
588 vmr $tmp,$inptail
589 lvx $inptail,0,$inp
590 addi $inp,$inp,16
591 mtctr $rounds
592 subi $len,$len,16 # len-=16
593
594 lvx $rndkey0,0,$key
595 vperm $tmp,$tmp,$inptail,$inpperm
596 lvx $rndkey1,$idx,$key
597 addi $idx,$idx,16
598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
599 vxor $inout,$tmp,$rndkey0
600 lvx $rndkey0,$idx,$key
601 addi $idx,$idx,16
602
603 Loop_cbc_dec:
604 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
605 vncipher $inout,$inout,$rndkey1
606 lvx $rndkey1,$idx,$key
607 addi $idx,$idx,16
608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
609 vncipher $inout,$inout,$rndkey0
610 lvx $rndkey0,$idx,$key
611 addi $idx,$idx,16
612 bdnz Loop_cbc_dec
613
614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
615 vncipher $inout,$inout,$rndkey1
616 lvx $rndkey1,$idx,$key
617 li $idx,16
618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
619 vncipherlast $inout,$inout,$rndkey0
620 ${UCMP}i $len,16
621
622 vxor $inout,$inout,$ivec
623 vmr $ivec,$tmp
624 vperm $tmp,$inout,$inout,$outperm
625 vsel $inout,$outhead,$tmp,$outmask
626 vmr $outhead,$tmp
627 stvx $inout,0,$out
628 addi $out,$out,16
629 bge Lcbc_dec
630
631 Lcbc_done:
632 addi $out,$out,-1
633 lvx $inout,0,$out # redundant in aligned case
634 vsel $inout,$outhead,$inout,$outmask
635 stvx $inout,0,$out
636
637 neg $enc,$ivp # write [unaligned] iv
638 li $idx,15 # 15 is not typo
639 vxor $rndkey0,$rndkey0,$rndkey0
640 vspltisb $outmask,-1
641 le?vspltisb $tmp,0x0f
642 ?lvsl $outperm,0,$enc
643 ?vperm $outmask,$rndkey0,$outmask,$outperm
644 le?vxor $outperm,$outperm,$tmp
645 lvx $outhead,0,$ivp
646 vperm $ivec,$ivec,$ivec,$outperm
647 vsel $inout,$outhead,$ivec,$outmask
648 lvx $inptail,$idx,$ivp
649 stvx $inout,0,$ivp
650 vsel $inout,$ivec,$inptail,$outmask
651 stvx $inout,$idx,$ivp
652
653 mtspr 256,$vrsave
654 blr
655 .long 0
656 .byte 0,12,0x14,0,0,0,6,0
657 .long 0
658 ___
659 #########################################################################
660 {{ # Optimized CBC decrypt procedure #
661 my $key_="r11";
662 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
663 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
664 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
665 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
666 # v26-v31 last 6 round keys
667 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
668
669 $code.=<<___;
670 .align 5
671 _aesp8_cbc_decrypt8x:
672 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
673 li r10,`$FRAME+8*16+15`
674 li r11,`$FRAME+8*16+31`
675 stvx v20,r10,$sp # ABI says so
676 addi r10,r10,32
677 stvx v21,r11,$sp
678 addi r11,r11,32
679 stvx v22,r10,$sp
680 addi r10,r10,32
681 stvx v23,r11,$sp
682 addi r11,r11,32
683 stvx v24,r10,$sp
684 addi r10,r10,32
685 stvx v25,r11,$sp
686 addi r11,r11,32
687 stvx v26,r10,$sp
688 addi r10,r10,32
689 stvx v27,r11,$sp
690 addi r11,r11,32
691 stvx v28,r10,$sp
692 addi r10,r10,32
693 stvx v29,r11,$sp
694 addi r11,r11,32
695 stvx v30,r10,$sp
696 stvx v31,r11,$sp
697 li r0,-1
698 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
699 li $x10,0x10
700 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
701 li $x20,0x20
702 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
703 li $x30,0x30
704 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
705 li $x40,0x40
706 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
707 li $x50,0x50
708 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
709 li $x60,0x60
710 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
711 li $x70,0x70
712 mtspr 256,r0
713
714 subi $rounds,$rounds,3 # -4 in total
715 subi $len,$len,128 # bias
716
717 lvx $rndkey0,$x00,$key # load key schedule
718 lvx v30,$x10,$key
719 addi $key,$key,0x20
720 lvx v31,$x00,$key
721 ?vperm $rndkey0,$rndkey0,v30,$keyperm
722 addi $key_,$sp,$FRAME+15
723 mtctr $rounds
724
725 Load_cbc_dec_key:
726 ?vperm v24,v30,v31,$keyperm
727 lvx v30,$x10,$key
728 addi $key,$key,0x20
729 stvx v24,$x00,$key_ # off-load round[1]
730 ?vperm v25,v31,v30,$keyperm
731 lvx v31,$x00,$key
732 stvx v25,$x10,$key_ # off-load round[2]
733 addi $key_,$key_,0x20
734 bdnz Load_cbc_dec_key
735
736 lvx v26,$x10,$key
737 ?vperm v24,v30,v31,$keyperm
738 lvx v27,$x20,$key
739 stvx v24,$x00,$key_ # off-load round[3]
740 ?vperm v25,v31,v26,$keyperm
741 lvx v28,$x30,$key
742 stvx v25,$x10,$key_ # off-load round[4]
743 addi $key_,$sp,$FRAME+15 # rewind $key_
744 ?vperm v26,v26,v27,$keyperm
745 lvx v29,$x40,$key
746 ?vperm v27,v27,v28,$keyperm
747 lvx v30,$x50,$key
748 ?vperm v28,v28,v29,$keyperm
749 lvx v31,$x60,$key
750 ?vperm v29,v29,v30,$keyperm
751 lvx $out0,$x70,$key # borrow $out0
752 ?vperm v30,v30,v31,$keyperm
753 lvx v24,$x00,$key_ # pre-load round[1]
754 ?vperm v31,v31,$out0,$keyperm
755 lvx v25,$x10,$key_ # pre-load round[2]
756
757 #lvx $inptail,0,$inp # "caller" already did this
758 #addi $inp,$inp,15 # 15 is not typo
759 subi $inp,$inp,15 # undo "caller"
760
761 le?li $idx,8
762 lvx_u $in0,$x00,$inp # load first 8 "words"
763 le?lvsl $inpperm,0,$idx
764 le?vspltisb $tmp,0x0f
765 lvx_u $in1,$x10,$inp
766 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
767 lvx_u $in2,$x20,$inp
768 le?vperm $in0,$in0,$in0,$inpperm
769 lvx_u $in3,$x30,$inp
770 le?vperm $in1,$in1,$in1,$inpperm
771 lvx_u $in4,$x40,$inp
772 le?vperm $in2,$in2,$in2,$inpperm
773 vxor $out0,$in0,$rndkey0
774 lvx_u $in5,$x50,$inp
775 le?vperm $in3,$in3,$in3,$inpperm
776 vxor $out1,$in1,$rndkey0
777 lvx_u $in6,$x60,$inp
778 le?vperm $in4,$in4,$in4,$inpperm
779 vxor $out2,$in2,$rndkey0
780 lvx_u $in7,$x70,$inp
781 addi $inp,$inp,0x80
782 le?vperm $in5,$in5,$in5,$inpperm
783 vxor $out3,$in3,$rndkey0
784 le?vperm $in6,$in6,$in6,$inpperm
785 vxor $out4,$in4,$rndkey0
786 le?vperm $in7,$in7,$in7,$inpperm
787 vxor $out5,$in5,$rndkey0
788 vxor $out6,$in6,$rndkey0
789 vxor $out7,$in7,$rndkey0
790
791 mtctr $rounds
792 b Loop_cbc_dec8x
793 .align 5
794 Loop_cbc_dec8x:
795 vncipher $out0,$out0,v24
796 vncipher $out1,$out1,v24
797 vncipher $out2,$out2,v24
798 vncipher $out3,$out3,v24
799 vncipher $out4,$out4,v24
800 vncipher $out5,$out5,v24
801 vncipher $out6,$out6,v24
802 vncipher $out7,$out7,v24
803 lvx v24,$x20,$key_ # round[3]
804 addi $key_,$key_,0x20
805
806 vncipher $out0,$out0,v25
807 vncipher $out1,$out1,v25
808 vncipher $out2,$out2,v25
809 vncipher $out3,$out3,v25
810 vncipher $out4,$out4,v25
811 vncipher $out5,$out5,v25
812 vncipher $out6,$out6,v25
813 vncipher $out7,$out7,v25
814 lvx v25,$x10,$key_ # round[4]
815 bdnz Loop_cbc_dec8x
816
817 subic $len,$len,128 # $len-=128
818 vncipher $out0,$out0,v24
819 vncipher $out1,$out1,v24
820 vncipher $out2,$out2,v24
821 vncipher $out3,$out3,v24
822 vncipher $out4,$out4,v24
823 vncipher $out5,$out5,v24
824 vncipher $out6,$out6,v24
825 vncipher $out7,$out7,v24
826
827 subfe. r0,r0,r0 # borrow?-1:0
828 vncipher $out0,$out0,v25
829 vncipher $out1,$out1,v25
830 vncipher $out2,$out2,v25
831 vncipher $out3,$out3,v25
832 vncipher $out4,$out4,v25
833 vncipher $out5,$out5,v25
834 vncipher $out6,$out6,v25
835 vncipher $out7,$out7,v25
836
837 and r0,r0,$len
838 vncipher $out0,$out0,v26
839 vncipher $out1,$out1,v26
840 vncipher $out2,$out2,v26
841 vncipher $out3,$out3,v26
842 vncipher $out4,$out4,v26
843 vncipher $out5,$out5,v26
844 vncipher $out6,$out6,v26
845 vncipher $out7,$out7,v26
846
847 add $inp,$inp,r0 # $inp is adjusted in such
848 # way that at exit from the
849 # loop inX-in7 are loaded
850 # with last "words"
851 vncipher $out0,$out0,v27
852 vncipher $out1,$out1,v27
853 vncipher $out2,$out2,v27
854 vncipher $out3,$out3,v27
855 vncipher $out4,$out4,v27
856 vncipher $out5,$out5,v27
857 vncipher $out6,$out6,v27
858 vncipher $out7,$out7,v27
859
860 addi $key_,$sp,$FRAME+15 # rewind $key_
861 vncipher $out0,$out0,v28
862 vncipher $out1,$out1,v28
863 vncipher $out2,$out2,v28
864 vncipher $out3,$out3,v28
865 vncipher $out4,$out4,v28
866 vncipher $out5,$out5,v28
867 vncipher $out6,$out6,v28
868 vncipher $out7,$out7,v28
869 lvx v24,$x00,$key_ # re-pre-load round[1]
870
871 vncipher $out0,$out0,v29
872 vncipher $out1,$out1,v29
873 vncipher $out2,$out2,v29
874 vncipher $out3,$out3,v29
875 vncipher $out4,$out4,v29
876 vncipher $out5,$out5,v29
877 vncipher $out6,$out6,v29
878 vncipher $out7,$out7,v29
879 lvx v25,$x10,$key_ # re-pre-load round[2]
880
881 vncipher $out0,$out0,v30
882 vxor $ivec,$ivec,v31 # xor with last round key
883 vncipher $out1,$out1,v30
884 vxor $in0,$in0,v31
885 vncipher $out2,$out2,v30
886 vxor $in1,$in1,v31
887 vncipher $out3,$out3,v30
888 vxor $in2,$in2,v31
889 vncipher $out4,$out4,v30
890 vxor $in3,$in3,v31
891 vncipher $out5,$out5,v30
892 vxor $in4,$in4,v31
893 vncipher $out6,$out6,v30
894 vxor $in5,$in5,v31
895 vncipher $out7,$out7,v30
896 vxor $in6,$in6,v31
897
898 vncipherlast $out0,$out0,$ivec
899 vncipherlast $out1,$out1,$in0
900 lvx_u $in0,$x00,$inp # load next input block
901 vncipherlast $out2,$out2,$in1
902 lvx_u $in1,$x10,$inp
903 vncipherlast $out3,$out3,$in2
904 le?vperm $in0,$in0,$in0,$inpperm
905 lvx_u $in2,$x20,$inp
906 vncipherlast $out4,$out4,$in3
907 le?vperm $in1,$in1,$in1,$inpperm
908 lvx_u $in3,$x30,$inp
909 vncipherlast $out5,$out5,$in4
910 le?vperm $in2,$in2,$in2,$inpperm
911 lvx_u $in4,$x40,$inp
912 vncipherlast $out6,$out6,$in5
913 le?vperm $in3,$in3,$in3,$inpperm
914 lvx_u $in5,$x50,$inp
915 vncipherlast $out7,$out7,$in6
916 le?vperm $in4,$in4,$in4,$inpperm
917 lvx_u $in6,$x60,$inp
918 vmr $ivec,$in7
919 le?vperm $in5,$in5,$in5,$inpperm
920 lvx_u $in7,$x70,$inp
921 addi $inp,$inp,0x80
922
923 le?vperm $out0,$out0,$out0,$inpperm
924 le?vperm $out1,$out1,$out1,$inpperm
925 stvx_u $out0,$x00,$out
926 le?vperm $in6,$in6,$in6,$inpperm
927 vxor $out0,$in0,$rndkey0
928 le?vperm $out2,$out2,$out2,$inpperm
929 stvx_u $out1,$x10,$out
930 le?vperm $in7,$in7,$in7,$inpperm
931 vxor $out1,$in1,$rndkey0
932 le?vperm $out3,$out3,$out3,$inpperm
933 stvx_u $out2,$x20,$out
934 vxor $out2,$in2,$rndkey0
935 le?vperm $out4,$out4,$out4,$inpperm
936 stvx_u $out3,$x30,$out
937 vxor $out3,$in3,$rndkey0
938 le?vperm $out5,$out5,$out5,$inpperm
939 stvx_u $out4,$x40,$out
940 vxor $out4,$in4,$rndkey0
941 le?vperm $out6,$out6,$out6,$inpperm
942 stvx_u $out5,$x50,$out
943 vxor $out5,$in5,$rndkey0
944 le?vperm $out7,$out7,$out7,$inpperm
945 stvx_u $out6,$x60,$out
946 vxor $out6,$in6,$rndkey0
947 stvx_u $out7,$x70,$out
948 addi $out,$out,0x80
949 vxor $out7,$in7,$rndkey0
950
951 mtctr $rounds
952 beq Loop_cbc_dec8x # did $len-=128 borrow?
953
954 addic. $len,$len,128
955 beq Lcbc_dec8x_done
956 nop
957 nop
958
959 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
960 vncipher $out1,$out1,v24
961 vncipher $out2,$out2,v24
962 vncipher $out3,$out3,v24
963 vncipher $out4,$out4,v24
964 vncipher $out5,$out5,v24
965 vncipher $out6,$out6,v24
966 vncipher $out7,$out7,v24
967 lvx v24,$x20,$key_ # round[3]
968 addi $key_,$key_,0x20
969
970 vncipher $out1,$out1,v25
971 vncipher $out2,$out2,v25
972 vncipher $out3,$out3,v25
973 vncipher $out4,$out4,v25
974 vncipher $out5,$out5,v25
975 vncipher $out6,$out6,v25
976 vncipher $out7,$out7,v25
977 lvx v25,$x10,$key_ # round[4]
978 bdnz Loop_cbc_dec8x_tail
979
980 vncipher $out1,$out1,v24
981 vncipher $out2,$out2,v24
982 vncipher $out3,$out3,v24
983 vncipher $out4,$out4,v24
984 vncipher $out5,$out5,v24
985 vncipher $out6,$out6,v24
986 vncipher $out7,$out7,v24
987
988 vncipher $out1,$out1,v25
989 vncipher $out2,$out2,v25
990 vncipher $out3,$out3,v25
991 vncipher $out4,$out4,v25
992 vncipher $out5,$out5,v25
993 vncipher $out6,$out6,v25
994 vncipher $out7,$out7,v25
995
996 vncipher $out1,$out1,v26
997 vncipher $out2,$out2,v26
998 vncipher $out3,$out3,v26
999 vncipher $out4,$out4,v26
1000 vncipher $out5,$out5,v26
1001 vncipher $out6,$out6,v26
1002 vncipher $out7,$out7,v26
1003
1004 vncipher $out1,$out1,v27
1005 vncipher $out2,$out2,v27
1006 vncipher $out3,$out3,v27
1007 vncipher $out4,$out4,v27
1008 vncipher $out5,$out5,v27
1009 vncipher $out6,$out6,v27
1010 vncipher $out7,$out7,v27
1011
1012 vncipher $out1,$out1,v28
1013 vncipher $out2,$out2,v28
1014 vncipher $out3,$out3,v28
1015 vncipher $out4,$out4,v28
1016 vncipher $out5,$out5,v28
1017 vncipher $out6,$out6,v28
1018 vncipher $out7,$out7,v28
1019
1020 vncipher $out1,$out1,v29
1021 vncipher $out2,$out2,v29
1022 vncipher $out3,$out3,v29
1023 vncipher $out4,$out4,v29
1024 vncipher $out5,$out5,v29
1025 vncipher $out6,$out6,v29
1026 vncipher $out7,$out7,v29
1027
1028 vncipher $out1,$out1,v30
1029 vxor $ivec,$ivec,v31 # last round key
1030 vncipher $out2,$out2,v30
1031 vxor $in1,$in1,v31
1032 vncipher $out3,$out3,v30
1033 vxor $in2,$in2,v31
1034 vncipher $out4,$out4,v30
1035 vxor $in3,$in3,v31
1036 vncipher $out5,$out5,v30
1037 vxor $in4,$in4,v31
1038 vncipher $out6,$out6,v30
1039 vxor $in5,$in5,v31
1040 vncipher $out7,$out7,v30
1041 vxor $in6,$in6,v31
1042
1043 cmplwi $len,32 # switch($len)
1044 blt Lcbc_dec8x_one
1045 nop
1046 beq Lcbc_dec8x_two
1047 cmplwi $len,64
1048 blt Lcbc_dec8x_three
1049 nop
1050 beq Lcbc_dec8x_four
1051 cmplwi $len,96
1052 blt Lcbc_dec8x_five
1053 nop
1054 beq Lcbc_dec8x_six
1055
1056 Lcbc_dec8x_seven:
1057 vncipherlast $out1,$out1,$ivec
1058 vncipherlast $out2,$out2,$in1
1059 vncipherlast $out3,$out3,$in2
1060 vncipherlast $out4,$out4,$in3
1061 vncipherlast $out5,$out5,$in4
1062 vncipherlast $out6,$out6,$in5
1063 vncipherlast $out7,$out7,$in6
1064 vmr $ivec,$in7
1065
1066 le?vperm $out1,$out1,$out1,$inpperm
1067 le?vperm $out2,$out2,$out2,$inpperm
1068 stvx_u $out1,$x00,$out
1069 le?vperm $out3,$out3,$out3,$inpperm
1070 stvx_u $out2,$x10,$out
1071 le?vperm $out4,$out4,$out4,$inpperm
1072 stvx_u $out3,$x20,$out
1073 le?vperm $out5,$out5,$out5,$inpperm
1074 stvx_u $out4,$x30,$out
1075 le?vperm $out6,$out6,$out6,$inpperm
1076 stvx_u $out5,$x40,$out
1077 le?vperm $out7,$out7,$out7,$inpperm
1078 stvx_u $out6,$x50,$out
1079 stvx_u $out7,$x60,$out
1080 addi $out,$out,0x70
1081 b Lcbc_dec8x_done
1082
1083 .align 5
1084 Lcbc_dec8x_six:
1085 vncipherlast $out2,$out2,$ivec
1086 vncipherlast $out3,$out3,$in2
1087 vncipherlast $out4,$out4,$in3
1088 vncipherlast $out5,$out5,$in4
1089 vncipherlast $out6,$out6,$in5
1090 vncipherlast $out7,$out7,$in6
1091 vmr $ivec,$in7
1092
1093 le?vperm $out2,$out2,$out2,$inpperm
1094 le?vperm $out3,$out3,$out3,$inpperm
1095 stvx_u $out2,$x00,$out
1096 le?vperm $out4,$out4,$out4,$inpperm
1097 stvx_u $out3,$x10,$out
1098 le?vperm $out5,$out5,$out5,$inpperm
1099 stvx_u $out4,$x20,$out
1100 le?vperm $out6,$out6,$out6,$inpperm
1101 stvx_u $out5,$x30,$out
1102 le?vperm $out7,$out7,$out7,$inpperm
1103 stvx_u $out6,$x40,$out
1104 stvx_u $out7,$x50,$out
1105 addi $out,$out,0x60
1106 b Lcbc_dec8x_done
1107
1108 .align 5
1109 Lcbc_dec8x_five:
1110 vncipherlast $out3,$out3,$ivec
1111 vncipherlast $out4,$out4,$in3
1112 vncipherlast $out5,$out5,$in4
1113 vncipherlast $out6,$out6,$in5
1114 vncipherlast $out7,$out7,$in6
1115 vmr $ivec,$in7
1116
1117 le?vperm $out3,$out3,$out3,$inpperm
1118 le?vperm $out4,$out4,$out4,$inpperm
1119 stvx_u $out3,$x00,$out
1120 le?vperm $out5,$out5,$out5,$inpperm
1121 stvx_u $out4,$x10,$out
1122 le?vperm $out6,$out6,$out6,$inpperm
1123 stvx_u $out5,$x20,$out
1124 le?vperm $out7,$out7,$out7,$inpperm
1125 stvx_u $out6,$x30,$out
1126 stvx_u $out7,$x40,$out
1127 addi $out,$out,0x50
1128 b Lcbc_dec8x_done
1129
1130 .align 5
1131 Lcbc_dec8x_four:
1132 vncipherlast $out4,$out4,$ivec
1133 vncipherlast $out5,$out5,$in4
1134 vncipherlast $out6,$out6,$in5
1135 vncipherlast $out7,$out7,$in6
1136 vmr $ivec,$in7
1137
1138 le?vperm $out4,$out4,$out4,$inpperm
1139 le?vperm $out5,$out5,$out5,$inpperm
1140 stvx_u $out4,$x00,$out
1141 le?vperm $out6,$out6,$out6,$inpperm
1142 stvx_u $out5,$x10,$out
1143 le?vperm $out7,$out7,$out7,$inpperm
1144 stvx_u $out6,$x20,$out
1145 stvx_u $out7,$x30,$out
1146 addi $out,$out,0x40
1147 b Lcbc_dec8x_done
1148
1149 .align 5
1150 Lcbc_dec8x_three:
1151 vncipherlast $out5,$out5,$ivec
1152 vncipherlast $out6,$out6,$in5
1153 vncipherlast $out7,$out7,$in6
1154 vmr $ivec,$in7
1155
1156 le?vperm $out5,$out5,$out5,$inpperm
1157 le?vperm $out6,$out6,$out6,$inpperm
1158 stvx_u $out5,$x00,$out
1159 le?vperm $out7,$out7,$out7,$inpperm
1160 stvx_u $out6,$x10,$out
1161 stvx_u $out7,$x20,$out
1162 addi $out,$out,0x30
1163 b Lcbc_dec8x_done
1164
1165 .align 5
1166 Lcbc_dec8x_two:
1167 vncipherlast $out6,$out6,$ivec
1168 vncipherlast $out7,$out7,$in6
1169 vmr $ivec,$in7
1170
1171 le?vperm $out6,$out6,$out6,$inpperm
1172 le?vperm $out7,$out7,$out7,$inpperm
1173 stvx_u $out6,$x00,$out
1174 stvx_u $out7,$x10,$out
1175 addi $out,$out,0x20
1176 b Lcbc_dec8x_done
1177
1178 .align 5
1179 Lcbc_dec8x_one:
1180 vncipherlast $out7,$out7,$ivec
1181 vmr $ivec,$in7
1182
1183 le?vperm $out7,$out7,$out7,$inpperm
1184 stvx_u $out7,0,$out
1185 addi $out,$out,0x10
1186
1187 Lcbc_dec8x_done:
1188 le?vperm $ivec,$ivec,$ivec,$inpperm
1189 stvx_u $ivec,0,$ivp # write [unaligned] iv
1190
1191 li r10,`$FRAME+15`
1192 li r11,`$FRAME+31`
1193 stvx $inpperm,r10,$sp # wipe copies of round keys
1194 addi r10,r10,32
1195 stvx $inpperm,r11,$sp
1196 addi r11,r11,32
1197 stvx $inpperm,r10,$sp
1198 addi r10,r10,32
1199 stvx $inpperm,r11,$sp
1200 addi r11,r11,32
1201 stvx $inpperm,r10,$sp
1202 addi r10,r10,32
1203 stvx $inpperm,r11,$sp
1204 addi r11,r11,32
1205 stvx $inpperm,r10,$sp
1206 addi r10,r10,32
1207 stvx $inpperm,r11,$sp
1208 addi r11,r11,32
1209
1210 mtspr 256,$vrsave
1211 lvx v20,r10,$sp # ABI says so
1212 addi r10,r10,32
1213 lvx v21,r11,$sp
1214 addi r11,r11,32
1215 lvx v22,r10,$sp
1216 addi r10,r10,32
1217 lvx v23,r11,$sp
1218 addi r11,r11,32
1219 lvx v24,r10,$sp
1220 addi r10,r10,32
1221 lvx v25,r11,$sp
1222 addi r11,r11,32
1223 lvx v26,r10,$sp
1224 addi r10,r10,32
1225 lvx v27,r11,$sp
1226 addi r11,r11,32
1227 lvx v28,r10,$sp
1228 addi r10,r10,32
1229 lvx v29,r11,$sp
1230 addi r11,r11,32
1231 lvx v30,r10,$sp
1232 lvx v31,r11,$sp
1233 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1234 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1235 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1236 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1237 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1238 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1239 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1240 blr
1241 .long 0
1242 .byte 0,12,0x14,0,0x80,6,6,0
1243 .long 0
1244 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1245 ___
1246 }} }}}
1247
1248 #########################################################################
1249 {{{ # CTR procedure[s] #
1250 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1251 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1252 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1253 map("v$_",(4..11));
1254 my $dat=$tmp;
1255
1256 $code.=<<___;
1257 .globl .${prefix}_ctr32_encrypt_blocks
1258 ${UCMP}i $len,1
1259 bltlr-
1260
1261 lis r0,0xfff0
1262 mfspr $vrsave,256
1263 mtspr 256,r0
1264
1265 li $idx,15
1266 vxor $rndkey0,$rndkey0,$rndkey0
1267 le?vspltisb $tmp,0x0f
1268
1269 lvx $ivec,0,$ivp # load [unaligned] iv
1270 lvsl $inpperm,0,$ivp
1271 lvx $inptail,$idx,$ivp
1272 vspltisb $one,1
1273 le?vxor $inpperm,$inpperm,$tmp
1274 vperm $ivec,$ivec,$inptail,$inpperm
1275 vsldoi $one,$rndkey0,$one,1
1276
1277 neg r11,$inp
1278 ?lvsl $keyperm,0,$key # prepare for unaligned key
1279 lwz $rounds,240($key)
1280
1281 lvsr $inpperm,0,r11 # prepare for unaligned load
1282 lvx $inptail,0,$inp
1283 addi $inp,$inp,15 # 15 is not typo
1284 le?vxor $inpperm,$inpperm,$tmp
1285
1286 srwi $rounds,$rounds,1
1287 li $idx,16
1288 subi $rounds,$rounds,1
1289
1290 ${UCMP}i $len,8
1291 bge _aesp8_ctr32_encrypt8x
1292
1293 ?lvsr $outperm,0,$out # prepare for unaligned store
1294 vspltisb $outmask,-1
1295 lvx $outhead,0,$out
1296 ?vperm $outmask,$rndkey0,$outmask,$outperm
1297 le?vxor $outperm,$outperm,$tmp
1298
1299 lvx $rndkey0,0,$key
1300 mtctr $rounds
1301 lvx $rndkey1,$idx,$key
1302 addi $idx,$idx,16
1303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1304 vxor $inout,$ivec,$rndkey0
1305 lvx $rndkey0,$idx,$key
1306 addi $idx,$idx,16
1307 b Loop_ctr32_enc
1308
1309 .align 5
1310 Loop_ctr32_enc:
1311 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1312 vcipher $inout,$inout,$rndkey1
1313 lvx $rndkey1,$idx,$key
1314 addi $idx,$idx,16
1315 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1316 vcipher $inout,$inout,$rndkey0
1317 lvx $rndkey0,$idx,$key
1318 addi $idx,$idx,16
1319 bdnz Loop_ctr32_enc
1320
1321 vadduwm $ivec,$ivec,$one
1322 vmr $dat,$inptail
1323 lvx $inptail,0,$inp
1324 addi $inp,$inp,16
1325 subic. $len,$len,1 # blocks--
1326
1327 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1328 vcipher $inout,$inout,$rndkey1
1329 lvx $rndkey1,$idx,$key
1330 vperm $dat,$dat,$inptail,$inpperm
1331 li $idx,16
1332 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1333 lvx $rndkey0,0,$key
1334 vxor $dat,$dat,$rndkey1 # last round key
1335 vcipherlast $inout,$inout,$dat
1336
1337 lvx $rndkey1,$idx,$key
1338 addi $idx,$idx,16
1339 vperm $inout,$inout,$inout,$outperm
1340 vsel $dat,$outhead,$inout,$outmask
1341 mtctr $rounds
1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1343 vmr $outhead,$inout
1344 vxor $inout,$ivec,$rndkey0
1345 lvx $rndkey0,$idx,$key
1346 addi $idx,$idx,16
1347 stvx $dat,0,$out
1348 addi $out,$out,16
1349 bne Loop_ctr32_enc
1350
1351 addi $out,$out,-1
1352 lvx $inout,0,$out # redundant in aligned case
1353 vsel $inout,$outhead,$inout,$outmask
1354 stvx $inout,0,$out
1355
1356 mtspr 256,$vrsave
1357 blr
1358 .long 0
1359 .byte 0,12,0x14,0,0,0,6,0
1360 .long 0
1361 ___
1362 #########################################################################
1363 {{ # Optimized CTR procedure #
1364 my $key_="r11";
1365 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1366 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1367 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1368 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1369 # v26-v31 last 6 round keys
1370 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1371 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1372
1373 $code.=<<___;
1374 .align 5
1375 _aesp8_ctr32_encrypt8x:
1376 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1377 li r10,`$FRAME+8*16+15`
1378 li r11,`$FRAME+8*16+31`
1379 stvx v20,r10,$sp # ABI says so
1380 addi r10,r10,32
1381 stvx v21,r11,$sp
1382 addi r11,r11,32
1383 stvx v22,r10,$sp
1384 addi r10,r10,32
1385 stvx v23,r11,$sp
1386 addi r11,r11,32
1387 stvx v24,r10,$sp
1388 addi r10,r10,32
1389 stvx v25,r11,$sp
1390 addi r11,r11,32
1391 stvx v26,r10,$sp
1392 addi r10,r10,32
1393 stvx v27,r11,$sp
1394 addi r11,r11,32
1395 stvx v28,r10,$sp
1396 addi r10,r10,32
1397 stvx v29,r11,$sp
1398 addi r11,r11,32
1399 stvx v30,r10,$sp
1400 stvx v31,r11,$sp
1401 li r0,-1
1402 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1403 li $x10,0x10
1404 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1405 li $x20,0x20
1406 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1407 li $x30,0x30
1408 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1409 li $x40,0x40
1410 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1411 li $x50,0x50
1412 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1413 li $x60,0x60
1414 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1415 li $x70,0x70
1416 mtspr 256,r0
1417
1418 subi $rounds,$rounds,3 # -4 in total
1419
1420 lvx $rndkey0,$x00,$key # load key schedule
1421 lvx v30,$x10,$key
1422 addi $key,$key,0x20
1423 lvx v31,$x00,$key
1424 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1425 addi $key_,$sp,$FRAME+15
1426 mtctr $rounds
1427
1428 Load_ctr32_enc_key:
1429 ?vperm v24,v30,v31,$keyperm
1430 lvx v30,$x10,$key
1431 addi $key,$key,0x20
1432 stvx v24,$x00,$key_ # off-load round[1]
1433 ?vperm v25,v31,v30,$keyperm
1434 lvx v31,$x00,$key
1435 stvx v25,$x10,$key_ # off-load round[2]
1436 addi $key_,$key_,0x20
1437 bdnz Load_ctr32_enc_key
1438
1439 lvx v26,$x10,$key
1440 ?vperm v24,v30,v31,$keyperm
1441 lvx v27,$x20,$key
1442 stvx v24,$x00,$key_ # off-load round[3]
1443 ?vperm v25,v31,v26,$keyperm
1444 lvx v28,$x30,$key
1445 stvx v25,$x10,$key_ # off-load round[4]
1446 addi $key_,$sp,$FRAME+15 # rewind $key_
1447 ?vperm v26,v26,v27,$keyperm
1448 lvx v29,$x40,$key
1449 ?vperm v27,v27,v28,$keyperm
1450 lvx v30,$x50,$key
1451 ?vperm v28,v28,v29,$keyperm
1452 lvx v31,$x60,$key
1453 ?vperm v29,v29,v30,$keyperm
1454 lvx $out0,$x70,$key # borrow $out0
1455 ?vperm v30,v30,v31,$keyperm
1456 lvx v24,$x00,$key_ # pre-load round[1]
1457 ?vperm v31,v31,$out0,$keyperm
1458 lvx v25,$x10,$key_ # pre-load round[2]
1459
1460 vadduqm $two,$one,$one
1461 subi $inp,$inp,15 # undo "caller"
1462 $SHL $len,$len,4
1463
1464 vadduqm $out1,$ivec,$one # counter values ...
1465 vadduqm $out2,$ivec,$two
1466 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1467 le?li $idx,8
1468 vadduqm $out3,$out1,$two
1469 vxor $out1,$out1,$rndkey0
1470 le?lvsl $inpperm,0,$idx
1471 vadduqm $out4,$out2,$two
1472 vxor $out2,$out2,$rndkey0
1473 le?vspltisb $tmp,0x0f
1474 vadduqm $out5,$out3,$two
1475 vxor $out3,$out3,$rndkey0
1476 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1477 vadduqm $out6,$out4,$two
1478 vxor $out4,$out4,$rndkey0
1479 vadduqm $out7,$out5,$two
1480 vxor $out5,$out5,$rndkey0
1481 vadduqm $ivec,$out6,$two # next counter value
1482 vxor $out6,$out6,$rndkey0
1483 vxor $out7,$out7,$rndkey0
1484
1485 mtctr $rounds
1486 b Loop_ctr32_enc8x
1487 .align 5
1488 Loop_ctr32_enc8x:
1489 vcipher $out0,$out0,v24
1490 vcipher $out1,$out1,v24
1491 vcipher $out2,$out2,v24
1492 vcipher $out3,$out3,v24
1493 vcipher $out4,$out4,v24
1494 vcipher $out5,$out5,v24
1495 vcipher $out6,$out6,v24
1496 vcipher $out7,$out7,v24
1497 Loop_ctr32_enc8x_middle:
1498 lvx v24,$x20,$key_ # round[3]
1499 addi $key_,$key_,0x20
1500
1501 vcipher $out0,$out0,v25
1502 vcipher $out1,$out1,v25
1503 vcipher $out2,$out2,v25
1504 vcipher $out3,$out3,v25
1505 vcipher $out4,$out4,v25
1506 vcipher $out5,$out5,v25
1507 vcipher $out6,$out6,v25
1508 vcipher $out7,$out7,v25
1509 lvx v25,$x10,$key_ # round[4]
1510 bdnz Loop_ctr32_enc8x
1511
1512 subic r11,$len,256 # $len-256, borrow $key_
1513 vcipher $out0,$out0,v24
1514 vcipher $out1,$out1,v24
1515 vcipher $out2,$out2,v24
1516 vcipher $out3,$out3,v24
1517 vcipher $out4,$out4,v24
1518 vcipher $out5,$out5,v24
1519 vcipher $out6,$out6,v24
1520 vcipher $out7,$out7,v24
1521
1522 subfe r0,r0,r0 # borrow?-1:0
1523 vcipher $out0,$out0,v25
1524 vcipher $out1,$out1,v25
1525 vcipher $out2,$out2,v25
1526 vcipher $out3,$out3,v25
1527 vcipher $out4,$out4,v25
1528 vcipher $out5,$out5,v25
1529 vcipher $out6,$out6,v25
1530 vcipher $out7,$out7,v25
1531
1532 and r0,r0,r11
1533 addi $key_,$sp,$FRAME+15 # rewind $key_
1534 vcipher $out0,$out0,v26
1535 vcipher $out1,$out1,v26
1536 vcipher $out2,$out2,v26
1537 vcipher $out3,$out3,v26
1538 vcipher $out4,$out4,v26
1539 vcipher $out5,$out5,v26
1540 vcipher $out6,$out6,v26
1541 vcipher $out7,$out7,v26
1542 lvx v24,$x00,$key_ # re-pre-load round[1]
1543
1544 subic $len,$len,129 # $len-=129
1545 vcipher $out0,$out0,v27
1546 addi $len,$len,1 # $len-=128 really
1547 vcipher $out1,$out1,v27
1548 vcipher $out2,$out2,v27
1549 vcipher $out3,$out3,v27
1550 vcipher $out4,$out4,v27
1551 vcipher $out5,$out5,v27
1552 vcipher $out6,$out6,v27
1553 vcipher $out7,$out7,v27
1554 lvx v25,$x10,$key_ # re-pre-load round[2]
1555
1556 vcipher $out0,$out0,v28
1557 lvx_u $in0,$x00,$inp # load input
1558 vcipher $out1,$out1,v28
1559 lvx_u $in1,$x10,$inp
1560 vcipher $out2,$out2,v28
1561 lvx_u $in2,$x20,$inp
1562 vcipher $out3,$out3,v28
1563 lvx_u $in3,$x30,$inp
1564 vcipher $out4,$out4,v28
1565 lvx_u $in4,$x40,$inp
1566 vcipher $out5,$out5,v28
1567 lvx_u $in5,$x50,$inp
1568 vcipher $out6,$out6,v28
1569 lvx_u $in6,$x60,$inp
1570 vcipher $out7,$out7,v28
1571 lvx_u $in7,$x70,$inp
1572 addi $inp,$inp,0x80
1573
1574 vcipher $out0,$out0,v29
1575 le?vperm $in0,$in0,$in0,$inpperm
1576 vcipher $out1,$out1,v29
1577 le?vperm $in1,$in1,$in1,$inpperm
1578 vcipher $out2,$out2,v29
1579 le?vperm $in2,$in2,$in2,$inpperm
1580 vcipher $out3,$out3,v29
1581 le?vperm $in3,$in3,$in3,$inpperm
1582 vcipher $out4,$out4,v29
1583 le?vperm $in4,$in4,$in4,$inpperm
1584 vcipher $out5,$out5,v29
1585 le?vperm $in5,$in5,$in5,$inpperm
1586 vcipher $out6,$out6,v29
1587 le?vperm $in6,$in6,$in6,$inpperm
1588 vcipher $out7,$out7,v29
1589 le?vperm $in7,$in7,$in7,$inpperm
1590
1591 add $inp,$inp,r0 # $inp is adjusted in such
1592 # way that at exit from the
1593 # loop inX-in7 are loaded
1594 # with last "words"
1595 subfe. r0,r0,r0 # borrow?-1:0
1596 vcipher $out0,$out0,v30
1597 vxor $in0,$in0,v31 # xor with last round key
1598 vcipher $out1,$out1,v30
1599 vxor $in1,$in1,v31
1600 vcipher $out2,$out2,v30
1601 vxor $in2,$in2,v31
1602 vcipher $out3,$out3,v30
1603 vxor $in3,$in3,v31
1604 vcipher $out4,$out4,v30
1605 vxor $in4,$in4,v31
1606 vcipher $out5,$out5,v30
1607 vxor $in5,$in5,v31
1608 vcipher $out6,$out6,v30
1609 vxor $in6,$in6,v31
1610 vcipher $out7,$out7,v30
1611 vxor $in7,$in7,v31
1612
1613 bne Lctr32_enc8x_break # did $len-129 borrow?
1614
1615 vcipherlast $in0,$out0,$in0
1616 vcipherlast $in1,$out1,$in1
1617 vadduqm $out1,$ivec,$one # counter values ...
1618 vcipherlast $in2,$out2,$in2
1619 vadduqm $out2,$ivec,$two
1620 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1621 vcipherlast $in3,$out3,$in3
1622 vadduqm $out3,$out1,$two
1623 vxor $out1,$out1,$rndkey0
1624 vcipherlast $in4,$out4,$in4
1625 vadduqm $out4,$out2,$two
1626 vxor $out2,$out2,$rndkey0
1627 vcipherlast $in5,$out5,$in5
1628 vadduqm $out5,$out3,$two
1629 vxor $out3,$out3,$rndkey0
1630 vcipherlast $in6,$out6,$in6
1631 vadduqm $out6,$out4,$two
1632 vxor $out4,$out4,$rndkey0
1633 vcipherlast $in7,$out7,$in7
1634 vadduqm $out7,$out5,$two
1635 vxor $out5,$out5,$rndkey0
1636 le?vperm $in0,$in0,$in0,$inpperm
1637 vadduqm $ivec,$out6,$two # next counter value
1638 vxor $out6,$out6,$rndkey0
1639 le?vperm $in1,$in1,$in1,$inpperm
1640 vxor $out7,$out7,$rndkey0
1641 mtctr $rounds
1642
1643 vcipher $out0,$out0,v24
1644 stvx_u $in0,$x00,$out
1645 le?vperm $in2,$in2,$in2,$inpperm
1646 vcipher $out1,$out1,v24
1647 stvx_u $in1,$x10,$out
1648 le?vperm $in3,$in3,$in3,$inpperm
1649 vcipher $out2,$out2,v24
1650 stvx_u $in2,$x20,$out
1651 le?vperm $in4,$in4,$in4,$inpperm
1652 vcipher $out3,$out3,v24
1653 stvx_u $in3,$x30,$out
1654 le?vperm $in5,$in5,$in5,$inpperm
1655 vcipher $out4,$out4,v24
1656 stvx_u $in4,$x40,$out
1657 le?vperm $in6,$in6,$in6,$inpperm
1658 vcipher $out5,$out5,v24
1659 stvx_u $in5,$x50,$out
1660 le?vperm $in7,$in7,$in7,$inpperm
1661 vcipher $out6,$out6,v24
1662 stvx_u $in6,$x60,$out
1663 vcipher $out7,$out7,v24
1664 stvx_u $in7,$x70,$out
1665 addi $out,$out,0x80
1666
1667 b Loop_ctr32_enc8x_middle
1668
1669 .align 5
1670 Lctr32_enc8x_break:
1671 cmpwi $len,-0x60
1672 blt Lctr32_enc8x_one
1673 nop
1674 beq Lctr32_enc8x_two
1675 cmpwi $len,-0x40
1676 blt Lctr32_enc8x_three
1677 nop
1678 beq Lctr32_enc8x_four
1679 cmpwi $len,-0x20
1680 blt Lctr32_enc8x_five
1681 nop
1682 beq Lctr32_enc8x_six
1683 cmpwi $len,0x00
1684 blt Lctr32_enc8x_seven
1685
1686 Lctr32_enc8x_eight:
1687 vcipherlast $out0,$out0,$in0
1688 vcipherlast $out1,$out1,$in1
1689 vcipherlast $out2,$out2,$in2
1690 vcipherlast $out3,$out3,$in3
1691 vcipherlast $out4,$out4,$in4
1692 vcipherlast $out5,$out5,$in5
1693 vcipherlast $out6,$out6,$in6
1694 vcipherlast $out7,$out7,$in7
1695
1696 le?vperm $out0,$out0,$out0,$inpperm
1697 le?vperm $out1,$out1,$out1,$inpperm
1698 stvx_u $out0,$x00,$out
1699 le?vperm $out2,$out2,$out2,$inpperm
1700 stvx_u $out1,$x10,$out
1701 le?vperm $out3,$out3,$out3,$inpperm
1702 stvx_u $out2,$x20,$out
1703 le?vperm $out4,$out4,$out4,$inpperm
1704 stvx_u $out3,$x30,$out
1705 le?vperm $out5,$out5,$out5,$inpperm
1706 stvx_u $out4,$x40,$out
1707 le?vperm $out6,$out6,$out6,$inpperm
1708 stvx_u $out5,$x50,$out
1709 le?vperm $out7,$out7,$out7,$inpperm
1710 stvx_u $out6,$x60,$out
1711 stvx_u $out7,$x70,$out
1712 addi $out,$out,0x80
1713 b Lctr32_enc8x_done
1714
1715 .align 5
1716 Lctr32_enc8x_seven:
1717 vcipherlast $out0,$out0,$in1
1718 vcipherlast $out1,$out1,$in2
1719 vcipherlast $out2,$out2,$in3
1720 vcipherlast $out3,$out3,$in4
1721 vcipherlast $out4,$out4,$in5
1722 vcipherlast $out5,$out5,$in6
1723 vcipherlast $out6,$out6,$in7
1724
1725 le?vperm $out0,$out0,$out0,$inpperm
1726 le?vperm $out1,$out1,$out1,$inpperm
1727 stvx_u $out0,$x00,$out
1728 le?vperm $out2,$out2,$out2,$inpperm
1729 stvx_u $out1,$x10,$out
1730 le?vperm $out3,$out3,$out3,$inpperm
1731 stvx_u $out2,$x20,$out
1732 le?vperm $out4,$out4,$out4,$inpperm
1733 stvx_u $out3,$x30,$out
1734 le?vperm $out5,$out5,$out5,$inpperm
1735 stvx_u $out4,$x40,$out
1736 le?vperm $out6,$out6,$out6,$inpperm
1737 stvx_u $out5,$x50,$out
1738 stvx_u $out6,$x60,$out
1739 addi $out,$out,0x70
1740 b Lctr32_enc8x_done
1741
1742 .align 5
1743 Lctr32_enc8x_six:
1744 vcipherlast $out0,$out0,$in2
1745 vcipherlast $out1,$out1,$in3
1746 vcipherlast $out2,$out2,$in4
1747 vcipherlast $out3,$out3,$in5
1748 vcipherlast $out4,$out4,$in6
1749 vcipherlast $out5,$out5,$in7
1750
1751 le?vperm $out0,$out0,$out0,$inpperm
1752 le?vperm $out1,$out1,$out1,$inpperm
1753 stvx_u $out0,$x00,$out
1754 le?vperm $out2,$out2,$out2,$inpperm
1755 stvx_u $out1,$x10,$out
1756 le?vperm $out3,$out3,$out3,$inpperm
1757 stvx_u $out2,$x20,$out
1758 le?vperm $out4,$out4,$out4,$inpperm
1759 stvx_u $out3,$x30,$out
1760 le?vperm $out5,$out5,$out5,$inpperm
1761 stvx_u $out4,$x40,$out
1762 stvx_u $out5,$x50,$out
1763 addi $out,$out,0x60
1764 b Lctr32_enc8x_done
1765
1766 .align 5
1767 Lctr32_enc8x_five:
1768 vcipherlast $out0,$out0,$in3
1769 vcipherlast $out1,$out1,$in4
1770 vcipherlast $out2,$out2,$in5
1771 vcipherlast $out3,$out3,$in6
1772 vcipherlast $out4,$out4,$in7
1773
1774 le?vperm $out0,$out0,$out0,$inpperm
1775 le?vperm $out1,$out1,$out1,$inpperm
1776 stvx_u $out0,$x00,$out
1777 le?vperm $out2,$out2,$out2,$inpperm
1778 stvx_u $out1,$x10,$out
1779 le?vperm $out3,$out3,$out3,$inpperm
1780 stvx_u $out2,$x20,$out
1781 le?vperm $out4,$out4,$out4,$inpperm
1782 stvx_u $out3,$x30,$out
1783 stvx_u $out4,$x40,$out
1784 addi $out,$out,0x50
1785 b Lctr32_enc8x_done
1786
1787 .align 5
1788 Lctr32_enc8x_four:
1789 vcipherlast $out0,$out0,$in4
1790 vcipherlast $out1,$out1,$in5
1791 vcipherlast $out2,$out2,$in6
1792 vcipherlast $out3,$out3,$in7
1793
1794 le?vperm $out0,$out0,$out0,$inpperm
1795 le?vperm $out1,$out1,$out1,$inpperm
1796 stvx_u $out0,$x00,$out
1797 le?vperm $out2,$out2,$out2,$inpperm
1798 stvx_u $out1,$x10,$out
1799 le?vperm $out3,$out3,$out3,$inpperm
1800 stvx_u $out2,$x20,$out
1801 stvx_u $out3,$x30,$out
1802 addi $out,$out,0x40
1803 b Lctr32_enc8x_done
1804
1805 .align 5
1806 Lctr32_enc8x_three:
1807 vcipherlast $out0,$out0,$in5
1808 vcipherlast $out1,$out1,$in6
1809 vcipherlast $out2,$out2,$in7
1810
1811 le?vperm $out0,$out0,$out0,$inpperm
1812 le?vperm $out1,$out1,$out1,$inpperm
1813 stvx_u $out0,$x00,$out
1814 le?vperm $out2,$out2,$out2,$inpperm
1815 stvx_u $out1,$x10,$out
1816 stvx_u $out2,$x20,$out
1817 addi $out,$out,0x30
1818 b Lcbc_dec8x_done
1819
1820 .align 5
1821 Lctr32_enc8x_two:
1822 vcipherlast $out0,$out0,$in6
1823 vcipherlast $out1,$out1,$in7
1824
1825 le?vperm $out0,$out0,$out0,$inpperm
1826 le?vperm $out1,$out1,$out1,$inpperm
1827 stvx_u $out0,$x00,$out
1828 stvx_u $out1,$x10,$out
1829 addi $out,$out,0x20
1830 b Lcbc_dec8x_done
1831
1832 .align 5
1833 Lctr32_enc8x_one:
1834 vcipherlast $out0,$out0,$in7
1835
1836 le?vperm $out0,$out0,$out0,$inpperm
1837 stvx_u $out0,0,$out
1838 addi $out,$out,0x10
1839
1840 Lctr32_enc8x_done:
1841 li r10,`$FRAME+15`
1842 li r11,`$FRAME+31`
1843 stvx $inpperm,r10,$sp # wipe copies of round keys
1844 addi r10,r10,32
1845 stvx $inpperm,r11,$sp
1846 addi r11,r11,32
1847 stvx $inpperm,r10,$sp
1848 addi r10,r10,32
1849 stvx $inpperm,r11,$sp
1850 addi r11,r11,32
1851 stvx $inpperm,r10,$sp
1852 addi r10,r10,32
1853 stvx $inpperm,r11,$sp
1854 addi r11,r11,32
1855 stvx $inpperm,r10,$sp
1856 addi r10,r10,32
1857 stvx $inpperm,r11,$sp
1858 addi r11,r11,32
1859
1860 mtspr 256,$vrsave
1861 lvx v20,r10,$sp # ABI says so
1862 addi r10,r10,32
1863 lvx v21,r11,$sp
1864 addi r11,r11,32
1865 lvx v22,r10,$sp
1866 addi r10,r10,32
1867 lvx v23,r11,$sp
1868 addi r11,r11,32
1869 lvx v24,r10,$sp
1870 addi r10,r10,32
1871 lvx v25,r11,$sp
1872 addi r11,r11,32
1873 lvx v26,r10,$sp
1874 addi r10,r10,32
1875 lvx v27,r11,$sp
1876 addi r11,r11,32
1877 lvx v28,r10,$sp
1878 addi r10,r10,32
1879 lvx v29,r11,$sp
1880 addi r11,r11,32
1881 lvx v30,r10,$sp
1882 lvx v31,r11,$sp
1883 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1884 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1885 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1886 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1887 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1888 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1889 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1890 blr
1891 .long 0
1892 .byte 0,12,0x14,0,0x80,6,6,0
1893 .long 0
1894 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1895 ___
1896 }} }}}
1897
1898 #########################################################################
1899 {{{ # XTS procedures #
1900 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1901 # const AES_KEY *key1, const AES_KEY *key2, #
1902 # [const] unsigned char iv[16]); #
1903 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1904 # input tweak value is assumed to be encrypted already, and last tweak #
1905 # value, one suitable for consecutive call on same chunk of data, is #
1906 # written back to original buffer. In addition, in "tweak chaining" #
1907 # mode only complete input blocks are processed. #
1908
1909 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1910 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1911 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1912 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1913 my $taillen = $key2;
1914
1915 ($inp,$idx) = ($idx,$inp); # reassign
1916
1917 $code.=<<___;
1918 .globl .${prefix}_xts_encrypt
1919 mr $inp,r3 # reassign
1920 li r3,-1
1921 ${UCMP}i $len,16
1922 bltlr-
1923
1924 lis r0,0xfff0
1925 mfspr r12,256 # save vrsave
1926 li r11,0
1927 mtspr 256,r0
1928
1929 vspltisb $seven,0x07 # 0x070707..07
1930 le?lvsl $leperm,r11,r11
1931 le?vspltisb $tmp,0x0f
1932 le?vxor $leperm,$leperm,$seven
1933
1934 li $idx,15
1935 lvx $tweak,0,$ivp # load [unaligned] iv
1936 lvsl $inpperm,0,$ivp
1937 lvx $inptail,$idx,$ivp
1938 le?vxor $inpperm,$inpperm,$tmp
1939 vperm $tweak,$tweak,$inptail,$inpperm
1940
1941 neg r11,$inp
1942 lvsr $inpperm,0,r11 # prepare for unaligned load
1943 lvx $inout,0,$inp
1944 addi $inp,$inp,15 # 15 is not typo
1945 le?vxor $inpperm,$inpperm,$tmp
1946
1947 ${UCMP}i $key2,0 # key2==NULL?
1948 beq Lxts_enc_no_key2
1949
1950 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1951 lwz $rounds,240($key2)
1952 srwi $rounds,$rounds,1
1953 subi $rounds,$rounds,1
1954 li $idx,16
1955
1956 lvx $rndkey0,0,$key2
1957 lvx $rndkey1,$idx,$key2
1958 addi $idx,$idx,16
1959 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1960 vxor $tweak,$tweak,$rndkey0
1961 lvx $rndkey0,$idx,$key2
1962 addi $idx,$idx,16
1963 mtctr $rounds
1964
1965 Ltweak_xts_enc:
1966 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1967 vcipher $tweak,$tweak,$rndkey1
1968 lvx $rndkey1,$idx,$key2
1969 addi $idx,$idx,16
1970 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1971 vcipher $tweak,$tweak,$rndkey0
1972 lvx $rndkey0,$idx,$key2
1973 addi $idx,$idx,16
1974 bdnz Ltweak_xts_enc
1975
1976 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1977 vcipher $tweak,$tweak,$rndkey1
1978 lvx $rndkey1,$idx,$key2
1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1980 vcipherlast $tweak,$tweak,$rndkey0
1981
1982 li $ivp,0 # don't chain the tweak
1983 b Lxts_enc
1984
1985 Lxts_enc_no_key2:
1986 li $idx,-16
1987 and $len,$len,$idx # in "tweak chaining"
1988 # mode only complete
1989 # blocks are processed
1990 Lxts_enc:
1991 lvx $inptail,0,$inp
1992 addi $inp,$inp,16
1993
1994 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
1995 lwz $rounds,240($key1)
1996 srwi $rounds,$rounds,1
1997 subi $rounds,$rounds,1
1998 li $idx,16
1999
2000 vslb $eighty7,$seven,$seven # 0x808080..80
2001 vor $eighty7,$eighty7,$seven # 0x878787..87
2002 vspltisb $tmp,1 # 0x010101..01
2003 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2004
2005 ${UCMP}i $len,96
2006 bge _aesp8_xts_encrypt6x
2007
2008 andi. $taillen,$len,15
2009 subic r0,$len,32
2010 subi $taillen,$taillen,16
2011 subfe r0,r0,r0
2012 and r0,r0,$taillen
2013 add $inp,$inp,r0
2014
2015 lvx $rndkey0,0,$key1
2016 lvx $rndkey1,$idx,$key1
2017 addi $idx,$idx,16
2018 vperm $inout,$inout,$inptail,$inpperm
2019 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2020 vxor $inout,$inout,$tweak
2021 vxor $inout,$inout,$rndkey0
2022 lvx $rndkey0,$idx,$key1
2023 addi $idx,$idx,16
2024 mtctr $rounds
2025 b Loop_xts_enc
2026
2027 .align 5
2028 Loop_xts_enc:
2029 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2030 vcipher $inout,$inout,$rndkey1
2031 lvx $rndkey1,$idx,$key1
2032 addi $idx,$idx,16
2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vcipher $inout,$inout,$rndkey0
2035 lvx $rndkey0,$idx,$key1
2036 addi $idx,$idx,16
2037 bdnz Loop_xts_enc
2038
2039 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2040 vcipher $inout,$inout,$rndkey1
2041 lvx $rndkey1,$idx,$key1
2042 li $idx,16
2043 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2044 vxor $rndkey0,$rndkey0,$tweak
2045 vcipherlast $output,$inout,$rndkey0
2046
2047 le?vperm $tmp,$output,$output,$leperm
2048 be?nop
2049 le?stvx_u $tmp,0,$out
2050 be?stvx_u $output,0,$out
2051 addi $out,$out,16
2052
2053 subic. $len,$len,16
2054 beq Lxts_enc_done
2055
2056 vmr $inout,$inptail
2057 lvx $inptail,0,$inp
2058 addi $inp,$inp,16
2059 lvx $rndkey0,0,$key1
2060 lvx $rndkey1,$idx,$key1
2061 addi $idx,$idx,16
2062
2063 subic r0,$len,32
2064 subfe r0,r0,r0
2065 and r0,r0,$taillen
2066 add $inp,$inp,r0
2067
2068 vsrab $tmp,$tweak,$seven # next tweak value
2069 vaddubm $tweak,$tweak,$tweak
2070 vsldoi $tmp,$tmp,$tmp,15
2071 vand $tmp,$tmp,$eighty7
2072 vxor $tweak,$tweak,$tmp
2073
2074 vperm $inout,$inout,$inptail,$inpperm
2075 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2076 vxor $inout,$inout,$tweak
2077 vxor $output,$output,$rndkey0 # just in case $len<16
2078 vxor $inout,$inout,$rndkey0
2079 lvx $rndkey0,$idx,$key1
2080 addi $idx,$idx,16
2081
2082 mtctr $rounds
2083 ${UCMP}i $len,16
2084 bge Loop_xts_enc
2085
2086 vxor $output,$output,$tweak
2087 lvsr $inpperm,0,$len # $inpperm is no longer needed
2088 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2089 vspltisb $tmp,-1
2090 vperm $inptail,$inptail,$tmp,$inpperm
2091 vsel $inout,$inout,$output,$inptail
2092
2093 subi r11,$out,17
2094 subi $out,$out,16
2095 mtctr $len
2096 li $len,16
2097 Loop_xts_enc_steal:
2098 lbzu r0,1(r11)
2099 stb r0,16(r11)
2100 bdnz Loop_xts_enc_steal
2101
2102 mtctr $rounds
2103 b Loop_xts_enc # one more time...
2104
2105 Lxts_enc_done:
2106 ${UCMP}i $ivp,0
2107 beq Lxts_enc_ret
2108
2109 vsrab $tmp,$tweak,$seven # next tweak value
2110 vaddubm $tweak,$tweak,$tweak
2111 vsldoi $tmp,$tmp,$tmp,15
2112 vand $tmp,$tmp,$eighty7
2113 vxor $tweak,$tweak,$tmp
2114
2115 le?vperm $tweak,$tweak,$tweak,$leperm
2116 stvx_u $tweak,0,$ivp
2117
2118 Lxts_enc_ret:
2119 mtspr 256,r12 # restore vrsave
2120 li r3,0
2121 blr
2122 .long 0
2123 .byte 0,12,0x04,0,0x80,6,6,0
2124 .long 0
2125 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2126
2127 .globl .${prefix}_xts_decrypt
2128 mr $inp,r3 # reassign
2129 li r3,-1
2130 ${UCMP}i $len,16
2131 bltlr-
2132
2133 lis r0,0xfff8
2134 mfspr r12,256 # save vrsave
2135 li r11,0
2136 mtspr 256,r0
2137
2138 andi. r0,$len,15
2139 neg r0,r0
2140 andi. r0,r0,16
2141 sub $len,$len,r0
2142
2143 vspltisb $seven,0x07 # 0x070707..07
2144 le?lvsl $leperm,r11,r11
2145 le?vspltisb $tmp,0x0f
2146 le?vxor $leperm,$leperm,$seven
2147
2148 li $idx,15
2149 lvx $tweak,0,$ivp # load [unaligned] iv
2150 lvsl $inpperm,0,$ivp
2151 lvx $inptail,$idx,$ivp
2152 le?vxor $inpperm,$inpperm,$tmp
2153 vperm $tweak,$tweak,$inptail,$inpperm
2154
2155 neg r11,$inp
2156 lvsr $inpperm,0,r11 # prepare for unaligned load
2157 lvx $inout,0,$inp
2158 addi $inp,$inp,15 # 15 is not typo
2159 le?vxor $inpperm,$inpperm,$tmp
2160
2161 ${UCMP}i $key2,0 # key2==NULL?
2162 beq Lxts_dec_no_key2
2163
2164 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2165 lwz $rounds,240($key2)
2166 srwi $rounds,$rounds,1
2167 subi $rounds,$rounds,1
2168 li $idx,16
2169
2170 lvx $rndkey0,0,$key2
2171 lvx $rndkey1,$idx,$key2
2172 addi $idx,$idx,16
2173 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2174 vxor $tweak,$tweak,$rndkey0
2175 lvx $rndkey0,$idx,$key2
2176 addi $idx,$idx,16
2177 mtctr $rounds
2178
2179 Ltweak_xts_dec:
2180 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2181 vcipher $tweak,$tweak,$rndkey1
2182 lvx $rndkey1,$idx,$key2
2183 addi $idx,$idx,16
2184 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2185 vcipher $tweak,$tweak,$rndkey0
2186 lvx $rndkey0,$idx,$key2
2187 addi $idx,$idx,16
2188 bdnz Ltweak_xts_dec
2189
2190 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2191 vcipher $tweak,$tweak,$rndkey1
2192 lvx $rndkey1,$idx,$key2
2193 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2194 vcipherlast $tweak,$tweak,$rndkey0
2195
2196 li $ivp,0 # don't chain the tweak
2197 b Lxts_dec
2198
2199 Lxts_dec_no_key2:
2200 neg $idx,$len
2201 andi. $idx,$idx,15
2202 add $len,$len,$idx # in "tweak chaining"
2203 # mode only complete
2204 # blocks are processed
2205 Lxts_dec:
2206 lvx $inptail,0,$inp
2207 addi $inp,$inp,16
2208
2209 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2210 lwz $rounds,240($key1)
2211 srwi $rounds,$rounds,1
2212 subi $rounds,$rounds,1
2213 li $idx,16
2214
2215 vslb $eighty7,$seven,$seven # 0x808080..80
2216 vor $eighty7,$eighty7,$seven # 0x878787..87
2217 vspltisb $tmp,1 # 0x010101..01
2218 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2219
2220 ${UCMP}i $len,96
2221 bge _aesp8_xts_decrypt6x
2222
2223 lvx $rndkey0,0,$key1
2224 lvx $rndkey1,$idx,$key1
2225 addi $idx,$idx,16
2226 vperm $inout,$inout,$inptail,$inpperm
2227 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2228 vxor $inout,$inout,$tweak
2229 vxor $inout,$inout,$rndkey0
2230 lvx $rndkey0,$idx,$key1
2231 addi $idx,$idx,16
2232 mtctr $rounds
2233
2234 ${UCMP}i $len,16
2235 blt Ltail_xts_dec
2236 be?b Loop_xts_dec
2237
2238 .align 5
2239 Loop_xts_dec:
2240 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2241 vncipher $inout,$inout,$rndkey1
2242 lvx $rndkey1,$idx,$key1
2243 addi $idx,$idx,16
2244 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2245 vncipher $inout,$inout,$rndkey0
2246 lvx $rndkey0,$idx,$key1
2247 addi $idx,$idx,16
2248 bdnz Loop_xts_dec
2249
2250 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2251 vncipher $inout,$inout,$rndkey1
2252 lvx $rndkey1,$idx,$key1
2253 li $idx,16
2254 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2255 vxor $rndkey0,$rndkey0,$tweak
2256 vncipherlast $output,$inout,$rndkey0
2257
2258 le?vperm $tmp,$output,$output,$leperm
2259 be?nop
2260 le?stvx_u $tmp,0,$out
2261 be?stvx_u $output,0,$out
2262 addi $out,$out,16
2263
2264 subic. $len,$len,16
2265 beq Lxts_dec_done
2266
2267 vmr $inout,$inptail
2268 lvx $inptail,0,$inp
2269 addi $inp,$inp,16
2270 lvx $rndkey0,0,$key1
2271 lvx $rndkey1,$idx,$key1
2272 addi $idx,$idx,16
2273
2274 vsrab $tmp,$tweak,$seven # next tweak value
2275 vaddubm $tweak,$tweak,$tweak
2276 vsldoi $tmp,$tmp,$tmp,15
2277 vand $tmp,$tmp,$eighty7
2278 vxor $tweak,$tweak,$tmp
2279
2280 vperm $inout,$inout,$inptail,$inpperm
2281 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2282 vxor $inout,$inout,$tweak
2283 vxor $inout,$inout,$rndkey0
2284 lvx $rndkey0,$idx,$key1
2285 addi $idx,$idx,16
2286
2287 mtctr $rounds
2288 ${UCMP}i $len,16
2289 bge Loop_xts_dec
2290
2291 Ltail_xts_dec:
2292 vsrab $tmp,$tweak,$seven # next tweak value
2293 vaddubm $tweak1,$tweak,$tweak
2294 vsldoi $tmp,$tmp,$tmp,15
2295 vand $tmp,$tmp,$eighty7
2296 vxor $tweak1,$tweak1,$tmp
2297
2298 subi $inp,$inp,16
2299 add $inp,$inp,$len
2300
2301 vxor $inout,$inout,$tweak # :-(
2302 vxor $inout,$inout,$tweak1 # :-)
2303
2304 Loop_xts_dec_short:
2305 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2306 vncipher $inout,$inout,$rndkey1
2307 lvx $rndkey1,$idx,$key1
2308 addi $idx,$idx,16
2309 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2310 vncipher $inout,$inout,$rndkey0
2311 lvx $rndkey0,$idx,$key1
2312 addi $idx,$idx,16
2313 bdnz Loop_xts_dec_short
2314
2315 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2316 vncipher $inout,$inout,$rndkey1
2317 lvx $rndkey1,$idx,$key1
2318 li $idx,16
2319 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2320 vxor $rndkey0,$rndkey0,$tweak1
2321 vncipherlast $output,$inout,$rndkey0
2322
2323 le?vperm $tmp,$output,$output,$leperm
2324 be?nop
2325 le?stvx_u $tmp,0,$out
2326 be?stvx_u $output,0,$out
2327
2328 vmr $inout,$inptail
2329 lvx $inptail,0,$inp
2330 #addi $inp,$inp,16
2331 lvx $rndkey0,0,$key1
2332 lvx $rndkey1,$idx,$key1
2333 addi $idx,$idx,16
2334 vperm $inout,$inout,$inptail,$inpperm
2335 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2336
2337 lvsr $inpperm,0,$len # $inpperm is no longer needed
2338 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2339 vspltisb $tmp,-1
2340 vperm $inptail,$inptail,$tmp,$inpperm
2341 vsel $inout,$inout,$output,$inptail
2342
2343 vxor $rndkey0,$rndkey0,$tweak
2344 vxor $inout,$inout,$rndkey0
2345 lvx $rndkey0,$idx,$key1
2346 addi $idx,$idx,16
2347
2348 subi r11,$out,1
2349 mtctr $len
2350 li $len,16
2351 Loop_xts_dec_steal:
2352 lbzu r0,1(r11)
2353 stb r0,16(r11)
2354 bdnz Loop_xts_dec_steal
2355
2356 mtctr $rounds
2357 b Loop_xts_dec # one more time...
2358
2359 Lxts_dec_done:
2360 ${UCMP}i $ivp,0
2361 beq Lxts_dec_ret
2362
2363 vsrab $tmp,$tweak,$seven # next tweak value
2364 vaddubm $tweak,$tweak,$tweak
2365 vsldoi $tmp,$tmp,$tmp,15
2366 vand $tmp,$tmp,$eighty7
2367 vxor $tweak,$tweak,$tmp
2368
2369 le?vperm $tweak,$tweak,$tweak,$leperm
2370 stvx_u $tweak,0,$ivp
2371
2372 Lxts_dec_ret:
2373 mtspr 256,r12 # restore vrsave
2374 li r3,0
2375 blr
2376 .long 0
2377 .byte 0,12,0x04,0,0x80,6,6,0
2378 .long 0
2379 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2380 ___
2381 #########################################################################
2382 {{ # Optimized XTS procedures #
2383 my $key_=$key2;
2384 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2385 $x00=0 if ($flavour =~ /osx/);
2386 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2387 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2388 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2389 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2390 # v26-v31 last 6 round keys
2391 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2392 my $taillen=$x70;
2393
2394 $code.=<<___;
2395 .align 5
2396 _aesp8_xts_encrypt6x:
2397 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2398 mflr r11
2399 li r7,`$FRAME+8*16+15`
2400 li r3,`$FRAME+8*16+31`
2401 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2402 stvx v20,r7,$sp # ABI says so
2403 addi r7,r7,32
2404 stvx v21,r3,$sp
2405 addi r3,r3,32
2406 stvx v22,r7,$sp
2407 addi r7,r7,32
2408 stvx v23,r3,$sp
2409 addi r3,r3,32
2410 stvx v24,r7,$sp
2411 addi r7,r7,32
2412 stvx v25,r3,$sp
2413 addi r3,r3,32
2414 stvx v26,r7,$sp
2415 addi r7,r7,32
2416 stvx v27,r3,$sp
2417 addi r3,r3,32
2418 stvx v28,r7,$sp
2419 addi r7,r7,32
2420 stvx v29,r3,$sp
2421 addi r3,r3,32
2422 stvx v30,r7,$sp
2423 stvx v31,r3,$sp
2424 li r0,-1
2425 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2426 li $x10,0x10
2427 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2428 li $x20,0x20
2429 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2430 li $x30,0x30
2431 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2432 li $x40,0x40
2433 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2434 li $x50,0x50
2435 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2436 li $x60,0x60
2437 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2438 li $x70,0x70
2439 mtspr 256,r0
2440
2441 subi $rounds,$rounds,3 # -4 in total
2442
2443 lvx $rndkey0,$x00,$key1 # load key schedule
2444 lvx v30,$x10,$key1
2445 addi $key1,$key1,0x20
2446 lvx v31,$x00,$key1
2447 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2448 addi $key_,$sp,$FRAME+15
2449 mtctr $rounds
2450
2451 Load_xts_enc_key:
2452 ?vperm v24,v30,v31,$keyperm
2453 lvx v30,$x10,$key1
2454 addi $key1,$key1,0x20
2455 stvx v24,$x00,$key_ # off-load round[1]
2456 ?vperm v25,v31,v30,$keyperm
2457 lvx v31,$x00,$key1
2458 stvx v25,$x10,$key_ # off-load round[2]
2459 addi $key_,$key_,0x20
2460 bdnz Load_xts_enc_key
2461
2462 lvx v26,$x10,$key1
2463 ?vperm v24,v30,v31,$keyperm
2464 lvx v27,$x20,$key1
2465 stvx v24,$x00,$key_ # off-load round[3]
2466 ?vperm v25,v31,v26,$keyperm
2467 lvx v28,$x30,$key1
2468 stvx v25,$x10,$key_ # off-load round[4]
2469 addi $key_,$sp,$FRAME+15 # rewind $key_
2470 ?vperm v26,v26,v27,$keyperm
2471 lvx v29,$x40,$key1
2472 ?vperm v27,v27,v28,$keyperm
2473 lvx v30,$x50,$key1
2474 ?vperm v28,v28,v29,$keyperm
2475 lvx v31,$x60,$key1
2476 ?vperm v29,v29,v30,$keyperm
2477 lvx $twk5,$x70,$key1 # borrow $twk5
2478 ?vperm v30,v30,v31,$keyperm
2479 lvx v24,$x00,$key_ # pre-load round[1]
2480 ?vperm v31,v31,$twk5,$keyperm
2481 lvx v25,$x10,$key_ # pre-load round[2]
2482
2483 vperm $in0,$inout,$inptail,$inpperm
2484 subi $inp,$inp,31 # undo "caller"
2485 vxor $twk0,$tweak,$rndkey0
2486 vsrab $tmp,$tweak,$seven # next tweak value
2487 vaddubm $tweak,$tweak,$tweak
2488 vsldoi $tmp,$tmp,$tmp,15
2489 vand $tmp,$tmp,$eighty7
2490 vxor $out0,$in0,$twk0
2491 vxor $tweak,$tweak,$tmp
2492
2493 lvx_u $in1,$x10,$inp
2494 vxor $twk1,$tweak,$rndkey0
2495 vsrab $tmp,$tweak,$seven # next tweak value
2496 vaddubm $tweak,$tweak,$tweak
2497 vsldoi $tmp,$tmp,$tmp,15
2498 le?vperm $in1,$in1,$in1,$leperm
2499 vand $tmp,$tmp,$eighty7
2500 vxor $out1,$in1,$twk1
2501 vxor $tweak,$tweak,$tmp
2502
2503 lvx_u $in2,$x20,$inp
2504 andi. $taillen,$len,15
2505 vxor $twk2,$tweak,$rndkey0
2506 vsrab $tmp,$tweak,$seven # next tweak value
2507 vaddubm $tweak,$tweak,$tweak
2508 vsldoi $tmp,$tmp,$tmp,15
2509 le?vperm $in2,$in2,$in2,$leperm
2510 vand $tmp,$tmp,$eighty7
2511 vxor $out2,$in2,$twk2
2512 vxor $tweak,$tweak,$tmp
2513
2514 lvx_u $in3,$x30,$inp
2515 sub $len,$len,$taillen
2516 vxor $twk3,$tweak,$rndkey0
2517 vsrab $tmp,$tweak,$seven # next tweak value
2518 vaddubm $tweak,$tweak,$tweak
2519 vsldoi $tmp,$tmp,$tmp,15
2520 le?vperm $in3,$in3,$in3,$leperm
2521 vand $tmp,$tmp,$eighty7
2522 vxor $out3,$in3,$twk3
2523 vxor $tweak,$tweak,$tmp
2524
2525 lvx_u $in4,$x40,$inp
2526 subi $len,$len,0x60
2527 vxor $twk4,$tweak,$rndkey0
2528 vsrab $tmp,$tweak,$seven # next tweak value
2529 vaddubm $tweak,$tweak,$tweak
2530 vsldoi $tmp,$tmp,$tmp,15
2531 le?vperm $in4,$in4,$in4,$leperm
2532 vand $tmp,$tmp,$eighty7
2533 vxor $out4,$in4,$twk4
2534 vxor $tweak,$tweak,$tmp
2535
2536 lvx_u $in5,$x50,$inp
2537 addi $inp,$inp,0x60
2538 vxor $twk5,$tweak,$rndkey0
2539 vsrab $tmp,$tweak,$seven # next tweak value
2540 vaddubm $tweak,$tweak,$tweak
2541 vsldoi $tmp,$tmp,$tmp,15
2542 le?vperm $in5,$in5,$in5,$leperm
2543 vand $tmp,$tmp,$eighty7
2544 vxor $out5,$in5,$twk5
2545 vxor $tweak,$tweak,$tmp
2546
2547 vxor v31,v31,$rndkey0
2548 mtctr $rounds
2549 b Loop_xts_enc6x
2550
2551 .align 5
2552 Loop_xts_enc6x:
2553 vcipher $out0,$out0,v24
2554 vcipher $out1,$out1,v24
2555 vcipher $out2,$out2,v24
2556 vcipher $out3,$out3,v24
2557 vcipher $out4,$out4,v24
2558 vcipher $out5,$out5,v24
2559 lvx v24,$x20,$key_ # round[3]
2560 addi $key_,$key_,0x20
2561
2562 vcipher $out0,$out0,v25
2563 vcipher $out1,$out1,v25
2564 vcipher $out2,$out2,v25
2565 vcipher $out3,$out3,v25
2566 vcipher $out4,$out4,v25
2567 vcipher $out5,$out5,v25
2568 lvx v25,$x10,$key_ # round[4]
2569 bdnz Loop_xts_enc6x
2570
2571 subic $len,$len,96 # $len-=96
2572 vxor $in0,$twk0,v31 # xor with last round key
2573 vcipher $out0,$out0,v24
2574 vcipher $out1,$out1,v24
2575 vsrab $tmp,$tweak,$seven # next tweak value
2576 vxor $twk0,$tweak,$rndkey0
2577 vaddubm $tweak,$tweak,$tweak
2578 vcipher $out2,$out2,v24
2579 vcipher $out3,$out3,v24
2580 vsldoi $tmp,$tmp,$tmp,15
2581 vcipher $out4,$out4,v24
2582 vcipher $out5,$out5,v24
2583
2584 subfe. r0,r0,r0 # borrow?-1:0
2585 vand $tmp,$tmp,$eighty7
2586 vcipher $out0,$out0,v25
2587 vcipher $out1,$out1,v25
2588 vxor $tweak,$tweak,$tmp
2589 vcipher $out2,$out2,v25
2590 vcipher $out3,$out3,v25
2591 vxor $in1,$twk1,v31
2592 vsrab $tmp,$tweak,$seven # next tweak value
2593 vxor $twk1,$tweak,$rndkey0
2594 vcipher $out4,$out4,v25
2595 vcipher $out5,$out5,v25
2596
2597 and r0,r0,$len
2598 vaddubm $tweak,$tweak,$tweak
2599 vsldoi $tmp,$tmp,$tmp,15
2600 vcipher $out0,$out0,v26
2601 vcipher $out1,$out1,v26
2602 vand $tmp,$tmp,$eighty7
2603 vcipher $out2,$out2,v26
2604 vcipher $out3,$out3,v26
2605 vxor $tweak,$tweak,$tmp
2606 vcipher $out4,$out4,v26
2607 vcipher $out5,$out5,v26
2608
2609 add $inp,$inp,r0 # $inp is adjusted in such
2610 # way that at exit from the
2611 # loop inX-in5 are loaded
2612 # with last "words"
2613 vxor $in2,$twk2,v31
2614 vsrab $tmp,$tweak,$seven # next tweak value
2615 vxor $twk2,$tweak,$rndkey0
2616 vaddubm $tweak,$tweak,$tweak
2617 vcipher $out0,$out0,v27
2618 vcipher $out1,$out1,v27
2619 vsldoi $tmp,$tmp,$tmp,15
2620 vcipher $out2,$out2,v27
2621 vcipher $out3,$out3,v27
2622 vand $tmp,$tmp,$eighty7
2623 vcipher $out4,$out4,v27
2624 vcipher $out5,$out5,v27
2625
2626 addi $key_,$sp,$FRAME+15 # rewind $key_
2627 vxor $tweak,$tweak,$tmp
2628 vcipher $out0,$out0,v28
2629 vcipher $out1,$out1,v28
2630 vxor $in3,$twk3,v31
2631 vsrab $tmp,$tweak,$seven # next tweak value
2632 vxor $twk3,$tweak,$rndkey0
2633 vcipher $out2,$out2,v28
2634 vcipher $out3,$out3,v28
2635 vaddubm $tweak,$tweak,$tweak
2636 vsldoi $tmp,$tmp,$tmp,15
2637 vcipher $out4,$out4,v28
2638 vcipher $out5,$out5,v28
2639 lvx v24,$x00,$key_ # re-pre-load round[1]
2640 vand $tmp,$tmp,$eighty7
2641
2642 vcipher $out0,$out0,v29
2643 vcipher $out1,$out1,v29
2644 vxor $tweak,$tweak,$tmp
2645 vcipher $out2,$out2,v29
2646 vcipher $out3,$out3,v29
2647 vxor $in4,$twk4,v31
2648 vsrab $tmp,$tweak,$seven # next tweak value
2649 vxor $twk4,$tweak,$rndkey0
2650 vcipher $out4,$out4,v29
2651 vcipher $out5,$out5,v29
2652 lvx v25,$x10,$key_ # re-pre-load round[2]
2653 vaddubm $tweak,$tweak,$tweak
2654 vsldoi $tmp,$tmp,$tmp,15
2655
2656 vcipher $out0,$out0,v30
2657 vcipher $out1,$out1,v30
2658 vand $tmp,$tmp,$eighty7
2659 vcipher $out2,$out2,v30
2660 vcipher $out3,$out3,v30
2661 vxor $tweak,$tweak,$tmp
2662 vcipher $out4,$out4,v30
2663 vcipher $out5,$out5,v30
2664 vxor $in5,$twk5,v31
2665 vsrab $tmp,$tweak,$seven # next tweak value
2666 vxor $twk5,$tweak,$rndkey0
2667
2668 vcipherlast $out0,$out0,$in0
2669 lvx_u $in0,$x00,$inp # load next input block
2670 vaddubm $tweak,$tweak,$tweak
2671 vsldoi $tmp,$tmp,$tmp,15
2672 vcipherlast $out1,$out1,$in1
2673 lvx_u $in1,$x10,$inp
2674 vcipherlast $out2,$out2,$in2
2675 le?vperm $in0,$in0,$in0,$leperm
2676 lvx_u $in2,$x20,$inp
2677 vand $tmp,$tmp,$eighty7
2678 vcipherlast $out3,$out3,$in3
2679 le?vperm $in1,$in1,$in1,$leperm
2680 lvx_u $in3,$x30,$inp
2681 vcipherlast $out4,$out4,$in4
2682 le?vperm $in2,$in2,$in2,$leperm
2683 lvx_u $in4,$x40,$inp
2684 vxor $tweak,$tweak,$tmp
2685 vcipherlast $tmp,$out5,$in5 # last block might be needed
2686 # in stealing mode
2687 le?vperm $in3,$in3,$in3,$leperm
2688 lvx_u $in5,$x50,$inp
2689 addi $inp,$inp,0x60
2690 le?vperm $in4,$in4,$in4,$leperm
2691 le?vperm $in5,$in5,$in5,$leperm
2692
2693 le?vperm $out0,$out0,$out0,$leperm
2694 le?vperm $out1,$out1,$out1,$leperm
2695 stvx_u $out0,$x00,$out # store output
2696 vxor $out0,$in0,$twk0
2697 le?vperm $out2,$out2,$out2,$leperm
2698 stvx_u $out1,$x10,$out
2699 vxor $out1,$in1,$twk1
2700 le?vperm $out3,$out3,$out3,$leperm
2701 stvx_u $out2,$x20,$out
2702 vxor $out2,$in2,$twk2
2703 le?vperm $out4,$out4,$out4,$leperm
2704 stvx_u $out3,$x30,$out
2705 vxor $out3,$in3,$twk3
2706 le?vperm $out5,$tmp,$tmp,$leperm
2707 stvx_u $out4,$x40,$out
2708 vxor $out4,$in4,$twk4
2709 le?stvx_u $out5,$x50,$out
2710 be?stvx_u $tmp, $x50,$out
2711 vxor $out5,$in5,$twk5
2712 addi $out,$out,0x60
2713
2714 mtctr $rounds
2715 beq Loop_xts_enc6x # did $len-=96 borrow?
2716
2717 addic. $len,$len,0x60
2718 beq Lxts_enc6x_zero
2719 cmpwi $len,0x20
2720 blt Lxts_enc6x_one
2721 nop
2722 beq Lxts_enc6x_two
2723 cmpwi $len,0x40
2724 blt Lxts_enc6x_three
2725 nop
2726 beq Lxts_enc6x_four
2727
2728 Lxts_enc6x_five:
2729 vxor $out0,$in1,$twk0
2730 vxor $out1,$in2,$twk1
2731 vxor $out2,$in3,$twk2
2732 vxor $out3,$in4,$twk3
2733 vxor $out4,$in5,$twk4
2734
2735 bl _aesp8_xts_enc5x
2736
2737 le?vperm $out0,$out0,$out0,$leperm
2738 vmr $twk0,$twk5 # unused tweak
2739 le?vperm $out1,$out1,$out1,$leperm
2740 stvx_u $out0,$x00,$out # store output
2741 le?vperm $out2,$out2,$out2,$leperm
2742 stvx_u $out1,$x10,$out
2743 le?vperm $out3,$out3,$out3,$leperm
2744 stvx_u $out2,$x20,$out
2745 vxor $tmp,$out4,$twk5 # last block prep for stealing
2746 le?vperm $out4,$out4,$out4,$leperm
2747 stvx_u $out3,$x30,$out
2748 stvx_u $out4,$x40,$out
2749 addi $out,$out,0x50
2750 bne Lxts_enc6x_steal
2751 b Lxts_enc6x_done
2752
2753 .align 4
2754 Lxts_enc6x_four:
2755 vxor $out0,$in2,$twk0
2756 vxor $out1,$in3,$twk1
2757 vxor $out2,$in4,$twk2
2758 vxor $out3,$in5,$twk3
2759 vxor $out4,$out4,$out4
2760
2761 bl _aesp8_xts_enc5x
2762
2763 le?vperm $out0,$out0,$out0,$leperm
2764 vmr $twk0,$twk4 # unused tweak
2765 le?vperm $out1,$out1,$out1,$leperm
2766 stvx_u $out0,$x00,$out # store output
2767 le?vperm $out2,$out2,$out2,$leperm
2768 stvx_u $out1,$x10,$out
2769 vxor $tmp,$out3,$twk4 # last block prep for stealing
2770 le?vperm $out3,$out3,$out3,$leperm
2771 stvx_u $out2,$x20,$out
2772 stvx_u $out3,$x30,$out
2773 addi $out,$out,0x40
2774 bne Lxts_enc6x_steal
2775 b Lxts_enc6x_done
2776
2777 .align 4
2778 Lxts_enc6x_three:
2779 vxor $out0,$in3,$twk0
2780 vxor $out1,$in4,$twk1
2781 vxor $out2,$in5,$twk2
2782 vxor $out3,$out3,$out3
2783 vxor $out4,$out4,$out4
2784
2785 bl _aesp8_xts_enc5x
2786
2787 le?vperm $out0,$out0,$out0,$leperm
2788 vmr $twk0,$twk3 # unused tweak
2789 le?vperm $out1,$out1,$out1,$leperm
2790 stvx_u $out0,$x00,$out # store output
2791 vxor $tmp,$out2,$twk3 # last block prep for stealing
2792 le?vperm $out2,$out2,$out2,$leperm
2793 stvx_u $out1,$x10,$out
2794 stvx_u $out2,$x20,$out
2795 addi $out,$out,0x30
2796 bne Lxts_enc6x_steal
2797 b Lxts_enc6x_done
2798
2799 .align 4
2800 Lxts_enc6x_two:
2801 vxor $out0,$in4,$twk0
2802 vxor $out1,$in5,$twk1
2803 vxor $out2,$out2,$out2
2804 vxor $out3,$out3,$out3
2805 vxor $out4,$out4,$out4
2806
2807 bl _aesp8_xts_enc5x
2808
2809 le?vperm $out0,$out0,$out0,$leperm
2810 vmr $twk0,$twk2 # unused tweak
2811 vxor $tmp,$out1,$twk2 # last block prep for stealing
2812 le?vperm $out1,$out1,$out1,$leperm
2813 stvx_u $out0,$x00,$out # store output
2814 stvx_u $out1,$x10,$out
2815 addi $out,$out,0x20
2816 bne Lxts_enc6x_steal
2817 b Lxts_enc6x_done
2818
2819 .align 4
2820 Lxts_enc6x_one:
2821 vxor $out0,$in5,$twk0
2822 nop
2823 Loop_xts_enc1x:
2824 vcipher $out0,$out0,v24
2825 lvx v24,$x20,$key_ # round[3]
2826 addi $key_,$key_,0x20
2827
2828 vcipher $out0,$out0,v25
2829 lvx v25,$x10,$key_ # round[4]
2830 bdnz Loop_xts_enc1x
2831
2832 add $inp,$inp,$taillen
2833 cmpwi $taillen,0
2834 vcipher $out0,$out0,v24
2835
2836 subi $inp,$inp,16
2837 vcipher $out0,$out0,v25
2838
2839 lvsr $inpperm,0,$taillen
2840 vcipher $out0,$out0,v26
2841
2842 lvx_u $in0,0,$inp
2843 vcipher $out0,$out0,v27
2844
2845 addi $key_,$sp,$FRAME+15 # rewind $key_
2846 vcipher $out0,$out0,v28
2847 lvx v24,$x00,$key_ # re-pre-load round[1]
2848
2849 vcipher $out0,$out0,v29
2850 lvx v25,$x10,$key_ # re-pre-load round[2]
2851 vxor $twk0,$twk0,v31
2852
2853 le?vperm $in0,$in0,$in0,$leperm
2854 vcipher $out0,$out0,v30
2855
2856 vperm $in0,$in0,$in0,$inpperm
2857 vcipherlast $out0,$out0,$twk0
2858
2859 vmr $twk0,$twk1 # unused tweak
2860 vxor $tmp,$out0,$twk1 # last block prep for stealing
2861 le?vperm $out0,$out0,$out0,$leperm
2862 stvx_u $out0,$x00,$out # store output
2863 addi $out,$out,0x10
2864 bne Lxts_enc6x_steal
2865 b Lxts_enc6x_done
2866
2867 .align 4
2868 Lxts_enc6x_zero:
2869 cmpwi $taillen,0
2870 beq Lxts_enc6x_done
2871
2872 add $inp,$inp,$taillen
2873 subi $inp,$inp,16
2874 lvx_u $in0,0,$inp
2875 lvsr $inpperm,0,$taillen # $in5 is no more
2876 le?vperm $in0,$in0,$in0,$leperm
2877 vperm $in0,$in0,$in0,$inpperm
2878 vxor $tmp,$tmp,$twk0
2879 Lxts_enc6x_steal:
2880 vxor $in0,$in0,$twk0
2881 vxor $out0,$out0,$out0
2882 vspltisb $out1,-1
2883 vperm $out0,$out0,$out1,$inpperm
2884 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2885
2886 subi r30,$out,17
2887 subi $out,$out,16
2888 mtctr $taillen
2889 Loop_xts_enc6x_steal:
2890 lbzu r0,1(r30)
2891 stb r0,16(r30)
2892 bdnz Loop_xts_enc6x_steal
2893
2894 li $taillen,0
2895 mtctr $rounds
2896 b Loop_xts_enc1x # one more time...
2897
2898 .align 4
2899 Lxts_enc6x_done:
2900 ${UCMP}i $ivp,0
2901 beq Lxts_enc6x_ret
2902
2903 vxor $tweak,$twk0,$rndkey0
2904 le?vperm $tweak,$tweak,$tweak,$leperm
2905 stvx_u $tweak,0,$ivp
2906
2907 Lxts_enc6x_ret:
2908 mtlr r11
2909 li r10,`$FRAME+15`
2910 li r11,`$FRAME+31`
2911 stvx $seven,r10,$sp # wipe copies of round keys
2912 addi r10,r10,32
2913 stvx $seven,r11,$sp
2914 addi r11,r11,32
2915 stvx $seven,r10,$sp
2916 addi r10,r10,32
2917 stvx $seven,r11,$sp
2918 addi r11,r11,32
2919 stvx $seven,r10,$sp
2920 addi r10,r10,32
2921 stvx $seven,r11,$sp
2922 addi r11,r11,32
2923 stvx $seven,r10,$sp
2924 addi r10,r10,32
2925 stvx $seven,r11,$sp
2926 addi r11,r11,32
2927
2928 mtspr 256,$vrsave
2929 lvx v20,r10,$sp # ABI says so
2930 addi r10,r10,32
2931 lvx v21,r11,$sp
2932 addi r11,r11,32
2933 lvx v22,r10,$sp
2934 addi r10,r10,32
2935 lvx v23,r11,$sp
2936 addi r11,r11,32
2937 lvx v24,r10,$sp
2938 addi r10,r10,32
2939 lvx v25,r11,$sp
2940 addi r11,r11,32
2941 lvx v26,r10,$sp
2942 addi r10,r10,32
2943 lvx v27,r11,$sp
2944 addi r11,r11,32
2945 lvx v28,r10,$sp
2946 addi r10,r10,32
2947 lvx v29,r11,$sp
2948 addi r11,r11,32
2949 lvx v30,r10,$sp
2950 lvx v31,r11,$sp
2951 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2952 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2953 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2954 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2955 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2956 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2957 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2958 blr
2959 .long 0
2960 .byte 0,12,0x04,1,0x80,6,6,0
2961 .long 0
2962
2963 .align 5
2964 _aesp8_xts_enc5x:
2965 vcipher $out0,$out0,v24
2966 vcipher $out1,$out1,v24
2967 vcipher $out2,$out2,v24
2968 vcipher $out3,$out3,v24
2969 vcipher $out4,$out4,v24
2970 lvx v24,$x20,$key_ # round[3]
2971 addi $key_,$key_,0x20
2972
2973 vcipher $out0,$out0,v25
2974 vcipher $out1,$out1,v25
2975 vcipher $out2,$out2,v25
2976 vcipher $out3,$out3,v25
2977 vcipher $out4,$out4,v25
2978 lvx v25,$x10,$key_ # round[4]
2979 bdnz _aesp8_xts_enc5x
2980
2981 add $inp,$inp,$taillen
2982 cmpwi $taillen,0
2983 vcipher $out0,$out0,v24
2984 vcipher $out1,$out1,v24
2985 vcipher $out2,$out2,v24
2986 vcipher $out3,$out3,v24
2987 vcipher $out4,$out4,v24
2988
2989 subi $inp,$inp,16
2990 vcipher $out0,$out0,v25
2991 vcipher $out1,$out1,v25
2992 vcipher $out2,$out2,v25
2993 vcipher $out3,$out3,v25
2994 vcipher $out4,$out4,v25
2995 vxor $twk0,$twk0,v31
2996
2997 vcipher $out0,$out0,v26
2998 lvsr $inpperm,r0,$taillen # $in5 is no more
2999 vcipher $out1,$out1,v26
3000 vcipher $out2,$out2,v26
3001 vcipher $out3,$out3,v26
3002 vcipher $out4,$out4,v26
3003 vxor $in1,$twk1,v31
3004
3005 vcipher $out0,$out0,v27
3006 lvx_u $in0,0,$inp
3007 vcipher $out1,$out1,v27
3008 vcipher $out2,$out2,v27
3009 vcipher $out3,$out3,v27
3010 vcipher $out4,$out4,v27
3011 vxor $in2,$twk2,v31
3012
3013 addi $key_,$sp,$FRAME+15 # rewind $key_
3014 vcipher $out0,$out0,v28
3015 vcipher $out1,$out1,v28
3016 vcipher $out2,$out2,v28
3017 vcipher $out3,$out3,v28
3018 vcipher $out4,$out4,v28
3019 lvx v24,$x00,$key_ # re-pre-load round[1]
3020 vxor $in3,$twk3,v31
3021
3022 vcipher $out0,$out0,v29
3023 le?vperm $in0,$in0,$in0,$leperm
3024 vcipher $out1,$out1,v29
3025 vcipher $out2,$out2,v29
3026 vcipher $out3,$out3,v29
3027 vcipher $out4,$out4,v29
3028 lvx v25,$x10,$key_ # re-pre-load round[2]
3029 vxor $in4,$twk4,v31
3030
3031 vcipher $out0,$out0,v30
3032 vperm $in0,$in0,$in0,$inpperm
3033 vcipher $out1,$out1,v30
3034 vcipher $out2,$out2,v30
3035 vcipher $out3,$out3,v30
3036 vcipher $out4,$out4,v30
3037
3038 vcipherlast $out0,$out0,$twk0
3039 vcipherlast $out1,$out1,$in1
3040 vcipherlast $out2,$out2,$in2
3041 vcipherlast $out3,$out3,$in3
3042 vcipherlast $out4,$out4,$in4
3043 blr
3044 .long 0
3045 .byte 0,12,0x14,0,0,0,0,0
3046
3047 .align 5
3048 _aesp8_xts_decrypt6x:
3049 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3050 mflr r11
3051 li r7,`$FRAME+8*16+15`
3052 li r3,`$FRAME+8*16+31`
3053 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3054 stvx v20,r7,$sp # ABI says so
3055 addi r7,r7,32
3056 stvx v21,r3,$sp
3057 addi r3,r3,32
3058 stvx v22,r7,$sp
3059 addi r7,r7,32
3060 stvx v23,r3,$sp
3061 addi r3,r3,32
3062 stvx v24,r7,$sp
3063 addi r7,r7,32
3064 stvx v25,r3,$sp
3065 addi r3,r3,32
3066 stvx v26,r7,$sp
3067 addi r7,r7,32
3068 stvx v27,r3,$sp
3069 addi r3,r3,32
3070 stvx v28,r7,$sp
3071 addi r7,r7,32
3072 stvx v29,r3,$sp
3073 addi r3,r3,32
3074 stvx v30,r7,$sp
3075 stvx v31,r3,$sp
3076 li r0,-1
3077 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3078 li $x10,0x10
3079 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3080 li $x20,0x20
3081 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3082 li $x30,0x30
3083 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3084 li $x40,0x40
3085 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3086 li $x50,0x50
3087 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3088 li $x60,0x60
3089 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3090 li $x70,0x70
3091 mtspr 256,r0
3092
3093 subi $rounds,$rounds,3 # -4 in total
3094
3095 lvx $rndkey0,$x00,$key1 # load key schedule
3096 lvx v30,$x10,$key1
3097 addi $key1,$key1,0x20
3098 lvx v31,$x00,$key1
3099 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3100 addi $key_,$sp,$FRAME+15
3101 mtctr $rounds
3102
3103 Load_xts_dec_key:
3104 ?vperm v24,v30,v31,$keyperm
3105 lvx v30,$x10,$key1
3106 addi $key1,$key1,0x20
3107 stvx v24,$x00,$key_ # off-load round[1]
3108 ?vperm v25,v31,v30,$keyperm
3109 lvx v31,$x00,$key1
3110 stvx v25,$x10,$key_ # off-load round[2]
3111 addi $key_,$key_,0x20
3112 bdnz Load_xts_dec_key
3113
3114 lvx v26,$x10,$key1
3115 ?vperm v24,v30,v31,$keyperm
3116 lvx v27,$x20,$key1
3117 stvx v24,$x00,$key_ # off-load round[3]
3118 ?vperm v25,v31,v26,$keyperm
3119 lvx v28,$x30,$key1
3120 stvx v25,$x10,$key_ # off-load round[4]
3121 addi $key_,$sp,$FRAME+15 # rewind $key_
3122 ?vperm v26,v26,v27,$keyperm
3123 lvx v29,$x40,$key1
3124 ?vperm v27,v27,v28,$keyperm
3125 lvx v30,$x50,$key1
3126 ?vperm v28,v28,v29,$keyperm
3127 lvx v31,$x60,$key1
3128 ?vperm v29,v29,v30,$keyperm
3129 lvx $twk5,$x70,$key1 # borrow $twk5
3130 ?vperm v30,v30,v31,$keyperm
3131 lvx v24,$x00,$key_ # pre-load round[1]
3132 ?vperm v31,v31,$twk5,$keyperm
3133 lvx v25,$x10,$key_ # pre-load round[2]
3134
3135 vperm $in0,$inout,$inptail,$inpperm
3136 subi $inp,$inp,31 # undo "caller"
3137 vxor $twk0,$tweak,$rndkey0
3138 vsrab $tmp,$tweak,$seven # next tweak value
3139 vaddubm $tweak,$tweak,$tweak
3140 vsldoi $tmp,$tmp,$tmp,15
3141 vand $tmp,$tmp,$eighty7
3142 vxor $out0,$in0,$twk0
3143 vxor $tweak,$tweak,$tmp
3144
3145 lvx_u $in1,$x10,$inp
3146 vxor $twk1,$tweak,$rndkey0
3147 vsrab $tmp,$tweak,$seven # next tweak value
3148 vaddubm $tweak,$tweak,$tweak
3149 vsldoi $tmp,$tmp,$tmp,15
3150 le?vperm $in1,$in1,$in1,$leperm
3151 vand $tmp,$tmp,$eighty7
3152 vxor $out1,$in1,$twk1
3153 vxor $tweak,$tweak,$tmp
3154
3155 lvx_u $in2,$x20,$inp
3156 andi. $taillen,$len,15
3157 vxor $twk2,$tweak,$rndkey0
3158 vsrab $tmp,$tweak,$seven # next tweak value
3159 vaddubm $tweak,$tweak,$tweak
3160 vsldoi $tmp,$tmp,$tmp,15
3161 le?vperm $in2,$in2,$in2,$leperm
3162 vand $tmp,$tmp,$eighty7
3163 vxor $out2,$in2,$twk2
3164 vxor $tweak,$tweak,$tmp
3165
3166 lvx_u $in3,$x30,$inp
3167 sub $len,$len,$taillen
3168 vxor $twk3,$tweak,$rndkey0
3169 vsrab $tmp,$tweak,$seven # next tweak value
3170 vaddubm $tweak,$tweak,$tweak
3171 vsldoi $tmp,$tmp,$tmp,15
3172 le?vperm $in3,$in3,$in3,$leperm
3173 vand $tmp,$tmp,$eighty7
3174 vxor $out3,$in3,$twk3
3175 vxor $tweak,$tweak,$tmp
3176
3177 lvx_u $in4,$x40,$inp
3178 subi $len,$len,0x60
3179 vxor $twk4,$tweak,$rndkey0
3180 vsrab $tmp,$tweak,$seven # next tweak value
3181 vaddubm $tweak,$tweak,$tweak
3182 vsldoi $tmp,$tmp,$tmp,15
3183 le?vperm $in4,$in4,$in4,$leperm
3184 vand $tmp,$tmp,$eighty7
3185 vxor $out4,$in4,$twk4
3186 vxor $tweak,$tweak,$tmp
3187
3188 lvx_u $in5,$x50,$inp
3189 addi $inp,$inp,0x60
3190 vxor $twk5,$tweak,$rndkey0
3191 vsrab $tmp,$tweak,$seven # next tweak value
3192 vaddubm $tweak,$tweak,$tweak
3193 vsldoi $tmp,$tmp,$tmp,15
3194 le?vperm $in5,$in5,$in5,$leperm
3195 vand $tmp,$tmp,$eighty7
3196 vxor $out5,$in5,$twk5
3197 vxor $tweak,$tweak,$tmp
3198
3199 vxor v31,v31,$rndkey0
3200 mtctr $rounds
3201 b Loop_xts_dec6x
3202
3203 .align 5
3204 Loop_xts_dec6x:
3205 vncipher $out0,$out0,v24
3206 vncipher $out1,$out1,v24
3207 vncipher $out2,$out2,v24
3208 vncipher $out3,$out3,v24
3209 vncipher $out4,$out4,v24
3210 vncipher $out5,$out5,v24
3211 lvx v24,$x20,$key_ # round[3]
3212 addi $key_,$key_,0x20
3213
3214 vncipher $out0,$out0,v25
3215 vncipher $out1,$out1,v25
3216 vncipher $out2,$out2,v25
3217 vncipher $out3,$out3,v25
3218 vncipher $out4,$out4,v25
3219 vncipher $out5,$out5,v25
3220 lvx v25,$x10,$key_ # round[4]
3221 bdnz Loop_xts_dec6x
3222
3223 subic $len,$len,96 # $len-=96
3224 vxor $in0,$twk0,v31 # xor with last round key
3225 vncipher $out0,$out0,v24
3226 vncipher $out1,$out1,v24
3227 vsrab $tmp,$tweak,$seven # next tweak value
3228 vxor $twk0,$tweak,$rndkey0
3229 vaddubm $tweak,$tweak,$tweak
3230 vncipher $out2,$out2,v24
3231 vncipher $out3,$out3,v24
3232 vsldoi $tmp,$tmp,$tmp,15
3233 vncipher $out4,$out4,v24
3234 vncipher $out5,$out5,v24
3235
3236 subfe. r0,r0,r0 # borrow?-1:0
3237 vand $tmp,$tmp,$eighty7
3238 vncipher $out0,$out0,v25
3239 vncipher $out1,$out1,v25
3240 vxor $tweak,$tweak,$tmp
3241 vncipher $out2,$out2,v25
3242 vncipher $out3,$out3,v25
3243 vxor $in1,$twk1,v31
3244 vsrab $tmp,$tweak,$seven # next tweak value
3245 vxor $twk1,$tweak,$rndkey0
3246 vncipher $out4,$out4,v25
3247 vncipher $out5,$out5,v25
3248
3249 and r0,r0,$len
3250 vaddubm $tweak,$tweak,$tweak
3251 vsldoi $tmp,$tmp,$tmp,15
3252 vncipher $out0,$out0,v26
3253 vncipher $out1,$out1,v26
3254 vand $tmp,$tmp,$eighty7
3255 vncipher $out2,$out2,v26
3256 vncipher $out3,$out3,v26
3257 vxor $tweak,$tweak,$tmp
3258 vncipher $out4,$out4,v26
3259 vncipher $out5,$out5,v26
3260
3261 add $inp,$inp,r0 # $inp is adjusted in such
3262 # way that at exit from the
3263 # loop inX-in5 are loaded
3264 # with last "words"
3265 vxor $in2,$twk2,v31
3266 vsrab $tmp,$tweak,$seven # next tweak value
3267 vxor $twk2,$tweak,$rndkey0
3268 vaddubm $tweak,$tweak,$tweak
3269 vncipher $out0,$out0,v27
3270 vncipher $out1,$out1,v27
3271 vsldoi $tmp,$tmp,$tmp,15
3272 vncipher $out2,$out2,v27
3273 vncipher $out3,$out3,v27
3274 vand $tmp,$tmp,$eighty7
3275 vncipher $out4,$out4,v27
3276 vncipher $out5,$out5,v27
3277
3278 addi $key_,$sp,$FRAME+15 # rewind $key_
3279 vxor $tweak,$tweak,$tmp
3280 vncipher $out0,$out0,v28
3281 vncipher $out1,$out1,v28
3282 vxor $in3,$twk3,v31
3283 vsrab $tmp,$tweak,$seven # next tweak value
3284 vxor $twk3,$tweak,$rndkey0
3285 vncipher $out2,$out2,v28
3286 vncipher $out3,$out3,v28
3287 vaddubm $tweak,$tweak,$tweak
3288 vsldoi $tmp,$tmp,$tmp,15
3289 vncipher $out4,$out4,v28
3290 vncipher $out5,$out5,v28
3291 lvx v24,$x00,$key_ # re-pre-load round[1]
3292 vand $tmp,$tmp,$eighty7
3293
3294 vncipher $out0,$out0,v29
3295 vncipher $out1,$out1,v29
3296 vxor $tweak,$tweak,$tmp
3297 vncipher $out2,$out2,v29
3298 vncipher $out3,$out3,v29
3299 vxor $in4,$twk4,v31
3300 vsrab $tmp,$tweak,$seven # next tweak value
3301 vxor $twk4,$tweak,$rndkey0
3302 vncipher $out4,$out4,v29
3303 vncipher $out5,$out5,v29
3304 lvx v25,$x10,$key_ # re-pre-load round[2]
3305 vaddubm $tweak,$tweak,$tweak
3306 vsldoi $tmp,$tmp,$tmp,15
3307
3308 vncipher $out0,$out0,v30
3309 vncipher $out1,$out1,v30
3310 vand $tmp,$tmp,$eighty7
3311 vncipher $out2,$out2,v30
3312 vncipher $out3,$out3,v30
3313 vxor $tweak,$tweak,$tmp
3314 vncipher $out4,$out4,v30
3315 vncipher $out5,$out5,v30
3316 vxor $in5,$twk5,v31
3317 vsrab $tmp,$tweak,$seven # next tweak value
3318 vxor $twk5,$tweak,$rndkey0
3319
3320 vncipherlast $out0,$out0,$in0
3321 lvx_u $in0,$x00,$inp # load next input block
3322 vaddubm $tweak,$tweak,$tweak
3323 vsldoi $tmp,$tmp,$tmp,15
3324 vncipherlast $out1,$out1,$in1
3325 lvx_u $in1,$x10,$inp
3326 vncipherlast $out2,$out2,$in2
3327 le?vperm $in0,$in0,$in0,$leperm
3328 lvx_u $in2,$x20,$inp
3329 vand $tmp,$tmp,$eighty7
3330 vncipherlast $out3,$out3,$in3
3331 le?vperm $in1,$in1,$in1,$leperm
3332 lvx_u $in3,$x30,$inp
3333 vncipherlast $out4,$out4,$in4
3334 le?vperm $in2,$in2,$in2,$leperm
3335 lvx_u $in4,$x40,$inp
3336 vxor $tweak,$tweak,$tmp
3337 vncipherlast $out5,$out5,$in5
3338 le?vperm $in3,$in3,$in3,$leperm
3339 lvx_u $in5,$x50,$inp
3340 addi $inp,$inp,0x60
3341 le?vperm $in4,$in4,$in4,$leperm
3342 le?vperm $in5,$in5,$in5,$leperm
3343
3344 le?vperm $out0,$out0,$out0,$leperm
3345 le?vperm $out1,$out1,$out1,$leperm
3346 stvx_u $out0,$x00,$out # store output
3347 vxor $out0,$in0,$twk0
3348 le?vperm $out2,$out2,$out2,$leperm
3349 stvx_u $out1,$x10,$out
3350 vxor $out1,$in1,$twk1
3351 le?vperm $out3,$out3,$out3,$leperm
3352 stvx_u $out2,$x20,$out
3353 vxor $out2,$in2,$twk2
3354 le?vperm $out4,$out4,$out4,$leperm
3355 stvx_u $out3,$x30,$out
3356 vxor $out3,$in3,$twk3
3357 le?vperm $out5,$out5,$out5,$leperm
3358 stvx_u $out4,$x40,$out
3359 vxor $out4,$in4,$twk4
3360 stvx_u $out5,$x50,$out
3361 vxor $out5,$in5,$twk5
3362 addi $out,$out,0x60
3363
3364 mtctr $rounds
3365 beq Loop_xts_dec6x # did $len-=96 borrow?
3366
3367 addic. $len,$len,0x60
3368 beq Lxts_dec6x_zero
3369 cmpwi $len,0x20
3370 blt Lxts_dec6x_one
3371 nop
3372 beq Lxts_dec6x_two
3373 cmpwi $len,0x40
3374 blt Lxts_dec6x_three
3375 nop
3376 beq Lxts_dec6x_four
3377
3378 Lxts_dec6x_five:
3379 vxor $out0,$in1,$twk0
3380 vxor $out1,$in2,$twk1
3381 vxor $out2,$in3,$twk2
3382 vxor $out3,$in4,$twk3
3383 vxor $out4,$in5,$twk4
3384
3385 bl _aesp8_xts_dec5x
3386
3387 le?vperm $out0,$out0,$out0,$leperm
3388 vmr $twk0,$twk5 # unused tweak
3389 vxor $twk1,$tweak,$rndkey0
3390 le?vperm $out1,$out1,$out1,$leperm
3391 stvx_u $out0,$x00,$out # store output
3392 vxor $out0,$in0,$twk1
3393 le?vperm $out2,$out2,$out2,$leperm
3394 stvx_u $out1,$x10,$out
3395 le?vperm $out3,$out3,$out3,$leperm
3396 stvx_u $out2,$x20,$out
3397 le?vperm $out4,$out4,$out4,$leperm
3398 stvx_u $out3,$x30,$out
3399 stvx_u $out4,$x40,$out
3400 addi $out,$out,0x50
3401 bne Lxts_dec6x_steal
3402 b Lxts_dec6x_done
3403
3404 .align 4
3405 Lxts_dec6x_four:
3406 vxor $out0,$in2,$twk0
3407 vxor $out1,$in3,$twk1
3408 vxor $out2,$in4,$twk2
3409 vxor $out3,$in5,$twk3
3410 vxor $out4,$out4,$out4
3411
3412 bl _aesp8_xts_dec5x
3413
3414 le?vperm $out0,$out0,$out0,$leperm
3415 vmr $twk0,$twk4 # unused tweak
3416 vmr $twk1,$twk5
3417 le?vperm $out1,$out1,$out1,$leperm
3418 stvx_u $out0,$x00,$out # store output
3419 vxor $out0,$in0,$twk5
3420 le?vperm $out2,$out2,$out2,$leperm
3421 stvx_u $out1,$x10,$out
3422 le?vperm $out3,$out3,$out3,$leperm
3423 stvx_u $out2,$x20,$out
3424 stvx_u $out3,$x30,$out
3425 addi $out,$out,0x40
3426 bne Lxts_dec6x_steal
3427 b Lxts_dec6x_done
3428
3429 .align 4
3430 Lxts_dec6x_three:
3431 vxor $out0,$in3,$twk0
3432 vxor $out1,$in4,$twk1
3433 vxor $out2,$in5,$twk2
3434 vxor $out3,$out3,$out3
3435 vxor $out4,$out4,$out4
3436
3437 bl _aesp8_xts_dec5x
3438
3439 le?vperm $out0,$out0,$out0,$leperm
3440 vmr $twk0,$twk3 # unused tweak
3441 vmr $twk1,$twk4
3442 le?vperm $out1,$out1,$out1,$leperm
3443 stvx_u $out0,$x00,$out # store output
3444 vxor $out0,$in0,$twk4
3445 le?vperm $out2,$out2,$out2,$leperm
3446 stvx_u $out1,$x10,$out
3447 stvx_u $out2,$x20,$out
3448 addi $out,$out,0x30
3449 bne Lxts_dec6x_steal
3450 b Lxts_dec6x_done
3451
3452 .align 4
3453 Lxts_dec6x_two:
3454 vxor $out0,$in4,$twk0
3455 vxor $out1,$in5,$twk1
3456 vxor $out2,$out2,$out2
3457 vxor $out3,$out3,$out3
3458 vxor $out4,$out4,$out4
3459
3460 bl _aesp8_xts_dec5x
3461
3462 le?vperm $out0,$out0,$out0,$leperm
3463 vmr $twk0,$twk2 # unused tweak
3464 vmr $twk1,$twk3
3465 le?vperm $out1,$out1,$out1,$leperm
3466 stvx_u $out0,$x00,$out # store output
3467 vxor $out0,$in0,$twk3
3468 stvx_u $out1,$x10,$out
3469 addi $out,$out,0x20
3470 bne Lxts_dec6x_steal
3471 b Lxts_dec6x_done
3472
3473 .align 4
3474 Lxts_dec6x_one:
3475 vxor $out0,$in5,$twk0
3476 nop
3477 Loop_xts_dec1x:
3478 vncipher $out0,$out0,v24
3479 lvx v24,$x20,$key_ # round[3]
3480 addi $key_,$key_,0x20
3481
3482 vncipher $out0,$out0,v25
3483 lvx v25,$x10,$key_ # round[4]
3484 bdnz Loop_xts_dec1x
3485
3486 subi r0,$taillen,1
3487 vncipher $out0,$out0,v24
3488
3489 andi. r0,r0,16
3490 cmpwi $taillen,0
3491 vncipher $out0,$out0,v25
3492
3493 sub $inp,$inp,r0
3494 vncipher $out0,$out0,v26
3495
3496 lvx_u $in0,0,$inp
3497 vncipher $out0,$out0,v27
3498
3499 addi $key_,$sp,$FRAME+15 # rewind $key_
3500 vncipher $out0,$out0,v28
3501 lvx v24,$x00,$key_ # re-pre-load round[1]
3502
3503 vncipher $out0,$out0,v29
3504 lvx v25,$x10,$key_ # re-pre-load round[2]
3505 vxor $twk0,$twk0,v31
3506
3507 le?vperm $in0,$in0,$in0,$leperm
3508 vncipher $out0,$out0,v30
3509
3510 mtctr $rounds
3511 vncipherlast $out0,$out0,$twk0
3512
3513 vmr $twk0,$twk1 # unused tweak
3514 vmr $twk1,$twk2
3515 le?vperm $out0,$out0,$out0,$leperm
3516 stvx_u $out0,$x00,$out # store output
3517 addi $out,$out,0x10
3518 vxor $out0,$in0,$twk2
3519 bne Lxts_dec6x_steal
3520 b Lxts_dec6x_done
3521
3522 .align 4
3523 Lxts_dec6x_zero:
3524 cmpwi $taillen,0
3525 beq Lxts_dec6x_done
3526
3527 lvx_u $in0,0,$inp
3528 le?vperm $in0,$in0,$in0,$leperm
3529 vxor $out0,$in0,$twk1
3530 Lxts_dec6x_steal:
3531 vncipher $out0,$out0,v24
3532 lvx v24,$x20,$key_ # round[3]
3533 addi $key_,$key_,0x20
3534
3535 vncipher $out0,$out0,v25
3536 lvx v25,$x10,$key_ # round[4]
3537 bdnz Lxts_dec6x_steal
3538
3539 add $inp,$inp,$taillen
3540 vncipher $out0,$out0,v24
3541
3542 cmpwi $taillen,0
3543 vncipher $out0,$out0,v25
3544
3545 lvx_u $in0,0,$inp
3546 vncipher $out0,$out0,v26
3547
3548 lvsr $inpperm,0,$taillen # $in5 is no more
3549 vncipher $out0,$out0,v27
3550
3551 addi $key_,$sp,$FRAME+15 # rewind $key_
3552 vncipher $out0,$out0,v28
3553 lvx v24,$x00,$key_ # re-pre-load round[1]
3554
3555 vncipher $out0,$out0,v29
3556 lvx v25,$x10,$key_ # re-pre-load round[2]
3557 vxor $twk1,$twk1,v31
3558
3559 le?vperm $in0,$in0,$in0,$leperm
3560 vncipher $out0,$out0,v30
3561
3562 vperm $in0,$in0,$in0,$inpperm
3563 vncipherlast $tmp,$out0,$twk1
3564
3565 le?vperm $out0,$tmp,$tmp,$leperm
3566 le?stvx_u $out0,0,$out
3567 be?stvx_u $tmp,0,$out
3568
3569 vxor $out0,$out0,$out0
3570 vspltisb $out1,-1
3571 vperm $out0,$out0,$out1,$inpperm
3572 vsel $out0,$in0,$tmp,$out0
3573 vxor $out0,$out0,$twk0
3574
3575 subi r30,$out,1
3576 mtctr $taillen
3577 Loop_xts_dec6x_steal:
3578 lbzu r0,1(r30)
3579 stb r0,16(r30)
3580 bdnz Loop_xts_dec6x_steal
3581
3582 li $taillen,0
3583 mtctr $rounds
3584 b Loop_xts_dec1x # one more time...
3585
3586 .align 4
3587 Lxts_dec6x_done:
3588 ${UCMP}i $ivp,0
3589 beq Lxts_dec6x_ret
3590
3591 vxor $tweak,$twk0,$rndkey0
3592 le?vperm $tweak,$tweak,$tweak,$leperm
3593 stvx_u $tweak,0,$ivp
3594
3595 Lxts_dec6x_ret:
3596 mtlr r11
3597 li r10,`$FRAME+15`
3598 li r11,`$FRAME+31`
3599 stvx $seven,r10,$sp # wipe copies of round keys
3600 addi r10,r10,32
3601 stvx $seven,r11,$sp
3602 addi r11,r11,32
3603 stvx $seven,r10,$sp
3604 addi r10,r10,32
3605 stvx $seven,r11,$sp
3606 addi r11,r11,32
3607 stvx $seven,r10,$sp
3608 addi r10,r10,32
3609 stvx $seven,r11,$sp
3610 addi r11,r11,32
3611 stvx $seven,r10,$sp
3612 addi r10,r10,32
3613 stvx $seven,r11,$sp
3614 addi r11,r11,32
3615
3616 mtspr 256,$vrsave
3617 lvx v20,r10,$sp # ABI says so
3618 addi r10,r10,32
3619 lvx v21,r11,$sp
3620 addi r11,r11,32
3621 lvx v22,r10,$sp
3622 addi r10,r10,32
3623 lvx v23,r11,$sp
3624 addi r11,r11,32
3625 lvx v24,r10,$sp
3626 addi r10,r10,32
3627 lvx v25,r11,$sp
3628 addi r11,r11,32
3629 lvx v26,r10,$sp
3630 addi r10,r10,32
3631 lvx v27,r11,$sp
3632 addi r11,r11,32
3633 lvx v28,r10,$sp
3634 addi r10,r10,32
3635 lvx v29,r11,$sp
3636 addi r11,r11,32
3637 lvx v30,r10,$sp
3638 lvx v31,r11,$sp
3639 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3640 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3641 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3642 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3643 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3644 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3645 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3646 blr
3647 .long 0
3648 .byte 0,12,0x04,1,0x80,6,6,0
3649 .long 0
3650
3651 .align 5
3652 _aesp8_xts_dec5x:
3653 vncipher $out0,$out0,v24
3654 vncipher $out1,$out1,v24
3655 vncipher $out2,$out2,v24
3656 vncipher $out3,$out3,v24
3657 vncipher $out4,$out4,v24
3658 lvx v24,$x20,$key_ # round[3]
3659 addi $key_,$key_,0x20
3660
3661 vncipher $out0,$out0,v25
3662 vncipher $out1,$out1,v25
3663 vncipher $out2,$out2,v25
3664 vncipher $out3,$out3,v25
3665 vncipher $out4,$out4,v25
3666 lvx v25,$x10,$key_ # round[4]
3667 bdnz _aesp8_xts_dec5x
3668
3669 subi r0,$taillen,1
3670 vncipher $out0,$out0,v24
3671 vncipher $out1,$out1,v24
3672 vncipher $out2,$out2,v24
3673 vncipher $out3,$out3,v24
3674 vncipher $out4,$out4,v24
3675
3676 andi. r0,r0,16
3677 cmpwi $taillen,0
3678 vncipher $out0,$out0,v25
3679 vncipher $out1,$out1,v25
3680 vncipher $out2,$out2,v25
3681 vncipher $out3,$out3,v25
3682 vncipher $out4,$out4,v25
3683 vxor $twk0,$twk0,v31
3684
3685 sub $inp,$inp,r0
3686 vncipher $out0,$out0,v26
3687 vncipher $out1,$out1,v26
3688 vncipher $out2,$out2,v26
3689 vncipher $out3,$out3,v26
3690 vncipher $out4,$out4,v26
3691 vxor $in1,$twk1,v31
3692
3693 vncipher $out0,$out0,v27
3694 lvx_u $in0,0,$inp
3695 vncipher $out1,$out1,v27
3696 vncipher $out2,$out2,v27
3697 vncipher $out3,$out3,v27
3698 vncipher $out4,$out4,v27
3699 vxor $in2,$twk2,v31
3700
3701 addi $key_,$sp,$FRAME+15 # rewind $key_
3702 vncipher $out0,$out0,v28
3703 vncipher $out1,$out1,v28
3704 vncipher $out2,$out2,v28
3705 vncipher $out3,$out3,v28
3706 vncipher $out4,$out4,v28
3707 lvx v24,$x00,$key_ # re-pre-load round[1]
3708 vxor $in3,$twk3,v31
3709
3710 vncipher $out0,$out0,v29
3711 le?vperm $in0,$in0,$in0,$leperm
3712 vncipher $out1,$out1,v29
3713 vncipher $out2,$out2,v29
3714 vncipher $out3,$out3,v29
3715 vncipher $out4,$out4,v29
3716 lvx v25,$x10,$key_ # re-pre-load round[2]
3717 vxor $in4,$twk4,v31
3718
3719 vncipher $out0,$out0,v30
3720 vncipher $out1,$out1,v30
3721 vncipher $out2,$out2,v30
3722 vncipher $out3,$out3,v30
3723 vncipher $out4,$out4,v30
3724
3725 vncipherlast $out0,$out0,$twk0
3726 vncipherlast $out1,$out1,$in1
3727 vncipherlast $out2,$out2,$in2
3728 vncipherlast $out3,$out3,$in3
3729 vncipherlast $out4,$out4,$in4
3730 mtctr $rounds
3731 blr
3732 .long 0
3733 .byte 0,12,0x14,0,0,0,0,0
3734 ___
3735 }} }}}
3736
3737 my $consts=1;
3738 foreach(split("\n",$code)) {
3739 s/\`([^\`]*)\`/eval($1)/geo;
3740
3741 # constants table endian-specific conversion
3742 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3743 my $conv=$3;
3744 my @bytes=();
3745
3746 # convert to endian-agnostic format
3747 if ($1 eq "long") {
3748 foreach (split(/,\s*/,$2)) {
3749 my $l = /^0/?oct:int;
3750 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3751 }
3752 } else {
3753 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3754 }
3755
3756 # little-endian conversion
3757 if ($flavour =~ /le$/o) {
3758 SWITCH: for($conv) {
3759 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3760 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3761 }
3762 }
3763
3764 #emit
3765 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3766 next;
3767 }
3768 $consts=0 if (m/Lconsts:/o); # end of table
3769
3770 # instructions prefixed with '?' are endian-specific and need
3771 # to be adjusted accordingly...
3772 if ($flavour =~ /le$/o) { # little-endian
3773 s/le\?//o or
3774 s/be\?/#be#/o or
3775 s/\?lvsr/lvsl/o or
3776 s/\?lvsl/lvsr/o or
3777 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3778 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3779 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3780 } else { # big-endian
3781 s/le\?/#le#/o or
3782 s/be\?//o or
3783 s/\?([a-z]+)/$1/o;
3784 }
3785
3786 print $_,"\n";
3787 }
3788
3789 close STDOUT;