@ A little glue here to select the correct code below for the ARM CPU
@ that is being targetted.
+#include <linux/linkage.h>
+
.text
-.code 32
.type AES_Te,%object
.align 5
@ void AES_encrypt(const unsigned char *in, unsigned char *out,
@ const AES_KEY *key) {
-.global AES_encrypt
-.type AES_encrypt,%function
.align 5
-AES_encrypt:
+ENTRY(AES_encrypt)
sub r3,pc,#8 @ AES_encrypt
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
strb r6,[r12,#14]
strb r3,[r12,#15]
#endif
-#if __ARM_ARCH__>=5
ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size AES_encrypt,.-AES_encrypt
+ENDPROC(AES_encrypt)
.type _armv4_AES_encrypt,%function
.align 2
ldr pc,[sp],#4 @ pop and return
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
-.global private_AES_set_encrypt_key
-.type private_AES_set_encrypt_key,%function
.align 5
-private_AES_set_encrypt_key:
+ENTRY(private_AES_set_encrypt_key)
_armv4_AES_set_encrypt_key:
sub r3,pc,#8 @ AES_set_encrypt_key
teq r0,#0
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
-.Labrt: tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+.Labrt: mov pc,lr
+ENDPROC(private_AES_set_encrypt_key)
-.global private_AES_set_decrypt_key
-.type private_AES_set_decrypt_key,%function
.align 5
-private_AES_set_decrypt_key:
+ENTRY(private_AES_set_decrypt_key)
str lr,[sp,#-4]! @ push lr
#if 0
@ kernel does both of these in setkey so optimise this bit out by
bne .Lmix
mov r0,#0
-#if __ARM_ARCH__>=5
ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+ENDPROC(private_AES_set_decrypt_key)
.type AES_Td,%object
.align 5
@ void AES_decrypt(const unsigned char *in, unsigned char *out,
@ const AES_KEY *key) {
-.global AES_decrypt
-.type AES_decrypt,%function
.align 5
-AES_decrypt:
+ENTRY(AES_decrypt)
sub r3,pc,#8 @ AES_decrypt
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
strb r6,[r12,#14]
strb r3,[r12,#15]
#endif
-#if __ARM_ARCH__>=5
ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size AES_decrypt,.-AES_decrypt
+ENDPROC(AES_decrypt)
.type _armv4_AES_decrypt,%function
.align 2
and r9,lr,r1,lsr#8
ldrb r7,[r10,r7] @ Td4[s1>>0]
- ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
+ ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24]
+ THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24]
+ THUMB( ldrb r1,[r1] )
ldrb r8,[r10,r8] @ Td4[s1>>16]
eor r0,r7,r0,lsl#24
ldrb r9,[r10,r9] @ Td4[s1>>8]
ldrb r8,[r10,r8] @ Td4[s2>>0]
and r9,lr,r2,lsr#16
- ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
+ ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24]
+ THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24]
+ THUMB( ldrb r2,[r2] )
eor r0,r0,r7,lsl#8
ldrb r9,[r10,r9] @ Td4[s2>>16]
eor r1,r8,r1,lsl#16
and r9,lr,r3 @ i2
ldrb r9,[r10,r9] @ Td4[s3>>0]
- ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
+ ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24]
+ THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24]
+ THUMB( ldrb r3,[r3] )
eor r0,r0,r7,lsl#16
ldr r7,[r11,#0]
eor r1,r1,r8,lsl#8
@ Profiler-assisted and platform-specific optimization resulted in 10%
@ improvement on Cortex A8 core and 12.2 cycles per byte.
-.text
+#include <linux/linkage.h>
-.global sha1_block_data_order
-.type sha1_block_data_order,%function
+.text
.align 2
-sha1_block_data_order:
+ENTRY(sha1_block_data_order)
stmdb sp!,{r4-r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
ldmia r0,{r3,r4,r5,r6,r7}
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r3,r3,r10 @ E+=F_00_19(B,C,D)
- teq r14,sp
+ cmp r14,sp
bne .L_00_15 @ [((11+4)*5+2)*3]
#if __ARM_ARCH__<7
ldrb r10,[r1,#2]
@ F_xx_xx
add r3,r3,r9 @ E+=X[i]
add r3,r3,r10 @ E+=F_20_39(B,C,D)
- teq r14,sp @ preserve carry
+ ARM( teq r14,sp ) @ preserve carry
+ THUMB( mov r11,sp )
+ THUMB( teq r14,r11 ) @ preserve carry
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
add r3,r3,r9 @ E+=X[i]
add r3,r3,r10 @ E+=F_40_59(B,C,D)
add r3,r3,r11,ror#2
- teq r14,sp
+ cmp r14,sp
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr r8,.LK_60_79
teq r1,r2
bne .Lloop @ [+18], total 1307
-#if __ARM_ARCH__>=5
ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
.align 2
.LK_00_19: .word 0x5a827999
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
-.size sha1_block_data_order,.-sha1_block_data_order
+ENDPROC(sha1_block_data_order)
.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
.align 2