From aa955f54ac25981e75206aa99b9d50c6b2fdaf74 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 23 Aug 2018 17:48:45 +0100 Subject: [PATCH] BACKPORT: crypto: arm64/aes-modes - get rid of literal load of addend vector commit ed6ed11830a9ded520db31a6e2b69b6b0a1eb0e2 upstream. Replace the literal load of the addend vector with a sequence that performs each add individually. This sequence is only 2 instructions longer than the original, and 2% faster on Cortex-A53. This is an improvement by itself, but also works around a Clang issue, whose integrated assembler does not implement the GNU ARM asm syntax completely, and does not support the =literal notation for FP registers (more info at https://bugs.llvm.org/show_bug.cgi?id=38642) Cc: Nick Desaulniers Signed-off-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Change-Id: Ic8f7adcd28bd2da57b465a8e11e9d55b5669a539 --- arch/arm64/crypto/aes-modes.S | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 838dad5c209f..067b4e222f74 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -297,17 +297,19 @@ AES_ENTRY(aes_ctr_encrypt) eor v1.16b, v1.16b, v3.16b st1 {v0.16b-v1.16b}, [x0], #32 #else - ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w8 + add w7, w6, #1 mov v0.16b, v4.16b - add v7.4s, v7.4s, v8.4s + add w8, w6, #2 mov v1.16b, v4.16b - rev32 v8.16b, v7.16b + add w9, w6, #3 mov v2.16b, v4.16b + rev w7, w7 mov v3.16b, v4.16b - mov v1.s[3], v8.s[0] - mov v2.s[3], v8.s[1] - mov v3.s[3], v8.s[2] + rev w8, w8 + mov v1.s[3], w7 + rev w9, w9 + mov v2.s[3], w8 + mov v3.s[3], w9 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ do_encrypt_block4x eor v0.16b, v5.16b, v0.16b -- 2.20.1