crypto: arm/aes - replace scalar AES cipher
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Wed, 11 Jan 2017 16:41:53 +0000 (16:41 +0000)
committerHerbert Xu <herbert@gondor.apana.org.au>
Thu, 12 Jan 2017 16:26:50 +0000 (00:26 +0800)
This replaces the scalar AES cipher that originates in the OpenSSL project
with a new implementation that is ~15% (*) faster (on modern cores), and
reuses the lookup tables and the key schedule generation routines from the
generic C implementation (which is usually compiled in anyway due to
networking and other subsystems depending on it).

Note that the bit sliced NEON code for AES still depends on the scalar cipher
that this patch replaces, so it is not removed entirely yet.

* On Cortex-A57, the performance increases from 17.0 to 14.9 cycles per byte
  for 128-bit keys.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm/crypto/Kconfig
arch/arm/crypto/Makefile
arch/arm/crypto/aes-cipher-core.S [new file with mode: 0644]
arch/arm/crypto/aes-cipher-glue.c [new file with mode: 0644]
arch/arm/crypto/aes_glue.c [deleted file]

index 2f3339f015d36c22c4b7c58bd24ab0a87071b2ac..f1de658c3c8f99a1288dc6da93fee3755ceb26f5 100644 (file)
@@ -62,33 +62,15 @@ config CRYPTO_SHA512_ARM
          using optimized ARM assembler and NEON, when available.
 
 config CRYPTO_AES_ARM
-       tristate "AES cipher algorithms (ARM-asm)"
-       depends on ARM
+       tristate "Scalar AES cipher for ARM"
        select CRYPTO_ALGAPI
        select CRYPTO_AES
        help
          Use optimized AES assembler routines for ARM platforms.
 
-         AES cipher algorithms (FIPS-197). AES uses the Rijndael
-         algorithm.
-
-         Rijndael appears to be consistently a very good performer in
-         both hardware and software across a wide range of computing
-         environments regardless of its use in feedback or non-feedback
-         modes. Its key setup time is excellent, and its key agility is
-         good. Rijndael's very low memory requirements make it very well
-         suited for restricted-space environments, in which it also
-         demonstrates excellent performance. Rijndael's operations are
-         among the easiest to defend against power and timing attacks.
-
-         The AES specifies three key sizes: 128, 192 and 256 bits
-
-         See <http://csrc.nist.gov/encryption/aes/> for more information.
-
 config CRYPTO_AES_ARM_BS
        tristate "Bit sliced AES using NEON instructions"
        depends on KERNEL_MODE_NEON
-       select CRYPTO_AES_ARM
        select CRYPTO_BLKCIPHER
        select CRYPTO_SIMD
        help
index 8d74e55eacd41e21609b8f3903ce4a4e8c79739e..8f5de2db701c310356c9b0a68c072fa3cfd74deb 100644 (file)
@@ -27,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
 endif
 endif
 
-aes-arm-y      := aes-armv4.o aes_glue.o
-aes-arm-bs-y   := aesbs-core.o aesbs-glue.o
+aes-arm-y      := aes-cipher-core.o aes-cipher-glue.o
+aes-arm-bs-y   := aes-armv4.o aesbs-core.o aesbs-glue.o
 sha1-arm-y     := sha1-armv4-large.o sha1_glue.o
 sha1-arm-neon-y        := sha1-armv7-neon.o sha1_neon_glue.o
 sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
new file mode 100644 (file)
index 0000000..b04261e
--- /dev/null
@@ -0,0 +1,179 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+       .text
+       .align          5
+
+       rk              .req    r0
+       rounds          .req    r1
+       in              .req    r2
+       out             .req    r3
+       tt              .req    ip
+
+       t0              .req    lr
+       t1              .req    r2
+       t2              .req    r3
+
+       .macro          __select, out, in, idx
+       .if             __LINUX_ARM_ARCH__ < 7
+       and             \out, \in, #0xff << (8 * \idx)
+       .else
+       ubfx            \out, \in, #(8 * \idx), #8
+       .endif
+       .endm
+
+       .macro          __load, out, in, idx
+       .if             __LINUX_ARM_ARCH__ < 7 && \idx > 0
+       ldr             \out, [tt, \in, lsr #(8 * \idx) - 2]
+       .else
+       ldr             \out, [tt, \in, lsl #2]
+       .endif
+       .endm
+
+       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+       __select        \out0, \in0, 0
+       __select        t0, \in1, 1
+       __load          \out0, \out0, 0
+       __load          t0, t0, 1
+
+       .if             \enc
+       __select        \out1, \in1, 0
+       __select        t1, \in2, 1
+       .else
+       __select        \out1, \in3, 0
+       __select        t1, \in0, 1
+       .endif
+       __load          \out1, \out1, 0
+       __select        t2, \in2, 2
+       __load          t1, t1, 1
+       __load          t2, t2, 2
+
+       eor             \out0, \out0, t0, ror #24
+
+       __select        t0, \in3, 3
+       .if             \enc
+       __select        \t3, \in3, 2
+       __select        \t4, \in0, 3
+       .else
+       __select        \t3, \in1, 2
+       __select        \t4, \in2, 3
+       .endif
+       __load          \t3, \t3, 2
+       __load          t0, t0, 3
+       __load          \t4, \t4, 3
+
+       eor             \out1, \out1, t1, ror #24
+       eor             \out0, \out0, t2, ror #16
+       ldm             rk!, {t1, t2}
+       eor             \out1, \out1, \t3, ror #16
+       eor             \out0, \out0, t0, ror #8
+       eor             \out1, \out1, \t4, ror #8
+       eor             \out0, \out0, t1
+       eor             \out1, \out1, t2
+       .endm
+
+       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3
+       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
+       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+       .endm
+
+       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3
+       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
+       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+       .endm
+
+       .macro          __rev, out, in
+       .if             __LINUX_ARM_ARCH__ < 6
+       lsl             t0, \in, #24
+       and             t1, \in, #0xff00
+       and             t2, \in, #0xff0000
+       orr             \out, t0, \in, lsr #24
+       orr             \out, \out, t1, lsl #8
+       orr             \out, \out, t2, lsr #8
+       .else
+       rev             \out, \in
+       .endif
+       .endm
+
+       .macro          __adrl, out, sym, c
+       .if             __LINUX_ARM_ARCH__ < 7
+       ldr\c           \out, =\sym
+       .else
+       movw\c          \out, #:lower16:\sym
+       movt\c          \out, #:upper16:\sym
+       .endif
+       .endm
+
+       .macro          do_crypt, round, ttab, ltab
+       push            {r3-r11, lr}
+
+       ldr             r4, [in]
+       ldr             r5, [in, #4]
+       ldr             r6, [in, #8]
+       ldr             r7, [in, #12]
+
+       ldm             rk!, {r8-r11}
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       __rev           r4, r4
+       __rev           r5, r5
+       __rev           r6, r6
+       __rev           r7, r7
+#endif
+
+       eor             r4, r4, r8
+       eor             r5, r5, r9
+       eor             r6, r6, r10
+       eor             r7, r7, r11
+
+       __adrl          tt, \ttab
+
+       tst             rounds, #2
+       bne             1f
+
+0:     \round          r8, r9, r10, r11, r4, r5, r6, r7
+       \round          r4, r5, r6, r7, r8, r9, r10, r11
+
+1:     subs            rounds, rounds, #4
+       \round          r8, r9, r10, r11, r4, r5, r6, r7
+       __adrl          tt, \ltab, ls
+       \round          r4, r5, r6, r7, r8, r9, r10, r11
+       bhi             0b
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       __rev           r4, r4
+       __rev           r5, r5
+       __rev           r6, r6
+       __rev           r7, r7
+#endif
+
+       ldr             out, [sp]
+
+       str             r4, [out]
+       str             r5, [out, #4]
+       str             r6, [out, #8]
+       str             r7, [out, #12]
+
+       pop             {r3-r11, pc}
+
+       .align          3
+       .ltorg
+       .endm
+
+ENTRY(__aes_arm_encrypt)
+       do_crypt        fround, crypto_ft_tab, crypto_fl_tab
+ENDPROC(__aes_arm_encrypt)
+
+ENTRY(__aes_arm_decrypt)
+       do_crypt        iround, crypto_it_tab, crypto_il_tab
+ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
new file mode 100644 (file)
index 0000000..c222f6e
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL(__aes_arm_encrypt);
+
+asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL(__aes_arm_decrypt);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       int rounds = 6 + ctx->key_length / 4;
+
+       __aes_arm_encrypt(ctx->key_enc, rounds, in, out);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       int rounds = 6 + ctx->key_length / 4;
+
+       __aes_arm_decrypt(ctx->key_dec, rounds, in, out);
+}
+
+static struct crypto_alg aes_alg = {
+       .cra_name                       = "aes",
+       .cra_driver_name                = "aes-arm",
+       .cra_priority                   = 200,
+       .cra_flags                      = CRYPTO_ALG_TYPE_CIPHER,
+       .cra_blocksize                  = AES_BLOCK_SIZE,
+       .cra_ctxsize                    = sizeof(struct crypto_aes_ctx),
+       .cra_module                     = THIS_MODULE,
+
+       .cra_cipher.cia_min_keysize     = AES_MIN_KEY_SIZE,
+       .cra_cipher.cia_max_keysize     = AES_MAX_KEY_SIZE,
+       .cra_cipher.cia_setkey          = crypto_aes_set_key,
+       .cra_cipher.cia_encrypt         = aes_encrypt,
+       .cra_cipher.cia_decrypt         = aes_decrypt,
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       .cra_alignmask                  = 3,
+#endif
+};
+
+static int __init aes_init(void)
+{
+       return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+       crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Scalar AES cipher for ARM");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
deleted file mode 100644 (file)
index 0409b8f..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Glue Code for the asm optimized version of the AES Cipher Algorithm
- */
-
-#include <linux/module.h>
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-#include "aes_glue.h"
-
-EXPORT_SYMBOL(AES_encrypt);
-EXPORT_SYMBOL(AES_decrypt);
-EXPORT_SYMBOL(private_AES_set_encrypt_key);
-EXPORT_SYMBOL(private_AES_set_decrypt_key);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-       AES_encrypt(src, dst, &ctx->enc_key);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-       AES_decrypt(src, dst, &ctx->dec_key);
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-               unsigned int key_len)
-{
-       struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-
-       switch (key_len) {
-       case AES_KEYSIZE_128:
-               key_len = 128;
-               break;
-       case AES_KEYSIZE_192:
-               key_len = 192;
-               break;
-       case AES_KEYSIZE_256:
-               key_len = 256;
-               break;
-       default:
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-
-       if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-       /* private_AES_set_decrypt_key expects an encryption key as input */
-       ctx->dec_key = ctx->enc_key;
-       if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static struct crypto_alg aes_alg = {
-       .cra_name               = "aes",
-       .cra_driver_name        = "aes-asm",
-       .cra_priority           = 200,
-       .cra_flags              = CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct AES_CTX),
-       .cra_module             = THIS_MODULE,
-       .cra_list               = LIST_HEAD_INIT(aes_alg.cra_list),
-       .cra_u  = {
-               .cipher = {
-                       .cia_min_keysize        = AES_MIN_KEY_SIZE,
-                       .cia_max_keysize        = AES_MAX_KEY_SIZE,
-                       .cia_setkey             = aes_set_key,
-                       .cia_encrypt            = aes_encrypt,
-                       .cia_decrypt            = aes_decrypt
-               }
-       }
-};
-
-static int __init aes_init(void)
-{
-       return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-       crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("aes");
-MODULE_ALIAS_CRYPTO("aes-asm");
-MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");