crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Thu, 9 Apr 2015 10:55:45 +0000 (12:55 +0200)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 10 Apr 2015 13:39:46 +0000 (21:39 +0800)
This removes all the boilerplate from the existing implementation,
and replaces it with calls into the base layer.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm64/crypto/sha2-ce-core.S
arch/arm64/crypto/sha2-ce-glue.c

index 7f29fc031ea8de7520713f9d35a6265a7288c70b..5df9d9d470adb53156ae88acd785fdf0b78c43bd 100644 (file)
@@ -73,8 +73,8 @@
        .word           0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 
        /*
-        * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
-        *                        u8 *head, long bytes)
+        * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+        *                        int blocks)
         */
 ENTRY(sha2_ce_transform)
        /* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
        ld1             {v12.4s-v15.4s}, [x8]
 
        /* load state */
-       ldp             dga, dgb, [x2]
+       ldp             dga, dgb, [x0]
 
-       /* load partial input (if supplied) */
-       cbz             x3, 0f
-       ld1             {v16.4s-v19.4s}, [x3]
-       b               1f
+       /* load sha256_ce_state::finalize */
+       ldr             w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
 
        /* load input */
 0:     ld1             {v16.4s-v19.4s}, [x1], #64
-       sub             w0, w0, #1
+       sub             w2, w2, #1
 
-1:
 CPU_LE(        rev32           v16.16b, v16.16b        )
 CPU_LE(        rev32           v17.16b, v17.16b        )
 CPU_LE(        rev32           v18.16b, v18.16b        )
 CPU_LE(        rev32           v19.16b, v19.16b        )
 
-2:     add             t0.4s, v16.4s, v0.4s
+1:     add             t0.4s, v16.4s, v0.4s
        mov             dg0v.16b, dgav.16b
        mov             dg1v.16b, dgbv.16b
 
@@ -131,15 +128,15 @@ CPU_LE(   rev32           v19.16b, v19.16b        )
        add             dgbv.4s, dgbv.4s, dg1v.4s
 
        /* handled all input blocks? */
-       cbnz            w0, 0b
+       cbnz            w2, 0b
 
        /*
         * Final block: add padding and total bit count.
-        * Skip if we have no total byte count in x4. In that case, the input
-        * size was not a round multiple of the block size, and the padding is
-        * handled by the C code.
+        * Skip if the input size was not a round multiple of the block size,
+        * the padding is handled by the C code in that case.
         */
        cbz             x4, 3f
+       ldr             x4, [x0, #:lo12:sha256_ce_offsetof_count]
        movi            v17.2d, #0
        mov             x8, #0x80000000
        movi            v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE(     rev32           v19.16b, v19.16b        )
        mov             x4, #0
        mov             v19.d[0], xzr
        mov             v19.d[1], x7
-       b               2b
+       b               1b
 
        /* store new state */
-3:     stp             dga, dgb, [x2]
+3:     stp             dga, dgb, [x0]
        ret
 ENDPROC(sha2_ce_transform)
index ae67e88c28b99497b4ae394bc849bdb20cacdd02..1340e44c048beab91279b6dd317f10590a6f06b9 100644 (file)
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
+#include <crypto/sha256_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#define ASM_EXPORT(sym, val) \
+       asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
-                                u8 *head, long bytes);
-
-static int sha224_init(struct shash_desc *desc)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-
-       *sctx = (struct sha256_state){
-               .state = {
-                       SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
-                       SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
-               }
-       };
-       return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-
-       *sctx = (struct sha256_state){
-               .state = {
-                       SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-                       SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
-               }
-       };
-       return 0;
-}
-
-static int sha2_update(struct shash_desc *desc, const u8 *data,
-                      unsigned int len)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
-       sctx->count += len;
-
-       if ((partial + len) >= SHA256_BLOCK_SIZE) {
-               int blocks;
-
-               if (partial) {
-                       int p = SHA256_BLOCK_SIZE - partial;
-
-                       memcpy(sctx->buf + partial, data, p);
-                       data += p;
-                       len -= p;
-               }
+struct sha256_ce_state {
+       struct sha256_state     sst;
+       u32                     finalize;
+};
 
-               blocks = len / SHA256_BLOCK_SIZE;
-               len %= SHA256_BLOCK_SIZE;
+asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+                                 int blocks);
 
-               kernel_neon_begin_partial(28);
-               sha2_ce_transform(blocks, data, sctx->state,
-                                 partial ? sctx->buf : NULL, 0);
-               kernel_neon_end();
-
-               data += blocks * SHA256_BLOCK_SIZE;
-               partial = 0;
-       }
-       if (len)
-               memcpy(sctx->buf + partial, data, len);
-       return 0;
-}
-
-static void sha2_final(struct shash_desc *desc)
+static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
+                           unsigned int len)
 {
-       static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       __be64 bits = cpu_to_be64(sctx->count << 3);
-       u32 padlen = SHA256_BLOCK_SIZE
-                    - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
-
-       sha2_update(desc, padding, padlen);
-       sha2_update(desc, (const u8 *)&bits, sizeof(bits));
-}
-
-static int sha224_final(struct shash_desc *desc, u8 *out)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       __be32 *dst = (__be32 *)out;
-       int i;
-
-       sha2_final(desc);
-
-       for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
-               put_unaligned_be32(sctx->state[i], dst++);
-
-       *sctx = (struct sha256_state){};
-       return 0;
-}
+       struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       __be32 *dst = (__be32 *)out;
-       int i;
-
-       sha2_final(desc);
-
-       for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
-               put_unaligned_be32(sctx->state[i], dst++);
+       sctx->finalize = 0;
+       kernel_neon_begin_partial(28);
+       sha256_base_do_update(desc, data, len,
+                             (sha256_block_fn *)sha2_ce_transform);
+       kernel_neon_end();
 
-       *sctx = (struct sha256_state){};
        return 0;
 }
 
-static void sha2_finup(struct shash_desc *desc, const u8 *data,
-                      unsigned int len)
+static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
+                          unsigned int len, u8 *out)
 {
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       int blocks;
+       struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+       bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
-       if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
-               sha2_update(desc, data, len);
-               sha2_final(desc);
-               return;
-       }
+       ASM_EXPORT(sha256_ce_offsetof_count,
+                  offsetof(struct sha256_ce_state, sst.count));
+       ASM_EXPORT(sha256_ce_offsetof_finalize,
+                  offsetof(struct sha256_ce_state, finalize));
 
        /*
-        * Use a fast path if the input is a multiple of 64 bytes. In
-        * this case, there is no need to copy data around, and we can
-        * perform the entire digest calculation in a single invocation
-        * of sha2_ce_transform()
+        * Allow the asm code to perform the finalization if there is no
+        * partial data and the input is a round multiple of the block size.
         */
-       blocks = len / SHA256_BLOCK_SIZE;
+       sctx->finalize = finalize;
 
        kernel_neon_begin_partial(28);
-       sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+       sha256_base_do_update(desc, data, len,
+                             (sha256_block_fn *)sha2_ce_transform);
+       if (!finalize)
+               sha256_base_do_finalize(desc,
+                                       (sha256_block_fn *)sha2_ce_transform);
        kernel_neon_end();
+       return sha256_base_finish(desc, out);
 }
 
-static int sha224_finup(struct shash_desc *desc, const u8 *data,
-                       unsigned int len, u8 *out)
+static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       __be32 *dst = (__be32 *)out;
-       int i;
-
-       sha2_finup(desc, data, len);
-
-       for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
-               put_unaligned_be32(sctx->state[i], dst++);
-
-       *sctx = (struct sha256_state){};
-       return 0;
-}
-
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
-                       unsigned int len, u8 *out)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       __be32 *dst = (__be32 *)out;
-       int i;
-
-       sha2_finup(desc, data, len);
-
-       for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
-               put_unaligned_be32(sctx->state[i], dst++);
-
-       *sctx = (struct sha256_state){};
-       return 0;
-}
-
-static int sha2_export(struct shash_desc *desc, void *out)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       struct sha256_state *dst = out;
-
-       *dst = *sctx;
-       return 0;
-}
-
-static int sha2_import(struct shash_desc *desc, const void *in)
-{
-       struct sha256_state *sctx = shash_desc_ctx(desc);
-       struct sha256_state const *src = in;
-
-       *sctx = *src;
-       return 0;
+       kernel_neon_begin_partial(28);
+       sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+       kernel_neon_end();
+       return sha256_base_finish(desc, out);
 }
 
 static struct shash_alg algs[] = { {
-       .init                   = sha224_init,
-       .update                 = sha2_update,
-       .final                  = sha224_final,
-       .finup                  = sha224_finup,
-       .export                 = sha2_export,
-       .import                 = sha2_import,
-       .descsize               = sizeof(struct sha256_state),
+       .init                   = sha224_base_init,
+       .update                 = sha256_ce_update,
+       .final                  = sha256_ce_final,
+       .finup                  = sha256_ce_finup,
+       .descsize               = sizeof(struct sha256_ce_state),
        .digestsize             = SHA224_DIGEST_SIZE,
-       .statesize              = sizeof(struct sha256_state),
        .base                   = {
                .cra_name               = "sha224",
                .cra_driver_name        = "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
                .cra_module             = THIS_MODULE,
        }
 }, {
-       .init                   = sha256_init,
-       .update                 = sha2_update,
-       .final                  = sha256_final,
-       .finup                  = sha256_finup,
-       .export                 = sha2_export,
-       .import                 = sha2_import,
-       .descsize               = sizeof(struct sha256_state),
+       .init                   = sha256_base_init,
+       .update                 = sha256_ce_update,
+       .final                  = sha256_ce_final,
+       .finup                  = sha256_ce_finup,
+       .descsize               = sizeof(struct sha256_ce_state),
        .digestsize             = SHA256_DIGEST_SIZE,
-       .statesize              = sizeof(struct sha256_state),
        .base                   = {
                .cra_name               = "sha256",
                .cra_driver_name        = "sha256-ce",