crypto: x86/glue_helper - use le128 instead of u128 for CTR mode
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Sat, 20 Oct 2012 12:06:36 +0000 (15:06 +0300)
committerHerbert Xu <herbert@gondor.apana.org.au>
Wed, 24 Oct 2012 13:10:54 +0000 (21:10 +0800)
'u128' currently used for CTR mode is on little-endian 'long long' swapped
and would require extra swap operations by SSE/AVX code. Use of le128
instead of u128 allows IV calculations to be done with vector registers
easier.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/camellia_glue.c
arch/x86/crypto/cast6_avx_glue.c
arch/x86/crypto/glue_helper.c
arch/x86/crypto/serpent_avx_glue.c
arch/x86/crypto/serpent_sse2_glue.c
arch/x86/crypto/twofish_avx_glue.c
arch/x86/crypto/twofish_glue_3way.c
arch/x86/include/asm/crypto/glue_helper.h
arch/x86/include/asm/crypto/twofish.h

index 42ffd2bbab5bb9e18de2695d1f5814e4c5b43958..021a0086186bb28a4cc0cd39ce3f11eda553a378 100644 (file)
@@ -1317,21 +1317,21 @@ static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
        u128_xor(&dst[1], &dst[1], &iv);
 }
 
-static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
        be128 ctrblk;
 
        if (dst != src)
                *dst = *src;
 
-       u128_to_be128(&ctrblk, iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
 
        camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
 }
 
 static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
-                                   u128 *iv)
+                                   le128 *iv)
 {
        be128 ctrblks[2];
 
@@ -1340,10 +1340,10 @@ static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
                dst[1] = src[1];
        }
 
-       u128_to_be128(&ctrblks[0], iv);
-       u128_inc(iv);
-       u128_to_be128(&ctrblks[1], iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblks[0], iv);
+       le128_inc(iv);
+       le128_to_be128(&ctrblks[1], iv);
+       le128_inc(iv);
 
        camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
 }
index 15e5f85a50115a6eba3659011e35d5b95ff97558..1dfd33b5b4fb42c57ace3a07187d169a8b9a8bff 100644 (file)
@@ -78,19 +78,19 @@ static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
                u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
 }
 
-static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
        be128 ctrblk;
 
-       u128_to_be128(&ctrblk, iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
 
        __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
        u128_xor(dst, src, (u128 *)&ctrblk);
 }
 
 static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
-                                  u128 *iv)
+                                le128 *iv)
 {
        be128 ctrblks[CAST6_PARALLEL_BLOCKS];
        unsigned int i;
@@ -99,8 +99,8 @@ static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
                if (dst != src)
                        dst[i] = src[i];
 
-               u128_to_be128(&ctrblks[i], iv);
-               u128_inc(iv);
+               le128_to_be128(&ctrblks[i], iv);
+               le128_inc(iv);
        }
 
        cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
index 30b3927bd733b6f0c17c0d492621ee1666de2b5a..22ce4f683e550da4b464c99d8fedfce6414ea885 100644 (file)
@@ -221,16 +221,16 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
        u8 *src = (u8 *)walk->src.virt.addr;
        u8 *dst = (u8 *)walk->dst.virt.addr;
        unsigned int nbytes = walk->nbytes;
-       u128 ctrblk;
+       le128 ctrblk;
        u128 tmp;
 
-       be128_to_u128(&ctrblk, (be128 *)walk->iv);
+       be128_to_le128(&ctrblk, (be128 *)walk->iv);
 
        memcpy(&tmp, src, nbytes);
        fn_ctr(ctx, &tmp, &tmp, &ctrblk);
        memcpy(dst, &tmp, nbytes);
 
-       u128_to_be128((be128 *)walk->iv, &ctrblk);
+       le128_to_be128((be128 *)walk->iv, &ctrblk);
 }
 EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
 
@@ -243,11 +243,11 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
        unsigned int nbytes = walk->nbytes;
        u128 *src = (u128 *)walk->src.virt.addr;
        u128 *dst = (u128 *)walk->dst.virt.addr;
-       u128 ctrblk;
+       le128 ctrblk;
        unsigned int num_blocks, func_bytes;
        unsigned int i;
 
-       be128_to_u128(&ctrblk, (be128 *)walk->iv);
+       be128_to_le128(&ctrblk, (be128 *)walk->iv);
 
        /* Process multi-block batch */
        for (i = 0; i < gctx->num_funcs; i++) {
@@ -269,7 +269,7 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
        }
 
 done:
-       u128_to_be128((be128 *)walk->iv, &ctrblk);
+       le128_to_be128((be128 *)walk->iv, &ctrblk);
        return nbytes;
 }
 
index 3f543a04cf1ee2c697e2b2ea1b427ea7f8631386..2aa31ade1e68312ba48d61e56dfff1e377eba1ad 100644 (file)
@@ -56,19 +56,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
                u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
 }
 
-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
        be128 ctrblk;
 
-       u128_to_be128(&ctrblk, iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
 
        __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
        u128_xor(dst, src, (u128 *)&ctrblk);
 }
 
 static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
-                                  u128 *iv)
+                                  le128 *iv)
 {
        be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
        unsigned int i;
@@ -77,8 +77,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
                if (dst != src)
                        dst[i] = src[i];
 
-               u128_to_be128(&ctrblks[i], iv);
-               u128_inc(iv);
+               le128_to_be128(&ctrblks[i], iv);
+               le128_inc(iv);
        }
 
        serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
index 9107a9908c41ecb4a3e591f0c10380487d5c5b47..97a356ece24d2b74d18090760e988c45d2bc914a 100644 (file)
@@ -59,19 +59,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
                u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
 }
 
-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
        be128 ctrblk;
 
-       u128_to_be128(&ctrblk, iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
 
        __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
        u128_xor(dst, src, (u128 *)&ctrblk);
 }
 
 static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
-                                  u128 *iv)
+                                  le128 *iv)
 {
        be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
        unsigned int i;
@@ -80,8 +80,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
                if (dst != src)
                        dst[i] = src[i];
 
-               u128_to_be128(&ctrblks[i], iv);
-               u128_inc(iv);
+               le128_to_be128(&ctrblks[i], iv);
+               le128_inc(iv);
        }
 
        serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
index e7708b5442e0b59ab7478306124c8c84493a551c..810e45d5118695b70e52dee7a61bfb0599c976e2 100644 (file)
@@ -90,7 +90,7 @@ static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 }
 
 static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
-                                    u128 *iv)
+                                    le128 *iv)
 {
        be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
        unsigned int i;
@@ -99,8 +99,8 @@ static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
                if (dst != src)
                        dst[i] = src[i];
 
-               u128_to_be128(&ctrblks[i], iv);
-               u128_inc(iv);
+               le128_to_be128(&ctrblks[i], iv);
+               le128_inc(iv);
        }
 
        twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
index aa3eb358b7e81b82d1816cbafbc58a04349d3428..13e63b3e1dfb44593ea2274a63adebfbdce7e6ce 100644 (file)
@@ -62,15 +62,15 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
 }
 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
 
-void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
        be128 ctrblk;
 
        if (dst != src)
                *dst = *src;
 
-       u128_to_be128(&ctrblk, iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
 
        twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
        u128_xor(dst, dst, (u128 *)&ctrblk);
@@ -78,7 +78,7 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
 
 void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
-                                    u128 *iv)
+                             le128 *iv)
 {
        be128 ctrblks[3];
 
@@ -88,12 +88,12 @@ void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
                dst[2] = src[2];
        }
 
-       u128_to_be128(&ctrblks[0], iv);
-       u128_inc(iv);
-       u128_to_be128(&ctrblks[1], iv);
-       u128_inc(iv);
-       u128_to_be128(&ctrblks[2], iv);
-       u128_inc(iv);
+       le128_to_be128(&ctrblks[0], iv);
+       le128_inc(iv);
+       le128_to_be128(&ctrblks[1], iv);
+       le128_inc(iv);
+       le128_to_be128(&ctrblks[2], iv);
+       le128_inc(iv);
 
        twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
 }
index 3e408bddc96f1f80ae66c60152c14533fdd12012..e2d65b061d27801c0c515dda3d1bc956ada4b563 100644 (file)
@@ -13,7 +13,7 @@
 typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
 typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
 typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
-                                      u128 *iv);
+                                      le128 *iv);
 
 #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
 #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
@@ -71,23 +71,29 @@ static inline void glue_fpu_end(bool fpu_enabled)
                kernel_fpu_end();
 }
 
-static inline void u128_to_be128(be128 *dst, const u128 *src)
+static inline void le128_to_be128(be128 *dst, const le128 *src)
 {
-       dst->a = cpu_to_be64(src->a);
-       dst->b = cpu_to_be64(src->b);
+       dst->a = cpu_to_be64(le64_to_cpu(src->a));
+       dst->b = cpu_to_be64(le64_to_cpu(src->b));
 }
 
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static inline void be128_to_le128(le128 *dst, const be128 *src)
 {
-       dst->a = be64_to_cpu(src->a);
-       dst->b = be64_to_cpu(src->b);
+       dst->a = cpu_to_le64(be64_to_cpu(src->a));
+       dst->b = cpu_to_le64(be64_to_cpu(src->b));
 }
 
-static inline void u128_inc(u128 *i)
+static inline void le128_inc(le128 *i)
 {
-       i->b++;
-       if (!i->b)
-               i->a++;
+       u64 a = le64_to_cpu(i->a);
+       u64 b = le64_to_cpu(i->b);
+
+       b++;
+       if (!b)
+               a++;
+
+       i->a = cpu_to_le64(a);
+       i->b = cpu_to_le64(b);
 }
 
 extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
index 9d2c514bd5f90020b3d22036a73ab816735b3416..878c51ceebb57f408d78eb94abb13d16d0c2801d 100644 (file)
@@ -31,9 +31,9 @@ asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 /* helpers from twofish_x86_64-3way module */
 extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
 extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
-                               u128 *iv);
+                               le128 *iv);
 extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
-                                    u128 *iv);
+                                    le128 *iv);
 
 extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
                              unsigned int keylen);