f2fs: split bio cache

[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / crypto / gf128mul.c
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c

index 72015fee533deed95aff7e41866472358244682e..dc012129c0631ab11a9d8afa69d8c57b06dc3ead 100644 (file)
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -44,7 +44,7 @@
   ---------------------------------------------------------------------------
   Issue 31/01/2006
  
- This file provides fast multiplication in GF(128) as required by several
+ This file provides fast multiplication in GF(2^128) as required by several
   cryptographic authentication modes
  */
  
@@ -88,76 +88,59 @@
         q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
  }
  
-/*     Given the value i in 0..255 as the byte overflow when a field element
-    in GHASH is multiplied by x^8, this function will return the values that
-    are generated in the lo 16-bit word of the field value by applying the
-    modular polynomial. The values lo_byte and hi_byte are returned via the
-    macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
-    memory as required by a suitable definition of this macro operating on
-    the table above
-*/
-
-#define xx(p, q)       0x##p##q
+/*
+ * Given a value i in 0..255 as the byte overflow when a field element
+ * in GF(2^128) is multiplied by x^8, the following macro returns the
+ * 16-bit value that must be XOR-ed into the low-degree end of the
+ * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
+ *
+ * There are two versions of the macro, and hence two tables: one for
+ * the "be" convention where the highest-order bit is the coefficient of
+ * the highest-degree polynomial term, and one for the "le" convention
+ * where the highest-order bit is the coefficient of the lowest-degree
+ * polynomial term.  In both cases the values are stored in CPU byte
+ * endianness such that the coefficients are ordered consistently across
+ * bytes, i.e. in the "be" table bits 15..0 of the stored value
+ * correspond to the coefficients of x^15..x^0, and in the "le" table
+ * bits 15..0 correspond to the coefficients of x^0..x^15.
+ *
+ * Therefore, provided that the appropriate byte endianness conversions
+ * are done by the multiplication functions (and these must be in place
+ * anyway to support both little endian and big endian CPUs), the "be"
+ * table can be used for multiplications of both "bbe" and "ble"
+ * elements, and the "le" table can be used for multiplications of both
+ * "lle" and "lbe" elements.
+ */
  
-#define xda_bbe(i) ( \
-       (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
-       (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
-       (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
-       (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
+#define xda_be(i) ( \
+       (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
+       (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
+       (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
+       (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
  )
  
-#define xda_lle(i) ( \
-       (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
-       (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
-       (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
-       (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
+#define xda_le(i) ( \
+       (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
+       (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
+       (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
+       (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
  )
  
-static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
-static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
+static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
+static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
  
-/* These functions multiply a field element by x, by x^4 and by x^8
- * in the polynomial field representation. It uses 32-bit word operations
- * to gain speed but compensates for machine endianess and hence works
+/*
+ * The following functions multiply a field element by x^8 in
+ * the polynomial field representation.  They use 64-bit word operations
+ * to gain speed but compensate for machine endianness and hence work
   * correctly on both styles of machine.
   */
  
-static void gf128mul_x_lle(be128 *r, const be128 *x)
-{
-       u64 a = be64_to_cpu(x->a);
-       u64 b = be64_to_cpu(x->b);
-       u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
-
-       r->b = cpu_to_be64((b >> 1) | (a << 63));
-       r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
-}
-
-static void gf128mul_x_bbe(be128 *r, const be128 *x)
-{
-       u64 a = be64_to_cpu(x->a);
-       u64 b = be64_to_cpu(x->b);
-       u64 _tt = gf128mul_table_bbe[a >> 63];
-
-       r->a = cpu_to_be64((a << 1) | (b >> 63));
-       r->b = cpu_to_be64((b << 1) ^ _tt);
-}
-
-void gf128mul_x_ble(be128 *r, const be128 *x)
-{
-       u64 a = le64_to_cpu(x->a);
-       u64 b = le64_to_cpu(x->b);
-       u64 _tt = gf128mul_table_bbe[b >> 63];
-
-       r->a = cpu_to_le64((a << 1) ^ _tt);
-       r->b = cpu_to_le64((b << 1) | (a >> 63));
-}
-EXPORT_SYMBOL(gf128mul_x_ble);
-
  static void gf128mul_x8_lle(be128 *x)
  {
         u64 a = be64_to_cpu(x->a);
         u64 b = be64_to_cpu(x->b);
-       u64 _tt = gf128mul_table_lle[b & 0xff];
+       u64 _tt = gf128mul_table_le[b & 0xff];
  
         x->b = cpu_to_be64((b >> 8) | (a << 56));
         x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
@@ -167,7 +150,7 @@ static void gf128mul_x8_bbe(be128 *x)
  {
         u64 a = be64_to_cpu(x->a);
         u64 b = be64_to_cpu(x->b);
-       u64 _tt = gf128mul_table_bbe[a >> 56];
+       u64 _tt = gf128mul_table_be[a >> 56];
  
         x->a = cpu_to_be64((a << 8) | (b >> 56));
         x->b = cpu_to_be64((b << 8) ^ _tt);
@@ -251,7 +234,7 @@ EXPORT_SYMBOL(gf128mul_bbe);
  
  /*      This version uses 64k bytes of table space.
      A 16 byte buffer has to be multiplied by a 16 byte key
-    value in GF(128).  If we consider a GF(128) value in
+    value in GF(2^128).  If we consider a GF(2^128) value in
      the buffer's lowest byte, we can construct a table of
      the 256 16 byte values that result from the 256 values
      of this byte.  This requires 4096 bytes. But we also
@@ -315,7 +298,7 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
  }
  EXPORT_SYMBOL(gf128mul_free_64k);
  
-void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
+void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
  {
         u8 *ap = (u8 *)a;
         be128 r[1];
@@ -330,7 +313,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe);
  
  /*      This version uses 4k bytes of table space.
      A 16 byte buffer has to be multiplied by a 16 byte key
-    value in GF(128).  If we consider a GF(128) value in a
+    value in GF(2^128).  If we consider a GF(2^128) value in a
      single byte, we can construct a table of the 256 16 byte
      values that result from the 256 values of this byte.
      This requires 4096 bytes. If we take the highest byte in
@@ -388,7 +371,7 @@ out:
  }
  EXPORT_SYMBOL(gf128mul_init_4k_bbe);
  
-void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
+void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
  {
         u8 *ap = (u8 *)a;
         be128 r[1];
@@ -403,7 +386,7 @@ void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
  }
  EXPORT_SYMBOL(gf128mul_4k_lle);
  
-void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
+void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
  {
         u8 *ap = (u8 *)a;
         be128 r[1];