Commit | Line | Data |
---|---|---|
c494e070 RS |
1 | /* gf128mul.c - GF(2^128) multiplication functions |
2 | * | |
3 | * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. | |
4 | * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org> | |
5 | * | |
6 | * Based on Dr Brian Gladman's (GPL'd) work published at | |
8c882f64 | 7 | * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php |
c494e070 RS |
8 | * See the original copyright notice below. |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify it | |
11 | * under the terms of the GNU General Public License as published by the Free | |
12 | * Software Foundation; either version 2 of the License, or (at your option) | |
13 | * any later version. | |
14 | */ | |
15 | ||
16 | /* | |
17 | --------------------------------------------------------------------------- | |
18 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. | |
19 | ||
20 | LICENSE TERMS | |
21 | ||
22 | The free distribution and use of this software in both source and binary | |
23 | form is allowed (with or without changes) provided that: | |
24 | ||
25 | 1. distributions of this source code include the above copyright | |
26 | notice, this list of conditions and the following disclaimer; | |
27 | ||
28 | 2. distributions in binary form include the above copyright | |
29 | notice, this list of conditions and the following disclaimer | |
30 | in the documentation and/or other associated materials; | |
31 | ||
32 | 3. the copyright holder's name is not used to endorse products | |
33 | built using this software without specific written permission. | |
34 | ||
35 | ALTERNATIVELY, provided that this notice is retained in full, this product | |
36 | may be distributed under the terms of the GNU General Public License (GPL), | |
37 | in which case the provisions of the GPL apply INSTEAD OF those given above. | |
38 | ||
39 | DISCLAIMER | |
40 | ||
41 | This software is provided 'as is' with no explicit or implied warranties | |
42 | in respect of its properties, including, but not limited to, correctness | |
43 | and/or fitness for purpose. | |
44 | --------------------------------------------------------------------------- | |
45 | Issue 31/01/2006 | |
46 | ||
ce2ace45 | 47 | This file provides fast multiplication in GF(2^128) as required by several |
c494e070 RS |
48 | cryptographic authentication modes |
49 | */ | |
50 | ||
51 | #include <crypto/gf128mul.h> | |
52 | #include <linux/kernel.h> | |
53 | #include <linux/module.h> | |
54 | #include <linux/slab.h> | |
55 | ||
56 | #define gf128mul_dat(q) { \ | |
57 | q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\ | |
58 | q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\ | |
59 | q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\ | |
60 | q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\ | |
61 | q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\ | |
62 | q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\ | |
63 | q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\ | |
64 | q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\ | |
65 | q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\ | |
66 | q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\ | |
67 | q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\ | |
68 | q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\ | |
69 | q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\ | |
70 | q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\ | |
71 | q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\ | |
72 | q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\ | |
73 | q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\ | |
74 | q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\ | |
75 | q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\ | |
76 | q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\ | |
77 | q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\ | |
78 | q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\ | |
79 | q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\ | |
80 | q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\ | |
81 | q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\ | |
82 | q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\ | |
83 | q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\ | |
84 | q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\ | |
85 | q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\ | |
86 | q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\ | |
87 | q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\ | |
88 | q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ | |
89 | } | |
90 | ||
3eaf06b7 EB |
91 | /* |
92 | * Given a value i in 0..255 as the byte overflow when a field element | |
93 | * in GF(2^128) is multiplied by x^8, the following macro returns the | |
94 | * 16-bit value that must be XOR-ed into the low-degree end of the | |
95 | * product to reduce it modulo the irreducible polynomial x^128 + x^7 + | |
96 | * x^2 + x + 1. | |
97 | * | |
98 | * There are two versions of the macro, and hence two tables: one for | |
99 | * the "be" convention where the highest-order bit is the coefficient of | |
100 | * the highest-degree polynomial term, and one for the "le" convention | |
101 | * where the highest-order bit is the coefficient of the lowest-degree | |
102 | * polynomial term. In both cases the values are stored in CPU byte | |
103 | * endianness such that the coefficients are ordered consistently across | |
104 | * bytes, i.e. in the "be" table bits 15..0 of the stored value | |
105 | * correspond to the coefficients of x^15..x^0, and in the "le" table | |
106 | * bits 15..0 correspond to the coefficients of x^0..x^15. | |
107 | * | |
108 | * Therefore, provided that the appropriate byte endianness conversions | |
109 | * are done by the multiplication functions (and these must be in place | |
110 | * anyway to support both little endian and big endian CPUs), the "be" | |
111 | * table can be used for multiplications of both "bbe" and "ble" | |
112 | * elements, and the "le" table can be used for multiplications of both | |
113 | * "lle" and "lbe" elements. | |
114 | */ | |
c494e070 | 115 | |
3eaf06b7 EB |
116 | #define xda_be(i) ( \ |
117 | (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \ | |
118 | (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \ | |
119 | (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \ | |
120 | (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \ | |
c494e070 RS |
121 | ) |
122 | ||
3eaf06b7 EB |
123 | #define xda_le(i) ( \ |
124 | (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \ | |
125 | (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \ | |
126 | (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \ | |
127 | (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \ | |
c494e070 RS |
128 | ) |
129 | ||
3eaf06b7 EB |
130 | static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le); |
131 | static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be); | |
c494e070 | 132 | |
ce2ace45 AC |
133 | /* |
134 | * The following functions multiply a field element by x or by x^8 in | |
135 | * the polynomial field representation. They use 64-bit word operations | |
136 | * to gain speed but compensate for machine endianness and hence work | |
c494e070 RS |
137 | * correctly on both styles of machine. |
138 | */ | |
139 | ||
140 | static void gf128mul_x_lle(be128 *r, const be128 *x) | |
141 | { | |
142 | u64 a = be64_to_cpu(x->a); | |
143 | u64 b = be64_to_cpu(x->b); | |
3eaf06b7 | 144 | u64 _tt = gf128mul_table_le[(b << 7) & 0xff]; |
c494e070 RS |
145 | |
146 | r->b = cpu_to_be64((b >> 1) | (a << 63)); | |
147 | r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); | |
148 | } | |
149 | ||
150 | static void gf128mul_x_bbe(be128 *r, const be128 *x) | |
151 | { | |
152 | u64 a = be64_to_cpu(x->a); | |
153 | u64 b = be64_to_cpu(x->b); | |
3eaf06b7 | 154 | u64 _tt = gf128mul_table_be[a >> 63]; |
c494e070 RS |
155 | |
156 | r->a = cpu_to_be64((a << 1) | (b >> 63)); | |
157 | r->b = cpu_to_be64((b << 1) ^ _tt); | |
158 | } | |
159 | ||
f19f5111 RS |
160 | void gf128mul_x_ble(be128 *r, const be128 *x) |
161 | { | |
162 | u64 a = le64_to_cpu(x->a); | |
163 | u64 b = le64_to_cpu(x->b); | |
3eaf06b7 | 164 | u64 _tt = gf128mul_table_be[b >> 63]; |
f19f5111 RS |
165 | |
166 | r->a = cpu_to_le64((a << 1) ^ _tt); | |
167 | r->b = cpu_to_le64((b << 1) | (a >> 63)); | |
168 | } | |
169 | EXPORT_SYMBOL(gf128mul_x_ble); | |
170 | ||
c494e070 RS |
171 | static void gf128mul_x8_lle(be128 *x) |
172 | { | |
173 | u64 a = be64_to_cpu(x->a); | |
174 | u64 b = be64_to_cpu(x->b); | |
3eaf06b7 | 175 | u64 _tt = gf128mul_table_le[b & 0xff]; |
c494e070 RS |
176 | |
177 | x->b = cpu_to_be64((b >> 8) | (a << 56)); | |
178 | x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); | |
179 | } | |
180 | ||
181 | static void gf128mul_x8_bbe(be128 *x) | |
182 | { | |
183 | u64 a = be64_to_cpu(x->a); | |
184 | u64 b = be64_to_cpu(x->b); | |
3eaf06b7 | 185 | u64 _tt = gf128mul_table_be[a >> 56]; |
c494e070 RS |
186 | |
187 | x->a = cpu_to_be64((a << 8) | (b >> 56)); | |
188 | x->b = cpu_to_be64((b << 8) ^ _tt); | |
189 | } | |
190 | ||
ce2ace45 AC |
191 | static void gf128mul_x8_ble(be128 *x) |
192 | { | |
193 | u64 a = le64_to_cpu(x->b); | |
194 | u64 b = le64_to_cpu(x->a); | |
195 | u64 _tt = gf128mul_table_be[a >> 56]; | |
196 | ||
197 | x->b = cpu_to_le64((a << 8) | (b >> 56)); | |
198 | x->a = cpu_to_le64((b << 8) ^ _tt); | |
199 | } | |
200 | ||
c494e070 RS |
201 | void gf128mul_lle(be128 *r, const be128 *b) |
202 | { | |
203 | be128 p[8]; | |
204 | int i; | |
205 | ||
206 | p[0] = *r; | |
207 | for (i = 0; i < 7; ++i) | |
208 | gf128mul_x_lle(&p[i + 1], &p[i]); | |
209 | ||
62542663 | 210 | memset(r, 0, sizeof(*r)); |
c494e070 RS |
211 | for (i = 0;;) { |
212 | u8 ch = ((u8 *)b)[15 - i]; | |
213 | ||
214 | if (ch & 0x80) | |
215 | be128_xor(r, r, &p[0]); | |
216 | if (ch & 0x40) | |
217 | be128_xor(r, r, &p[1]); | |
218 | if (ch & 0x20) | |
219 | be128_xor(r, r, &p[2]); | |
220 | if (ch & 0x10) | |
221 | be128_xor(r, r, &p[3]); | |
222 | if (ch & 0x08) | |
223 | be128_xor(r, r, &p[4]); | |
224 | if (ch & 0x04) | |
225 | be128_xor(r, r, &p[5]); | |
226 | if (ch & 0x02) | |
227 | be128_xor(r, r, &p[6]); | |
228 | if (ch & 0x01) | |
229 | be128_xor(r, r, &p[7]); | |
230 | ||
231 | if (++i >= 16) | |
232 | break; | |
233 | ||
234 | gf128mul_x8_lle(r); | |
235 | } | |
236 | } | |
237 | EXPORT_SYMBOL(gf128mul_lle); | |
238 | ||
239 | void gf128mul_bbe(be128 *r, const be128 *b) | |
240 | { | |
241 | be128 p[8]; | |
242 | int i; | |
243 | ||
244 | p[0] = *r; | |
245 | for (i = 0; i < 7; ++i) | |
246 | gf128mul_x_bbe(&p[i + 1], &p[i]); | |
247 | ||
62542663 | 248 | memset(r, 0, sizeof(*r)); |
c494e070 RS |
249 | for (i = 0;;) { |
250 | u8 ch = ((u8 *)b)[i]; | |
251 | ||
252 | if (ch & 0x80) | |
253 | be128_xor(r, r, &p[7]); | |
254 | if (ch & 0x40) | |
255 | be128_xor(r, r, &p[6]); | |
256 | if (ch & 0x20) | |
257 | be128_xor(r, r, &p[5]); | |
258 | if (ch & 0x10) | |
259 | be128_xor(r, r, &p[4]); | |
260 | if (ch & 0x08) | |
261 | be128_xor(r, r, &p[3]); | |
262 | if (ch & 0x04) | |
263 | be128_xor(r, r, &p[2]); | |
264 | if (ch & 0x02) | |
265 | be128_xor(r, r, &p[1]); | |
266 | if (ch & 0x01) | |
267 | be128_xor(r, r, &p[0]); | |
268 | ||
269 | if (++i >= 16) | |
270 | break; | |
271 | ||
272 | gf128mul_x8_bbe(r); | |
273 | } | |
274 | } | |
275 | EXPORT_SYMBOL(gf128mul_bbe); | |
276 | ||
ce2ace45 AC |
277 | void gf128mul_ble(be128 *r, const be128 *b) |
278 | { | |
279 | be128 p[8]; | |
280 | int i; | |
281 | ||
282 | p[0] = *r; | |
283 | for (i = 0; i < 7; ++i) | |
284 | gf128mul_x_ble((be128 *)&p[i + 1], (be128 *)&p[i]); | |
285 | ||
286 | memset(r, 0, sizeof(*r)); | |
287 | for (i = 0;;) { | |
288 | u8 ch = ((u8 *)b)[15 - i]; | |
289 | ||
290 | if (ch & 0x80) | |
291 | be128_xor(r, r, &p[7]); | |
292 | if (ch & 0x40) | |
293 | be128_xor(r, r, &p[6]); | |
294 | if (ch & 0x20) | |
295 | be128_xor(r, r, &p[5]); | |
296 | if (ch & 0x10) | |
297 | be128_xor(r, r, &p[4]); | |
298 | if (ch & 0x08) | |
299 | be128_xor(r, r, &p[3]); | |
300 | if (ch & 0x04) | |
301 | be128_xor(r, r, &p[2]); | |
302 | if (ch & 0x02) | |
303 | be128_xor(r, r, &p[1]); | |
304 | if (ch & 0x01) | |
305 | be128_xor(r, r, &p[0]); | |
306 | ||
307 | if (++i >= 16) | |
308 | break; | |
309 | ||
310 | gf128mul_x8_ble(r); | |
311 | } | |
312 | } | |
313 | EXPORT_SYMBOL(gf128mul_ble); | |
314 | ||
315 | ||
c494e070 RS |
316 | /* This version uses 64k bytes of table space. |
317 | A 16 byte buffer has to be multiplied by a 16 byte key | |
ce2ace45 | 318 | value in GF(2^128). If we consider a GF(2^128) value in |
c494e070 RS |
319 | the buffer's lowest byte, we can construct a table of |
320 | the 256 16 byte values that result from the 256 values | |
321 | of this byte. This requires 4096 bytes. But we also | |
322 | need tables for each of the 16 higher bytes in the | |
323 | buffer as well, which makes 64 kbytes in total. | |
324 | */ | |
325 | /* additional explanation | |
326 | * t[0][BYTE] contains g*BYTE | |
327 | * t[1][BYTE] contains g*x^8*BYTE | |
328 | * .. | |
329 | * t[15][BYTE] contains g*x^120*BYTE */ | |
330 | struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g) | |
331 | { | |
332 | struct gf128mul_64k *t; | |
333 | int i, j, k; | |
334 | ||
335 | t = kzalloc(sizeof(*t), GFP_KERNEL); | |
336 | if (!t) | |
337 | goto out; | |
338 | ||
339 | for (i = 0; i < 16; i++) { | |
340 | t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); | |
341 | if (!t->t[i]) { | |
342 | gf128mul_free_64k(t); | |
343 | t = NULL; | |
344 | goto out; | |
345 | } | |
346 | } | |
347 | ||
348 | t->t[0]->t[128] = *g; | |
349 | for (j = 64; j > 0; j >>= 1) | |
350 | gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]); | |
351 | ||
352 | for (i = 0;;) { | |
353 | for (j = 2; j < 256; j += j) | |
354 | for (k = 1; k < j; ++k) | |
355 | be128_xor(&t->t[i]->t[j + k], | |
356 | &t->t[i]->t[j], &t->t[i]->t[k]); | |
357 | ||
358 | if (++i >= 16) | |
359 | break; | |
360 | ||
361 | for (j = 128; j > 0; j >>= 1) { | |
362 | t->t[i]->t[j] = t->t[i - 1]->t[j]; | |
363 | gf128mul_x8_lle(&t->t[i]->t[j]); | |
364 | } | |
365 | } | |
366 | ||
367 | out: | |
368 | return t; | |
369 | } | |
370 | EXPORT_SYMBOL(gf128mul_init_64k_lle); | |
371 | ||
372 | struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g) | |
373 | { | |
374 | struct gf128mul_64k *t; | |
375 | int i, j, k; | |
376 | ||
377 | t = kzalloc(sizeof(*t), GFP_KERNEL); | |
378 | if (!t) | |
379 | goto out; | |
380 | ||
381 | for (i = 0; i < 16; i++) { | |
382 | t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); | |
383 | if (!t->t[i]) { | |
384 | gf128mul_free_64k(t); | |
385 | t = NULL; | |
386 | goto out; | |
387 | } | |
388 | } | |
389 | ||
390 | t->t[0]->t[1] = *g; | |
391 | for (j = 1; j <= 64; j <<= 1) | |
392 | gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]); | |
393 | ||
394 | for (i = 0;;) { | |
395 | for (j = 2; j < 256; j += j) | |
396 | for (k = 1; k < j; ++k) | |
397 | be128_xor(&t->t[i]->t[j + k], | |
398 | &t->t[i]->t[j], &t->t[i]->t[k]); | |
399 | ||
400 | if (++i >= 16) | |
401 | break; | |
402 | ||
403 | for (j = 128; j > 0; j >>= 1) { | |
404 | t->t[i]->t[j] = t->t[i - 1]->t[j]; | |
405 | gf128mul_x8_bbe(&t->t[i]->t[j]); | |
406 | } | |
407 | } | |
408 | ||
409 | out: | |
410 | return t; | |
411 | } | |
412 | EXPORT_SYMBOL(gf128mul_init_64k_bbe); | |
413 | ||
414 | void gf128mul_free_64k(struct gf128mul_64k *t) | |
415 | { | |
416 | int i; | |
417 | ||
418 | for (i = 0; i < 16; i++) | |
8ea7531e AC |
419 | kzfree(t->t[i]); |
420 | kzfree(t); | |
c494e070 RS |
421 | } |
422 | EXPORT_SYMBOL(gf128mul_free_64k); | |
423 | ||
424 | void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t) | |
425 | { | |
426 | u8 *ap = (u8 *)a; | |
427 | be128 r[1]; | |
428 | int i; | |
429 | ||
430 | *r = t->t[0]->t[ap[0]]; | |
431 | for (i = 1; i < 16; ++i) | |
432 | be128_xor(r, r, &t->t[i]->t[ap[i]]); | |
433 | *a = *r; | |
434 | } | |
435 | EXPORT_SYMBOL(gf128mul_64k_lle); | |
436 | ||
437 | void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t) | |
438 | { | |
439 | u8 *ap = (u8 *)a; | |
440 | be128 r[1]; | |
441 | int i; | |
442 | ||
443 | *r = t->t[0]->t[ap[15]]; | |
444 | for (i = 1; i < 16; ++i) | |
445 | be128_xor(r, r, &t->t[i]->t[ap[15 - i]]); | |
446 | *a = *r; | |
447 | } | |
448 | EXPORT_SYMBOL(gf128mul_64k_bbe); | |
449 | ||
450 | /* This version uses 4k bytes of table space. | |
451 | A 16 byte buffer has to be multiplied by a 16 byte key | |
ce2ace45 | 452 | value in GF(2^128). If we consider a GF(2^128) value in a |
c494e070 RS |
453 | single byte, we can construct a table of the 256 16 byte |
454 | values that result from the 256 values of this byte. | |
455 | This requires 4096 bytes. If we take the highest byte in | |
456 | the buffer and use this table to get the result, we then | |
457 | have to multiply by x^120 to get the final value. For the | |
458 | next highest byte the result has to be multiplied by x^112 | |
459 | and so on. But we can do this by accumulating the result | |
460 | in an accumulator starting with the result for the top | |
461 | byte. We repeatedly multiply the accumulator value by | |
462 | x^8 and then add in (i.e. xor) the 16 bytes of the next | |
463 | lower byte in the buffer, stopping when we reach the | |
464 | lowest byte. This requires a 4096 byte table. | |
465 | */ | |
466 | struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g) | |
467 | { | |
468 | struct gf128mul_4k *t; | |
469 | int j, k; | |
470 | ||
471 | t = kzalloc(sizeof(*t), GFP_KERNEL); | |
472 | if (!t) | |
473 | goto out; | |
474 | ||
475 | t->t[128] = *g; | |
476 | for (j = 64; j > 0; j >>= 1) | |
477 | gf128mul_x_lle(&t->t[j], &t->t[j+j]); | |
478 | ||
479 | for (j = 2; j < 256; j += j) | |
480 | for (k = 1; k < j; ++k) | |
481 | be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); | |
482 | ||
483 | out: | |
484 | return t; | |
485 | } | |
486 | EXPORT_SYMBOL(gf128mul_init_4k_lle); | |
487 | ||
488 | struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g) | |
489 | { | |
490 | struct gf128mul_4k *t; | |
491 | int j, k; | |
492 | ||
493 | t = kzalloc(sizeof(*t), GFP_KERNEL); | |
494 | if (!t) | |
495 | goto out; | |
496 | ||
497 | t->t[1] = *g; | |
498 | for (j = 1; j <= 64; j <<= 1) | |
499 | gf128mul_x_bbe(&t->t[j + j], &t->t[j]); | |
500 | ||
501 | for (j = 2; j < 256; j += j) | |
502 | for (k = 1; k < j; ++k) | |
503 | be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); | |
504 | ||
505 | out: | |
506 | return t; | |
507 | } | |
508 | EXPORT_SYMBOL(gf128mul_init_4k_bbe); | |
509 | ||
ce2ace45 AC |
510 | struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g) |
511 | { | |
512 | struct gf128mul_4k *t; | |
513 | int j, k; | |
514 | ||
515 | t = kzalloc(sizeof(*t), GFP_KERNEL); | |
516 | if (!t) | |
517 | goto out; | |
518 | ||
519 | t->t[1] = *g; | |
520 | for (j = 1; j <= 64; j <<= 1) | |
521 | gf128mul_x_ble(&t->t[j + j], &t->t[j]); | |
522 | ||
523 | for (j = 2; j < 256; j += j) | |
524 | for (k = 1; k < j; ++k) | |
525 | be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); | |
526 | ||
527 | out: | |
528 | return t; | |
529 | } | |
530 | EXPORT_SYMBOL(gf128mul_init_4k_ble); | |
531 | ||
c494e070 RS |
532 | void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) |
533 | { | |
534 | u8 *ap = (u8 *)a; | |
535 | be128 r[1]; | |
536 | int i = 15; | |
537 | ||
538 | *r = t->t[ap[15]]; | |
539 | while (i--) { | |
540 | gf128mul_x8_lle(r); | |
541 | be128_xor(r, r, &t->t[ap[i]]); | |
542 | } | |
543 | *a = *r; | |
544 | } | |
545 | EXPORT_SYMBOL(gf128mul_4k_lle); | |
546 | ||
547 | void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t) | |
548 | { | |
549 | u8 *ap = (u8 *)a; | |
550 | be128 r[1]; | |
551 | int i = 0; | |
552 | ||
553 | *r = t->t[ap[0]]; | |
554 | while (++i < 16) { | |
555 | gf128mul_x8_bbe(r); | |
556 | be128_xor(r, r, &t->t[ap[i]]); | |
557 | } | |
558 | *a = *r; | |
559 | } | |
560 | EXPORT_SYMBOL(gf128mul_4k_bbe); | |
561 | ||
ce2ace45 AC |
562 | void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t) |
563 | { | |
564 | u8 *ap = (u8 *)a; | |
565 | be128 r[1]; | |
566 | int i = 15; | |
567 | ||
568 | *r = t->t[ap[15]]; | |
569 | while (i--) { | |
570 | gf128mul_x8_ble(r); | |
571 | be128_xor(r, r, &t->t[ap[i]]); | |
572 | } | |
573 | *a = *r; | |
574 | } | |
575 | EXPORT_SYMBOL(gf128mul_4k_ble); | |
576 | ||
c494e070 RS |
577 | MODULE_LICENSE("GPL"); |
578 | MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)"); |