]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libsec/port/aes.c
libsec: fix probably_prime() endless loop for n == 3
[plan9front.git] / sys / src / libsec / port / aes.c
1 /*
2  * this code is derived from the following source,
3  * and modified to fit into the plan 9 libsec interface.
4  * most of the changes are confined to the top section,
5  * with the exception of converting Te4 and Td4 into u8 rather than u32 arrays.
6  *
7  * rijndael-alg-fst.c
8  *
9  * @version 3.0 (December 2000)
10  *
11  * Optimised ANSI C code for the Rijndael cipher (now AES)
12  *
13  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
14  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
15  * @author Paulo Barreto <paulo.barreto@terra.com.br>
16  *
17  * This code is hereby placed in the public domain.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include <u.h>
32 #include <libc.h>
33 #include <mp.h>
34 #include <libsec.h>
35
36 typedef uchar   u8;
37 typedef ulong   u32;
38
39 #define FULL_UNROLL
40 #define const
41
42 static const u32 Td0[256];
43 static const u32 Td1[256];
44 static const u32 Td2[256];
45 static const u32 Td3[256];
46 static const u8  Te4[256];
47 static uchar basekey[3][16] = {
48         {
49         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
50         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
51         },
52         {
53         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
54         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
55         },
56         {
57         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
58         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
59         },
60 };
61
62 int aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
63                 int keyBits);
64 static int aes_setupDec(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
65                 int keyBits);
66 static int aes_setup(ulong erk[/*4*(Nr + 1)*/], ulong drk[/*4*(Nr + 1)*/],
67                 const uchar cipherKey[], int keyBits);
68
69 void    aes_encrypt(const ulong rk[], int Nr, const uchar pt[16], uchar ct[16]);
70 void    aes_decrypt(const ulong rk[], int Nr, const uchar ct[16], uchar pt[16]);
71
72 void
73 setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
74 {
75         memset(s, 0, sizeof(*s));
76         if(keybytes > AESmaxkey)
77                 keybytes = AESmaxkey;
78         memmove(s->key, key, keybytes);
79         s->keybytes = keybytes;
80         s->rounds = aes_setup(s->ekey, s->dkey, s->key, keybytes * 8);
81         if(ivec != nil)
82                 memmove(s->ivec, ivec, AESbsize);
83         if(keybytes==16 || keybytes==24 || keybytes==32)
84                 s->setup = 0xcafebabe;
85         /* else aes_setup was invalid */
86 }
87
88 /*
89  * AES-XCBC-MAC-96 message authentication, per rfc3566.
90  */
91
92 void
93 setupAESXCBCstate(AESstate *s)          /* was setupmac96 */
94 {
95         int i, j;
96         uint q[16 / sizeof(uint)];
97         uchar *p;
98
99         assert(s->keybytes == 16);
100         for(i = 0; i < 3; i++)
101                 aes_encrypt(s->ekey, s->rounds, basekey[i],
102                         s->mackey + AESbsize*i);
103
104         p = s->mackey;
105         memset(q, 0, AESbsize);
106
107         /*
108          * put the in the right endian.  once figured, probably better
109          * to use some fcall macros.
110          * keys for encryption in local endianness for the algorithm...
111          * only key1 is used for encryption;
112          * BUG!!: I think this is what I got wrong.
113          */
114         for(i = 0; i < 16 / sizeof(uint); i ++){
115                 for(j = 0; j < sizeof(uint); j++)
116                         q[i] |= p[sizeof(uint)-j-1] << 8*j;
117                 p += sizeof(uint);
118         }
119         memmove(s->mackey, q, 16);
120 }
121
122 /*
123  * Not dealing with > 128-bit keys, not dealing with strange corner cases like
124  * empty message.  Should be fine for AES-XCBC-MAC-96.
125  */
126 uchar*
127 aesXCBCmac(uchar *p, int len, AESstate *s)
128 {
129         uchar *p2, *ip, *eip, *mackey;
130         uchar q[AESbsize];
131
132         assert(s->keybytes == 16);      /* more complicated for bigger */
133         memset(s->ivec, 0, AESbsize);   /* E[0] is 0+ */
134
135         for(; len > AESbsize; len -= AESbsize){
136                 memmove(q, p, AESbsize);
137                 p2 = q;
138                 ip = s->ivec;
139                 for(eip = ip + AESbsize; ip < eip; )
140                         *p2++ ^= *ip++;
141                 aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
142                 p += AESbsize;
143         }
144         /* the last one */
145
146         memmove(q, p, len);
147         p2 = q+len;
148         if(len == AESbsize)
149                 mackey = s->mackey + AESbsize;  /* k2 */
150         else{
151                 mackey = s->mackey+2*AESbsize;  /* k3 */
152                 *p2++ = 1 << 7;                 /* padding */
153                 len = AESbsize - len - 1;
154                 memset(p2, 0, len);
155         }
156
157         ip = s->ivec;
158         p2 = q;
159         for(eip = ip + AESbsize; ip < eip; )
160                 *p2++ ^= *ip++ ^ *mackey++;
161         aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
162         return s->ivec;                 /* only the 12 bytes leftmost */
163 }
164
165 /*
166  * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
167  * Because of the way that non-multiple-of-16 buffers are handled,
168  * the decryptor must be fed buffers of the same size as the encryptor.
169  */
170 void
171 aesCBCencrypt(uchar *p, int len, AESstate *s)
172 {
173         uchar *p2, *ip, *eip;
174         uchar q[AESbsize];
175
176         for(; len >= AESbsize; len -= AESbsize){
177                 p2 = p;
178                 ip = s->ivec;
179                 for(eip = ip+AESbsize; ip < eip; )
180                         *p2++ ^= *ip++;
181                 aes_encrypt(s->ekey, s->rounds, p, q);
182                 memmove(s->ivec, q, AESbsize);
183                 memmove(p, q, AESbsize);
184                 p += AESbsize;
185         }
186
187         if(len > 0){
188                 ip = s->ivec;
189                 aes_encrypt(s->ekey, s->rounds, ip, q);
190                 memmove(s->ivec, q, AESbsize);
191                 for(eip = ip+len; ip < eip; )
192                         *p++ ^= *ip++;
193         }
194 }
195
196 void
197 aesCBCdecrypt(uchar *p, int len, AESstate *s)
198 {
199         uchar *ip, *eip, *tp;
200         uchar tmp[AESbsize], q[AESbsize];
201
202         for(; len >= AESbsize; len -= AESbsize){
203                 memmove(tmp, p, AESbsize);
204                 aes_decrypt(s->dkey, s->rounds, p, q);
205                 memmove(p, q, AESbsize);
206                 tp = tmp;
207                 ip = s->ivec;
208                 for(eip = ip+AESbsize; ip < eip; ){
209                         *p++ ^= *ip;
210                         *ip++ = *tp++;
211                 }
212         }
213
214         if(len > 0){
215                 ip = s->ivec;
216                 aes_encrypt(s->ekey, s->rounds, ip, q);
217                 memmove(s->ivec, q, AESbsize);
218                 for(eip = ip+len; ip < eip; )
219                         *p++ ^= *ip++;
220         }
221 }
222
223 /* taken from sha1; TODO: verify suitability (esp. byte order) for aes */
224 /*
225  *      encodes input (ulong) into output (uchar). Assumes len is
226  *      a multiple of 4.
227  */
228 static void
229 encode(uchar *output, ulong *input, ulong len)
230 {
231         ulong x;
232         uchar *e;
233
234         for(e = output + len; output < e;) {
235                 x = *input++;
236                 *output++ = x >> 24;
237                 *output++ = x >> 16;
238                 *output++ = x >> 8;
239                 *output++ = x;
240         }
241 }
242
243 /* TODO: verify use of aes_encrypt here */
244 AEShstate*
245 aes(uchar *p, ulong len, uchar *digest, AEShstate *s)
246 {
247         uchar buf[128];
248         ulong x[16];
249         int i;
250         uchar *e;
251
252         if(s == nil){
253                 s = malloc(sizeof(*s));
254                 if(s == nil)
255                         return nil;
256                 memset(s, 0, sizeof(*s));
257                 s->malloced = 1;
258         }
259
260         if(s->seeded == 0){
261                 /* seed the state, these constants would look nicer big-endian */
262                 s->state[0] = 0x67452301;
263                 s->state[1] = 0xefcdab89;
264                 s->state[2] = 0x98badcfe;
265                 s->state[3] = 0x10325476;
266                 /* in sha1 (20-byte digest), but not md5 (16 bytes)*/
267                 s->state[4] = 0xc3d2e1f0;
268                 s->seeded = 1;
269         }
270
271         /* fill out the partial 64 byte block from previous calls */
272         if(s->blen){
273                 i = 64 - s->blen;
274                 if(len < i)
275                         i = len;
276                 memmove(s->buf + s->blen, p, i);
277                 len -= i;
278                 s->blen += i;
279                 p += i;
280                 if(s->blen == 64){
281                         /* encrypt s->buf into s->state */
282                         // _sha1block(s->buf, s->blen, s->state);
283                         aes_encrypt((ulong *)s->buf, 1, s->buf, (uchar *)s->state);
284                         s->len += s->blen;
285                         s->blen = 0;
286                 }
287         }
288
289         /* do 64 byte blocks */
290         i = len & ~0x3f;
291         if(i){
292                 /* encrypt p into s->state */
293                 // _sha1block(p, i, s->state);
294                 aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
295                 s->len += i;
296                 len -= i;
297                 p += i;
298         }
299
300         /* save the left overs if not last call */
301         if(digest == 0){
302                 if(len){
303                         memmove(s->buf, p, len);
304                         s->blen += len;
305                 }
306                 return s;
307         }
308
309         /*
310          *  this is the last time through, pad what's left with 0x80,
311          *  0's, and the input count to create a multiple of 64 bytes
312          */
313         if(s->blen){
314                 p = s->buf;
315                 len = s->blen;
316         } else {
317                 memmove(buf, p, len);
318                 p = buf;
319         }
320         s->len += len;
321         e = p + len;
322         if(len < 56)
323                 i = 56 - len;
324         else
325                 i = 120 - len;
326         memset(e, 0, i);
327         *e = 0x80;
328         len += i;
329
330         /* append the count */
331         x[0] = s->len>>29;              /* byte-order dependent */
332         x[1] = s->len<<3;
333         encode(p+len, x, 8);
334
335         /* digest the last part */
336         /* encrypt p into s->state */
337         // _sha1block(p, len+8, s->state);
338         aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
339         s->len += len+8;                /* sha1: +8 */
340
341         /* return result and free state */
342         encode((uchar *)digest, (ulong *)s->state, AESdlen);
343         if(s->malloced == 1)
344                 free(s);
345         return nil;
346 }
347
348 DigestState*
349 hmac_aes(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest,
350         DigestState *s)
351 {
352         return hmac_x(p, len, key, klen, digest, s, aes, AESdlen);
353 }
354
355
356
357 /*
358  * this function has been changed for plan 9.
359  * Expand the cipher key into the encryption and decryption key schedules.
360  *
361  * @return      the number of rounds for the given cipher key size.
362  */
363 static int
364 aes_setup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */],
365         const uchar cipherKey[], int keyBits)
366 {
367         int Nr, i;
368
369         /* expand the cipher key: */
370         Nr = aes_setupEnc(erk, cipherKey, keyBits);
371
372         /*
373          * invert the order of the round keys and apply the inverse MixColumn
374          * transform to all round keys but the first and the last
375          */
376         drk[0       ] = erk[4*Nr    ];
377         drk[1       ] = erk[4*Nr + 1];
378         drk[2       ] = erk[4*Nr + 2];
379         drk[3       ] = erk[4*Nr + 3];
380         drk[4*Nr    ] = erk[0       ];
381         drk[4*Nr + 1] = erk[1       ];
382         drk[4*Nr + 2] = erk[2       ];
383         drk[4*Nr + 3] = erk[3       ];
384         erk += 4 * Nr;
385         for (i = 1; i < Nr; i++) {
386                 drk += 4;
387                 erk -= 4;
388                 drk[0] =
389                     Td0[Te4[(erk[0] >> 24)       ]] ^
390                     Td1[Te4[(erk[0] >> 16) & 0xff]] ^
391                     Td2[Te4[(erk[0] >>  8) & 0xff]] ^
392                     Td3[Te4[(erk[0]      ) & 0xff]];
393                 drk[1] =
394                     Td0[Te4[(erk[1] >> 24)       ]] ^
395                     Td1[Te4[(erk[1] >> 16) & 0xff]] ^
396                     Td2[Te4[(erk[1] >>  8) & 0xff]] ^
397                     Td3[Te4[(erk[1]      ) & 0xff]];
398                 drk[2] =
399                     Td0[Te4[(erk[2] >> 24)       ]] ^
400                     Td1[Te4[(erk[2] >> 16) & 0xff]] ^
401                     Td2[Te4[(erk[2] >>  8) & 0xff]] ^
402                     Td3[Te4[(erk[2]      ) & 0xff]];
403                 drk[3] =
404                     Td0[Te4[(erk[3] >> 24)       ]] ^
405                     Td1[Te4[(erk[3] >> 16) & 0xff]] ^
406                     Td2[Te4[(erk[3] >>  8) & 0xff]] ^
407                     Td3[Te4[(erk[3]      ) & 0xff]];
408         }
409         return Nr;
410 }
411
412
413 /*
414 Te0[x] = S [x].[02, 01, 01, 03];
415 Te1[x] = S [x].[03, 02, 01, 01];
416 Te2[x] = S [x].[01, 03, 02, 01];
417 Te3[x] = S [x].[01, 01, 03, 02];
418 Te4[x] = S [x]
419
420 Td0[x] = Si[x].[0e, 09, 0d, 0b];
421 Td1[x] = Si[x].[0b, 0e, 09, 0d];
422 Td2[x] = Si[x].[0d, 0b, 0e, 09];
423 Td3[x] = Si[x].[09, 0d, 0b, 0e];
424 Td4[x] = Si[x]
425 */
426
427 static const u32 Te0[256] = {
428     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
429     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
430     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
431     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
432     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
433     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
434     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
435     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
436     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
437     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
438     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
439     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
440     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
441     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
442     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
443     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
444     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
445     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
446     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
447     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
448     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
449     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
450     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
451     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
452     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
453     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
454     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
455     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
456     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
457     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
458     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
459     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
460     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
461     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
462     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
463     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
464     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
465     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
466     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
467     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
468     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
469     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
470     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
471     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
472     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
473     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
474     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
475     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
476     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
477     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
478     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
479     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
480     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
481     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
482     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
483     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
484     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
485     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
486     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
487     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
488     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
489     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
490     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
491     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
492 };
493 static const u32 Te1[256] = {
494     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
495     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
496     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
497     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
498     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
499     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
500     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
501     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
502     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
503     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
504     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
505     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
506     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
507     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
508     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
509     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
510     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
511     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
512     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
513     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
514     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
515     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
516     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
517     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
518     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
519     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
520     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
521     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
522     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
523     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
524     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
525     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
526     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
527     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
528     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
529     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
530     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
531     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
532     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
533     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
534     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
535     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
536     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
537     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
538     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
539     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
540     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
541     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
542     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
543     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
544     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
545     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
546     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
547     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
548     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
549     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
550     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
551     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
552     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
553     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
554     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
555     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
556     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
557     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
558 };
559 static const u32 Te2[256] = {
560     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
561     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
562     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
563     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
564     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
565     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
566     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
567     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
568     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
569     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
570     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
571     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
572     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
573     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
574     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
575     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
576     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
577     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
578     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
579     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
580     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
581     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
582     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
583     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
584     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
585     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
586     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
587     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
588     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
589     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
590     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
591     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
592     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
593     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
594     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
595     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
596     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
597     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
598     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
599     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
600     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
601     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
602     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
603     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
604     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
605     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
606     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
607     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
608     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
609     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
610     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
611     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
612     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
613     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
614     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
615     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
616     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
617     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
618     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
619     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
620     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
621     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
622     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
623     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
624 };
625 static const u32 Te3[256] = {
626
627     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
628     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
629     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
630     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
631     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
632     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
633     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
634     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
635     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
636     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
637     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
638     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
639     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
640     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
641     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
642     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
643     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
644     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
645     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
646     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
647     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
648     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
649     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
650     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
651     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
652     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
653     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
654     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
655     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
656     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
657     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
658     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
659     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
660     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
661     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
662     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
663     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
664     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
665     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
666     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
667     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
668     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
669     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
670     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
671     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
672     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
673     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
674     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
675     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
676     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
677     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
678     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
679     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
680     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
681     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
682     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
683     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
684     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
685     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
686     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
687     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
688     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
689     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
690     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
691 };
692 static const u8 Te4[256] = {
693     0x63U, 0x7cU, 0x77U, 0x7bU,
694     0xf2U, 0x6bU, 0x6fU, 0xc5U,
695     0x30U, 0x01U, 0x67U, 0x2bU,
696     0xfeU, 0xd7U, 0xabU, 0x76U,
697     0xcaU, 0x82U, 0xc9U, 0x7dU,
698     0xfaU, 0x59U, 0x47U, 0xf0U,
699     0xadU, 0xd4U, 0xa2U, 0xafU,
700     0x9cU, 0xa4U, 0x72U, 0xc0U,
701     0xb7U, 0xfdU, 0x93U, 0x26U,
702     0x36U, 0x3fU, 0xf7U, 0xccU,
703     0x34U, 0xa5U, 0xe5U, 0xf1U,
704     0x71U, 0xd8U, 0x31U, 0x15U,
705     0x04U, 0xc7U, 0x23U, 0xc3U,
706     0x18U, 0x96U, 0x05U, 0x9aU,
707     0x07U, 0x12U, 0x80U, 0xe2U,
708     0xebU, 0x27U, 0xb2U, 0x75U,
709     0x09U, 0x83U, 0x2cU, 0x1aU,
710     0x1bU, 0x6eU, 0x5aU, 0xa0U,
711     0x52U, 0x3bU, 0xd6U, 0xb3U,
712     0x29U, 0xe3U, 0x2fU, 0x84U,
713     0x53U, 0xd1U, 0x00U, 0xedU,
714     0x20U, 0xfcU, 0xb1U, 0x5bU,
715     0x6aU, 0xcbU, 0xbeU, 0x39U,
716     0x4aU, 0x4cU, 0x58U, 0xcfU,
717     0xd0U, 0xefU, 0xaaU, 0xfbU,
718     0x43U, 0x4dU, 0x33U, 0x85U,
719     0x45U, 0xf9U, 0x02U, 0x7fU,
720     0x50U, 0x3cU, 0x9fU, 0xa8U,
721     0x51U, 0xa3U, 0x40U, 0x8fU,
722     0x92U, 0x9dU, 0x38U, 0xf5U,
723     0xbcU, 0xb6U, 0xdaU, 0x21U,
724     0x10U, 0xffU, 0xf3U, 0xd2U,
725     0xcdU, 0x0cU, 0x13U, 0xecU,
726     0x5fU, 0x97U, 0x44U, 0x17U,
727     0xc4U, 0xa7U, 0x7eU, 0x3dU,
728     0x64U, 0x5dU, 0x19U, 0x73U,
729     0x60U, 0x81U, 0x4fU, 0xdcU,
730     0x22U, 0x2aU, 0x90U, 0x88U,
731     0x46U, 0xeeU, 0xb8U, 0x14U,
732     0xdeU, 0x5eU, 0x0bU, 0xdbU,
733     0xe0U, 0x32U, 0x3aU, 0x0aU,
734     0x49U, 0x06U, 0x24U, 0x5cU,
735     0xc2U, 0xd3U, 0xacU, 0x62U,
736     0x91U, 0x95U, 0xe4U, 0x79U,
737     0xe7U, 0xc8U, 0x37U, 0x6dU,
738     0x8dU, 0xd5U, 0x4eU, 0xa9U,
739     0x6cU, 0x56U, 0xf4U, 0xeaU,
740     0x65U, 0x7aU, 0xaeU, 0x08U,
741     0xbaU, 0x78U, 0x25U, 0x2eU,
742     0x1cU, 0xa6U, 0xb4U, 0xc6U,
743     0xe8U, 0xddU, 0x74U, 0x1fU,
744     0x4bU, 0xbdU, 0x8bU, 0x8aU,
745     0x70U, 0x3eU, 0xb5U, 0x66U,
746     0x48U, 0x03U, 0xf6U, 0x0eU,
747     0x61U, 0x35U, 0x57U, 0xb9U,
748     0x86U, 0xc1U, 0x1dU, 0x9eU,
749     0xe1U, 0xf8U, 0x98U, 0x11U,
750     0x69U, 0xd9U, 0x8eU, 0x94U,
751     0x9bU, 0x1eU, 0x87U, 0xe9U,
752     0xceU, 0x55U, 0x28U, 0xdfU,
753     0x8cU, 0xa1U, 0x89U, 0x0dU,
754     0xbfU, 0xe6U, 0x42U, 0x68U,
755     0x41U, 0x99U, 0x2dU, 0x0fU,
756     0xb0U, 0x54U, 0xbbU, 0x16U,
757 };
758 static const u32 Td0[256] = {
759     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
760     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
761     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
762     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
763     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
764     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
765     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
766     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
767     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
768     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
769     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
770     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
771     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
772     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
773     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
774     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
775     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
776     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
777     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
778     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
779     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
780     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
781     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
782     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
783     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
784     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
785     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
786     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
787     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
788     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
789     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
790     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
791     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
792     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
793     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
794     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
795     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
796     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
797     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
798     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
799     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
800     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
801     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
802     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
803     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
804     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
805     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
806     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
807     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
808     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
809     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
810     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
811     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
812     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
813     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
814     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
815     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
816     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
817     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
818     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
819     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
820     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
821     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
822     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
823 };
824 static const u32 Td1[256] = {
825     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
826     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
827     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
828     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
829     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
830     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
831     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
832     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
833     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
834     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
835     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
836     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
837     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
838     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
839     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
840     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
841     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
842     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
843     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
844     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
845     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
846     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
847     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
848     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
849     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
850     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
851     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
852     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
853     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
854     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
855     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
856     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
857     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
858     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
859     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
860     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
861     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
862     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
863     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
864     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
865     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
866     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
867     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
868     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
869     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
870     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
871     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
872     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
873     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
874     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
875     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
876     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
877     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
878     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
879     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
880     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
881     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
882     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
883     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
884     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
885     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
886     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
887     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
888     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
889 };
890 static const u32 Td2[256] = {
891     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
892     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
893     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
894     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
895     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
896     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
897     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
898     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
899     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
900     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
901     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
902     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
903     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
904     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
905     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
906     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
907     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
908     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
909     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
910     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
911
912     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
913     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
914     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
915     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
916     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
917     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
918     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
919     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
920     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
921     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
922     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
923     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
924     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
925     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
926     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
927     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
928     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
929     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
930     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
931     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
932     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
933     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
934     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
935     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
936     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
937     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
938     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
939     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
940     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
941     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
942     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
943     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
944     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
945     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
946     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
947     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
948     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
949     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
950     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
951     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
952     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
953     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
954     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
955     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
956 };
957 static const u32 Td3[256] = {
958     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
959     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
960     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
961     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
962     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
963     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
964     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
965     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
966     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
967     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
968     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
969     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
970     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
971     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
972     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
973     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
974     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
975     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
976     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
977     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
978     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
979     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
980     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
981     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
982     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
983     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
984     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
985     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
986     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
987     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
988     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
989     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
990     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
991     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
992     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
993     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
994     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
995     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
996     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
997     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
998     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
999     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1000     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1001     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1002     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1003     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1004     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1005     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1006     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1007     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1008     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1009     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1010     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1011     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1012     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1013     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1014     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1015     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1016     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1017     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1018     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1019     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1020     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1021     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1022 };
1023 static const u8 Td4[256] = {
1024     0x52U, 0x09U, 0x6aU, 0xd5U,
1025     0x30U, 0x36U, 0xa5U, 0x38U,
1026     0xbfU, 0x40U, 0xa3U, 0x9eU,
1027     0x81U, 0xf3U, 0xd7U, 0xfbU,
1028     0x7cU, 0xe3U, 0x39U, 0x82U,
1029     0x9bU, 0x2fU, 0xffU, 0x87U,
1030     0x34U, 0x8eU, 0x43U, 0x44U,
1031     0xc4U, 0xdeU, 0xe9U, 0xcbU,
1032     0x54U, 0x7bU, 0x94U, 0x32U,
1033     0xa6U, 0xc2U, 0x23U, 0x3dU,
1034     0xeeU, 0x4cU, 0x95U, 0x0bU,
1035     0x42U, 0xfaU, 0xc3U, 0x4eU,
1036     0x08U, 0x2eU, 0xa1U, 0x66U,
1037     0x28U, 0xd9U, 0x24U, 0xb2U,
1038     0x76U, 0x5bU, 0xa2U, 0x49U,
1039     0x6dU, 0x8bU, 0xd1U, 0x25U,
1040     0x72U, 0xf8U, 0xf6U, 0x64U,
1041     0x86U, 0x68U, 0x98U, 0x16U,
1042     0xd4U, 0xa4U, 0x5cU, 0xccU,
1043     0x5dU, 0x65U, 0xb6U, 0x92U,
1044     0x6cU, 0x70U, 0x48U, 0x50U,
1045     0xfdU, 0xedU, 0xb9U, 0xdaU,
1046     0x5eU, 0x15U, 0x46U, 0x57U,
1047     0xa7U, 0x8dU, 0x9dU, 0x84U,
1048     0x90U, 0xd8U, 0xabU, 0x00U,
1049     0x8cU, 0xbcU, 0xd3U, 0x0aU,
1050     0xf7U, 0xe4U, 0x58U, 0x05U,
1051     0xb8U, 0xb3U, 0x45U, 0x06U,
1052     0xd0U, 0x2cU, 0x1eU, 0x8fU,
1053     0xcaU, 0x3fU, 0x0fU, 0x02U,
1054     0xc1U, 0xafU, 0xbdU, 0x03U,
1055     0x01U, 0x13U, 0x8aU, 0x6bU,
1056     0x3aU, 0x91U, 0x11U, 0x41U,
1057     0x4fU, 0x67U, 0xdcU, 0xeaU,
1058     0x97U, 0xf2U, 0xcfU, 0xceU,
1059     0xf0U, 0xb4U, 0xe6U, 0x73U,
1060     0x96U, 0xacU, 0x74U, 0x22U,
1061     0xe7U, 0xadU, 0x35U, 0x85U,
1062     0xe2U, 0xf9U, 0x37U, 0xe8U,
1063     0x1cU, 0x75U, 0xdfU, 0x6eU,
1064     0x47U, 0xf1U, 0x1aU, 0x71U,
1065     0x1dU, 0x29U, 0xc5U, 0x89U,
1066     0x6fU, 0xb7U, 0x62U, 0x0eU,
1067     0xaaU, 0x18U, 0xbeU, 0x1bU,
1068     0xfcU, 0x56U, 0x3eU, 0x4bU,
1069     0xc6U, 0xd2U, 0x79U, 0x20U,
1070     0x9aU, 0xdbU, 0xc0U, 0xfeU,
1071     0x78U, 0xcdU, 0x5aU, 0xf4U,
1072     0x1fU, 0xddU, 0xa8U, 0x33U,
1073     0x88U, 0x07U, 0xc7U, 0x31U,
1074     0xb1U, 0x12U, 0x10U, 0x59U,
1075     0x27U, 0x80U, 0xecU, 0x5fU,
1076     0x60U, 0x51U, 0x7fU, 0xa9U,
1077     0x19U, 0xb5U, 0x4aU, 0x0dU,
1078     0x2dU, 0xe5U, 0x7aU, 0x9fU,
1079     0x93U, 0xc9U, 0x9cU, 0xefU,
1080     0xa0U, 0xe0U, 0x3bU, 0x4dU,
1081     0xaeU, 0x2aU, 0xf5U, 0xb0U,
1082     0xc8U, 0xebU, 0xbbU, 0x3cU,
1083     0x83U, 0x53U, 0x99U, 0x61U,
1084     0x17U, 0x2bU, 0x04U, 0x7eU,
1085     0xbaU, 0x77U, 0xd6U, 0x26U,
1086     0xe1U, 0x69U, 0x14U, 0x63U,
1087     0x55U, 0x21U, 0x0cU, 0x7dU,
1088 };
1089 static const u32 rcon[] = {
1090         0x01000000, 0x02000000, 0x04000000, 0x08000000,
1091         0x10000000, 0x20000000, 0x40000000, 0x80000000,
1092         0x1B000000, 0x36000000,
1093         /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1094 };
1095
1096 #define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
1097                     ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
1098 #define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
1099                          (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
1100
1101 /*
1102  * Expand the cipher key into the encryption key schedule.
1103  *
1104  * @return      the number of rounds for the given cipher key size.
1105  */
1106 int
1107 aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
1108 {
1109         int i = 0;
1110         u32 temp;
1111
1112         rk[0] = GETU32(cipherKey     );
1113         rk[1] = GETU32(cipherKey +  4);
1114         rk[2] = GETU32(cipherKey +  8);
1115         rk[3] = GETU32(cipherKey + 12);
1116         if (keyBits == 128) {
1117                 for (;;) {
1118                         temp  = rk[3];
1119                         rk[4] = rk[0] ^
1120                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1121                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1122                                 (Te4[(temp      ) & 0xff] <<  8) ^
1123                                 (Te4[(temp >> 24)       ]      ) ^
1124                                 rcon[i];
1125                         rk[5] = rk[1] ^ rk[4];
1126                         rk[6] = rk[2] ^ rk[5];
1127                         rk[7] = rk[3] ^ rk[6];
1128                         if (++i == 10) {
1129                                 return 10;
1130                         }
1131                         rk += 4;
1132                 }
1133         }
1134         rk[4] = GETU32(cipherKey + 16);
1135         rk[5] = GETU32(cipherKey + 20);
1136         if (keyBits == 192) {
1137                 for (;;) {
1138                         temp = rk[ 5];
1139                         rk[ 6] = rk[ 0] ^
1140                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1141                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1142                                 (Te4[(temp      ) & 0xff] <<  8) ^
1143                                 (Te4[(temp >> 24)       ]      ) ^
1144                                 rcon[i];
1145                         rk[ 7] = rk[ 1] ^ rk[ 6];
1146                         rk[ 8] = rk[ 2] ^ rk[ 7];
1147                         rk[ 9] = rk[ 3] ^ rk[ 8];
1148                         if (++i == 8) {
1149                                 return 12;
1150                         }
1151                         rk[10] = rk[ 4] ^ rk[ 9];
1152                         rk[11] = rk[ 5] ^ rk[10];
1153                         rk += 6;
1154                 }
1155         }
1156         rk[6] = GETU32(cipherKey + 24);
1157         rk[7] = GETU32(cipherKey + 28);
1158         if (keyBits == 256) {
1159                 for (;;) {
1160                         temp = rk[ 7];
1161                         rk[ 8] = rk[ 0] ^
1162                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1163                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1164                                 (Te4[(temp      ) & 0xff] <<  8) ^
1165                                 (Te4[(temp >> 24)       ]      ) ^
1166                                 rcon[i];
1167                         rk[ 9] = rk[ 1] ^ rk[ 8];
1168                         rk[10] = rk[ 2] ^ rk[ 9];
1169                         rk[11] = rk[ 3] ^ rk[10];
1170                         if (++i == 7) {
1171                                 return 14;
1172                         }
1173                         temp = rk[11];
1174                         rk[12] = rk[ 4] ^
1175                                 (Te4[(temp >> 24)       ] << 24) ^
1176                                 (Te4[(temp >> 16) & 0xff] << 16) ^
1177                                 (Te4[(temp >>  8) & 0xff] <<  8) ^
1178                                 (Te4[(temp      ) & 0xff]      );
1179                         rk[13] = rk[ 5] ^ rk[12];
1180                         rk[14] = rk[ 6] ^ rk[13];
1181                         rk[15] = rk[ 7] ^ rk[14];
1182                         rk += 8;
1183                 }
1184         }
1185         return 0;
1186 }
1187
1188 /**
1189  * Expand the cipher key into the decryption key schedule.
1190  *
1191  * @return      the number of rounds for the given cipher key size.
1192  */
1193 static int
1194 aes_setupDec(ulong rk[/* 4*(Nr + 1) */], const uchar cipherKey[], int keyBits)
1195 {
1196         int Nr, i, j;
1197         ulong temp;
1198
1199         /* expand the cipher key: */
1200         Nr = aes_setupEnc(rk, cipherKey, keyBits);
1201         /* invert the order of the round keys: */
1202         for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
1203                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1204                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1205                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1206                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1207         }
1208         /*
1209          * apply the inverse MixColumn transform to all round keys
1210          * but the first and the last:
1211          */
1212         for (i = 1; i < Nr; i++) {
1213                 rk += 4;
1214                 rk[0] =
1215                         Td0[Te4[(rk[0] >> 24)       ]] ^
1216                         Td1[Te4[(rk[0] >> 16) & 0xff]] ^
1217                         Td2[Te4[(rk[0] >>  8) & 0xff]] ^
1218                         Td3[Te4[(rk[0]      ) & 0xff]];
1219                 rk[1] =
1220                         Td0[Te4[(rk[1] >> 24)       ]] ^
1221                         Td1[Te4[(rk[1] >> 16) & 0xff]] ^
1222                         Td2[Te4[(rk[1] >>  8) & 0xff]] ^
1223                         Td3[Te4[(rk[1]      ) & 0xff]];
1224                 rk[2] =
1225                         Td0[Te4[(rk[2] >> 24)       ]] ^
1226                         Td1[Te4[(rk[2] >> 16) & 0xff]] ^
1227                         Td2[Te4[(rk[2] >>  8) & 0xff]] ^
1228                         Td3[Te4[(rk[2]      ) & 0xff]];
1229                 rk[3] =
1230                         Td0[Te4[(rk[3] >> 24)       ]] ^
1231                         Td1[Te4[(rk[3] >> 16) & 0xff]] ^
1232                         Td2[Te4[(rk[3] >>  8) & 0xff]] ^
1233                         Td3[Te4[(rk[3]      ) & 0xff]];
1234         }
1235         return Nr;
1236 }
1237
1238 /* using round keys in rk, perform Nr rounds of encrypting pt into ct */
1239 void
1240 aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
1241         uchar ct[16])
1242 {
1243         ulong s0, s1, s2, s3, t0, t1, t2, t3;
1244 #ifndef FULL_UNROLL
1245         int r;
1246 #endif /* ?FULL_UNROLL */
1247
1248         /*
1249          * map byte array block to cipher state
1250          * and add initial round key:
1251          */
1252         s0 = GETU32(pt     ) ^ rk[0];
1253         s1 = GETU32(pt +  4) ^ rk[1];
1254         s2 = GETU32(pt +  8) ^ rk[2];
1255         s3 = GETU32(pt + 12) ^ rk[3];
1256 #ifdef FULL_UNROLL
1257         /* round 1: */
1258         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
1259         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
1260         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
1261         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
1262         /* round 2: */
1263         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
1264         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
1265         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
1266         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
1267         /* round 3: */
1268         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
1269         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
1270         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
1271         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
1272         /* round 4: */
1273         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
1274         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
1275         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
1276         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
1277         /* round 5: */
1278         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
1279         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
1280         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
1281         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
1282         /* round 6: */
1283         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
1284         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
1285         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
1286         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
1287         /* round 7: */
1288         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
1289         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
1290         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
1291         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
1292         /* round 8: */
1293         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
1294         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
1295         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
1296         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
1297         /* round 9: */
1298         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
1299         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
1300         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
1301         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
1302         if (Nr > 10) {
1303                 /* round 10: */
1304                 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
1305                 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
1306                 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
1307                 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
1308                 /* round 11: */
1309                 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
1310                 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
1311                 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
1312                 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
1313                 if (Nr > 12) {
1314                         /* round 12: */
1315                         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
1316                         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
1317                         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
1318                         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
1319                         /* round 13: */
1320                         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
1321                         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
1322                         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
1323                         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
1324                 }
1325         }
1326         rk += Nr << 2;
1327 #else                                   /* !FULL_UNROLL */
1328         /*
1329          * Nr - 1 full rounds:
1330          */
1331         r = Nr >> 1;
1332         for (;;) {
1333                 t0 =
1334                     Te0[(s0 >> 24)       ] ^
1335                     Te1[(s1 >> 16) & 0xff] ^
1336                     Te2[(s2 >>  8) & 0xff] ^
1337                     Te3[(s3      ) & 0xff] ^
1338                     rk[4];
1339                 t1 =
1340                     Te0[(s1 >> 24)       ] ^
1341                     Te1[(s2 >> 16) & 0xff] ^
1342                     Te2[(s3 >>  8) & 0xff] ^
1343                     Te3[(s0      ) & 0xff] ^
1344                     rk[5];
1345                 t2 =
1346                     Te0[(s2 >> 24)       ] ^
1347                     Te1[(s3 >> 16) & 0xff] ^
1348                     Te2[(s0 >>  8) & 0xff] ^
1349                     Te3[(s1      ) & 0xff] ^
1350                     rk[6];
1351                 t3 =
1352                     Te0[(s3 >> 24)       ] ^
1353                     Te1[(s0 >> 16) & 0xff] ^
1354                     Te2[(s1 >>  8) & 0xff] ^
1355                     Te3[(s2      ) & 0xff] ^
1356                     rk[7];
1357
1358                 rk += 8;
1359                 if (--r == 0)
1360                     break;
1361
1362                 s0 =
1363                     Te0[(t0 >> 24)       ] ^
1364                     Te1[(t1 >> 16) & 0xff] ^
1365                     Te2[(t2 >>  8) & 0xff] ^
1366                     Te3[(t3      ) & 0xff] ^
1367                     rk[0];
1368                 s1 =
1369                     Te0[(t1 >> 24)       ] ^
1370                     Te1[(t2 >> 16) & 0xff] ^
1371                     Te2[(t3 >>  8) & 0xff] ^
1372                     Te3[(t0      ) & 0xff] ^
1373                     rk[1];
1374                 s2 =
1375                     Te0[(t2 >> 24)       ] ^
1376                     Te1[(t3 >> 16) & 0xff] ^
1377                     Te2[(t0 >>  8) & 0xff] ^
1378                     Te3[(t1      ) & 0xff] ^
1379                     rk[2];
1380                 s3 =
1381                     Te0[(t3 >> 24)       ] ^
1382                     Te1[(t0 >> 16) & 0xff] ^
1383                     Te2[(t1 >>  8) & 0xff] ^
1384                     Te3[(t2      ) & 0xff] ^
1385                     rk[3];
1386         }
1387 #endif                                  /* ?FULL_UNROLL */
1388         /*
1389          * apply last round and
1390          * map cipher state to byte array block:
1391          */
1392         s0 =
1393                 (Te4[(t0 >> 24)       ] << 24) ^
1394                 (Te4[(t1 >> 16) & 0xff] << 16) ^
1395                 (Te4[(t2 >>  8) & 0xff] <<  8) ^
1396                 (Te4[(t3      ) & 0xff]      ) ^
1397                 rk[0];
1398         PUTU32(ct     , s0);
1399         s1 =
1400                 (Te4[(t1 >> 24)       ] << 24) ^
1401                 (Te4[(t2 >> 16) & 0xff] << 16) ^
1402                 (Te4[(t3 >>  8) & 0xff] <<  8) ^
1403                 (Te4[(t0      ) & 0xff]      ) ^
1404                 rk[1];
1405         PUTU32(ct +  4, s1);
1406         s2 =
1407                 (Te4[(t2 >> 24)       ] << 24) ^
1408                 (Te4[(t3 >> 16) & 0xff] << 16) ^
1409                 (Te4[(t0 >>  8) & 0xff] <<  8) ^
1410                 (Te4[(t1      ) & 0xff]      ) ^
1411                 rk[2];
1412         PUTU32(ct +  8, s2);
1413         s3 =
1414                 (Te4[(t3 >> 24)       ] << 24) ^
1415                 (Te4[(t0 >> 16) & 0xff] << 16) ^
1416                 (Te4[(t1 >>  8) & 0xff] <<  8) ^
1417                 (Te4[(t2      ) & 0xff]      ) ^
1418                 rk[3];
1419         PUTU32(ct + 12, s3);
1420 }
1421
1422 void
1423 aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
1424         uchar pt[16])
1425 {
1426         ulong s0, s1, s2, s3, t0, t1, t2, t3;
1427 #ifndef FULL_UNROLL
1428         int r;
1429 #endif          /* ?FULL_UNROLL */
1430
1431         /*
1432          * map byte array block to cipher state
1433          * and add initial round key:
1434          */
1435     s0 = GETU32(ct     ) ^ rk[0];
1436     s1 = GETU32(ct +  4) ^ rk[1];
1437     s2 = GETU32(ct +  8) ^ rk[2];
1438     s3 = GETU32(ct + 12) ^ rk[3];
1439 #ifdef FULL_UNROLL
1440     /* round 1: */
1441     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1442     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1443     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1444     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1445     /* round 2: */
1446     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1447     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1448     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1449     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1450     /* round 3: */
1451     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1452     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1453     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1454     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1455     /* round 4: */
1456     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1457     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1458     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1459     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1460     /* round 5: */
1461     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1462     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1463     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1464     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1465     /* round 6: */
1466     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1467     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1468     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1469     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1470     /* round 7: */
1471     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1472     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1473     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1474     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1475     /* round 8: */
1476     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1477     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1478     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1479     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1480     /* round 9: */
1481     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1482     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1483     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1484     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1485     if (Nr > 10) {
1486         /* round 10: */
1487         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1488         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1489         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1490         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1491         /* round 11: */
1492         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1493         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1494         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1495         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1496         if (Nr > 12) {
1497             /* round 12: */
1498             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1499             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1500             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1501             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1502             /* round 13: */
1503             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1504             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1505             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1506             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1507         }
1508     }
1509     rk += Nr << 2;
1510 #else                                   /* !FULL_UNROLL */
1511     /*
1512      * Nr - 1 full rounds:
1513      */
1514     r = Nr >> 1;
1515     for (;;) {
1516         t0 =
1517             Td0[(s0 >> 24)       ] ^
1518             Td1[(s3 >> 16) & 0xff] ^
1519             Td2[(s2 >>  8) & 0xff] ^
1520             Td3[(s1      ) & 0xff] ^
1521             rk[4];
1522         t1 =
1523             Td0[(s1 >> 24)       ] ^
1524             Td1[(s0 >> 16) & 0xff] ^
1525             Td2[(s3 >>  8) & 0xff] ^
1526             Td3[(s2      ) & 0xff] ^
1527             rk[5];
1528         t2 =
1529             Td0[(s2 >> 24)       ] ^
1530             Td1[(s1 >> 16) & 0xff] ^
1531             Td2[(s0 >>  8) & 0xff] ^
1532             Td3[(s3      ) & 0xff] ^
1533             rk[6];
1534         t3 =
1535             Td0[(s3 >> 24)       ] ^
1536             Td1[(s2 >> 16) & 0xff] ^
1537             Td2[(s1 >>  8) & 0xff] ^
1538             Td3[(s0      ) & 0xff] ^
1539             rk[7];
1540
1541         rk += 8;
1542         if (--r == 0)
1543             break;
1544
1545         s0 =
1546             Td0[(t0 >> 24)       ] ^
1547             Td1[(t3 >> 16) & 0xff] ^
1548             Td2[(t2 >>  8) & 0xff] ^
1549             Td3[(t1      ) & 0xff] ^
1550             rk[0];
1551         s1 =
1552             Td0[(t1 >> 24)       ] ^
1553             Td1[(t0 >> 16) & 0xff] ^
1554             Td2[(t3 >>  8) & 0xff] ^
1555             Td3[(t2      ) & 0xff] ^
1556             rk[1];
1557         s2 =
1558             Td0[(t2 >> 24)       ] ^
1559             Td1[(t1 >> 16) & 0xff] ^
1560             Td2[(t0 >>  8) & 0xff] ^
1561             Td3[(t3      ) & 0xff] ^
1562             rk[2];
1563         s3 =
1564             Td0[(t3 >> 24)       ] ^
1565             Td1[(t2 >> 16) & 0xff] ^
1566             Td2[(t1 >>  8) & 0xff] ^
1567             Td3[(t0      ) & 0xff] ^
1568             rk[3];
1569     }
1570 #endif                                  /* ?FULL_UNROLL */
1571         /*
1572          * apply last round and
1573          * map cipher state to byte array block:
1574          */
1575         s0 =
1576                 (Td4[(t0 >> 24)       ] << 24) ^
1577                 (Td4[(t3 >> 16) & 0xff] << 16) ^
1578                 (Td4[(t2 >>  8) & 0xff] <<  8) ^
1579                 (Td4[(t1      ) & 0xff]      ) ^
1580                 rk[0];
1581         PUTU32(pt     , s0);
1582         s1 =
1583                 (Td4[(t1 >> 24)       ] << 24) ^
1584                 (Td4[(t0 >> 16) & 0xff] << 16) ^
1585                 (Td4[(t3 >>  8) & 0xff] <<  8) ^
1586                 (Td4[(t2      ) & 0xff]      ) ^
1587                 rk[1];
1588         PUTU32(pt +  4, s1);
1589         s2 =
1590                 (Td4[(t2 >> 24)       ] << 24) ^
1591                 (Td4[(t1 >> 16) & 0xff] << 16) ^
1592                 (Td4[(t0 >>  8) & 0xff] <<  8) ^
1593                 (Td4[(t3      ) & 0xff]      ) ^
1594                 rk[2];
1595         PUTU32(pt +  8, s2);
1596         s3 =
1597                 (Td4[(t3 >> 24)       ] << 24) ^
1598                 (Td4[(t2 >> 16) & 0xff] << 16) ^
1599                 (Td4[(t1 >>  8) & 0xff] <<  8) ^
1600                 (Td4[(t0      ) & 0xff]      ) ^
1601                 rk[3];
1602         PUTU32(pt + 12, s3);
1603 }
1604
1605 #ifdef INTERMEDIATE_VALUE_KAT
1606
1607 static void
1608 aes_encryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1609         int rounds)
1610 {
1611         int r;
1612         u32 s0, s1, s2, s3, t0, t1, t2, t3;
1613
1614         /*
1615          * map byte array block to cipher state
1616          * and add initial round key:
1617          */
1618         s0 = GETU32(block     ) ^ rk[0];
1619         s1 = GETU32(block +  4) ^ rk[1];
1620         s2 = GETU32(block +  8) ^ rk[2];
1621         s3 = GETU32(block + 12) ^ rk[3];
1622         rk += 4;
1623
1624         /*
1625          * Nr - 1 full rounds:
1626          */
1627         for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
1628                 t0 =
1629                         Te0[(s0 >> 24)       ] ^
1630                         Te1[(s1 >> 16) & 0xff] ^
1631                         Te2[(s2 >>  8) & 0xff] ^
1632                         Te3[(s3      ) & 0xff] ^
1633                         rk[0];
1634                 t1 =
1635                         Te0[(s1 >> 24)       ] ^
1636                         Te1[(s2 >> 16) & 0xff] ^
1637                         Te2[(s3 >>  8) & 0xff] ^
1638                         Te3[(s0      ) & 0xff] ^
1639                         rk[1];
1640                 t2 =
1641                         Te0[(s2 >> 24)       ] ^
1642                         Te1[(s3 >> 16) & 0xff] ^
1643                         Te2[(s0 >>  8) & 0xff] ^
1644                         Te3[(s1      ) & 0xff] ^
1645                         rk[2];
1646                 t3 =
1647                         Te0[(s3 >> 24)       ] ^
1648                         Te1[(s0 >> 16) & 0xff] ^
1649                         Te2[(s1 >>  8) & 0xff] ^
1650                         Te3[(s2      ) & 0xff] ^
1651                         rk[3];
1652                 s0 = t0;
1653                 s1 = t1;
1654                 s2 = t2;
1655                 s3 = t3;
1656                 rk += 4;
1657         }
1658
1659         /*
1660          * apply last round and
1661          * map cipher state to byte array block:
1662          */
1663         if (rounds == Nr) {
1664                 t0 =
1665                         (Te4[(s0 >> 24)       ] << 24) ^
1666                         (Te4[(s1 >> 16) & 0xff] << 16) ^
1667                         (Te4[(s2 >>  8) & 0xff] <<  8) ^
1668                         (Te4[(s3      ) & 0xff]      ) ^
1669                         rk[0];
1670                 t1 =
1671                         (Te4[(s1 >> 24)       ] << 24) ^
1672                         (Te4[(s2 >> 16) & 0xff] << 16) ^
1673                         (Te4[(s3 >>  8) & 0xff] <<  8) ^
1674                         (Te4[(s0      ) & 0xff]      ) ^
1675                         rk[1];
1676                 t2 =
1677                         (Te4[(s2 >> 24)       ] << 24) ^
1678                         (Te4[(s3 >> 16) & 0xff] << 16) ^
1679                         (Te4[(s0 >>  8) & 0xff] <<  8) ^
1680                         (Te4[(s1      ) & 0xff]      ) ^
1681                         rk[2];
1682                 t3 =
1683                         (Te4[(s3 >> 24)       ] << 24) ^
1684                         (Te4[(s0 >> 16) & 0xff] << 16) ^
1685                         (Te4[(s1 >>  8) & 0xff] <<  8) ^
1686                         (Te4[(s2      ) & 0xff]      ) ^
1687                         rk[3];
1688                 s0 = t0;
1689                 s1 = t1;
1690                 s2 = t2;
1691                 s3 = t3;
1692         }
1693
1694         PUTU32(block     , s0);
1695         PUTU32(block +  4, s1);
1696         PUTU32(block +  8, s2);
1697         PUTU32(block + 12, s3);
1698 }
1699
1700 static void
1701 aes_decryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1702         int rounds)
1703 {
1704         int r;
1705         u32 s0, s1, s2, s3, t0, t1, t2, t3;
1706
1707         /*
1708          * map byte array block to cipher state
1709          * and add initial round key:
1710          */
1711         s0 = GETU32(block     ) ^ rk[0];
1712         s1 = GETU32(block +  4) ^ rk[1];
1713         s2 = GETU32(block +  8) ^ rk[2];
1714         s3 = GETU32(block + 12) ^ rk[3];
1715         rk += 4;
1716
1717         /*
1718          * Nr - 1 full rounds:
1719          */
1720         for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
1721                 t0 =
1722                         Td0[(s0 >> 24)       ] ^
1723                         Td1[(s3 >> 16) & 0xff] ^
1724                         Td2[(s2 >>  8) & 0xff] ^
1725                         Td3[(s1      ) & 0xff] ^
1726                         rk[0];
1727                 t1 =
1728                         Td0[(s1 >> 24)       ] ^
1729                         Td1[(s0 >> 16) & 0xff] ^
1730                         Td2[(s3 >>  8) & 0xff] ^
1731                         Td3[(s2      ) & 0xff] ^
1732                         rk[1];
1733                 t2 =
1734                         Td0[(s2 >> 24)       ] ^
1735                         Td1[(s1 >> 16) & 0xff] ^
1736                         Td2[(s0 >>  8) & 0xff] ^
1737                         Td3[(s3      ) & 0xff] ^
1738                         rk[2];
1739                 t3 =
1740                         Td0[(s3 >> 24)       ] ^
1741                         Td1[(s2 >> 16) & 0xff] ^
1742                         Td2[(s1 >>  8) & 0xff] ^
1743                         Td3[(s0      ) & 0xff] ^
1744                         rk[3];
1745
1746                 s0 = t0;
1747                 s1 = t1;
1748                 s2 = t2;
1749                 s3 = t3;
1750                 rk += 4;
1751         }
1752
1753         /*
1754          * complete the last round and
1755          * map cipher state to byte array block:
1756          */
1757         t0 =
1758                 (Td4[(s0 >> 24)       ] << 24) ^
1759                 (Td4[(s3 >> 16) & 0xff] << 16) ^
1760                 (Td4[(s2 >>  8) & 0xff] <<  8) ^
1761                 (Td4[(s1      ) & 0xff]      );
1762         t1 =
1763                 (Td4[(s1 >> 24)       ] << 24) ^
1764                 (Td4[(s0 >> 16) & 0xff] << 16) ^
1765                 (Td4[(s3 >>  8) & 0xff] <<  8) ^
1766                 (Td4[(s2      ) & 0xff]      );
1767         t2 =
1768                 (Td4[(s2 >> 24)       ] << 24) ^
1769                 (Td4[(s1 >> 16) & 0xff] << 16) ^
1770                 (Td4[(s0 >>  8) & 0xff] <<  8) ^
1771                 (Td4[(s3      ) & 0xff]      );
1772         t3 =
1773                 (Td4[(s3 >> 24)       ] << 24) ^
1774                 (Td4[(s2 >> 16) & 0xff] << 16) ^
1775                 (Td4[(s1 >>  8) & 0xff] <<  8) ^
1776                 (Td4[(s0      ) & 0xff]      );
1777
1778         if (rounds == Nr) {
1779                 t0 ^= rk[0];
1780                 t1 ^= rk[1];
1781                 t2 ^= rk[2];
1782                 t3 ^= rk[3];
1783         }
1784
1785         PUTU32(block     , t0);
1786         PUTU32(block +  4, t1);
1787         PUTU32(block +  8, t2);
1788         PUTU32(block + 12, t3);
1789 }
1790
1791 #endif                  /* INTERMEDIATE_VALUE_KAT */