]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libsec/port/aes.c
libsec: add diffie-hellman functions
[plan9front.git] / sys / src / libsec / port / aes.c
1 /*
2  * this code is derived from the following source,
3  * and modified to fit into the plan 9 libsec interface.
4  * most of the changes are confined to the top section,
5  * with the exception of converting Te4 and Td4 into u8 rather than u32 arrays.
6  *
7  * rijndael-alg-fst.c
8  *
9  * @version 3.0 (December 2000)
10  *
11  * Optimised ANSI C code for the Rijndael cipher (now AES)
12  *
13  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
14  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
15  * @author Paulo Barreto <paulo.barreto@terra.com.br>
16  *
17  * This code is hereby placed in the public domain.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include <u.h>
32 #include <libc.h>
33 #include <mp.h>
34 #include <libsec.h>
35
36 typedef uchar   u8;
37 typedef ulong   u32;
38
39 #define FULL_UNROLL
40 #define const
41
42 static const u32 Td0[256];
43 static const u32 Td1[256];
44 static const u32 Td2[256];
45 static const u32 Td3[256];
46 static const u8  Te4[256];
47 static uchar basekey[3][16] = {
48         {
49         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
50         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
51         },
52         {
53         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
54         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
55         },
56         {
57         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
58         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
59         },
60 };
61
62 int aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
63                 int keyBits);
64 static int aes_setupDec(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
65                 int keyBits);
66 static int aes_setup(ulong erk[/*4*(Nr + 1)*/], ulong drk[/*4*(Nr + 1)*/],
67                 const uchar cipherKey[], int keyBits);
68
69 void    aes_encrypt(const ulong rk[], int Nr, const uchar pt[16], uchar ct[16]);
70 void    aes_decrypt(const ulong rk[], int Nr, const uchar ct[16], uchar pt[16]);
71
72 void
73 setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
74 {
75         memset(s, 0, sizeof(*s));
76         if(keybytes > AESmaxkey)
77                 keybytes = AESmaxkey;
78         memmove(s->key, key, keybytes);
79         s->keybytes = keybytes;
80         s->rounds = aes_setup(s->ekey, s->dkey, s->key, keybytes * 8);
81         if(ivec != nil)
82                 memmove(s->ivec, ivec, AESbsize);
83         if(keybytes==16 || keybytes==24 || keybytes==32)
84                 s->setup = 0xcafebabe;
85         /* else aes_setup was invalid */
86 }
87
88 /*
89  * AES-XCBC-MAC-96 message authentication, per rfc3566.
90  */
91
92 void
93 setupAESXCBCstate(AESstate *s)          /* was setupmac96 */
94 {
95         int i, j;
96         uint q[16 / sizeof(uint)];
97         uchar *p;
98
99         assert(s->keybytes == 16);
100         for(i = 0; i < 3; i++)
101                 aes_encrypt(s->ekey, s->rounds, basekey[i],
102                         s->mackey + AESbsize*i);
103
104         p = s->mackey;
105         memset(q, 0, AESbsize);
106
107         /*
108          * put the in the right endian.  once figured, probably better
109          * to use some fcall macros.
110          * keys for encryption in local endianness for the algorithm...
111          * only key1 is used for encryption;
112          * BUG!!: I think this is what I got wrong.
113          */
114         for(i = 0; i < 16 / sizeof(uint); i ++){
115                 for(j = 0; j < sizeof(uint); j++)
116                         q[i] |= p[sizeof(uint)-j-1] << 8*j;
117                 p += sizeof(uint);
118         }
119         memmove(s->mackey, q, 16);
120 }
121
122 /*
123  * Not dealing with > 128-bit keys, not dealing with strange corner cases like
124  * empty message.  Should be fine for AES-XCBC-MAC-96.
125  */
126 uchar*
127 aesXCBCmac(uchar *p, int len, AESstate *s)
128 {
129         uchar *p2, *ip, *eip, *mackey;
130         uchar q[AESbsize];
131
132         assert(s->keybytes == 16);      /* more complicated for bigger */
133         memset(s->ivec, 0, AESbsize);   /* E[0] is 0+ */
134
135         for(; len > AESbsize; len -= AESbsize){
136                 memmove(q, p, AESbsize);
137                 p2 = q;
138                 ip = s->ivec;
139                 for(eip = ip + AESbsize; ip < eip; )
140                         *p2++ ^= *ip++;
141                 aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
142                 p += AESbsize;
143         }
144         /* the last one */
145
146         memmove(q, p, len);
147         p2 = q+len;
148         if(len == AESbsize)
149                 mackey = s->mackey + AESbsize;  /* k2 */
150         else{
151                 mackey = s->mackey+2*AESbsize;  /* k3 */
152                 *p2++ = 1 << 7;                 /* padding */
153                 len = AESbsize - len - 1;
154                 memset(p2, 0, len);
155         }
156
157         ip = s->ivec;
158         p2 = q;
159         for(eip = ip + AESbsize; ip < eip; )
160                 *p2++ ^= *ip++ ^ *mackey++;
161         aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
162         return s->ivec;                 /* only the 12 bytes leftmost */
163 }
164
165 /*
166  * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
167  * Because of the way that non-multiple-of-16 buffers are handled,
168  * the decryptor must be fed buffers of the same size as the encryptor.
169  */
170 void
171 aesCBCencrypt(uchar *p, int len, AESstate *s)
172 {
173         uchar *p2, *ip, *eip;
174         uchar q[AESbsize];
175
176         for(; len >= AESbsize; len -= AESbsize){
177                 p2 = p;
178                 ip = s->ivec;
179                 for(eip = ip+AESbsize; ip < eip; )
180                         *p2++ ^= *ip++;
181                 aes_encrypt(s->ekey, s->rounds, p, q);
182                 memmove(s->ivec, q, AESbsize);
183                 memmove(p, q, AESbsize);
184                 p += AESbsize;
185         }
186
187         if(len > 0){
188                 ip = s->ivec;
189                 aes_encrypt(s->ekey, s->rounds, ip, q);
190                 memmove(s->ivec, q, AESbsize);
191                 for(eip = ip+len; ip < eip; )
192                         *p++ ^= *ip++;
193         }
194 }
195
196 void
197 aesCBCdecrypt(uchar *p, int len, AESstate *s)
198 {
199         uchar *ip, *eip, *tp;
200         uchar tmp[AESbsize], q[AESbsize];
201
202         for(; len >= AESbsize; len -= AESbsize){
203                 memmove(tmp, p, AESbsize);
204                 aes_decrypt(s->dkey, s->rounds, p, q);
205                 memmove(p, q, AESbsize);
206                 tp = tmp;
207                 ip = s->ivec;
208                 for(eip = ip+AESbsize; ip < eip; ){
209                         *p++ ^= *ip;
210                         *ip++ = *tp++;
211                 }
212         }
213
214         if(len > 0){
215                 ip = s->ivec;
216                 aes_encrypt(s->ekey, s->rounds, ip, q);
217                 memmove(s->ivec, q, AESbsize);
218                 for(eip = ip+len; ip < eip; )
219                         *p++ ^= *ip++;
220         }
221 }
222
223 /*
224  * AES-CTR mode, per rfc3686.
225  * CTRs could be precalculated for efficiency
226  * and there would also be less back and forth mp
227  */
228
229 static void
230 incrementCTR(uchar *p, uint ctrsz)
231 {
232         int len;
233         uchar *ctr;
234         mpint *mpctr, *mpctrsz;
235
236         ctr = p + AESbsize - ctrsz;
237         mpctr = betomp(ctr, ctrsz, nil);
238         mpctrsz = itomp(1 << (ctrsz*8), nil);
239         mpadd(mpctr, mpone, mpctr);
240         mpmod(mpctr, mpctrsz, mpctr);
241         len = mptobe(mpctr, ctr, ctrsz, nil);
242         assert(len == ctrsz);
243         mpfree(mpctrsz);
244         mpfree(mpctr);
245 }
246
247 void
248 aesCTRencrypt(uchar *p, int len, AESstate *s)
249 {
250         uchar q[AESbsize];
251         uchar *ip, *eip, *ctr;
252
253         ctr = s->ivec;
254         for(; len >= AESbsize; len -= AESbsize){
255                 ip = q;
256                 aes_encrypt(s->ekey, s->rounds, ctr, q);
257                 for(eip = p + AESbsize; p < eip; )
258                         *p++ ^= *ip++;
259                 incrementCTR(ctr, s->ctrsz);
260         }
261
262         if(len > 0){
263                 ip = q;
264                 aes_encrypt(s->ekey, s->rounds, ctr, q);
265                 for(eip = p + len; p < eip; )
266                         *p++ ^= *ip++;
267                 incrementCTR(ctr, s->ctrsz);
268         }
269 }
270
271 void
272 aesCTRdecrypt(uchar *p, int len, AESstate *s)
273 {
274         aesCTRencrypt(p, len, s);
275 }
276
277
278 /* taken from sha1; TODO: verify suitability (esp. byte order) for aes */
279 /*
280  *      encodes input (ulong) into output (uchar). Assumes len is
281  *      a multiple of 4.
282  */
283 static void
284 encode(uchar *output, ulong *input, ulong len)
285 {
286         ulong x;
287         uchar *e;
288
289         for(e = output + len; output < e;) {
290                 x = *input++;
291                 *output++ = x >> 24;
292                 *output++ = x >> 16;
293                 *output++ = x >> 8;
294                 *output++ = x;
295         }
296 }
297
298 /* TODO: verify use of aes_encrypt here */
299 AEShstate*
300 aes(uchar *p, ulong len, uchar *digest, AEShstate *s)
301 {
302         uchar buf[128];
303         ulong x[16];
304         int i;
305         uchar *e;
306
307         if(s == nil){
308                 s = malloc(sizeof(*s));
309                 if(s == nil)
310                         return nil;
311                 memset(s, 0, sizeof(*s));
312                 s->malloced = 1;
313         }
314
315         if(s->seeded == 0){
316                 /* seed the state, these constants would look nicer big-endian */
317                 s->state[0] = 0x67452301;
318                 s->state[1] = 0xefcdab89;
319                 s->state[2] = 0x98badcfe;
320                 s->state[3] = 0x10325476;
321                 /* in sha1 (20-byte digest), but not md5 (16 bytes)*/
322                 s->state[4] = 0xc3d2e1f0;
323                 s->seeded = 1;
324         }
325
326         /* fill out the partial 64 byte block from previous calls */
327         if(s->blen){
328                 i = 64 - s->blen;
329                 if(len < i)
330                         i = len;
331                 memmove(s->buf + s->blen, p, i);
332                 len -= i;
333                 s->blen += i;
334                 p += i;
335                 if(s->blen == 64){
336                         /* encrypt s->buf into s->state */
337                         // _sha1block(s->buf, s->blen, s->state);
338                         aes_encrypt((ulong *)s->buf, 1, s->buf, (uchar *)s->state);
339                         s->len += s->blen;
340                         s->blen = 0;
341                 }
342         }
343
344         /* do 64 byte blocks */
345         i = len & ~0x3f;
346         if(i){
347                 /* encrypt p into s->state */
348                 // _sha1block(p, i, s->state);
349                 aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
350                 s->len += i;
351                 len -= i;
352                 p += i;
353         }
354
355         /* save the left overs if not last call */
356         if(digest == 0){
357                 if(len){
358                         memmove(s->buf, p, len);
359                         s->blen += len;
360                 }
361                 return s;
362         }
363
364         /*
365          *  this is the last time through, pad what's left with 0x80,
366          *  0's, and the input count to create a multiple of 64 bytes
367          */
368         if(s->blen){
369                 p = s->buf;
370                 len = s->blen;
371         } else {
372                 memmove(buf, p, len);
373                 p = buf;
374         }
375         s->len += len;
376         e = p + len;
377         if(len < 56)
378                 i = 56 - len;
379         else
380                 i = 120 - len;
381         memset(e, 0, i);
382         *e = 0x80;
383         len += i;
384
385         /* append the count */
386         x[0] = s->len>>29;              /* byte-order dependent */
387         x[1] = s->len<<3;
388         encode(p+len, x, 8);
389
390         /* digest the last part */
391         /* encrypt p into s->state */
392         // _sha1block(p, len+8, s->state);
393         aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
394         s->len += len+8;                /* sha1: +8 */
395
396         /* return result and free state */
397         encode((uchar *)digest, (ulong *)s->state, AESdlen);
398         if(s->malloced == 1)
399                 free(s);
400         return nil;
401 }
402
403 DigestState*
404 hmac_aes(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest,
405         DigestState *s)
406 {
407         return hmac_x(p, len, key, klen, digest, s, aes, AESdlen);
408 }
409
410
411
412 /*
413  * this function has been changed for plan 9.
414  * Expand the cipher key into the encryption and decryption key schedules.
415  *
416  * @return      the number of rounds for the given cipher key size.
417  */
418 static int
419 aes_setup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */],
420         const uchar cipherKey[], int keyBits)
421 {
422         int Nr, i;
423
424         /* expand the cipher key: */
425         Nr = aes_setupEnc(erk, cipherKey, keyBits);
426
427         /*
428          * invert the order of the round keys and apply the inverse MixColumn
429          * transform to all round keys but the first and the last
430          */
431         drk[0       ] = erk[4*Nr    ];
432         drk[1       ] = erk[4*Nr + 1];
433         drk[2       ] = erk[4*Nr + 2];
434         drk[3       ] = erk[4*Nr + 3];
435         drk[4*Nr    ] = erk[0       ];
436         drk[4*Nr + 1] = erk[1       ];
437         drk[4*Nr + 2] = erk[2       ];
438         drk[4*Nr + 3] = erk[3       ];
439         erk += 4 * Nr;
440         for (i = 1; i < Nr; i++) {
441                 drk += 4;
442                 erk -= 4;
443                 drk[0] =
444                     Td0[Te4[(erk[0] >> 24)       ]] ^
445                     Td1[Te4[(erk[0] >> 16) & 0xff]] ^
446                     Td2[Te4[(erk[0] >>  8) & 0xff]] ^
447                     Td3[Te4[(erk[0]      ) & 0xff]];
448                 drk[1] =
449                     Td0[Te4[(erk[1] >> 24)       ]] ^
450                     Td1[Te4[(erk[1] >> 16) & 0xff]] ^
451                     Td2[Te4[(erk[1] >>  8) & 0xff]] ^
452                     Td3[Te4[(erk[1]      ) & 0xff]];
453                 drk[2] =
454                     Td0[Te4[(erk[2] >> 24)       ]] ^
455                     Td1[Te4[(erk[2] >> 16) & 0xff]] ^
456                     Td2[Te4[(erk[2] >>  8) & 0xff]] ^
457                     Td3[Te4[(erk[2]      ) & 0xff]];
458                 drk[3] =
459                     Td0[Te4[(erk[3] >> 24)       ]] ^
460                     Td1[Te4[(erk[3] >> 16) & 0xff]] ^
461                     Td2[Te4[(erk[3] >>  8) & 0xff]] ^
462                     Td3[Te4[(erk[3]      ) & 0xff]];
463         }
464         return Nr;
465 }
466
467
468 /*
469 Te0[x] = S [x].[02, 01, 01, 03];
470 Te1[x] = S [x].[03, 02, 01, 01];
471 Te2[x] = S [x].[01, 03, 02, 01];
472 Te3[x] = S [x].[01, 01, 03, 02];
473 Te4[x] = S [x]
474
475 Td0[x] = Si[x].[0e, 09, 0d, 0b];
476 Td1[x] = Si[x].[0b, 0e, 09, 0d];
477 Td2[x] = Si[x].[0d, 0b, 0e, 09];
478 Td3[x] = Si[x].[09, 0d, 0b, 0e];
479 Td4[x] = Si[x]
480 */
481
482 static const u32 Te0[256] = {
483     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
484     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
485     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
486     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
487     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
488     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
489     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
490     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
491     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
492     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
493     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
494     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
495     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
496     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
497     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
498     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
499     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
500     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
501     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
502     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
503     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
504     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
505     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
506     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
507     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
508     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
509     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
510     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
511     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
512     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
513     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
514     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
515     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
516     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
517     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
518     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
519     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
520     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
521     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
522     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
523     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
524     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
525     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
526     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
527     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
528     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
529     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
530     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
531     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
532     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
533     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
534     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
535     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
536     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
537     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
538     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
539     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
540     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
541     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
542     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
543     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
544     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
545     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
546     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
547 };
548 static const u32 Te1[256] = {
549     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
550     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
551     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
552     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
553     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
554     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
555     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
556     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
557     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
558     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
559     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
560     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
561     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
562     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
563     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
564     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
565     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
566     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
567     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
568     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
569     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
570     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
571     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
572     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
573     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
574     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
575     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
576     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
577     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
578     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
579     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
580     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
581     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
582     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
583     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
584     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
585     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
586     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
587     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
588     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
589     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
590     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
591     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
592     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
593     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
594     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
595     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
596     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
597     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
598     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
599     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
600     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
601     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
602     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
603     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
604     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
605     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
606     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
607     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
608     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
609     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
610     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
611     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
612     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
613 };
614 static const u32 Te2[256] = {
615     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
616     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
617     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
618     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
619     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
620     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
621     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
622     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
623     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
624     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
625     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
626     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
627     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
628     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
629     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
630     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
631     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
632     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
633     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
634     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
635     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
636     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
637     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
638     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
639     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
640     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
641     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
642     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
643     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
644     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
645     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
646     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
647     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
648     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
649     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
650     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
651     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
652     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
653     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
654     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
655     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
656     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
657     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
658     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
659     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
660     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
661     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
662     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
663     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
664     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
665     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
666     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
667     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
668     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
669     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
670     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
671     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
672     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
673     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
674     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
675     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
676     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
677     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
678     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
679 };
680 static const u32 Te3[256] = {
681
682     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
683     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
684     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
685     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
686     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
687     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
688     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
689     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
690     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
691     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
692     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
693     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
694     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
695     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
696     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
697     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
698     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
699     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
700     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
701     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
702     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
703     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
704     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
705     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
706     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
707     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
708     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
709     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
710     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
711     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
712     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
713     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
714     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
715     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
716     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
717     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
718     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
719     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
720     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
721     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
722     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
723     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
724     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
725     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
726     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
727     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
728     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
729     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
730     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
731     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
732     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
733     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
734     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
735     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
736     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
737     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
738     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
739     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
740     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
741     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
742     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
743     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
744     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
745     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
746 };
747 static const u8 Te4[256] = {
748     0x63U, 0x7cU, 0x77U, 0x7bU,
749     0xf2U, 0x6bU, 0x6fU, 0xc5U,
750     0x30U, 0x01U, 0x67U, 0x2bU,
751     0xfeU, 0xd7U, 0xabU, 0x76U,
752     0xcaU, 0x82U, 0xc9U, 0x7dU,
753     0xfaU, 0x59U, 0x47U, 0xf0U,
754     0xadU, 0xd4U, 0xa2U, 0xafU,
755     0x9cU, 0xa4U, 0x72U, 0xc0U,
756     0xb7U, 0xfdU, 0x93U, 0x26U,
757     0x36U, 0x3fU, 0xf7U, 0xccU,
758     0x34U, 0xa5U, 0xe5U, 0xf1U,
759     0x71U, 0xd8U, 0x31U, 0x15U,
760     0x04U, 0xc7U, 0x23U, 0xc3U,
761     0x18U, 0x96U, 0x05U, 0x9aU,
762     0x07U, 0x12U, 0x80U, 0xe2U,
763     0xebU, 0x27U, 0xb2U, 0x75U,
764     0x09U, 0x83U, 0x2cU, 0x1aU,
765     0x1bU, 0x6eU, 0x5aU, 0xa0U,
766     0x52U, 0x3bU, 0xd6U, 0xb3U,
767     0x29U, 0xe3U, 0x2fU, 0x84U,
768     0x53U, 0xd1U, 0x00U, 0xedU,
769     0x20U, 0xfcU, 0xb1U, 0x5bU,
770     0x6aU, 0xcbU, 0xbeU, 0x39U,
771     0x4aU, 0x4cU, 0x58U, 0xcfU,
772     0xd0U, 0xefU, 0xaaU, 0xfbU,
773     0x43U, 0x4dU, 0x33U, 0x85U,
774     0x45U, 0xf9U, 0x02U, 0x7fU,
775     0x50U, 0x3cU, 0x9fU, 0xa8U,
776     0x51U, 0xa3U, 0x40U, 0x8fU,
777     0x92U, 0x9dU, 0x38U, 0xf5U,
778     0xbcU, 0xb6U, 0xdaU, 0x21U,
779     0x10U, 0xffU, 0xf3U, 0xd2U,
780     0xcdU, 0x0cU, 0x13U, 0xecU,
781     0x5fU, 0x97U, 0x44U, 0x17U,
782     0xc4U, 0xa7U, 0x7eU, 0x3dU,
783     0x64U, 0x5dU, 0x19U, 0x73U,
784     0x60U, 0x81U, 0x4fU, 0xdcU,
785     0x22U, 0x2aU, 0x90U, 0x88U,
786     0x46U, 0xeeU, 0xb8U, 0x14U,
787     0xdeU, 0x5eU, 0x0bU, 0xdbU,
788     0xe0U, 0x32U, 0x3aU, 0x0aU,
789     0x49U, 0x06U, 0x24U, 0x5cU,
790     0xc2U, 0xd3U, 0xacU, 0x62U,
791     0x91U, 0x95U, 0xe4U, 0x79U,
792     0xe7U, 0xc8U, 0x37U, 0x6dU,
793     0x8dU, 0xd5U, 0x4eU, 0xa9U,
794     0x6cU, 0x56U, 0xf4U, 0xeaU,
795     0x65U, 0x7aU, 0xaeU, 0x08U,
796     0xbaU, 0x78U, 0x25U, 0x2eU,
797     0x1cU, 0xa6U, 0xb4U, 0xc6U,
798     0xe8U, 0xddU, 0x74U, 0x1fU,
799     0x4bU, 0xbdU, 0x8bU, 0x8aU,
800     0x70U, 0x3eU, 0xb5U, 0x66U,
801     0x48U, 0x03U, 0xf6U, 0x0eU,
802     0x61U, 0x35U, 0x57U, 0xb9U,
803     0x86U, 0xc1U, 0x1dU, 0x9eU,
804     0xe1U, 0xf8U, 0x98U, 0x11U,
805     0x69U, 0xd9U, 0x8eU, 0x94U,
806     0x9bU, 0x1eU, 0x87U, 0xe9U,
807     0xceU, 0x55U, 0x28U, 0xdfU,
808     0x8cU, 0xa1U, 0x89U, 0x0dU,
809     0xbfU, 0xe6U, 0x42U, 0x68U,
810     0x41U, 0x99U, 0x2dU, 0x0fU,
811     0xb0U, 0x54U, 0xbbU, 0x16U,
812 };
813 static const u32 Td0[256] = {
814     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
815     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
816     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
817     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
818     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
819     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
820     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
821     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
822     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
823     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
824     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
825     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
826     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
827     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
828     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
829     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
830     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
831     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
832     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
833     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
834     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
835     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
836     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
837     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
838     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
839     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
840     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
841     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
842     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
843     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
844     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
845     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
846     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
847     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
848     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
849     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
850     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
851     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
852     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
853     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
854     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
855     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
856     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
857     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
858     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
859     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
860     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
861     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
862     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
863     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
864     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
865     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
866     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
867     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
868     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
869     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
870     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
871     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
872     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
873     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
874     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
875     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
876     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
877     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
878 };
879 static const u32 Td1[256] = {
880     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
881     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
882     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
883     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
884     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
885     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
886     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
887     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
888     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
889     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
890     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
891     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
892     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
893     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
894     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
895     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
896     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
897     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
898     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
899     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
900     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
901     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
902     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
903     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
904     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
905     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
906     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
907     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
908     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
909     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
910     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
911     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
912     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
913     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
914     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
915     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
916     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
917     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
918     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
919     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
920     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
921     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
922     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
923     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
924     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
925     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
926     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
927     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
928     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
929     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
930     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
931     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
932     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
933     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
934     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
935     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
936     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
937     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
938     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
939     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
940     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
941     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
942     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
943     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
944 };
945 static const u32 Td2[256] = {
946     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
947     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
948     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
949     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
950     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
951     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
952     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
953     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
954     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
955     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
956     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
957     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
958     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
959     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
960     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
961     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
962     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
963     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
964     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
965     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
966
967     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
968     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
969     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
970     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
971     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
972     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
973     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
974     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
975     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
976     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
977     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
978     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
979     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
980     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
981     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
982     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
983     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
984     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
985     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
986     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
987     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
988     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
989     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
990     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
991     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
992     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
993     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
994     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
995     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
996     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
997     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
998     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
999     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
1000     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
1001     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
1002     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
1003     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
1004     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
1005     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
1006     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
1007     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
1008     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
1009     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
1010     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
1011 };
1012 static const u32 Td3[256] = {
1013     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
1014     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
1015     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
1016     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
1017     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
1018     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
1019     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
1020     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
1021     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
1022     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
1023     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
1024     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
1025     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
1026     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
1027     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
1028     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
1029     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
1030     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
1031     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
1032     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
1033     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
1034     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
1035     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
1036     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
1037     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
1038     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
1039     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
1040     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
1041     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
1042     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
1043     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
1044     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
1045     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
1046     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
1047     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
1048     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
1049     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
1050     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
1051     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
1052     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
1053     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
1054     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1055     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1056     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1057     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1058     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1059     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1060     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1061     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1062     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1063     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1064     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1065     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1066     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1067     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1068     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1069     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1070     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1071     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1072     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1073     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1074     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1075     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1076     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1077 };
1078 static const u8 Td4[256] = {
1079     0x52U, 0x09U, 0x6aU, 0xd5U,
1080     0x30U, 0x36U, 0xa5U, 0x38U,
1081     0xbfU, 0x40U, 0xa3U, 0x9eU,
1082     0x81U, 0xf3U, 0xd7U, 0xfbU,
1083     0x7cU, 0xe3U, 0x39U, 0x82U,
1084     0x9bU, 0x2fU, 0xffU, 0x87U,
1085     0x34U, 0x8eU, 0x43U, 0x44U,
1086     0xc4U, 0xdeU, 0xe9U, 0xcbU,
1087     0x54U, 0x7bU, 0x94U, 0x32U,
1088     0xa6U, 0xc2U, 0x23U, 0x3dU,
1089     0xeeU, 0x4cU, 0x95U, 0x0bU,
1090     0x42U, 0xfaU, 0xc3U, 0x4eU,
1091     0x08U, 0x2eU, 0xa1U, 0x66U,
1092     0x28U, 0xd9U, 0x24U, 0xb2U,
1093     0x76U, 0x5bU, 0xa2U, 0x49U,
1094     0x6dU, 0x8bU, 0xd1U, 0x25U,
1095     0x72U, 0xf8U, 0xf6U, 0x64U,
1096     0x86U, 0x68U, 0x98U, 0x16U,
1097     0xd4U, 0xa4U, 0x5cU, 0xccU,
1098     0x5dU, 0x65U, 0xb6U, 0x92U,
1099     0x6cU, 0x70U, 0x48U, 0x50U,
1100     0xfdU, 0xedU, 0xb9U, 0xdaU,
1101     0x5eU, 0x15U, 0x46U, 0x57U,
1102     0xa7U, 0x8dU, 0x9dU, 0x84U,
1103     0x90U, 0xd8U, 0xabU, 0x00U,
1104     0x8cU, 0xbcU, 0xd3U, 0x0aU,
1105     0xf7U, 0xe4U, 0x58U, 0x05U,
1106     0xb8U, 0xb3U, 0x45U, 0x06U,
1107     0xd0U, 0x2cU, 0x1eU, 0x8fU,
1108     0xcaU, 0x3fU, 0x0fU, 0x02U,
1109     0xc1U, 0xafU, 0xbdU, 0x03U,
1110     0x01U, 0x13U, 0x8aU, 0x6bU,
1111     0x3aU, 0x91U, 0x11U, 0x41U,
1112     0x4fU, 0x67U, 0xdcU, 0xeaU,
1113     0x97U, 0xf2U, 0xcfU, 0xceU,
1114     0xf0U, 0xb4U, 0xe6U, 0x73U,
1115     0x96U, 0xacU, 0x74U, 0x22U,
1116     0xe7U, 0xadU, 0x35U, 0x85U,
1117     0xe2U, 0xf9U, 0x37U, 0xe8U,
1118     0x1cU, 0x75U, 0xdfU, 0x6eU,
1119     0x47U, 0xf1U, 0x1aU, 0x71U,
1120     0x1dU, 0x29U, 0xc5U, 0x89U,
1121     0x6fU, 0xb7U, 0x62U, 0x0eU,
1122     0xaaU, 0x18U, 0xbeU, 0x1bU,
1123     0xfcU, 0x56U, 0x3eU, 0x4bU,
1124     0xc6U, 0xd2U, 0x79U, 0x20U,
1125     0x9aU, 0xdbU, 0xc0U, 0xfeU,
1126     0x78U, 0xcdU, 0x5aU, 0xf4U,
1127     0x1fU, 0xddU, 0xa8U, 0x33U,
1128     0x88U, 0x07U, 0xc7U, 0x31U,
1129     0xb1U, 0x12U, 0x10U, 0x59U,
1130     0x27U, 0x80U, 0xecU, 0x5fU,
1131     0x60U, 0x51U, 0x7fU, 0xa9U,
1132     0x19U, 0xb5U, 0x4aU, 0x0dU,
1133     0x2dU, 0xe5U, 0x7aU, 0x9fU,
1134     0x93U, 0xc9U, 0x9cU, 0xefU,
1135     0xa0U, 0xe0U, 0x3bU, 0x4dU,
1136     0xaeU, 0x2aU, 0xf5U, 0xb0U,
1137     0xc8U, 0xebU, 0xbbU, 0x3cU,
1138     0x83U, 0x53U, 0x99U, 0x61U,
1139     0x17U, 0x2bU, 0x04U, 0x7eU,
1140     0xbaU, 0x77U, 0xd6U, 0x26U,
1141     0xe1U, 0x69U, 0x14U, 0x63U,
1142     0x55U, 0x21U, 0x0cU, 0x7dU,
1143 };
1144 static const u32 rcon[] = {
1145         0x01000000, 0x02000000, 0x04000000, 0x08000000,
1146         0x10000000, 0x20000000, 0x40000000, 0x80000000,
1147         0x1B000000, 0x36000000,
1148         /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1149 };
1150
1151 #define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
1152                     ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
1153 #define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
1154                          (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
1155
1156 /*
1157  * Expand the cipher key into the encryption key schedule.
1158  *
1159  * @return      the number of rounds for the given cipher key size.
1160  */
1161 int
1162 aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
1163 {
1164         int i = 0;
1165         u32 temp;
1166
1167         rk[0] = GETU32(cipherKey     );
1168         rk[1] = GETU32(cipherKey +  4);
1169         rk[2] = GETU32(cipherKey +  8);
1170         rk[3] = GETU32(cipherKey + 12);
1171         if (keyBits == 128) {
1172                 for (;;) {
1173                         temp  = rk[3];
1174                         rk[4] = rk[0] ^
1175                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1176                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1177                                 (Te4[(temp      ) & 0xff] <<  8) ^
1178                                 (Te4[(temp >> 24)       ]      ) ^
1179                                 rcon[i];
1180                         rk[5] = rk[1] ^ rk[4];
1181                         rk[6] = rk[2] ^ rk[5];
1182                         rk[7] = rk[3] ^ rk[6];
1183                         if (++i == 10) {
1184                                 return 10;
1185                         }
1186                         rk += 4;
1187                 }
1188         }
1189         rk[4] = GETU32(cipherKey + 16);
1190         rk[5] = GETU32(cipherKey + 20);
1191         if (keyBits == 192) {
1192                 for (;;) {
1193                         temp = rk[ 5];
1194                         rk[ 6] = rk[ 0] ^
1195                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1196                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1197                                 (Te4[(temp      ) & 0xff] <<  8) ^
1198                                 (Te4[(temp >> 24)       ]      ) ^
1199                                 rcon[i];
1200                         rk[ 7] = rk[ 1] ^ rk[ 6];
1201                         rk[ 8] = rk[ 2] ^ rk[ 7];
1202                         rk[ 9] = rk[ 3] ^ rk[ 8];
1203                         if (++i == 8) {
1204                                 return 12;
1205                         }
1206                         rk[10] = rk[ 4] ^ rk[ 9];
1207                         rk[11] = rk[ 5] ^ rk[10];
1208                         rk += 6;
1209                 }
1210         }
1211         rk[6] = GETU32(cipherKey + 24);
1212         rk[7] = GETU32(cipherKey + 28);
1213         if (keyBits == 256) {
1214                 for (;;) {
1215                         temp = rk[ 7];
1216                         rk[ 8] = rk[ 0] ^
1217                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1218                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1219                                 (Te4[(temp      ) & 0xff] <<  8) ^
1220                                 (Te4[(temp >> 24)       ]      ) ^
1221                                 rcon[i];
1222                         rk[ 9] = rk[ 1] ^ rk[ 8];
1223                         rk[10] = rk[ 2] ^ rk[ 9];
1224                         rk[11] = rk[ 3] ^ rk[10];
1225                         if (++i == 7) {
1226                                 return 14;
1227                         }
1228                         temp = rk[11];
1229                         rk[12] = rk[ 4] ^
1230                                 (Te4[(temp >> 24)       ] << 24) ^
1231                                 (Te4[(temp >> 16) & 0xff] << 16) ^
1232                                 (Te4[(temp >>  8) & 0xff] <<  8) ^
1233                                 (Te4[(temp      ) & 0xff]      );
1234                         rk[13] = rk[ 5] ^ rk[12];
1235                         rk[14] = rk[ 6] ^ rk[13];
1236                         rk[15] = rk[ 7] ^ rk[14];
1237                         rk += 8;
1238                 }
1239         }
1240         return 0;
1241 }
1242
1243 /**
1244  * Expand the cipher key into the decryption key schedule.
1245  *
1246  * @return      the number of rounds for the given cipher key size.
1247  */
1248 static int
1249 aes_setupDec(ulong rk[/* 4*(Nr + 1) */], const uchar cipherKey[], int keyBits)
1250 {
1251         int Nr, i, j;
1252         ulong temp;
1253
1254         /* expand the cipher key: */
1255         Nr = aes_setupEnc(rk, cipherKey, keyBits);
1256         /* invert the order of the round keys: */
1257         for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
1258                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1259                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1260                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1261                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1262         }
1263         /*
1264          * apply the inverse MixColumn transform to all round keys
1265          * but the first and the last:
1266          */
1267         for (i = 1; i < Nr; i++) {
1268                 rk += 4;
1269                 rk[0] =
1270                         Td0[Te4[(rk[0] >> 24)       ]] ^
1271                         Td1[Te4[(rk[0] >> 16) & 0xff]] ^
1272                         Td2[Te4[(rk[0] >>  8) & 0xff]] ^
1273                         Td3[Te4[(rk[0]      ) & 0xff]];
1274                 rk[1] =
1275                         Td0[Te4[(rk[1] >> 24)       ]] ^
1276                         Td1[Te4[(rk[1] >> 16) & 0xff]] ^
1277                         Td2[Te4[(rk[1] >>  8) & 0xff]] ^
1278                         Td3[Te4[(rk[1]      ) & 0xff]];
1279                 rk[2] =
1280                         Td0[Te4[(rk[2] >> 24)       ]] ^
1281                         Td1[Te4[(rk[2] >> 16) & 0xff]] ^
1282                         Td2[Te4[(rk[2] >>  8) & 0xff]] ^
1283                         Td3[Te4[(rk[2]      ) & 0xff]];
1284                 rk[3] =
1285                         Td0[Te4[(rk[3] >> 24)       ]] ^
1286                         Td1[Te4[(rk[3] >> 16) & 0xff]] ^
1287                         Td2[Te4[(rk[3] >>  8) & 0xff]] ^
1288                         Td3[Te4[(rk[3]      ) & 0xff]];
1289         }
1290         return Nr;
1291 }
1292
1293 /* using round keys in rk, perform Nr rounds of encrypting pt into ct */
1294 void
1295 aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
1296         uchar ct[16])
1297 {
1298         ulong s0, s1, s2, s3, t0, t1, t2, t3;
1299 #ifndef FULL_UNROLL
1300         int r;
1301 #endif /* ?FULL_UNROLL */
1302
1303         /*
1304          * map byte array block to cipher state
1305          * and add initial round key:
1306          */
1307         s0 = GETU32(pt     ) ^ rk[0];
1308         s1 = GETU32(pt +  4) ^ rk[1];
1309         s2 = GETU32(pt +  8) ^ rk[2];
1310         s3 = GETU32(pt + 12) ^ rk[3];
1311 #ifdef FULL_UNROLL
1312         /* round 1: */
1313         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
1314         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
1315         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
1316         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
1317         /* round 2: */
1318         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
1319         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
1320         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
1321         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
1322         /* round 3: */
1323         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
1324         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
1325         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
1326         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
1327         /* round 4: */
1328         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
1329         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
1330         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
1331         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
1332         /* round 5: */
1333         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
1334         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
1335         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
1336         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
1337         /* round 6: */
1338         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
1339         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
1340         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
1341         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
1342         /* round 7: */
1343         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
1344         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
1345         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
1346         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
1347         /* round 8: */
1348         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
1349         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
1350         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
1351         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
1352         /* round 9: */
1353         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
1354         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
1355         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
1356         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
1357         if (Nr > 10) {
1358                 /* round 10: */
1359                 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
1360                 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
1361                 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
1362                 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
1363                 /* round 11: */
1364                 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
1365                 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
1366                 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
1367                 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
1368                 if (Nr > 12) {
1369                         /* round 12: */
1370                         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
1371                         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
1372                         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
1373                         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
1374                         /* round 13: */
1375                         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
1376                         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
1377                         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
1378                         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
1379                 }
1380         }
1381         rk += Nr << 2;
1382 #else                                   /* !FULL_UNROLL */
1383         /*
1384          * Nr - 1 full rounds:
1385          */
1386         r = Nr >> 1;
1387         for (;;) {
1388                 t0 =
1389                     Te0[(s0 >> 24)       ] ^
1390                     Te1[(s1 >> 16) & 0xff] ^
1391                     Te2[(s2 >>  8) & 0xff] ^
1392                     Te3[(s3      ) & 0xff] ^
1393                     rk[4];
1394                 t1 =
1395                     Te0[(s1 >> 24)       ] ^
1396                     Te1[(s2 >> 16) & 0xff] ^
1397                     Te2[(s3 >>  8) & 0xff] ^
1398                     Te3[(s0      ) & 0xff] ^
1399                     rk[5];
1400                 t2 =
1401                     Te0[(s2 >> 24)       ] ^
1402                     Te1[(s3 >> 16) & 0xff] ^
1403                     Te2[(s0 >>  8) & 0xff] ^
1404                     Te3[(s1      ) & 0xff] ^
1405                     rk[6];
1406                 t3 =
1407                     Te0[(s3 >> 24)       ] ^
1408                     Te1[(s0 >> 16) & 0xff] ^
1409                     Te2[(s1 >>  8) & 0xff] ^
1410                     Te3[(s2      ) & 0xff] ^
1411                     rk[7];
1412
1413                 rk += 8;
1414                 if (--r == 0)
1415                     break;
1416
1417                 s0 =
1418                     Te0[(t0 >> 24)       ] ^
1419                     Te1[(t1 >> 16) & 0xff] ^
1420                     Te2[(t2 >>  8) & 0xff] ^
1421                     Te3[(t3      ) & 0xff] ^
1422                     rk[0];
1423                 s1 =
1424                     Te0[(t1 >> 24)       ] ^
1425                     Te1[(t2 >> 16) & 0xff] ^
1426                     Te2[(t3 >>  8) & 0xff] ^
1427                     Te3[(t0      ) & 0xff] ^
1428                     rk[1];
1429                 s2 =
1430                     Te0[(t2 >> 24)       ] ^
1431                     Te1[(t3 >> 16) & 0xff] ^
1432                     Te2[(t0 >>  8) & 0xff] ^
1433                     Te3[(t1      ) & 0xff] ^
1434                     rk[2];
1435                 s3 =
1436                     Te0[(t3 >> 24)       ] ^
1437                     Te1[(t0 >> 16) & 0xff] ^
1438                     Te2[(t1 >>  8) & 0xff] ^
1439                     Te3[(t2      ) & 0xff] ^
1440                     rk[3];
1441         }
1442 #endif                                  /* ?FULL_UNROLL */
1443         /*
1444          * apply last round and
1445          * map cipher state to byte array block:
1446          */
1447         s0 =
1448                 (Te4[(t0 >> 24)       ] << 24) ^
1449                 (Te4[(t1 >> 16) & 0xff] << 16) ^
1450                 (Te4[(t2 >>  8) & 0xff] <<  8) ^
1451                 (Te4[(t3      ) & 0xff]      ) ^
1452                 rk[0];
1453         PUTU32(ct     , s0);
1454         s1 =
1455                 (Te4[(t1 >> 24)       ] << 24) ^
1456                 (Te4[(t2 >> 16) & 0xff] << 16) ^
1457                 (Te4[(t3 >>  8) & 0xff] <<  8) ^
1458                 (Te4[(t0      ) & 0xff]      ) ^
1459                 rk[1];
1460         PUTU32(ct +  4, s1);
1461         s2 =
1462                 (Te4[(t2 >> 24)       ] << 24) ^
1463                 (Te4[(t3 >> 16) & 0xff] << 16) ^
1464                 (Te4[(t0 >>  8) & 0xff] <<  8) ^
1465                 (Te4[(t1      ) & 0xff]      ) ^
1466                 rk[2];
1467         PUTU32(ct +  8, s2);
1468         s3 =
1469                 (Te4[(t3 >> 24)       ] << 24) ^
1470                 (Te4[(t0 >> 16) & 0xff] << 16) ^
1471                 (Te4[(t1 >>  8) & 0xff] <<  8) ^
1472                 (Te4[(t2      ) & 0xff]      ) ^
1473                 rk[3];
1474         PUTU32(ct + 12, s3);
1475 }
1476
1477 void
1478 aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
1479         uchar pt[16])
1480 {
1481         ulong s0, s1, s2, s3, t0, t1, t2, t3;
1482 #ifndef FULL_UNROLL
1483         int r;
1484 #endif          /* ?FULL_UNROLL */
1485
1486         /*
1487          * map byte array block to cipher state
1488          * and add initial round key:
1489          */
1490     s0 = GETU32(ct     ) ^ rk[0];
1491     s1 = GETU32(ct +  4) ^ rk[1];
1492     s2 = GETU32(ct +  8) ^ rk[2];
1493     s3 = GETU32(ct + 12) ^ rk[3];
1494 #ifdef FULL_UNROLL
1495     /* round 1: */
1496     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1497     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1498     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1499     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1500     /* round 2: */
1501     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1502     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1503     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1504     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1505     /* round 3: */
1506     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1507     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1508     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1509     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1510     /* round 4: */
1511     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1512     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1513     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1514     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1515     /* round 5: */
1516     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1517     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1518     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1519     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1520     /* round 6: */
1521     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1522     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1523     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1524     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1525     /* round 7: */
1526     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1527     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1528     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1529     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1530     /* round 8: */
1531     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1532     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1533     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1534     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1535     /* round 9: */
1536     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1537     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1538     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1539     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1540     if (Nr > 10) {
1541         /* round 10: */
1542         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1543         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1544         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1545         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1546         /* round 11: */
1547         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1548         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1549         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1550         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1551         if (Nr > 12) {
1552             /* round 12: */
1553             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1554             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1555             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1556             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1557             /* round 13: */
1558             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1559             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1560             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1561             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1562         }
1563     }
1564     rk += Nr << 2;
1565 #else                                   /* !FULL_UNROLL */
1566     /*
1567      * Nr - 1 full rounds:
1568      */
1569     r = Nr >> 1;
1570     for (;;) {
1571         t0 =
1572             Td0[(s0 >> 24)       ] ^
1573             Td1[(s3 >> 16) & 0xff] ^
1574             Td2[(s2 >>  8) & 0xff] ^
1575             Td3[(s1      ) & 0xff] ^
1576             rk[4];
1577         t1 =
1578             Td0[(s1 >> 24)       ] ^
1579             Td1[(s0 >> 16) & 0xff] ^
1580             Td2[(s3 >>  8) & 0xff] ^
1581             Td3[(s2      ) & 0xff] ^
1582             rk[5];
1583         t2 =
1584             Td0[(s2 >> 24)       ] ^
1585             Td1[(s1 >> 16) & 0xff] ^
1586             Td2[(s0 >>  8) & 0xff] ^
1587             Td3[(s3      ) & 0xff] ^
1588             rk[6];
1589         t3 =
1590             Td0[(s3 >> 24)       ] ^
1591             Td1[(s2 >> 16) & 0xff] ^
1592             Td2[(s1 >>  8) & 0xff] ^
1593             Td3[(s0      ) & 0xff] ^
1594             rk[7];
1595
1596         rk += 8;
1597         if (--r == 0)
1598             break;
1599
1600         s0 =
1601             Td0[(t0 >> 24)       ] ^
1602             Td1[(t3 >> 16) & 0xff] ^
1603             Td2[(t2 >>  8) & 0xff] ^
1604             Td3[(t1      ) & 0xff] ^
1605             rk[0];
1606         s1 =
1607             Td0[(t1 >> 24)       ] ^
1608             Td1[(t0 >> 16) & 0xff] ^
1609             Td2[(t3 >>  8) & 0xff] ^
1610             Td3[(t2      ) & 0xff] ^
1611             rk[1];
1612         s2 =
1613             Td0[(t2 >> 24)       ] ^
1614             Td1[(t1 >> 16) & 0xff] ^
1615             Td2[(t0 >>  8) & 0xff] ^
1616             Td3[(t3      ) & 0xff] ^
1617             rk[2];
1618         s3 =
1619             Td0[(t3 >> 24)       ] ^
1620             Td1[(t2 >> 16) & 0xff] ^
1621             Td2[(t1 >>  8) & 0xff] ^
1622             Td3[(t0      ) & 0xff] ^
1623             rk[3];
1624     }
1625 #endif                                  /* ?FULL_UNROLL */
1626         /*
1627          * apply last round and
1628          * map cipher state to byte array block:
1629          */
1630         s0 =
1631                 (Td4[(t0 >> 24)       ] << 24) ^
1632                 (Td4[(t3 >> 16) & 0xff] << 16) ^
1633                 (Td4[(t2 >>  8) & 0xff] <<  8) ^
1634                 (Td4[(t1      ) & 0xff]      ) ^
1635                 rk[0];
1636         PUTU32(pt     , s0);
1637         s1 =
1638                 (Td4[(t1 >> 24)       ] << 24) ^
1639                 (Td4[(t0 >> 16) & 0xff] << 16) ^
1640                 (Td4[(t3 >>  8) & 0xff] <<  8) ^
1641                 (Td4[(t2      ) & 0xff]      ) ^
1642                 rk[1];
1643         PUTU32(pt +  4, s1);
1644         s2 =
1645                 (Td4[(t2 >> 24)       ] << 24) ^
1646                 (Td4[(t1 >> 16) & 0xff] << 16) ^
1647                 (Td4[(t0 >>  8) & 0xff] <<  8) ^
1648                 (Td4[(t3      ) & 0xff]      ) ^
1649                 rk[2];
1650         PUTU32(pt +  8, s2);
1651         s3 =
1652                 (Td4[(t3 >> 24)       ] << 24) ^
1653                 (Td4[(t2 >> 16) & 0xff] << 16) ^
1654                 (Td4[(t1 >>  8) & 0xff] <<  8) ^
1655                 (Td4[(t0      ) & 0xff]      ) ^
1656                 rk[3];
1657         PUTU32(pt + 12, s3);
1658 }
1659
1660 #ifdef INTERMEDIATE_VALUE_KAT
1661
1662 static void
1663 aes_encryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1664         int rounds)
1665 {
1666         int r;
1667         u32 s0, s1, s2, s3, t0, t1, t2, t3;
1668
1669         /*
1670          * map byte array block to cipher state
1671          * and add initial round key:
1672          */
1673         s0 = GETU32(block     ) ^ rk[0];
1674         s1 = GETU32(block +  4) ^ rk[1];
1675         s2 = GETU32(block +  8) ^ rk[2];
1676         s3 = GETU32(block + 12) ^ rk[3];
1677         rk += 4;
1678
1679         /*
1680          * Nr - 1 full rounds:
1681          */
1682         for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
1683                 t0 =
1684                         Te0[(s0 >> 24)       ] ^
1685                         Te1[(s1 >> 16) & 0xff] ^
1686                         Te2[(s2 >>  8) & 0xff] ^
1687                         Te3[(s3      ) & 0xff] ^
1688                         rk[0];
1689                 t1 =
1690                         Te0[(s1 >> 24)       ] ^
1691                         Te1[(s2 >> 16) & 0xff] ^
1692                         Te2[(s3 >>  8) & 0xff] ^
1693                         Te3[(s0      ) & 0xff] ^
1694                         rk[1];
1695                 t2 =
1696                         Te0[(s2 >> 24)       ] ^
1697                         Te1[(s3 >> 16) & 0xff] ^
1698                         Te2[(s0 >>  8) & 0xff] ^
1699                         Te3[(s1      ) & 0xff] ^
1700                         rk[2];
1701                 t3 =
1702                         Te0[(s3 >> 24)       ] ^
1703                         Te1[(s0 >> 16) & 0xff] ^
1704                         Te2[(s1 >>  8) & 0xff] ^
1705                         Te3[(s2      ) & 0xff] ^
1706                         rk[3];
1707                 s0 = t0;
1708                 s1 = t1;
1709                 s2 = t2;
1710                 s3 = t3;
1711                 rk += 4;
1712         }
1713
1714         /*
1715          * apply last round and
1716          * map cipher state to byte array block:
1717          */
1718         if (rounds == Nr) {
1719                 t0 =
1720                         (Te4[(s0 >> 24)       ] << 24) ^
1721                         (Te4[(s1 >> 16) & 0xff] << 16) ^
1722                         (Te4[(s2 >>  8) & 0xff] <<  8) ^
1723                         (Te4[(s3      ) & 0xff]      ) ^
1724                         rk[0];
1725                 t1 =
1726                         (Te4[(s1 >> 24)       ] << 24) ^
1727                         (Te4[(s2 >> 16) & 0xff] << 16) ^
1728                         (Te4[(s3 >>  8) & 0xff] <<  8) ^
1729                         (Te4[(s0      ) & 0xff]      ) ^
1730                         rk[1];
1731                 t2 =
1732                         (Te4[(s2 >> 24)       ] << 24) ^
1733                         (Te4[(s3 >> 16) & 0xff] << 16) ^
1734                         (Te4[(s0 >>  8) & 0xff] <<  8) ^
1735                         (Te4[(s1      ) & 0xff]      ) ^
1736                         rk[2];
1737                 t3 =
1738                         (Te4[(s3 >> 24)       ] << 24) ^
1739                         (Te4[(s0 >> 16) & 0xff] << 16) ^
1740                         (Te4[(s1 >>  8) & 0xff] <<  8) ^
1741                         (Te4[(s2      ) & 0xff]      ) ^
1742                         rk[3];
1743                 s0 = t0;
1744                 s1 = t1;
1745                 s2 = t2;
1746                 s3 = t3;
1747         }
1748
1749         PUTU32(block     , s0);
1750         PUTU32(block +  4, s1);
1751         PUTU32(block +  8, s2);
1752         PUTU32(block + 12, s3);
1753 }
1754
1755 static void
1756 aes_decryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1757         int rounds)
1758 {
1759         int r;
1760         u32 s0, s1, s2, s3, t0, t1, t2, t3;
1761
1762         /*
1763          * map byte array block to cipher state
1764          * and add initial round key:
1765          */
1766         s0 = GETU32(block     ) ^ rk[0];
1767         s1 = GETU32(block +  4) ^ rk[1];
1768         s2 = GETU32(block +  8) ^ rk[2];
1769         s3 = GETU32(block + 12) ^ rk[3];
1770         rk += 4;
1771
1772         /*
1773          * Nr - 1 full rounds:
1774          */
1775         for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
1776                 t0 =
1777                         Td0[(s0 >> 24)       ] ^
1778                         Td1[(s3 >> 16) & 0xff] ^
1779                         Td2[(s2 >>  8) & 0xff] ^
1780                         Td3[(s1      ) & 0xff] ^
1781                         rk[0];
1782                 t1 =
1783                         Td0[(s1 >> 24)       ] ^
1784                         Td1[(s0 >> 16) & 0xff] ^
1785                         Td2[(s3 >>  8) & 0xff] ^
1786                         Td3[(s2      ) & 0xff] ^
1787                         rk[1];
1788                 t2 =
1789                         Td0[(s2 >> 24)       ] ^
1790                         Td1[(s1 >> 16) & 0xff] ^
1791                         Td2[(s0 >>  8) & 0xff] ^
1792                         Td3[(s3      ) & 0xff] ^
1793                         rk[2];
1794                 t3 =
1795                         Td0[(s3 >> 24)       ] ^
1796                         Td1[(s2 >> 16) & 0xff] ^
1797                         Td2[(s1 >>  8) & 0xff] ^
1798                         Td3[(s0      ) & 0xff] ^
1799                         rk[3];
1800
1801                 s0 = t0;
1802                 s1 = t1;
1803                 s2 = t2;
1804                 s3 = t3;
1805                 rk += 4;
1806         }
1807
1808         /*
1809          * complete the last round and
1810          * map cipher state to byte array block:
1811          */
1812         t0 =
1813                 (Td4[(s0 >> 24)       ] << 24) ^
1814                 (Td4[(s3 >> 16) & 0xff] << 16) ^
1815                 (Td4[(s2 >>  8) & 0xff] <<  8) ^
1816                 (Td4[(s1      ) & 0xff]      );
1817         t1 =
1818                 (Td4[(s1 >> 24)       ] << 24) ^
1819                 (Td4[(s0 >> 16) & 0xff] << 16) ^
1820                 (Td4[(s3 >>  8) & 0xff] <<  8) ^
1821                 (Td4[(s2      ) & 0xff]      );
1822         t2 =
1823                 (Td4[(s2 >> 24)       ] << 24) ^
1824                 (Td4[(s1 >> 16) & 0xff] << 16) ^
1825                 (Td4[(s0 >>  8) & 0xff] <<  8) ^
1826                 (Td4[(s3      ) & 0xff]      );
1827         t3 =
1828                 (Td4[(s3 >> 24)       ] << 24) ^
1829                 (Td4[(s2 >> 16) & 0xff] << 16) ^
1830                 (Td4[(s1 >>  8) & 0xff] <<  8) ^
1831                 (Td4[(s0      ) & 0xff]      );
1832
1833         if (rounds == Nr) {
1834                 t0 ^= rk[0];
1835                 t1 ^= rk[1];
1836                 t2 ^= rk[2];
1837                 t3 ^= rk[3];
1838         }
1839
1840         PUTU32(block     , t0);
1841         PUTU32(block +  4, t1);
1842         PUTU32(block +  8, t2);
1843         PUTU32(block + 12, t3);
1844 }
1845
1846 #endif                  /* INTERMEDIATE_VALUE_KAT */