2 ---------------------------------------------------------------------------
\r
3 Copyright (c) 2003, Dr Brian Gladman < >, Worcester, UK.
\r
8 The free distribution and use of this software in both source and binary
\r
9 form is allowed (with or without changes) provided that:
\r
11 1. distributions of this source code include the above copyright
\r
12 notice, this list of conditions and the following disclaimer;
\r
14 2. distributions in binary form include the above copyright
\r
15 notice, this list of conditions and the following disclaimer
\r
16 in the documentation and/or other associated materials;
\r
18 3. the copyright holder's name is not used to endorse products
\r
19 built using this software without specific written permission.
\r
21 ALTERNATIVELY, provided that this notice is retained in full, this product
\r
22 may be distributed under the terms of the GNU General Public License (GPL),
\r
23 in which case the provisions of the GPL apply INSTEAD OF those given above.
\r
27 This software is provided 'as is' with no explicit or implied warranties
\r
28 in respect of its properties, including, but not limited to, correctness
\r
29 and/or fitness for purpose.
\r
30 ---------------------------------------------------------------------------
\r
31 Issue Date: 26/08/2003
\r
33 This file contains the code for implementing encryption and decryption
\r
34 for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
\r
35 can optionally be replaced by code written in assembler using NASM. For
\r
36 further details see the file aesopt.h
\r
41 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
\r
42 #define so(y,x,c) word_out(y, c, s(x,c))
\r
45 #define locals(y,x) x[4],y[4]
\r
47 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
\r
50 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
\r
51 s(y,2) = s(x,2); s(y,3) = s(x,3);
\r
52 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
\r
53 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
\r
54 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
\r
56 #if defined(ENCRYPTION) && !defined(AES_ASM)
\r
58 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
\r
59 Pentium optimization with small code but this is poor for decryption
\r
60 so we need to control this with the following VC++ pragmas
\r
63 #if defined(_MSC_VER)
\r
64 #pragma optimize( "s", on )
\r
67 /* Given the column (c) of the output state variable, the following
\r
68 macros give the input state variables which are needed in its
\r
69 computation for each row (r) of the state. All the alternative
\r
70 macros give the same end values but expand into different ways
\r
71 of calculating these values. In particular the complex macro
\r
72 used for dynamically variable block sizes is designed to expand
\r
73 to a compile time constant whenever possible but will expand to
\r
74 conditional clauses on some branches (I am grateful to Frank
\r
75 Yellin for this construction)
\r
78 #define fwd_var(x,r,c)\
\r
79 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
\r
80 : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
\r
81 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
\r
82 : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
\r
84 #if defined(FT4_SET)
\r
86 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
\r
87 #elif defined(FT1_SET)
\r
89 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
\r
91 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
\r
94 #if defined(FL4_SET)
\r
95 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
\r
96 #elif defined(FL1_SET)
\r
97 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
\r
99 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
\r
102 aes_rval aes_encrypt(const void *in_blk, void *out_blk, const aes_encrypt_ctx cx[1])
\r
103 { aes_32t locals(b0, b1);
\r
104 const aes_32t *kp = cx->ks;
\r
106 dec_fmvars; /* declare variables for fwd_mcol() if needed */
\r
109 aes_32t nr = (kp[45] ^ kp[52] ^ kp[53] ? kp[52] : 14);
\r
112 if( (nr != 10 || !(kp[0] | kp[3] | kp[4]))
\r
113 && (nr != 12 || !(kp[0] | kp[5] | kp[6]))
\r
114 && (nr != 14 || !(kp[0] | kp[7] | kp[8])) )
\r
118 state_in(b0, in_blk, kp);
\r
120 #if (ENC_UNROLL == FULL)
\r
125 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
\r
126 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
\r
128 /* Falls through. */
\r
130 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
\r
131 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
\r
133 /* Falls through. */
\r
135 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
\r
136 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
\r
137 round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
\r
138 round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
\r
139 round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
\r
140 round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
\r
141 round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
\r
142 round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
\r
143 round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
\r
144 round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
\r
149 #if (ENC_UNROLL == PARTIAL)
\r
151 for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
\r
154 round(fwd_rnd, b1, b0, kp);
\r
156 round(fwd_rnd, b0, b1, kp);
\r
159 round(fwd_rnd, b1, b0, kp);
\r
162 for(rnd = 0; rnd < nr - 1; ++rnd)
\r
165 round(fwd_rnd, b1, b0, kp);
\r
170 round(fwd_lrnd, b0, b1, kp);
\r
174 state_out(out_blk, b0);
\r
182 #if defined(DECRYPTION) && !defined(AES_ASM)
\r
184 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
\r
185 Pentium optimization with small code but this is poor for decryption
\r
186 so we need to control this with the following VC++ pragmas
\r
189 #if defined(_MSC_VER)
\r
190 #pragma optimize( "t", on )
\r
193 /* Given the column (c) of the output state variable, the following
\r
194 macros give the input state variables which are needed in its
\r
195 computation for each row (r) of the state. All the alternative
\r
196 macros give the same end values but expand into different ways
\r
197 of calculating these values. In particular the complex macro
\r
198 used for dynamically variable block sizes is designed to expand
\r
199 to a compile time constant whenever possible but will expand to
\r
200 conditional clauses on some branches (I am grateful to Frank
\r
201 Yellin for this construction)
\r
204 #define inv_var(x,r,c)\
\r
205 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
\r
206 : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
\r
207 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
\r
208 : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
\r
210 #if defined(IT4_SET)
\r
212 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
\r
213 #elif defined(IT1_SET)
\r
215 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
\r
217 #define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
\r
220 #if defined(IL4_SET)
\r
221 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
\r
222 #elif defined(IL1_SET)
\r
223 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
\r
225 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
\r
228 aes_rval aes_decrypt(const void *in_blk, void *out_blk, const aes_decrypt_ctx cx[1])
\r
229 { aes_32t locals(b0, b1);
\r
231 dec_imvars; /* declare variables for inv_mcol() if needed */
\r
234 aes_32t nr = (cx->ks[45] ^ cx->ks[52] ^ cx->ks[53] ? cx->ks[52] : 14);
\r
235 const aes_32t *kp = cx->ks + nr * N_COLS;
\r
238 if( (nr != 10 || !(cx->ks[0] | cx->ks[3] | cx->ks[4]))
\r
239 && (nr != 12 || !(cx->ks[0] | cx->ks[5] | cx->ks[6]))
\r
240 && (nr != 14 || !(cx->ks[0] | cx->ks[7] | cx->ks[8])) )
\r
244 state_in(b0, in_blk, kp);
\r
246 #if (DEC_UNROLL == FULL)
\r
251 round(inv_rnd, b1, b0, kp - 1 * N_COLS);
\r
252 round(inv_rnd, b0, b1, kp - 2 * N_COLS);
\r
254 /* Falls through. */
\r
256 round(inv_rnd, b1, b0, kp - 1 * N_COLS);
\r
257 round(inv_rnd, b0, b1, kp - 2 * N_COLS);
\r
259 /* Falls through. */
\r
261 round(inv_rnd, b1, b0, kp - 1 * N_COLS);
\r
262 round(inv_rnd, b0, b1, kp - 2 * N_COLS);
\r
263 round(inv_rnd, b1, b0, kp - 3 * N_COLS);
\r
264 round(inv_rnd, b0, b1, kp - 4 * N_COLS);
\r
265 round(inv_rnd, b1, b0, kp - 5 * N_COLS);
\r
266 round(inv_rnd, b0, b1, kp - 6 * N_COLS);
\r
267 round(inv_rnd, b1, b0, kp - 7 * N_COLS);
\r
268 round(inv_rnd, b0, b1, kp - 8 * N_COLS);
\r
269 round(inv_rnd, b1, b0, kp - 9 * N_COLS);
\r
270 round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
\r
275 #if (DEC_UNROLL == PARTIAL)
\r
277 for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
\r
280 round(inv_rnd, b1, b0, kp);
\r
282 round(inv_rnd, b0, b1, kp);
\r
285 round(inv_rnd, b1, b0, kp);
\r
288 for(rnd = 0; rnd < nr - 1; ++rnd)
\r
291 round(inv_rnd, b1, b0, kp);
\r
296 round(inv_lrnd, b0, b1, kp);
\r
300 state_out(out_blk, b0);
\r