1 /* Copyright (C) 2000-2009 Peter Selinger.
2 This file is part of ccrypt. It is free software and it is covered
3 by the GNU general public license. See the file COPYING for details. */
5 /* rijndael.c - optimized version of the Rijndeal cipher */
6 /* $Id: rijndael.c 258 2009-08-26 17:46:10Z selinger $ */
8 /* derived from original source: rijndael-alg-ref.c v2.0 August '99
9 * Reference ANSI C code for NIST competition
10 * authors: Paulo Barreto
18 #include "ao_aes_int.h"
20 static const int xshifts[3][2][4] = {
31 /* Exor corresponding text input and round key input bytes */
32 /* the result is written to res, which can be the same as a */
33 static inline void xKeyAddition(word32 res[MAXBC], word32 a[MAXBC],
34 word32 rk[MAXBC], int BC)
38 for (j = 0; j < BC; j++) {
39 res[j] = a[j] ^ rk[j];
43 #if 0 /* code included for reference */
45 /* shift rows a, return result in res. This avoids having to copy a
46 tmp array back to a. res must not be a. */
47 static inline void xShiftRow(word32 res[MAXBC], word32 a[MAXBC], int shift[4],
50 word8 (*a8)[4] = (word8 (*)[4]) a;
51 word8 (*res8)[4] = (word8 (*)[4]) res;
53 /* Row 0 remains unchanged
54 * The other three rows are shifted a variable amount
59 for (j = 0; j < BC; j++) {
60 res8[j][0] = a8[j][0];
62 for (i = 1; i < 4; i++) {
64 for (j = 0; j < BC; j++) {
65 res8[j][i] = a8[(j + s) % BC][i];
70 static inline void xSubstitution(word32 a[MAXBC], word8 box[256], int BC)
72 word8 (*a8)[4] = (word8 (*)[4]) a;
74 /* Replace every byte of the input by the byte at that place
75 * in the nonlinear S-box
79 for (i = 0; i < 4; i++) {
80 for (j = 0; j < BC; j++) {
81 a8[j][i] = box[a[j][i]];
86 #endif /* code included for reference */
88 /* profiling shows that the ccrypt program spends about 50% of its
89 time in the function xShiftSubst. Splitting the inner "for"
90 statement into two parts - versus using the expensive "%" modulo
91 operation, makes this function about 44% faster, thereby making the
92 entire program about 28% faster. With -O3 optimization, the time
93 savings are even more dramatic - ccrypt runs between 55% and 65%
94 faster on most platforms. */
96 /* do ShiftRow and Substitution together. res must not be a. */
97 static inline void xShiftSubst(word32 res[MAXBC], word32 a[MAXBC],
98 int shift[4], int BC, const word8 box[256])
102 word8 (*a8)[4] = (word8 (*)[4]) a;
103 word8 (*res8)[4] = (word8 (*)[4]) res;
105 for (j = 0; j < BC; j++) {
106 res8[j][0] = box[a8[j][0]];
108 for (i = 1; i < 4; i++) {
110 for (j = 0; j < BC - s; j++) {
111 res8[j][i] = box[a8[(j + s)][i]];
113 for (j = BC - s; j < BC; j++) {
114 res8[j][i] = box[a8[(j + s) - BC][i]];
119 #if 0 /* code included for reference */
121 /* Mix the four bytes of every column in a linear way */
122 /* the result is written to res, which may equal a */
123 static inline void xMixColumn(word32 res[MAXBC], word32 a[MAXBC], int BC)
127 word8 (*a8)[4] = (word8 (*)[4]) a;
129 for (j = 0; j < BC; j++) {
130 b = M0[0][a8[j][0]].w32;
131 b ^= M0[1][a8[j][1]].w32;
132 b ^= M0[2][a8[j][2]].w32;
133 b ^= M0[3][a8[j][3]].w32;
138 #endif /* code included for reference */
140 /* do MixColumn and KeyAddition together */
141 static inline void xMixAdd(word32 res[MAXBC], word32 a[MAXBC],
142 word32 rk[MAXBC], int BC)
146 word8 (*a8)[4] = (word8 (*)[4]) a;
148 for (j = 0; j < BC; j++) {
149 b = M0[0][a8[j][0]].w32;
150 b ^= M0[1][a8[j][1]].w32;
151 b ^= M0[2][a8[j][2]].w32;
152 b ^= M0[3][a8[j][3]].w32;
158 /* Mix the four bytes of every column in a linear way
159 * This is the opposite operation of xMixColumn */
160 /* the result is written to res, which may equal a */
161 static inline void xInvMixColumn(word32 res[MAXBC], word32 a[MAXBC], int BC)
165 word8 (*a8)[4] = (word8 (*)[4]) a;
167 for (j = 0; j < BC; j++) {
168 b = M1[0][a8[j][0]].w32;
169 b ^= M1[1][a8[j][1]].w32;
170 b ^= M1[2][a8[j][2]].w32;
171 b ^= M1[3][a8[j][3]].w32;
176 #if 0 /* code included for reference */
178 /* do KeyAddition and InvMixColumn together */
179 static inline void xAddInvMix(word32 res[MAXBC], word32 a[MAXBC],
180 word32 rk[MAXBC], int BC)
184 word8 (*a8)[4] = (word8 (*)[4]) a;
186 for (j = 0; j < BC; j++) {
188 b = M1[0][a8[j][0]].w32;
189 b ^= M1[1][a8[j][1]].w32;
190 b ^= M1[2][a8[j][2]].w32;
191 b ^= M1[3][a8[j][3]].w32;
196 #endif /* code included for reference */
199 int xrijndaelKeySched(word32 key[], int keyBits, int blockBits,
202 /* Calculate the necessary round keys
203 * The number of calculations depends on keyBits and blockBits */
205 int i, j, t, rconpointer = 0;
206 word8 (*k8)[4] = (word8 (*)[4]) key;
236 ROUNDS = KC > BC ? KC + 6 : BC + 6;
239 /* copy values into round key array */
240 for (j = 0; (j < KC) && (t < (ROUNDS + 1) * BC); j++, t++)
243 while (t < (ROUNDS + 1) * BC) { /* while not enough round key material */
244 /* calculate new values */
245 for (i = 0; i < 4; i++) {
246 k8[0][i] ^= xS[k8[KC - 1][(i + 1) % 4]];
248 k8[0][0] ^= xrcon[rconpointer++];
251 for (j = 1; j < KC; j++) {
252 key[j] ^= key[j - 1];
255 for (j = 1; j < 4; j++) {
256 key[j] ^= key[j - 1];
258 for (i = 0; i < 4; i++) {
259 k8[4][i] ^= xS[k8[3][i]];
261 for (j = 5; j < 8; j++) {
262 key[j] ^= key[j - 1];
265 /* copy values into round key array */
266 for (j = 0; (j < KC) && (t < (ROUNDS + 1) * BC); j++, t++) {
271 /* make roundkey structure */
274 rkk->ROUNDS = ROUNDS;
275 for (i = 0; i < 2; i++) {
276 for (j = 0; j < 4; j++) {
277 rkk->shift[i][j] = xshifts[(BC - 4) >> 1][i][j];
284 /* Encryption of one block. */
287 void xrijndaelEncrypt(word32 block[], roundkey *rkk)
289 word32 block2[MAXBC]; /* hold intermediate result */
292 int *shift = rkk->shift[0];
294 int ROUNDS = rkk->ROUNDS;
295 word32 *rp = rkk->rk;
297 /* begin with a key addition */
298 xKeyAddition(block, block, rp, BC);
301 /* ROUNDS-1 ordinary rounds */
302 for (r = 1; r < ROUNDS; r++) {
303 xShiftSubst(block2, block, shift, BC, xS);
304 xMixAdd(block, block2, rp, BC);
308 /* Last round is special: there is no xMixColumn */
309 xShiftSubst(block2, block, shift, BC, xS);
310 xKeyAddition(block, block2, rp, BC);
314 /* We don't actually need this in AltOS, so don't bother including it */
316 /* Decryption of one block. */
318 void xrijndaelDecrypt(word32 block[], roundkey *rkk)
320 word32 block2[MAXBC]; /* hold intermediate result */
323 int *shift = rkk->shift[1];
325 int ROUNDS = rkk->ROUNDS;
326 word32 *rp = rkk->rk + ROUNDS * BC;
328 /* To decrypt: apply the inverse operations of the encrypt routine,
331 * (xKeyAddition is an involution: it's equal to its inverse)
332 * (the inverse of xSubstitution with table S is xSubstitution with the
333 * inverse table of S)
334 * (the inverse of xShiftRow is xShiftRow over a suitable distance)
337 /* First the special round:
338 * without xInvMixColumn
339 * with extra xKeyAddition
341 xKeyAddition(block2, block, rp, BC);
342 xShiftSubst(block, block2, shift, BC, xSi);
345 /* ROUNDS-1 ordinary rounds
347 for (r = ROUNDS - 1; r > 0; r--) {
348 xKeyAddition(block, block, rp, BC);
349 xInvMixColumn(block2, block, BC);
350 xShiftSubst(block, block2, shift, BC, xSi);
354 /* End with the extra key addition
357 xKeyAddition(block, block, rp, BC);
361 uint8_t ao_aes_mutex;
362 static word32 key[16/4];
365 static word32 iv[16/4];
368 ao_aes_set_mode(enum ao_aes_mode mode)
371 /* we only do CBC_MAC anyways... */
375 ao_aes_set_key(__xdata uint8_t *in)
378 xrijndaelKeySched((word32 *) key, 128, 128, &rkk);
384 memset(iv, '\0', sizeof (iv));
388 ao_aes_run(__xdata uint8_t *in,
389 __xdata uint8_t *out)
392 uint8_t *_iv = (uint8_t *) iv;
394 for (i = 0; i < 16; i++)
396 xrijndaelEncrypt(iv, &rkk);