Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/symcrypt/lib/aes-c.c
15010 views
1
//
2
// aes-c.c code for AES implementation
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
// The fast-ish C implementation of the core AES functions
7
//
8
// Separate C file because at some point we want to be able to switch this out with a compact-C implementation
9
// that is smaller.
10
//
11
12
#include "precomp.h"
13
14
//
15
// Static vs. dynamically generated tables.
16
//
17
// AES uses about 13 kB of tables; it turns out that most of these tables can be generated
18
// algorithmically much faster than they can be read off the disk.
19
// This implementation does not do so.
20
// The reason is that generated tables live in the modifyable data segment, which means
21
// that they are not shared between different instances of a DLL.
22
// Static tables are shared. Especially for applications that have a very large number
23
// of processes (e.g. Terminal Servers) the extra cost of generating and storing a
24
// per-process copy of these tables is higher then the cost of loading it a few times
25
// from disk.
26
// Earlier versions of this implementation did generate the tables dynamically and ran into
27
// this very problem.
28
//
29
// Our tables are aligned to eliminate side-channels from TLB lookups if the TLB page size
30
// is big enough. For example, the SboxMatrixMult table is 1024-aligned. Each use of that
31
// table consists of 4 lookups, and each lookup is within its own 1kB aligned subtable.
32
// The side-channels from cache lines still remains, of course.
33
//
34
35
//extern BYTE SymCryptAesSbox[256]; // Basic S-box, not used
36
extern SYMCRYPT_ALIGN_AT( 256) BYTE SymCryptAesInvSbox[256]; // For final round in decryption
37
extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesSboxMatrixMult[4][256][4]; // Main encryption tables
38
extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvSboxMatrixMult[4][256][4];// Main decryption tables
39
extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvMatrixMult[4][256][4]; // For computing decryption round keys
40
41
//
42
// Throughout this implementation we use UINT32s to access byte arrays. The AES
43
// algorithm almost requires this; without it the performance would be abysmal.
44
// All data elements are SYMCRYPT_ALIGNed, which must be at least 4.
45
//
46
47
//
48
// Macro to check for alignment to support platforms that need alignment fix-ups.
49
//
50
#define IS_UINT32_ALIGNED( __p ) ((((intptr_t)__p) & 3) == 0)
51
52
//
53
// Only need to enforce alignment on platforms that are not x86 or x64
54
// Future improvement: should switch to using unaligned pointer accesses
55
// on some platforms.
56
//
57
#define NEED_ALIGN (!(SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64))
58
59
60
VOID
61
SYMCRYPT_CALL
62
SymCryptAes4SboxC(
63
_In_reads_(4) PCBYTE pIn,
64
_Out_writes_(4) PBYTE pOut )
65
//
66
// Perform 4 S-box lookups.
67
// This is a separate function as it can be done side-channel safe using
68
// AES-NI.
69
// Key expansion can actually be improved a lot more with AES-NI, but that
70
// requires major code changes for which we don't have time right now.
71
//
72
{
73
pOut[0] = SymCryptAesSboxMatrixMult[0][pIn[0]][1];
74
pOut[1] = SymCryptAesSboxMatrixMult[0][pIn[1]][1];
75
pOut[2] = SymCryptAesSboxMatrixMult[0][pIn[2]][1];
76
pOut[3] = SymCryptAesSboxMatrixMult[0][pIn[3]][1];
77
}
78
79
VOID
80
SYMCRYPT_CALL
81
SymCryptAesCreateDecryptionRoundKeyC(
82
_In_reads_(16) PCBYTE pEncryptionRoundKey,
83
_Out_writes_(16) PBYTE pDecryptionRoundKey )
84
//
85
// Convert an encryption round key to a decryption round key by applying the inverse
86
// mixcolumn function to each 4-byte subword.
87
// This is a separate function as with AES-NI there is an assembler version of this
88
// function that is side-channel safe.
89
//
90
{
91
int i;
92
PBYTE p = pDecryptionRoundKey;
93
PCBYTE q = pEncryptionRoundKey;
94
95
for( i=0; i<4; i++ ) {
96
*(UINT32 *)p =
97
*(UINT32 *)SymCryptAesInvMatrixMult[0][q[0]] ^
98
*(UINT32 *)SymCryptAesInvMatrixMult[1][q[1]] ^
99
*(UINT32 *)SymCryptAesInvMatrixMult[2][q[2]] ^
100
*(UINT32 *)SymCryptAesInvMatrixMult[3][q[3]];
101
p += 4;
102
q += 4;
103
}
104
105
}
106
107
//
108
// SymCryptAesEncrypt
109
// NOINLINE prevents the compiler from creating additional implementations
110
// that have to be FIPS selftested.
111
//
112
SYMCRYPT_NOINLINE
113
VOID
114
SYMCRYPT_CALL
115
SymCryptAesEncryptC(
116
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
117
_In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbPlaintext,
118
_Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbCiphertext )
119
{
120
SYMCRYPT_ALIGN BYTE state[4][4] = { 0 };
121
SYMCRYPT_ALIGN UINT32 state2[4] = { 0 };
122
123
const BYTE (*keyPtr)[4][4];
124
const BYTE (*keyLimit)[4][4];
125
126
#if NEED_ALIGN
127
SYMCRYPT_ALIGN BYTE alignBuffer[SYMCRYPT_AES_BLOCK_SIZE];
128
#endif
129
130
#if NEED_ALIGN
131
132
//
133
// Callers who don't have their buffers aligned don't care about speed,
134
// so we do this in the simplest way.
135
//
136
if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) {
137
memcpy( alignBuffer, pbPlaintext, SYMCRYPT_AES_BLOCK_SIZE );
138
SymCryptAesEncrypt( pExpandedKey, alignBuffer, alignBuffer );
139
memcpy( pbCiphertext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE );
140
SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) );
141
return;
142
}
143
#endif
144
145
SYMCRYPT_CHECK_MAGIC( pExpandedKey );
146
147
//
148
// From this point on all our data is UINT32 aligned or better on those
149
// platforms that have alignment restrictions.
150
//
151
152
keyPtr = &pExpandedKey->RoundKey[0]; // First round key
153
keyLimit = &pExpandedKey->lastEncRoundKey[0]; // Last round key
154
155
// Initial round (AddRoundKey)
156
*((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbPlaintext[0];
157
*((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbPlaintext[4];
158
*((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbPlaintext[8];
159
*((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbPlaintext[12];
160
161
keyPtr += 1;
162
163
// Main rounds
164
while (keyPtr < keyLimit)
165
{
166
167
// SubBytes/ShiftRows/MixColumns for col. 0
168
state2[0] = *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[0][0] ]);
169
state2[3] = *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[0][1] ]);
170
state2[2] = *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[0][2] ]);
171
state2[1] = *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[0][3] ]);
172
173
// SubBytes/ShiftRows/MixColumns for col. 1
174
state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[1][0] ]);
175
state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[1][1] ]);
176
state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[1][2] ]);
177
state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[1][3] ]);
178
179
// SubBytes/ShiftRows/MixColumns for col. 2
180
state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[2][0] ]);
181
state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[2][1] ]);
182
state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[2][2] ]);
183
state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[2][3] ]);
184
185
// SubBytes/ShiftRows/MixColumns for col. 3
186
state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[3][0] ]);
187
state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[3][1] ]);
188
state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[3][2] ]);
189
state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[3][3] ]);
190
191
// AddRoundKey
192
*((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
193
*((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
194
*((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
195
*((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
196
197
keyPtr += 1;
198
}
199
200
// Final round
201
202
// SubBytes/ShiftRows for col. 0
203
state2[0] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][0] ][1];
204
state2[3] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][1] ][1] << 8;
205
state2[2] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][2] ][1] << 16;
206
state2[1] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][3] ][1] << 24;
207
208
// SubBytes/ShiftRows for col. 1
209
state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][0] ][1];
210
state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][1] ][1] << 8;
211
state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][2] ][1] << 16;
212
state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][3] ][1] << 24;
213
214
// SubBytes/ShiftRows for col. 2
215
state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][0] ][1];
216
state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][1] ][1] << 8;
217
state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][2] ][1] << 16;
218
state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][3] ][1] << 24;
219
220
// SubBytes/ShiftRows for col. 3
221
state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][0] ][1];
222
state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][1] ][1] << 8;
223
state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][2] ][1] << 16;
224
state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][3] ][1] << 24;
225
226
// AddRoundKey
227
*((UINT32 *) &pbCiphertext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
228
*((UINT32 *) &pbCiphertext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
229
*((UINT32 *) &pbCiphertext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
230
*((UINT32 *) &pbCiphertext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
231
232
SymCryptWipeKnownSize( state, sizeof( state ) );
233
SymCryptWipeKnownSize( state2, sizeof( state2 ) );
234
235
return;
236
}
237
238
239
SYMCRYPT_NOINLINE
240
VOID
241
SYMCRYPT_CALL
242
SymCryptAesDecryptC(
243
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
244
_In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbCiphertext,
245
_Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbPlaintext )
246
{
247
SYMCRYPT_ALIGN BYTE state[4][4] = { 0 };
248
SYMCRYPT_ALIGN UINT32 state2[4] = { 0 };
249
250
const BYTE (*keyPtr)[4][4];
251
const BYTE (*keyLimit)[4][4];
252
253
#if NEED_ALIGN
254
SYMCRYPT_ALIGN BYTE alignBuffer[SYMCRYPT_AES_BLOCK_SIZE];
255
#endif
256
257
#if NEED_ALIGN
258
//
259
// Callers who don't have their buffers aligned don't care about speed,
260
// so we do this in the simplest way.
261
//
262
if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) {
263
memcpy( alignBuffer, pbCiphertext, SYMCRYPT_AES_BLOCK_SIZE );
264
SymCryptAesDecrypt( pExpandedKey, alignBuffer, alignBuffer );
265
memcpy( pbPlaintext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE );
266
SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) );
267
return;
268
}
269
#endif
270
271
SYMCRYPT_CHECK_MAGIC( pExpandedKey );
272
273
keyPtr = &pExpandedKey->lastEncRoundKey[0]; // First round key
274
keyLimit = &pExpandedKey->lastDecRoundKey[0]; // Last round key
275
276
// Initial round (AddRoundKey)
277
*((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbCiphertext[0];
278
*((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbCiphertext[4];
279
*((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbCiphertext[8];
280
*((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbCiphertext[12];
281
282
keyPtr += 1;
283
284
// Main rounds
285
while (keyPtr < keyLimit)
286
{
287
288
// SubBytes/ShiftRows/MixColumns for col. 0
289
state2[0] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[0][0] ]);
290
state2[1] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[0][1] ]);
291
state2[2] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[0][2] ]);
292
state2[3] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[0][3] ]);
293
294
// SubBytes/ShiftRows/MixColumns for col. 1
295
state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[1][0] ]);
296
state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[1][1] ]);
297
state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[1][2] ]);
298
state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[1][3] ]);
299
300
// SubBytes/ShiftRows/MixColumns for col. 2
301
state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[2][0] ]);
302
state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[2][1] ]);
303
state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[2][2] ]);
304
state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[2][3] ]);
305
306
// SubBytes/ShiftRows/MixColumns for col. 3
307
state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[3][0] ]);
308
state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[3][1] ]);
309
state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[3][2] ]);
310
state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[3][3] ]);
311
312
// AddRoundKey
313
*((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
314
*((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
315
*((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
316
*((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
317
318
keyPtr += 1;
319
}
320
321
// Final round
322
323
// SubBytes/ShiftRows for col. 0
324
state2[0] = (UINT32) SymCryptAesInvSbox[ state[0][0] ];
325
state2[1] = (UINT32) SymCryptAesInvSbox[ state[0][1] ] << 8;
326
state2[2] = (UINT32) SymCryptAesInvSbox[ state[0][2] ] << 16;
327
state2[3] = (UINT32) SymCryptAesInvSbox[ state[0][3] ] << 24;
328
329
// SubBytes/ShiftRows for col. 1
330
state2[1] |= (UINT32) SymCryptAesInvSbox[ state[1][0] ];
331
state2[2] |= (UINT32) SymCryptAesInvSbox[ state[1][1] ] << 8;
332
state2[3] |= (UINT32) SymCryptAesInvSbox[ state[1][2] ] << 16;
333
state2[0] |= (UINT32) SymCryptAesInvSbox[ state[1][3] ] << 24;
334
335
// SubBytes/ShiftRows for col. 2
336
state2[2] |= (UINT32) SymCryptAesInvSbox[ state[2][0] ];
337
state2[3] |= (UINT32) SymCryptAesInvSbox[ state[2][1] ] << 8;
338
state2[0] |= (UINT32) SymCryptAesInvSbox[ state[2][2] ] << 16;
339
state2[1] |= (UINT32) SymCryptAesInvSbox[ state[2][3] ] << 24;
340
341
// SubBytes/ShiftRows for col. 3
342
state2[3] |= (UINT32) SymCryptAesInvSbox[ state[3][0] ];
343
state2[0] |= (UINT32) SymCryptAesInvSbox[ state[3][1] ] << 8;
344
state2[1] |= (UINT32) SymCryptAesInvSbox[ state[3][2] ] << 16;
345
state2[2] |= (UINT32) SymCryptAesInvSbox[ state[3][3] ] << 24;
346
347
// AddRoundKey
348
*((UINT32 *) &pbPlaintext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0];
349
*((UINT32 *) &pbPlaintext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1];
350
*((UINT32 *) &pbPlaintext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2];
351
*((UINT32 *) &pbPlaintext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3];
352
353
SymCryptWipeKnownSize( state, sizeof( state ) );
354
SymCryptWipeKnownSize( state2, sizeof( state2 ) );
355
356
return;
357
}
358
359
VOID
360
SYMCRYPT_CALL
361
SymCryptAesEcbEncryptC(
362
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
363
_In_reads_( cbData ) PCBYTE pbSrc,
364
_Out_writes_( cbData ) PBYTE pbDst,
365
SIZE_T cbData )
366
{
367
while( cbData >= SYMCRYPT_AES_BLOCK_SIZE )
368
{
369
SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst );
370
pbSrc += SYMCRYPT_AES_BLOCK_SIZE;
371
pbDst += SYMCRYPT_AES_BLOCK_SIZE;
372
cbData -= SYMCRYPT_AES_BLOCK_SIZE;
373
}
374
}
375
376
VOID
377
SYMCRYPT_CALL
378
SymCryptAesEcbDecryptC(
379
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
380
_In_reads_( cbData ) PCBYTE pbSrc,
381
_Out_writes_( cbData ) PBYTE pbDst,
382
SIZE_T cbData )
383
{
384
while( cbData >= SYMCRYPT_AES_BLOCK_SIZE )
385
{
386
SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst );
387
pbSrc += SYMCRYPT_AES_BLOCK_SIZE;
388
pbDst += SYMCRYPT_AES_BLOCK_SIZE;
389
cbData -= SYMCRYPT_AES_BLOCK_SIZE;
390
}
391
}
392
393