Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/symcrypt/lib/aes-key.c
15010 views
1
//
2
// aes.c code for AES implementation
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
// The actual encryption and decryption routines here are not nearly as fast as the
7
// assembler ones. They are used on platforms that don't have assembler implementations
8
// and for various testing purposes.
9
//
10
// This code derives from the orignal fast AES code that Niels Ferguson wrote
11
// for BitLocker in Windows Vista.
12
// The C code is derived from the AES that was already in the RSA32 library,
13
// the assembler code was created new at that time.
14
//
15
16
17
#include "precomp.h"
18
19
20
///////////////////////////////////////////////////////////////////////////////
21
// Key expansion uses two functions, a 4-byte S-box lookup and one
22
// to create a decryption round key from an encryption round key.
23
// These are the C implementations of these functions
24
//
25
26
27
static BYTE g_SymCryptAesRoundConstant[11] =
28
{
29
0, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
30
};
31
32
SYMCRYPT_NOINLINE
33
SYMCRYPT_ERROR
34
SYMCRYPT_CALL
35
SymCryptAesExpandKeyInternal(
36
_Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
37
_In_reads_(cbKey) PCBYTE pbKey,
38
SIZE_T cbKey,
39
BOOLEAN fCreateDecryptionKeys )
40
{
41
UINT32 nRounds;
42
BYTE * p;
43
BYTE * q;
44
UINT32 i;
45
UINT32 t;
46
47
BOOL UseSimd = FALSE;
48
SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR;
49
50
#if SYMCRYPT_CPU_X86
51
SYMCRYPT_EXTENDED_SAVE_DATA SaveData;
52
53
if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) )
54
{
55
if( SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR )
56
{
57
UseSimd = TRUE;
58
}
59
}
60
#elif SYMCRYPT_CPU_AMD64
61
if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) )
62
{
63
UseSimd = TRUE;
64
}
65
#elif SYMCRYPT_CPU_ARM64
66
if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) )
67
{
68
UseSimd = TRUE;
69
}
70
#endif
71
72
SYMCRYPT_SET_MAGIC( pExpandedKey );
73
74
//
75
// Separate code for each key size, this is significantly faster.
76
// We have a number of applications that do frequent key expansions.
77
//
78
switch( cbKey )
79
{
80
case 16:
81
nRounds = 10;
82
pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds];
83
pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds];
84
85
memcpy( &pExpandedKey->RoundKey[0], pbKey, 16 );
86
87
p = (BYTE *)&pExpandedKey->RoundKey[1];
88
89
for( i=1; i<=nRounds; i++ )
90
{
91
SymCryptAes4Sbox( &p[-4], p, UseSimd );
92
t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 16) ^ g_SymCryptAesRoundConstant[i];
93
SYMCRYPT_STORE_LSBFIRST32( p, t ); // this is a macro that re-evaluates its arguments
94
95
*(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 12);
96
*(UINT32 *)(p+8) = *(UINT32 *)(p+4) ^ *(UINT32 *)(p - 8);
97
*(UINT32 *)(p+12) = *(UINT32 *)(p+8) ^ *(UINT32 *)(p - 4);
98
99
p += 16;
100
}
101
102
break;
103
104
case 24:
105
nRounds = 12;
106
pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds];
107
pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds];
108
109
memcpy( &pExpandedKey->RoundKey[0], pbKey, 24 );
110
111
p = (BYTE *)&pExpandedKey->RoundKey[0] + 24;
112
113
//
114
// We have 12 rounds, 13 round keys, and 13*16 = 208 bytes of encryption key to generate.
115
// We have 24 already, so we need 184 more.
116
// Each iteration produces 24 bytes, so we need to loop 8 times.
117
//
118
for( i=1; i<=8; i++ )
119
{
120
SymCryptAes4Sbox( &p[-4], p, UseSimd );
121
t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 24) ^ g_SymCryptAesRoundConstant[i];
122
SYMCRYPT_STORE_LSBFIRST32( p, t );
123
124
*(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 20);
125
*(UINT32 *)(p+8) = *(UINT32 *)(p+ 4) ^ *(UINT32 *)(p - 16);
126
*(UINT32 *)(p+12) = *(UINT32 *)(p+ 8) ^ *(UINT32 *)(p - 12);
127
*(UINT32 *)(p+16) = *(UINT32 *)(p+12) ^ *(UINT32 *)(p - 8);
128
*(UINT32 *)(p+20) = *(UINT32 *)(p+16) ^ *(UINT32 *)(p - 4);
129
130
p += 24;
131
}
132
133
break;
134
135
case 32:
136
nRounds = 14;
137
pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds];
138
pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds];
139
140
memcpy( &pExpandedKey->RoundKey[0], pbKey, 32 );
141
142
p = (BYTE *)&pExpandedKey->RoundKey[0] + 32;
143
144
//
145
// We have 14 rounds, 15 round keys, and 15*16 = 240 bytes of encryption key to generate.
146
// We have 32 already, so we need 208 more.
147
// Each iteration produces 32 bytes, so we need to loop 6.5 times.
148
//
149
for( i=1; i<=6; i++ )
150
{
151
SymCryptAes4Sbox( &p[-4], p, UseSimd );
152
t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i];
153
SYMCRYPT_STORE_LSBFIRST32( p, t );
154
155
*(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 28);
156
*(UINT32 *)(p+8) = *(UINT32 *)(p + 4) ^ *(UINT32 *)(p - 24);
157
*(UINT32 *)(p+12) = *(UINT32 *)(p + 8) ^ *(UINT32 *)(p - 20);
158
159
SymCryptAes4Sbox( &p[12], &p[16], UseSimd );
160
*(UINT32 *)(p+16) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 16);
161
162
*(UINT32 *)(p+20) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 12);
163
*(UINT32 *)(p+24) = *(UINT32 *)(p + 20) ^ *(UINT32 *)(p - 8);
164
*(UINT32 *)(p+28) = *(UINT32 *)(p + 24) ^ *(UINT32 *)(p - 4);
165
166
p += 32;
167
}
168
169
// We looped 6 times, so here is the half-loop
170
171
SymCryptAes4Sbox( &p[-4], p, UseSimd );
172
t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i];
173
SYMCRYPT_STORE_LSBFIRST32( p, t );
174
175
*(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 28);
176
*(UINT32 *)(p+8) = *(UINT32 *)(p + 4) ^ *(UINT32 *)(p - 24);
177
*(UINT32 *)(p+12) = *(UINT32 *)(p + 8) ^ *(UINT32 *)(p - 20);
178
179
break;
180
181
default:
182
status = SYMCRYPT_WRONG_KEY_SIZE;
183
goto cleanup;
184
}
185
186
187
if( fCreateDecryptionKeys )
188
{
189
p = &pExpandedKey->RoundKey[0][0][0];
190
q = (PBYTE)(pExpandedKey->lastDecRoundKey);
191
192
// The first encryption round key is the last decryption round key
193
memcpy( q, p, SYMCRYPT_AES_BLOCK_SIZE );
194
p += 16;
195
q -= 16;
196
197
while( p < (PBYTE) pExpandedKey->lastEncRoundKey )
198
{
199
SymCryptAesCreateDecryptionRoundKey( p, q, UseSimd );
200
q -= 16;
201
p += 16;
202
}
203
}
204
205
cleanup:
206
207
#if SYMCRYPT_CPU_X86
208
if( UseSimd )
209
{
210
SymCryptRestoreXmm( &SaveData );
211
}
212
#endif
213
214
return status;
215
}
216
217
SYMCRYPT_ERROR
218
SYMCRYPT_CALL
219
SymCryptAesExpandKey(
220
_Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
221
_In_reads_(cbKey) PCBYTE pbKey,
222
SIZE_T cbKey )
223
224
{
225
return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, TRUE );
226
}
227
228
SYMCRYPT_ERROR
229
SYMCRYPT_CALL
230
SymCryptAesExpandKeyEncryptOnly(
231
_Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
232
_In_reads_(cbKey) PCBYTE pbKey,
233
SIZE_T cbKey )
234
{
235
return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, FALSE );
236
}
237
238
VOID
239
SYMCRYPT_CALL
240
SymCryptAesKeyCopy( _In_ PCSYMCRYPT_AES_EXPANDED_KEY pSrc,
241
_Out_ PSYMCRYPT_AES_EXPANDED_KEY pDst )
242
{
243
SYMCRYPT_CHECK_MAGIC( pSrc );
244
245
*pDst = *pSrc;
246
pDst->lastEncRoundKey = &pDst->RoundKey[0] + (pSrc->lastEncRoundKey - &pSrc->RoundKey[0]);
247
pDst->lastDecRoundKey = &pDst->RoundKey[0] + (pSrc->lastDecRoundKey - &pSrc->RoundKey[0]);
248
249
SYMCRYPT_SET_MAGIC( pDst );
250
}
251
252
//
253
// Self test code
254
//
255
256
257
const BYTE SymCryptAesNistTestVector128Ciphertext[16] = {
258
0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30,
259
0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a,
260
};
261
262
263
264
/****************************************************************
265
* OLD CODE
266
*
267
* Old code to generate the AES tables dynamically.
268
* Kept for future reference.
269
*
270
271
272
//
273
// Prototype; on some platforms this function is in assembler.
274
//
275
VOID
276
SYMCRYPT_CALL
277
SymCryptAesCreateRotatedTables( BYTE MatrixMult[4][256][4] );
278
279
VOID
280
SYMCRYPT_CALL
281
SymCryptAesCreateRotatedTables( _Inout_ BYTE MatrixMult[4][256][4] )
282
{
283
int i,j,k;
284
285
//
286
// We do this byte-by-byte, which is easiest.
287
// It would be faster to use UINT32 operations,
288
// but that is endian-specific, and therefore platform-specific.
289
// Endian-agnostic UINT32-based code would be a lot more complicated.
290
// All this is extremely easy to do in assembler, which we do on those
291
// platforms that have assembler implementations.
292
//
293
for( j=1; j<4; j++ ) {
294
for( i=0; i<256; i++ ) {
295
for( k=0; k<4; k++ ) {
296
MatrixMult[j][i][k] = MatrixMult[0][i][(k-j)&3];
297
}
298
}
299
}
300
}
301
302
303
304
//
305
// SymCryptAesInitMatrixMultiplyTable
306
//
307
// Initialize a matrix multiplication table.
308
// Each matrix multiplication table consists of 4 tables of 256 entries of 4 bytes each.
309
// The four tables are rotated copies of each other.
310
// This function generates the first of those four tables from the init
311
// value.
312
//
313
// After this call:
314
// At index i the table contains the four bytes
315
// i * init[0], i * init[1], i * init[2], i * init[3]
316
// where multiplication is in GF(2^8).
317
//
318
// We do not do a GF(2^8) multiplication for each entry, but rather use the
319
// relationship (a xor b) * init[.] = a * init[.] xor b * init[.]
320
// And only compute i*init[.] for i = 1,2,4,8,...,128. This can be done
321
// using repeated multiplication by x in the finite field.
322
//
323
// It is safe to call this function on two separate threads for the same table.
324
// All invocations will write the same data to the table, and within a tread each entry is written
325
// before it is read. Doing parallel initializations of the same table can be very inefficient
326
// as multiple cores will be fighting over the cache lines, but the result will be correct.
327
// We use this property to initialize the tables lazily.
328
//
329
static
330
VOID
331
SYMCRYPT_CALL
332
SymCryptAesInitMatrixMultiplyTable( _Out_ SYMCRYPT_ALIGN BYTE MatrixMult[256][4],
333
_In_ SYMCRYPT_ALIGN BYTE init[4]
334
)
335
{
336
int i,j;
337
SYMCRYPT_ALIGN BYTE initCopy[4];
338
UINT32 initCopyAsUint32;
339
340
//
341
// We copy the init value so that we can modify it without worrying about multi-threading
342
// issues.
343
//
344
*(UINT32 *)initCopy = *(UINT32 *)init;
345
346
*(UINT32 *)MatrixMult[0] = 0;
347
for( i=1; i<256; i<<=1 )
348
{
349
initCopyAsUint32 = *(UINT32 *)initCopy;
350
for( j=0; j<i; j++ )
351
{
352
*(UINT32 *)MatrixMult[i+j] = *(UINT32 *)MatrixMult[j] ^ initCopyAsUint32;
353
}
354
for( j=0; j<4; j++ )
355
{
356
initCopy[j] = MULT_BY_X( initCopy[j] );
357
}
358
}
359
}
360
361
362
//
363
// SymCryptAesInitialize
364
//
365
// Initialize the static tables for the AES implementation.
366
// This function is called by the key expansion function if it finds the
367
// tables not initialized.
368
//
369
// This leads to an interesting case where multiple threads running on multiple
370
// CPUs run this initialization code at the same time.
371
// This code is carefully structured to allow that. When global data is written it is
372
// always with the final value, and we never read uninitialized global data.
373
// Thus, even if two CPUs run this code at the same time, they will both initialize each
374
// memory location to the same correct value and the end result will be correct.
375
// (Performance will suffer due to the fact that cache lines will be bounced back and force
376
// between the two CPUs, but that is not a significant concern as this code is used only once.)
377
//
378
// At the end of the initialization the flag is set to indicate that further
379
// key expansion invocations do not need to re-run the initialization.
380
// We use memory barriers to keep this multi-thread safe.
381
//
382
static
383
VOID
384
SYMCRYPT_CALL
385
SymCryptAesInitialize(void)
386
{
387
int i,j;
388
BYTE S;
389
BYTE Stimes2;
390
391
//
392
// We force alignment of these arrays as we sometimes treat them as a UINT32
393
//
394
SYMCRYPT_ALIGN BYTE InvMatrixEntry[4] = {0xe, 0x9, 0xd, 0xb};
395
SYMCRYPT_ALIGN BYTE MatrixEntry[4] = {2, 1, 1, 3};
396
SYMCRYPT_ALIGN BYTE MatrixScratch[256][4];
397
398
// Generate the forward MDS multiplication table in the scratch space
399
SymCryptAesInitMatrixMultiplyTable( MatrixScratch, MatrixEntry );
400
401
// Initialize first table of SymCryptAesInvMatrixMult
402
SymCryptAesInitMatrixMultiplyTable( SymCryptAesInvMatrixMult[0], InvMatrixEntry );
403
404
//
405
// Build the InvSbox table and the first table of SymCryptAesSboxMatrixMult and
406
// SymCryptAesInvSboxMatrixMult
407
//
408
for( i=0; i<256; i++ ) {
409
S = SymCryptAesSbox[i];
410
SymCryptAesInvSbox[S] = (BYTE) i;
411
*(UINT32 *)SymCryptAesSboxMatrixMult[0][i] = *(UINT32 *)MatrixScratch[S];
412
*(UINT32 *)SymCryptAesInvSboxMatrixMult[0][S] = *(UINT32 *)SymCryptAesInvMatrixMult[0][i];
413
}
414
415
//
416
// Now we generate the byte rotations of the tables
417
//
418
SymCryptAesCreateRotatedTables( SymCryptAesSboxMatrixMult );
419
SymCryptAesCreateRotatedTables( SymCryptAesInvSboxMatrixMult );
420
SymCryptAesCreateRotatedTables( SymCryptAesInvMatrixMult );
421
422
//
423
// This is a memory barrier. It ensures that all the memory writes we do before the barrier
424
// are globally visible to other CPUs before the memory writes we do after the fence.
425
// In this particular case, it ensures that every CPU sees the completed tables before
426
// it sees the flag as set.
427
//
428
MemoryBarrier();
429
430
//
431
// Set the flag to signal that the tables are initialized.
432
//
433
SymCryptAesTablesInitialized = TRUE;
434
}
435
436
437
*/
438
439