Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/symcrypt/lib/ScsTable.c
15010 views
1
//
2
// ScsTable.c
3
// Side-channel safe table
4
//
5
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
6
//
7
//
8
// These functions implement an table of large elements.
9
// Reading an element from the table is done in a way that does not reveal the
10
// element accessed through memory side channels.
11
// Basically, the whole table is read by the CPU, and the required data is selected
12
// using boolean operations.
13
//
14
15
#include "precomp.h"
16
17
//
18
// Items are multiple of SYMCRYPT_DIGIT_SIZE long.
19
//
20
// Format:
21
// The memory format is parameterized for optimal implementations on several
22
// different architectures.
23
//
24
// The following parameters define the format:
25
// - group_size
26
// - interleave_size
27
//
28
// Let nElements be the number of elements in the table.
29
// If necessary, the size of each element in the table is rounded up to a multiple of interleave_size.
30
// Each whole group of group_size elements is interleaved with each other.
31
// The last (nElements % group_size) elements are simply stored consecutively.
32
// (For now we simply require that nElements is a multiple of group_size.)
33
// Within each group of group_size, the data for the elements are interleaved in natural order
34
// using chunks of interleave_size bytes.
35
//
36
// The choice of group_size and interleave_size depends on the CPU architecture, CPU features,
37
// and even the element size. (E.g. 1024-bit elements might interleave @ 64 bytes on an AVX512
38
// capable CPU, but 256-bit elements would have to interleave at 16 or 32 bytes on that same CPU.)
39
//
40
41
// Currently these are constants as that allows easier optimizations...
42
#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64
43
#define SYMCRYPT_SCSTABLE_USE64 1
44
#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 32
45
#define SYMCRYPT_SCSTABLE_GROUP_SIZE 4
46
typedef UINT64 SYMCRYPT_SCSTABLE_TYPE;
47
#else
48
#define SYMCRYPT_SCSTABLE_USE64 0
49
#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 16
50
#define SYMCRYPT_SCSTABLE_GROUP_SIZE 4
51
typedef UINT32 SYMCRYPT_SCSTABLE_TYPE;
52
#endif
53
54
UINT32
55
SYMCRYPT_CALL
56
SymCryptScsTableInit(
57
_Out_ PSYMCRYPT_SCSTABLE pScsTable,
58
UINT32 nElements,
59
UINT32 elementSize )
60
{
61
UINT32 groupSize;
62
UINT32 interleaveSize;
63
UINT32 cbBuffer;
64
65
SYMCRYPT_ASSERT( nElements > 0 );
66
67
#pragma warning( suppress: 4127 ) // conditional expression is constant
68
if( SYMCRYPT_CPU_AMD64 && elementSize == 128 )
69
{
70
// Highly optimized assembler mode for 1024-bit entries for RSA-2048...
71
interleaveSize = 128;
72
groupSize = 1;
73
} else {
74
// Standard C implementation
75
interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE;
76
groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE;
77
}
78
79
// Right now, we limit ourselves to element sizes that are a multiple of the interleaveSize and
80
// # elements that are a multiple of the group size.
81
// We also limit ourselves to sensible input sizes
82
SYMCRYPT_ASSERT( elementSize % interleaveSize == 0 && nElements % groupSize == 0 && (elementSize | nElements) < (1 << 16) && elementSize > 0 );
83
84
cbBuffer = elementSize * nElements; // Each factor is < 2^16, so there is no overflow in the mul
85
86
pScsTable->groupSize = groupSize;
87
pScsTable->interleaveSize = interleaveSize;
88
pScsTable->nElements = nElements;
89
pScsTable->elementSize = elementSize;
90
pScsTable->cbTableData = cbBuffer;
91
pScsTable->pbTableData = NULL;
92
93
return cbBuffer;
94
}
95
96
VOID
97
SYMCRYPT_CALL
98
SymCryptScsTableSetBuffer(
99
_Inout_ PSYMCRYPT_SCSTABLE pScsTable,
100
_Inout_updates_bytes_( cbBuffer ) PBYTE pbBuffer,
101
UINT32 cbBuffer )
102
{
103
SYMCRYPT_ASSERT(cbBuffer >= pScsTable->cbTableData);
104
UNREFERENCED_PARAMETER( cbBuffer );
105
106
pScsTable->pbTableData = pbBuffer;
107
}
108
109
110
C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 16 || SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 32 );
111
// check that an interleave size is exactly 4 words
112
C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 4 * sizeof( SYMCRYPT_SCSTABLE_TYPE ) );
113
114
VOID
115
SYMCRYPT_CALL
116
SymCryptScsTableStoreC(
117
_Inout_ PSYMCRYPT_SCSTABLE pScsTable,
118
UINT32 iIndex,
119
_In_reads_bytes_( cbData ) PCBYTE pbData,
120
UINT32 cbData )
121
{
122
UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE;
123
UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE;
124
UINT32 elementSize = pScsTable->elementSize;
125
UINT32 groupOffset;
126
127
SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize );
128
SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize );
129
130
SYMCRYPT_ASSERT( cbData == elementSize );
131
UNREFERENCED_PARAMETER( cbData );
132
133
SYMCRYPT_ASSERT(iIndex < pScsTable->nElements);
134
135
groupOffset = iIndex % groupSize;
136
137
// dcl - document why this can't be an integer overflow
138
SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) (pScsTable->pbTableData + (iIndex - groupOffset) * elementSize + groupOffset * interleaveSize);
139
SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pbData;
140
141
UINT32 nInterleaves = elementSize / interleaveSize;
142
143
do
144
{
145
pDst[0] = pSrc[0];
146
pDst[1] = pSrc[1];
147
pDst[2] = pSrc[2];
148
pDst[3] = pSrc[3];
149
150
pDst += interleaveSize * groupSize / sizeof( *pDst );
151
pSrc += interleaveSize / sizeof( *pSrc );
152
nInterleaves--;
153
} while( nInterleaves > 0 );
154
155
}
156
157
#if SYMCRYPT_CPU_AMD64
158
VOID
159
SYMCRYPT_CALL
160
SymCryptScsTableStore128Xmm(
161
_Inout_ PSYMCRYPT_SCSTABLE pScsTable,
162
UINT32 iIndex,
163
_In_reads_bytes_( cbData ) PCBYTE pbData,
164
UINT32 cbData )
165
{
166
__m128i * pDst = (__m128i *) (pScsTable->pbTableData + iIndex * 128);
167
__m128i * pSrc = (__m128i *) pbData;
168
169
SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 );
170
UNREFERENCED_PARAMETER( cbData );
171
172
pDst[0] = pSrc[0];
173
pDst[1] = pSrc[1];
174
pDst[2] = pSrc[2];
175
pDst[3] = pSrc[3];
176
pDst[4] = pSrc[4];
177
pDst[5] = pSrc[5];
178
pDst[6] = pSrc[6];
179
pDst[7] = pSrc[7];
180
}
181
#endif // AMD64
182
183
VOID
184
SYMCRYPT_CALL
185
SymCryptScsTableLoadC(
186
_In_ PSYMCRYPT_SCSTABLE pScsTable,
187
UINT32 iIndex,
188
_Out_writes_bytes_(cbData) PBYTE pbData,
189
UINT32 cbData )
190
{
191
UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE;
192
UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE;
193
UINT32 elementSize = pScsTable->elementSize;
194
195
SYMCRYPT_SCSTABLE_TYPE mask0, mask1, mask2, mask3;
196
UINT32 i;
197
UINT32 j;
198
UINT32 nElements = pScsTable->nElements;
199
200
const SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pScsTable->pbTableData;
201
SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) pbData;
202
SYMCRYPT_SCSTABLE_TYPE * pD;
203
204
UINT32 nInterleaves = elementSize / interleaveSize;
205
206
207
SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize );
208
SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize );
209
210
SYMCRYPT_ASSERT( cbData >= sizeof( SYMCRYPT_SCSTABLE_TYPE ) * SYMCRYPT_SCSTABLE_GROUP_SIZE );
211
SYMCRYPT_ASSERT( cbData == pScsTable->elementSize );
212
UNREFERENCED_PARAMETER( cbData );
213
214
#if SYMCRYPT_SCSTABLE_USE64
215
#define SCS_MASK_EQUAL32( _a, _b ) ( ~(UINT64) ((INT64) ((UINT64)0 - (_a ^ _b)) >> 32 ) )
216
#else
217
#define SCS_MASK_EQUAL32( _a, _b ) (SYMCRYPT_MASK32_EQ( _a, _b ))
218
#endif
219
220
i = 0;
221
222
mask0 = SCS_MASK_EQUAL32( i+0, iIndex );
223
mask1 = SCS_MASK_EQUAL32( i+1, iIndex );
224
mask2 = SCS_MASK_EQUAL32( i+2, iIndex );
225
mask3 = SCS_MASK_EQUAL32( i+3, iIndex );
226
227
j = nInterleaves;
228
pD = pDst;
229
230
do {
231
pD[0] = (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]);
232
pD[1] = (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]);
233
pD[2] = (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]);
234
pD[3] = (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]);
235
pD += interleaveSize / sizeof( *pD );
236
pSrc += interleaveSize * groupSize / sizeof( *pSrc );
237
j--;
238
} while( j > 0 );
239
240
i += groupSize;
241
242
while (i + groupSize <= nElements)
243
{
244
245
mask0 = SCS_MASK_EQUAL32( i+0, iIndex );
246
mask1 = SCS_MASK_EQUAL32( i+1, iIndex );
247
mask2 = SCS_MASK_EQUAL32( i+2, iIndex );
248
mask3 = SCS_MASK_EQUAL32( i+3, iIndex );
249
250
j = nInterleaves;
251
pD = pDst;
252
253
do {
254
pD[0] |= (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]);
255
pD[1] |= (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]);
256
pD[2] |= (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]);
257
pD[3] |= (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]);
258
pD += interleaveSize / sizeof( *pD );
259
pSrc += interleaveSize * groupSize / sizeof( *pSrc );
260
j--;
261
} while( j > 0 );
262
263
i += groupSize;
264
}
265
}
266
267
#if SYMCRYPT_CPU_AMD64
268
VOID
269
SYMCRYPT_CALL
270
SymCryptScsTableLoad128Xmm(
271
_In_ PSYMCRYPT_SCSTABLE pScsTable,
272
UINT32 iIndex,
273
_Out_writes_bytes_(cbData) PBYTE pbData,
274
UINT32 cbData )
275
{
276
UINT32 nElements = pScsTable->nElements;
277
278
__m128i R0, R1, R2, R3, R4, R5, R6, R7;
279
__m128i T0, T1;
280
281
__m128i Count = _mm_setzero_si128();
282
__m128i Ones = _mm_set_epi32( 1, 1, 1, 1 );
283
__m128i Entry = _mm_set_epi32( iIndex, iIndex, iIndex, iIndex );
284
__m128i Mask;
285
__m128i * pSrc = (__m128i *) pScsTable->pbTableData;
286
__m128i * pDst = (__m128i *) pbData;
287
288
SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 );
289
UNREFERENCED_PARAMETER( cbData );
290
291
Mask = _mm_cmpeq_epi32( Count, Entry );
292
Count = _mm_add_epi32( Count, Ones );
293
294
R0 = _mm_and_si128( Mask, pSrc[0] );
295
R1 = _mm_and_si128( Mask, pSrc[1] );
296
R2 = _mm_and_si128( Mask, pSrc[2] );
297
R3 = _mm_and_si128( Mask, pSrc[3] );
298
R4 = _mm_and_si128( Mask, pSrc[4] );
299
R5 = _mm_and_si128( Mask, pSrc[5] );
300
R6 = _mm_and_si128( Mask, pSrc[6] );
301
R7 = _mm_and_si128( Mask, pSrc[7] );
302
303
pSrc += 8;
304
305
while( --nElements > 0 )
306
{
307
Mask = _mm_cmpeq_epi32( Count, Entry );
308
Count = _mm_add_epi32( Count, Ones );
309
310
T0 = _mm_and_si128( Mask, pSrc[0] ); R0 = _mm_or_si128( R0, T0 );
311
T1 = _mm_and_si128( Mask, pSrc[1] ); R1 = _mm_or_si128( R1, T1 );
312
T0 = _mm_and_si128( Mask, pSrc[2] ); R2 = _mm_or_si128( R2, T0 );
313
T1 = _mm_and_si128( Mask, pSrc[3] ); R3 = _mm_or_si128( R3, T1 );
314
T0 = _mm_and_si128( Mask, pSrc[4] ); R4 = _mm_or_si128( R4, T0 );
315
T1 = _mm_and_si128( Mask, pSrc[5] ); R5 = _mm_or_si128( R5, T1 );
316
T0 = _mm_and_si128( Mask, pSrc[6] ); R6 = _mm_or_si128( R6, T0 );
317
T1 = _mm_and_si128( Mask, pSrc[7] ); R7 = _mm_or_si128( R7, T1 );
318
pSrc += 8;
319
}
320
321
pDst[0] = R0;
322
pDst[1] = R1;
323
pDst[2] = R2;
324
pDst[3] = R3;
325
pDst[4] = R4;
326
pDst[5] = R5;
327
pDst[6] = R6;
328
pDst[7] = R7;
329
}
330
#endif // AMD64
331
332
VOID
333
SYMCRYPT_CALL
334
SymCryptScsTableStore(
335
_Inout_ PSYMCRYPT_SCSTABLE pScsTable,
336
UINT32 iIndex,
337
_In_reads_bytes_( cbData ) PCBYTE pbData,
338
UINT32 cbData )
339
{
340
#if SYMCRYPT_CPU_AMD64
341
342
if( pScsTable->elementSize == 128 )
343
{
344
SymCryptScsTableStore128Xmm( pScsTable, iIndex, pbData, cbData );
345
} else {
346
SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData );
347
}
348
349
#else
350
351
SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData );
352
353
#endif
354
}
355
356
VOID
357
SYMCRYPT_CALL
358
SymCryptScsTableLoad(
359
_In_ PSYMCRYPT_SCSTABLE pScsTable,
360
UINT32 iIndex,
361
_Out_writes_bytes_(cbData) PBYTE pbData,
362
UINT32 cbData )
363
{
364
// This is the side-channel safe routine
365
366
#if SYMCRYPT_CPU_AMD64
367
368
if( pScsTable->elementSize == 128 )
369
{
370
SymCryptScsTableLoad128Xmm( pScsTable, iIndex, pbData, cbData );
371
} else {
372
SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData );
373
}
374
375
#else
376
377
SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData );
378
379
#endif
380
}
381
382
VOID
383
SYMCRYPT_CALL
384
SymCryptScsTableWipe(
385
_Inout_ PSYMCRYPT_SCSTABLE pScsTable )
386
{
387
SymCryptWipe( pScsTable->pbTableData, pScsTable->cbTableData );
388
}
389
390