CoCalc -- fdef_general.c

GitHub Repository: wine-mirror/wine
Path: blob/master/libs/symcrypt/lib/fdef_general.c
¹⁵⁰¹⁰ views
1
//
2
// fdef_general.c   General functions of the default format.
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
//
7
//
8

9
#include "precomp.h"
10

11
#include "smallPrimes32.h"      // For SymCryptTestTrialdivisionMaxSmallPrime
12

13
#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64
14

15
#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES   (16)        // Measured on amd64
16
#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES       (2)         // Measured on amd64
17
#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES               (43000)     // Measured on amd64
18

19
#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM
20

21
#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES   (18)        // Measured on x86
22
#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES       (16)        // Measured on x86
23
#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES               (25300)     // Measured on x86
24

25
#else
26

27
#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES   (18)        // Measured on x86
28
#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES       (16)        // Measured on x86
29
#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES               (25300)     // Measured on x86
30

31
#endif
32

33

34
#define SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME          (1<<22)     // Some large limit to bound memory usage
35
C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME <= UINT32_MAX );
36
C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME == ((UINT32) SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME) );
37

38
VOID
39
SYMCRYPT_CALL
40
SymCryptFdefMaskedCopyC(
41
    _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )        PCBYTE      pbSrc,
42
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbDst,
43
                                                                UINT32      nDigits,
44
                                                                UINT32      mask )
45
	/*
46
	This function is dangerous, and would create a buffer overflow if nDigits > nDigits for pbDst
47
	It also appears that it is never called. Consider removing it if it is not needed.
48
	*/
49
{
50
    UINT64 m64 = (UINT64)0 - (mask & 1);
51
    PUINT64 pSrc = (PUINT64) pbSrc; // should be a const pointer to match pSrc
52
    PUINT64 pDst = (PUINT64) pbDst;
53
    SIZE_T i;
54

55
	// This allows 0xffffffff and 0, is that what you wanted?
56
	// If so, ( mask == 0xffffffff || mask == 0 )
57
	// would be more readable. It is also odd that 1 is not valid, but it results in exactly the
58
	// same code flow as ~0.
59
    SYMCRYPT_ASSERT( (mask + 1) < 2 );      // Check that mask is valid
60

61
	// This - nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 )
62
	// seems to occur often. Consider a macro with a name that explains what you are doing
63
	// A comment on the macro which explains why this multiplication is never a problem would be
64
	// helpful - I'm fairly sure it is not a problem.
65
    for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 )
66
    {
67
        pDst[i  ] = (pSrc[i  ] & m64) | (pDst[i  ] & ~m64 );
68
        pDst[i+1] = (pSrc[i+1] & m64) | (pDst[i+1] & ~m64 );
69
    }
70
}
71

72
VOID
73
SYMCRYPT_CALL
74
SymCryptFdefMaskedCopy(
75
    _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )        PCBYTE      pbSrc,
76
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbDst,
77
                                                                UINT32      nDigits,
78
                                                                UINT32      mask )
79
{
80
#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM
81
    SYMCRYPT_ASSERT_ASYM_ALIGNED( pbSrc );
82
    SYMCRYPT_ASSERT_ASYM_ALIGNED( pbDst );
83
    SymCryptFdefMaskedCopyAsm( pbSrc, pbDst, nDigits, mask );
84
#else
85
    SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask );
86
#endif
87
}
88

89
VOID
90
SYMCRYPT_CALL
91
SymCryptFdefConditionalSwapC(
92
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbSrc1,
93
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbSrc2,
94
                                                                UINT32      nDigits,
95
                                                                UINT32      cond )
96
{
97
	/*
98
	Some documentation as to what the cond argument means would be helpful.
99
	*/
100
    UINT64  m64 = (UINT64)0 - (cond & 1);
101
    PUINT64 pSrc1 = (PUINT64) pbSrc1;
102
    PUINT64 pSrc2 = (PUINT64) pbSrc2;
103
    UINT64  tmp1 = 0;
104
    UINT64  tmp2 = 0;
105
    SIZE_T  i;
106

107
	// Unlike the previous function, this only allows 0 and 1 why?
108
    SYMCRYPT_ASSERT( cond < 2 );      // Check that the condition is valid
109

110
    for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 )
111
    {
112
        tmp1 = (pSrc1[i  ] ^ pSrc2[i  ]) & m64;
113
        tmp2 = (pSrc1[i+1] ^ pSrc2[i+1]) & m64;
114

115
        pSrc1[i  ] ^= tmp1;    pSrc2[i  ] ^= tmp1;
116
        pSrc1[i+1] ^= tmp2;    pSrc2[i+1] ^= tmp2;
117
    }
118
}
119

120
VOID
121
SYMCRYPT_CALL
122
SymCryptFdefConditionalSwap(
123
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbSrc1,
124
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbSrc2,
125
                                                                UINT32      nDigits,
126
                                                                UINT32      cond )
127
{
128
    SymCryptFdefConditionalSwapC( pbSrc1, pbSrc2, nDigits, cond );
129
}
130

131

132
UINT32
133
SymCryptFdefDigitsFromBits( UINT32 nBits )
134
{
135
    UINT32  res;
136

137
    if( nBits == 0 )
138
    {
139
        res = 1;
140
    }
141
    else
142
    {
143
        SYMCRYPT_ASSERT( nBits <= SYMCRYPT_INT_MAX_BITS );
144

145
        // Callers with integers larger than SYMCRYPT_INT_MAX_BITS should not occur in real use cases
146
        // To avoid overflow issues, return the 0 digits to indicate an error which can be handled by
147
        // callers, or flow through into object allocation which will in turn recognize the invalid
148
        // digit count.
149
        if( nBits > SYMCRYPT_INT_MAX_BITS )
150
        {
151
            res = 0;
152
        } else {
153
            res = SYMCRYPT_FDEF_DIGITS_FROM_BITS( nBits );
154
        }
155
    }
156

157
    return res;
158
}
159

160
// Let's limit max bits to the number of bits we actually test
161
C_ASSERT( SYMCRYPT_INT_MAX_BITS < (1 << 30) );      // Larger values can cause overflows and sign confusion
162

163
PSYMCRYPT_INT
164
SYMCRYPT_CALL
165
SymCryptFdefIntAllocate( UINT32 nDigits )
166
{
167
    PVOID               p = NULL;
168
    UINT32              cb;
169
    PSYMCRYPT_INT       res = NULL;
170

171
    //
172
    // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus
173
    // the result does not overflow and is upper bounded by 2^18.
174
    //
175
    cb = SymCryptFdefSizeofIntFromDigits( nDigits );
176

177
    if( cb != 0 )
178
    {
179
        p = SymCryptCallbackAlloc( cb );
180
    }
181

182
    if( p == NULL )
183
    {
184
        goto cleanup;
185
    }
186

187
    res = SymCryptIntCreate( p, cb, nDigits );
188

189
cleanup:
190
    return res;
191
}
192

193

194
UINT32
195
SYMCRYPT_CALL
196
SymCryptFdefSizeofIntFromDigits( UINT32 nDigits )
197
{
198
    SYMCRYPT_ASSERT( nDigits != 0 );
199
    SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS );
200

201
    // Ensure we do not overflow the following calculation when provided with invalid inputs
202
    if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS )
203
    {
204
        return 0;
205
    }
206

207
    // Note: ti stands for 'Type-Int' and it helps catch type errors when type-casting macros are used.
208
    return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_INT, ti ) + nDigits * SYMCRYPT_FDEF_DIGIT_SIZE;
209
}
210

211
PSYMCRYPT_INT
212
SYMCRYPT_CALL
213
SymCryptFdefIntCreate(
214
    _Out_writes_bytes_( cbBuffer )  PBYTE   pbBuffer,
215
                                    SIZE_T  cbBuffer,
216
                                    UINT32  nDigits )
217
{
218
    PSYMCRYPT_INT pInt = NULL;
219
    UINT32 cb = SymCryptFdefSizeofIntFromDigits( nDigits );
220

221
    SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_INT) );
222
    SYMCRYPT_ASSERT( cbBuffer >= cb );
223
    if( (cb == 0) || (cbBuffer < cb) )
224
    {
225
        goto cleanup; // return NULL
226
    }
227

228
    SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer );
229
    pInt = (PSYMCRYPT_INT) pbBuffer;
230

231
    pInt->type = 'gI' << 16;
232
    pInt->nDigits = nDigits;
233

234
    //
235
    // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus
236
    // the result does not overflow and is upper bounded by 2^18.
237
    //
238
    pInt->cbSize = cb;
239

240
    SYMCRYPT_SET_MAGIC( pInt );
241

242
cleanup:
243
    return pInt;
244
}
245

246

247
VOID
248
SymCryptFdefIntCopyFixup(
249
    _In_    PCSYMCRYPT_INT pSrc,
250
    _Out_   PSYMCRYPT_INT  pDst )
251
{
252
    UNREFERENCED_PARAMETER( pSrc );
253
    UNREFERENCED_PARAMETER( pDst ); // not used in FRE builds...
254

255
    SYMCRYPT_SET_MAGIC( pDst );
256
}
257

258
VOID
259
SymCryptFdefIntCopy(
260
    _In_    PCSYMCRYPT_INT  piSrc,
261
    _Out_   PSYMCRYPT_INT   piDst )
262
{
263
    SYMCRYPT_CHECK_MAGIC( piSrc );
264
    SYMCRYPT_CHECK_MAGIC( piDst );
265

266
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
267

268
    //
269
    // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy.
270
    //
271
    if( piSrc != piDst )
272
    {
273
        // This is normally considered a banned, unsafe function. A note about why it is safe in this use
274
        // would be good.
275
        memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), SYMCRYPT_OBJ_NBYTES( piDst ));
276
    }
277
}
278

279
VOID
280
SymCryptFdefIntMaskedCopy(
281
    _In_    PCSYMCRYPT_INT  piSrc,
282
    _Inout_ PSYMCRYPT_INT   piDst,
283
            UINT32          mask )
284
	/*
285
	Function notes would be helpful - what is mask, what does it do?
286
	*/
287
{
288
    SYMCRYPT_CHECK_MAGIC( piSrc );
289
    SYMCRYPT_CHECK_MAGIC( piDst );
290

291
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
292

293
    SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, mask );
294
}
295

296
VOID
297
SYMCRYPT_CALL
298
SymCryptFdefIntConditionalCopy(
299
    _In_    PCSYMCRYPT_INT  piSrc,
300
    _Inout_ PSYMCRYPT_INT   piDst,
301
            UINT32          cond )
302
{
303
    SYMCRYPT_CHECK_MAGIC( piSrc );
304
    SYMCRYPT_CHECK_MAGIC( piDst );
305

306
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
307

308
    SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, SYMCRYPT_MASK32_NONZERO( cond ) );
309
}
310

311
VOID
312
SYMCRYPT_CALL
313
SymCryptFdefIntConditionalSwap(
314
    _Inout_ PSYMCRYPT_INT   piSrc1,
315
    _Inout_ PSYMCRYPT_INT   piSrc2,
316
            UINT32          cond )
317
{
318
    SYMCRYPT_CHECK_MAGIC( piSrc1 );
319
    SYMCRYPT_CHECK_MAGIC( piSrc2 );
320

321
    SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits );
322

323
    SymCryptFdefConditionalSwap( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), piSrc1->nDigits, cond );
324
}
325

326
UINT32
327
SYMCRYPT_CALL
328
SymCryptFdefIntBitsizeOfObject( _In_ PCSYMCRYPT_INT  piSrc )
329
{
330
    // This does not overflow since the nDigits field is
331
    // bounded by SYMCRYPT_FDEF_UPB_DIGITS.
332
    return SYMCRYPT_FDEF_DIGIT_BITS * piSrc->nDigits;
333
}
334

335
UINT32
336
SYMCRYPT_CALL
337
SymCryptFdefNumberofDigitsFromInt( _In_ PCSYMCRYPT_INT piSrc )
338
{
339
    return piSrc->nDigits;
340
}
341

342
SYMCRYPT_ERROR
343
SymCryptFdefIntCopyMixedSize(
344
    _In_    PCSYMCRYPT_INT  piSrc,
345
    _Out_   PSYMCRYPT_INT   piDst )
346
{
347
    UINT32  n;
348
    SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR;
349

350
    SYMCRYPT_CHECK_MAGIC( piSrc );
351
    SYMCRYPT_CHECK_MAGIC( piDst );
352

353
    // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy.
354
    if( piSrc == piDst )
355
    {
356
        goto cleanup;
357
    }
358

359
    //
360
    // Copy the digits that are available in both
361
    //
362
    n = SYMCRYPT_MIN( piSrc->nDigits, piDst->nDigits );
363
    memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), n * SYMCRYPT_FDEF_DIGIT_SIZE );
364

365
    if( piDst->nDigits > n )
366
    {
367
        SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[n * SYMCRYPT_FDEF_DIGIT_NUINT32], (piDst->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE );
368
    }
369

370
    if( piSrc->nDigits > n )
371
    {
372
        // Check that the rest of the source is zero
373
        PUINT64 p = (PUINT64) &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[n * SYMCRYPT_FDEF_DIGIT_NUINT32];
374
        UINT64 v = 0;
375
        UINT32 i = (piSrc->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 );
376
        while( i > 0 )
377
        {
378
            v |= *p++;
379
            i--;
380
        }
381

382
        //
383
        // If the Src doesn't fit, we are allowed to publish that fact, so we can use an IF.
384
        //
385
        if( v != 0 )
386
        {
387
            scError = SYMCRYPT_BUFFER_TOO_SMALL;
388
            goto cleanup;
389
        }
390
    }
391

392
cleanup:
393
    return scError;
394
}
395

396

397
UINT32
398
SYMCRYPT_CALL
399
SymCryptFdefBitsizeOfUint32( UINT32 v )
400
{
401
    UINT32 res;
402
    UINT32 mask;
403
    UINT32 vUpper;
404
    UINT32 vBit1;
405

406
    // This is tricky to do side-channel safe using only defined behaviour of the C language.
407

408
	// This is very difficult to make any sense of. A comment containing the C code that one would normally
409
	// write to do the same thing would be helpful. I will need to come back to this.
410
	// Also, there is no test coverage of this function. There should be a unit test to show that it does the same thing
411
	// as the code one would normally write.
412

413
    vUpper = v & 0xffff0000;
414
    mask = (UINT32) ( (0 -(UINT64)(vUpper)) >> 32 );      // mask = 0 or 0xffffffff
415
    res = mask & 16;                                      // Why do we want the 9th bit? Also, 0x10 would be better here
416
    v = ((v & 0xffff) & ~mask) | ((vUpper >> 16) & mask);
417

418
    vUpper = v & 0xff00;
419
    mask = (0 - vUpper) >> 16;                           // mask = 0 or 0xffff
420
    res |= mask & 8;
421
    v = ((v & 0xff) & ~mask) | ((v >> 8) & mask);
422

423
    vUpper = v & 0xf0;
424
    mask = (0 - vUpper) >> 16;
425
    res |= mask & 4;
426
    v = ((v & 0xf) & ~mask) | ((v >> 4) & mask );
427

428
    vUpper = v & 0xc;
429
    mask = (0 - vUpper) >> 16;
430
    res |= mask & 2;
431
    v = ((v & 0x3) & ~mask) | ((v >> 2) & mask);
432

433
    //
434
    // Only 2 bits left.
435
    //
436
    vBit1 = (v >> 1) & 1;
437
    res |= vBit1;
438

439
    //
440
    // Now we have the bit number of the MSbit set in res.
441
    // We need to increase this by one if v was nonzero, so that we
442
    // get 0 for v==0, and the # bits needed for v > 0
443
    //
444
    res += (v | vBit1) & 1;
445

446
    return res;
447
}
448

449
UINT32
450
SYMCRYPT_CALL
451
SymCryptFdefIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc )
452
{
453
    UINT32  nUint32 = SYMCRYPT_OBJ_NUINT32( piSrc );
454

455
    UINT32  res = 0;
456
    UINT32  msNonzeroWord = 0;      // most significant nonzero digit
457
    UINT32  searchingMask = SYMCRYPT_MASK32_SET; // Set if still searching, 0 otherwise
458
    UINT32  d;
459
    UINT32  dIsNonzeroMask;
460
    UINT32  foundMask;
461

462
    SYMCRYPT_CHECK_MAGIC( piSrc );
463

464
	// This while loop reveals the value of nUint32, is that OK?
465
	// If so, document why
466
    while( nUint32 > 0 )
467
    {
468
        //
469
        // Invariant:
470
        // If no nonzero digit has been found, res = 0 and updateMask = -1.
471
        // If a nonzero digit has been found:
472
        //  msNonzeroDigit = most significant nonzero digit in Src
473
        //  res = index where most-significant nonzero digit was found
474
        //  updateMask = 0
475
        //
476

477
        nUint32--;
478
        d = SYMCRYPT_FDEF_INT_PUINT32( piSrc )[nUint32];
479

480
        dIsNonzeroMask = SYMCRYPT_MASK32_NONZERO( d );
481
        foundMask = dIsNonzeroMask & searchingMask;
482
        res |= nUint32 & foundMask;
483
        msNonzeroWord |= d & foundMask;
484
        searchingMask &= ~foundMask;
485
    }
486

487
    //
488
    // If all words are zero, then res == 0 and msNonzeroDigit == 0.
489
    //
490
    res = res * 8 * sizeof( UINT32 ) + SymCryptFdefBitsizeOfUint32( msNonzeroWord );
491

492
    return res;
493
}
494

495
VOID
496
SYMCRYPT_CALL
497
SymCryptFdefIntSetValueUint32(
498
            UINT32          u32Src,
499
    _Out_   PSYMCRYPT_INT   piDst )
500
{
501
    SYMCRYPT_CHECK_MAGIC( piDst );
502

503
    SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) );
504
    SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = u32Src;
505
}
506

507
C_ASSERT( SYMCRYPT_FDEF_DIGIT_SIZE >= 8 );      // Code below fails if this doesn't hold
508

509
VOID
510
SYMCRYPT_CALL
511
SymCryptFdefIntSetValueUint64(
512
            UINT64          u64Src,
513
    _Out_   PSYMCRYPT_INT   piDst )
514
{
515
    SYMCRYPT_CHECK_MAGIC( piDst );
516

517
    SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) );
518
    SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = (UINT32) u64Src;
519
    SYMCRYPT_FDEF_INT_PUINT32( piDst )[1] = (UINT32)(u64Src >> 32);
520
}
521

522
SYMCRYPT_ERROR
523
SYMCRYPT_CALL
524
SymCryptFdefRawSetValue(
525
    _In_reads_bytes_(cbSrc)                             PCBYTE                  pbSrc,
526
                                                        SIZE_T                  cbSrc,
527
                                                        SYMCRYPT_NUMBER_FORMAT  format,
528
    _Out_writes_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32                 pDst,
529
                                                        UINT32                  nDigits )
530
{
531
    SYMCRYPT_ERROR scError;
532
    UINT32  b;
533
    INT32   step;
534
    UINT32  w;
535
    UINT32  windex;
536
    UINT32  i;
537
    UINT32  nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
538

539
    //
540
    // This is a very simple and slow generic implementation;
541
    // We'll create optimized versions for specific CPU platforms
542
    // (e.g. use of memcpy)
543
    //
544

545
    // I assume the number format is public?
546
    switch( format )
547
    {
548
    case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST:
549
        step = 1;
550
        break;
551
    case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST:
552
        step = -1;
553
        pbSrc += cbSrc; // avoid tripping pointer overflow sanitizer with cbSrc == 0
554
        pbSrc--;
555
        break;
556
    default:
557
        scError = SYMCRYPT_INVALID_ARGUMENT;
558
        goto cleanup;
559
    }
560

561
    for( windex = 0; windex < nWords; windex++ )
562
    {
563
        w = 0;
564
        for( i=0; i<4; i++ )
565
        {
566
            // read the next byte into b
567
            if( cbSrc > 0 )
568
            {
569
                b = *pbSrc;
570
                cbSrc -= 1;
571
                pbSrc += step;
572
                w |= b << 8*i;
573
            }
574
        }
575
        pDst[windex] = w;
576
    }
577

578
    // Inspect any remaining input bytes
579
    b = 0;
580
    while( cbSrc > 0 )
581
    {
582
        b |= *pbSrc;
583
        pbSrc += step;
584
        cbSrc -= 1;
585
    }
586

587
    if( b > 0 )
588
    {
589
        scError = SYMCRYPT_BUFFER_TOO_SMALL;
590
        goto cleanup;
591
    }
592

593
    scError = SYMCRYPT_NO_ERROR;
594

595
cleanup:
596
    return scError;
597
}
598

599
SYMCRYPT_ERROR
600
SYMCRYPT_CALL
601
SymCryptFdefIntSetValue(
602
    _In_reads_bytes_(cbSrc)     PCBYTE                  pbSrc,
603
                                SIZE_T                  cbSrc,
604
                                SYMCRYPT_NUMBER_FORMAT  format,
605
    _Out_                       PSYMCRYPT_INT           piDst )
606
{
607
    SYMCRYPT_ERROR scError;
608

609
    SYMCRYPT_CHECK_MAGIC( piDst );
610

611
    scError = SymCryptFdefRawSetValue( pbSrc, cbSrc, format, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits );
612

613
    return scError;
614
}
615

616

617
SYMCRYPT_ERROR
618
SYMCRYPT_CALL
619
SymCryptFdefRawGetValue(
620
    _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32)   PCUINT32                pSrc,
621
                                                        UINT32                  nDigits,
622
    _Out_writes_bytes_(cbDst)                           PBYTE                   pbDst,
623
                                                        SIZE_T                  cbDst,
624
                                                        SYMCRYPT_NUMBER_FORMAT  format )
625
{
626
    SYMCRYPT_ERROR scError;
627
    UINT32  b;
628
    INT32   step;
629
    UINT32  w;
630
    UINT32  windex;
631
    UINT32  i;
632
    UINT32  nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
633

634
    //
635
    // This is a very simple and slow generic implementation;
636
    // We'll create optimized versions for specific CPU platforms
637
    // (e.g. use of memcpy)
638
    //
639

640
    switch( format )
641
    {
642
    case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST:
643
        step = 1;
644
        break;
645
    case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST:
646
        step = -1;
647
        pbDst += cbDst; // avoid tripping pointer overflow sanitizer with cbSrc == 0
648
        pbDst--;
649
        break;
650
    default:
651
        scError = SYMCRYPT_INVALID_ARGUMENT;
652
        goto cleanup;
653
    }
654

655
    for( windex = 0; windex < nWords; windex++ )
656
    {
657
        w = pSrc[windex];
658
        for( i=0; i<4; i++ )
659
        {
660
            b = w & 0xff;
661
            w >>= 8;
662

663
            // write the next byte
664
            if( cbDst > 0 )
665
            {
666
                *pbDst = (BYTE)b;
667
                cbDst -= 1;
668
                pbDst += step;
669
            } else {
670
                if( b != 0 )
671
                {
672
                    scError = SYMCRYPT_BUFFER_TOO_SMALL;
673
                    goto cleanup;
674
                }
675
            }
676
        }
677
    }
678

679
    // Zero any remaining output bytes
680
    while( cbDst > 0 )
681
    {
682
        *pbDst = 0;
683
        pbDst += step;
684
        cbDst -= 1;
685
    }
686

687
    scError = SYMCRYPT_NO_ERROR;
688

689
cleanup:
690
    return scError;
691
}
692

693
SYMCRYPT_ERROR
694
SYMCRYPT_CALL
695
SymCryptFdefIntGetValue(
696
    _In_                        PCSYMCRYPT_INT          piSrc,
697
    _Out_writes_bytes_(cbDst)   PBYTE                   pbDst,
698
                                SIZE_T                  cbDst,
699
                                SYMCRYPT_NUMBER_FORMAT  format )
700
{
701
    SYMCRYPT_ERROR scError;
702

703
    SYMCRYPT_CHECK_MAGIC( piSrc );
704

705
    scError = SymCryptFdefRawGetValue( &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0], piSrc->nDigits, pbDst, cbDst, format );
706

707
    return scError;
708
}
709

710

711
UINT32
712
SYMCRYPT_CALL
713
SymCryptFdefIntGetValueLsbits32( _In_  PCSYMCRYPT_INT piSrc )
714
{
715
    // nDigits cannot be zero, so we don't have to test
716
    return SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0];
717
}
718

719
UINT64
720
SYMCRYPT_CALL
721
SymCryptFdefIntGetValueLsbits64( _In_  PCSYMCRYPT_INT piSrc )
722
{
723
    // nDigits cannot be zero, so we don't have to test
724
    PCUINT32 p = SYMCRYPT_FDEF_INT_PUINT32( piSrc );
725
    return ((UINT64)(p[1]) << 32) | p[0];
726
}
727

728
UINT32
729
SYMCRYPT_CALL
730
SymCryptFdefRawIsEqualUint32(
731
    _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32        pSrc1,
732
                                                    UINT32          nDigits,
733
    _In_                                            UINT32          u32Src2 )
734
{
735
    UINT32 d;
736
    UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
737

738
    d = pSrc1[0] ^ u32Src2;
739
    for( UINT32 i=1; i<nWords; i++)
740
    {
741
        d |= pSrc1[i];
742
    }
743

744
    return SYMCRYPT_MASK32_ZERO( d );
745
}
746

747
UINT32
748
SYMCRYPT_CALL
749
SymCryptFdefIntIsEqualUint32(
750
    _In_    PCSYMCRYPT_INT  piSrc1,
751
    _In_    UINT32          u32Src2 )
752
{
753
    return SymCryptFdefRawIsEqualUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[0], piSrc1->nDigits, u32Src2 );
754
}
755

756
UINT32
757
SYMCRYPT_CALL
758
SymCryptFdefIntIsEqual(
759
    _In_    PCSYMCRYPT_INT  piSrc1,
760
    _In_    PCSYMCRYPT_INT  piSrc2 )
761
{
762
    UINT32 d;
763
    UINT32  n1 = SYMCRYPT_OBJ_NUINT32( piSrc1 );
764
    UINT32  n2 = SYMCRYPT_OBJ_NUINT32( piSrc2 );
765
    UINT32  i;
766
    UINT32  n;
767
    PCUINT32 pSrc1 = SYMCRYPT_FDEF_INT_PUINT32( piSrc1 );
768
    PCUINT32 pSrc2 = SYMCRYPT_FDEF_INT_PUINT32( piSrc2 );
769

770
    n = SYMCRYPT_MIN( n1, n2 );
771
    d = 0;
772
    for( i=0; i < n ; i++ )
773
    {
774
        d |= pSrc1[i] ^ pSrc2[i];
775
    }
776

777
    // i == n1 or i == n2, so at most one of the 2 loops below is ever run
778

779
    while( i < n1 )
780
    {
781
        d |= pSrc1[i];
782
        i++;
783
    }
784

785
    while( i < n2 )
786
    {
787
        d |= pSrc2[i];
788
        i++;
789
    }
790

791
    return SYMCRYPT_MASK32_ZERO( d );
792
}
793

794
PSYMCRYPT_DIVISOR
795
SYMCRYPT_CALL
796
SymCryptFdefDivisorAllocate( UINT32 nDigits )
797
{
798
    PVOID               p = NULL;
799
    UINT32              cb;
800
    PSYMCRYPT_DIVISOR   res = NULL;
801

802
    //
803
    // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus
804
    // the result does not overflow and is upper bounded by 2^19.
805
    //
806
    cb = SymCryptFdefSizeofDivisorFromDigits( nDigits );
807

808
    if( cb != 0 )
809
    {
810
        p = SymCryptCallbackAlloc( cb );
811
    }
812

813
    if( p == NULL )
814
    {
815
        goto cleanup;
816
    }
817

818
    res = SymCryptFdefDivisorCreate( p, cb, nDigits );
819

820
cleanup:
821
    return res;
822
}
823

824
UINT32
825
SYMCRYPT_CALL
826
SymCryptFdefSizeofDivisorFromDigits( UINT32 nDigits )
827
{
828
    SYMCRYPT_ASSERT( nDigits != 0 );
829
    SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS );
830

831
    // Ensure we do not overflow the following calculation when provided with invalid inputs
832
    if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS )
833
    {
834
        return 0;
835
    }
836

837
    return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ) + SymCryptFdefSizeofIntFromDigits( nDigits );
838
}
839

840
PSYMCRYPT_DIVISOR
841
SYMCRYPT_CALL
842
SymCryptFdefDivisorCreate(
843
    _Out_writes_bytes_( cbBuffer )  PBYTE   pbBuffer,
844
                                    SIZE_T  cbBuffer,
845
                                    UINT32  nDigits )
846
{
847
    PSYMCRYPT_DIVISOR  pdDiv = NULL;
848
    UINT32 cb = SymCryptSizeofDivisorFromDigits( nDigits );
849

850
    SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_DIVISOR) );
851
    SYMCRYPT_ASSERT( cbBuffer >= cb );
852
    if( (cb == 0) || (cbBuffer < cb) )
853
    {
854
        goto cleanup; // return NULL
855
    }
856

857
    SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer );
858
    pdDiv = (PSYMCRYPT_DIVISOR) pbBuffer;
859

860
    pdDiv->type = 'gD' << 16;
861
    pdDiv->nDigits = nDigits;
862

863
    //
864
    // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus
865
    // the result does not overflow and is upper bounded by 2^19.
866
    //
867
    pdDiv->cbSize = cb;
868

869
    SYMCRYPT_SET_MAGIC( pdDiv );
870

871
    SymCryptIntCreate( (PBYTE)&pdDiv->Int, cbBuffer - SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ), nDigits );
872

873
cleanup:
874
    return pdDiv;
875
}
876

877
VOID
878
SymCryptFdefDivisorCopyFixup(
879
    _In_    PCSYMCRYPT_DIVISOR pdSrc,
880
    _Out_   PSYMCRYPT_DIVISOR  pdDst )
881
{
882
    UNREFERENCED_PARAMETER( pdSrc );
883
    UNREFERENCED_PARAMETER( pdDst );
884

885
    SymCryptFdefIntCopyFixup( &pdSrc->Int, &pdDst->Int );
886

887
    SYMCRYPT_SET_MAGIC( pdDst );
888
}
889

890
VOID
891
SymCryptFdefDivisorCopy(
892
    _In_    PCSYMCRYPT_DIVISOR  pdSrc,
893
    _Out_   PSYMCRYPT_DIVISOR   pdDst )
894
{
895
    SYMCRYPT_CHECK_MAGIC( pdSrc );
896
    SYMCRYPT_CHECK_MAGIC( pdDst );
897

898
    SYMCRYPT_ASSERT( pdSrc->nDigits == pdDst->nDigits );
899

900
    // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy.
901
    if( pdSrc != pdDst )
902
    {
903
        memcpy( pdDst, pdSrc, pdDst->cbSize );
904

905
        SymCryptFdefDivisorCopyFixup( pdSrc, pdDst );
906
    }
907
}
908

909

910
VOID
911
SYMCRYPT_CALL
912
SymCryptFdefClaimScratch( PBYTE pbScratch, SIZE_T cbScratch, SIZE_T cbMin )
913
{
914
#if SYMCRYPT_DEBUG
915
    SYMCRYPT_ASSERT( cbScratch >= cbMin );
916
    SymCryptWipe( pbScratch, cbMin );
917
#else
918
    UNREFERENCED_PARAMETER( pbScratch );
919
    UNREFERENCED_PARAMETER( cbScratch );
920
    UNREFERENCED_PARAMETER( cbMin );
921
#endif
922
}
923

924
UINT32
925
SymCryptTestTrialdivisionMaxSmallPrime(
926
    _In_                            PCSYMCRYPT_TRIALDIVISION_CONTEXT    pContext )
927
{
928
    return pContext->maxTrialPrime;
929
}
930

931
UINT64
932
SymCryptInverseMod2e64( UINT64 m )
933
{
934
    // Compute the inv64 value such that inv64 * m = 1 mod 2^64 for odd m.
935
    // If m is even, there exists no inverse, this function will return a
936
    // useless value in constant time.
937
    //
938
    // We use Newton's method to search for a zero of f(x) := x^-1 - m, working modulo 2^64
939
    // We get the iteration formula
940
    //  x_{i+1} = x_i - f(x_i)/f'(x_i)
941
    //          = x_i - (x_i^-1 - m)/(-x_i^-2)
942
    //          = x_i + x_i^2(1/x_i - m)
943
    //          = x_i + x_i - (x_i^2 * m)
944
    //          = x_i (2 - x_i*m)
945
    //
946
    // Let x_i = d + 2^n * e where d = inv64 = m^-1 mod 2^64, and 2^n * e is the error term that is zero in the n least
947
    // significant bits. We have
948
    //  x_{i+1} = (d + 2^n * e) (2 - (d + 2^n * e) * m)
949
    //          = (d + 2^n * e) (2 - d*m - 2^n * e * m)
950
    //          = (d + 2^n * e) (2 - 1 - 2^n * e * m)
951
    //          = (d + 2^n * e) (1 - 2^n * e * m)
952
    //          = d - (2^n * e * (d*m)) + (2^n * e) - (2^{2n} * e^2 * m)
953
    //          = d - (2^{2n} * e^2 * m)
954
    // In other words, the error has been squared and multiplied by m. In our case, working modulo 2^64, the number of correct bits
955
    // on the least significant side is doubled.
956
    //
957
    // To get a 4-bit correct estimate for m^-1 given odd m, we consider the least significant 4 bits of m and inv:
958
    // m   = ... m_3 m_2 m_1 m_0
959
    // inv = ... i_3 i_2 i_1 i_0
960
    // We want to directly compute i_[3..0] s.t. (m*inv) & 0xf == 1
961
    // working through some simple simultaneous equations it is easily shown that:
962
    // i_0 = m_0 = 1
963
    // i_1 = m_1
964
    // i_2 = m_2
965
    // i_3 = m_1 ^ m_2 ^ m_3
966
    // Once we have 4 correct bits, we can double that multiple times using Newton's method.
967
    //
968
    // We use 32-bit operations for most of the iterations for speed on 32-bit platforms.
969
    //
970
    UINT32 inv32;
971
    UINT64 inv64;
972
    UINT32 m32;
973

974
    m32 = (UINT32)m;
975

976
    inv32 = m32 ^ (((m32 - 1) * 0x6) & 0x8); // sets inv32 bits [3..0]
977
    SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xf) == 1) );
978

979
    inv32 = inv32 * (2 - inv32 * m32 );
980
    SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xff) == 1) );
981

982
    inv32 = inv32 * (2 - inv32 * m32 );
983
    SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xffff) == 1) );
984

985
    inv32 = inv32 * (2 - inv32 * m32 );
986
    SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv32 * m32) == 1) );
987

988
    inv64 = inv32;
989
    inv64 = inv64 * (2 - inv64 * m );
990
    SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv64 * m) == 1) );
991

992
    return inv64;
993
}
994

995

996
VOID
997
SYMCRYPT_CALL
998
SymCryptFdefInitTrialdivisionPrime(
999
            UINT32                          prime,
1000
    _Out_   PSYMCRYPT_TRIALDIVISION_PRIME   pPrime )
1001
{
1002
    // Compute the inverse of the prime mod 2^64
1003
    pPrime->invMod2e64 = SymCryptInverseMod2e64( prime );
1004
    pPrime->compareLimit = ((UINT64) -1) / prime;
1005
}
1006

1007
FORCEINLINE
1008
UINT32
1009
SymCryptIsMultipleOfSmallPrime( UINT64 value, PCSYMCRYPT_TRIALDIVISION_PRIME pPrime )
1010
{
1011
    return (value * pPrime->invMod2e64) <= pPrime->compareLimit;
1012
}
1013

1014
VOID
1015
SYMCRYPT_CALL
1016
SymCryptFdefInitTrialDivisionGroup( PSYMCRYPT_TRIALDIVISION_GROUP pGroup, UINT32 nPrimes, UINT32 primeProd )
1017
{
1018
    UINT32 f;
1019
    UINT32 r;
1020
    UINT32 i;
1021

1022
    pGroup->nPrimes = nPrimes;
1023

1024
    // These % operations are expensive; maybe we can optimize this further.
1025
    // In assembler we can do the UINT64 % UINT32 -> UINT32
1026
    // hopefully the compiler is smart enough...
1027

1028
    f = (UINT32) (((UINT64)1 << 32) % primeProd);
1029
    pGroup->factor[0] = f;
1030
    r = f;
1031
    for( i=1; i<9; i++ )
1032
    {
1033
        r = (UINT32) (SYMCRYPT_MUL32x32TO64( r, f ) % primeProd);
1034
        pGroup->factor[i] = r;
1035
    }
1036
}
1037

1038
UINT32
1039
SYMCRYPT_CALL
1040
SymCryptGenerateSmallPrimes( UINT32 maxPrime, PUINT32 * ppList )
1041
{
1042
    // returns a list of small primes, excluding 2, 3, 5, and 17.
1043
    UINT32 nPrimes = 0;
1044
    PUINT32 pList = NULL;
1045

1046
    // pSieve[i] corresponds to 2*i+1
1047
    // value X is in location X/2
1048
    UINT32 nSieve;
1049
    PBYTE pSieve;
1050

1051
    UINT32 pi;
1052
    UINT32 p;
1053
    UINT32 si;
1054
    UINT32 i;
1055

1056
    maxPrime = SYMCRYPT_MAX( maxPrime, 32 );         // simplify error handling by always producing primes at least up to 32
1057
    maxPrime = SYMCRYPT_MIN( maxPrime, 1 << 24 );    // Limit prime list to something sane (sieve = 8 MB, list = 4 MB or so).
1058

1059
    // highest index is (maxPrime - 1)/2 which encodes maxPrime if odd, or maxPrime-1 if even
1060
    nSieve = (maxPrime - 1) / 2 + 1;
1061

1062
    pSieve = SymCryptCallbackAlloc( nSieve );
1063
    if( pSieve == NULL )
1064
    {
1065
        goto cleanup;
1066
    }
1067

1068
    SymCryptWipe( pSieve, nSieve );
1069

1070

1071
    pi = 1; // index of first prime 3
1072
    p = 2*pi + 1;  // prime value
1073
    for(;;)
1074
    {
1075
        si = 2*(pi*pi + pi);    // index of p^2
1076
        if( si > nSieve )
1077
        {
1078
            break;  // We're done sieving
1079
        }
1080
        while( si < nSieve )
1081
        {
1082
            pSieve[si] = 1;
1083
            si += p;
1084
        }
1085
        // Search for the next prime
1086
        do {
1087
            pi += 1;
1088
        } while( pSieve[pi] != 0 );
1089
        p = 2*pi + 1;
1090
    }
1091

1092
    // Eliminate 3, 5, and 17
1093
    pSieve[1] = 1;
1094
    pSieve[2] = 1;
1095
    pSieve[8] = 1;
1096

1097
    for( i=1; i<nSieve; i++ )
1098
    {
1099
        nPrimes += 1 - pSieve[i];
1100
    }
1101

1102
	// dcl - I suspect that this is not a problem, but please document
1103
	// why this multiplication cannot overflow. I assume there is a practical limit on nPrimes, but unsure
1104
	// what that would be.
1105
    pList = SymCryptCallbackAlloc( nPrimes * sizeof( UINT32 ) );
1106
    if( pList == NULL )
1107
    {
1108
        goto cleanup;
1109
    }
1110

1111
    pi = 0;
1112
    for( i=1; i<nSieve; i++ )
1113
    {
1114
        if( pSieve[i] == 0 )
1115
        {
1116
            pList[pi++] = 2*i+1;
1117
        }
1118
    }
1119

1120
    SYMCRYPT_ASSERT( pi == nPrimes );
1121

1122
cleanup:
1123
    if( pSieve != NULL )
1124
    {
1125
        SymCryptWipe( pSieve, nSieve );
1126
        SymCryptCallbackFree( pSieve );
1127
    }
1128

1129
    *ppList = pList;
1130
    return nPrimes;
1131
}
1132

1133

1134
PCSYMCRYPT_TRIALDIVISION_CONTEXT
1135
SYMCRYPT_CALL
1136
SymCryptFdefCreateTrialDivisionContext( UINT32 nDigits )
1137
{
1138
    PSYMCRYPT_TRIALDIVISION_CONTEXT pRes = NULL;
1139
    PBYTE pAlloc;
1140
    UINT32 nBytes;
1141
    UINT32 iPrime;
1142
    UINT32 iGroup;
1143
    UINT32 nPrimes;
1144
    UINT32 nGroups;
1145
    UINT32 M;
1146
    UINT32 iGroupSpec;
1147
    UINT32 i;
1148
    UINT32 j;
1149
    UINT64 cRabinMillerCost;
1150
    UINT64 cPerPrimeCost;
1151
    UINT64 tmp64;
1152
    UINT32 maxPrime;
1153
    UINT32 minPrime;
1154
    UINT32 nSmallPrimes = 0;
1155
    UINT32 n;
1156
    UINT32 nP;
1157
    UINT32 nG;
1158
    PUINT32 pSmallPrimeList = NULL;
1159

1160
    // First we estimate the largest prime we will do trial division with
1161
    // Inputs:
1162
    // - cycles/digit of reduction per group of primes
1163
    // - cycles/prime of divide test
1164
    // - cycles per digit^3 for a Rabin-Miller test
1165
    // We optimize in this model, which is pretty accurate for large inputs but underestimates the RM cost
1166
    // for smaller sizes.
1167

1168
    // Compute the Rabin-Miller cost estimate. We reduce it by 20% because our cost model does not take
1169
    // into account some of the trial-division cost such as memory footprint, cache pressure,
1170
    // setup cost, etc. Reducing the Rabin-Miller cost leads us to do fewer trial divisions to approximately
1171
    // balance the hidden costs.
1172

1173
    if( nDigits <= 1000 )
1174
    {
1175
        // nDigits is small enough to not have any overflows in this computation
1176
        if( nDigits == 0 )
1177
        {
1178
            goto cleanup; // return NULL
1179
        }
1180

1181
        cRabinMillerCost = (UINT64) nDigits * nDigits * nDigits * (SYMCRYPT_RABINMILLER_DIGIT_CYCLES * 8 / 10);
1182
        i = 0;
1183
        minPrime = 0;
1184
        for(;;)
1185
        {
1186
            nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes;
1187
            maxPrime = g_SymCryptSmallPrimeGroupsSpec[i].maxPrime;
1188
            nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups;
1189
            cPerPrimeCost = (UINT64) nDigits * SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES / nPrimes + SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES;
1190

1191
            // If the last group isn't worth it, we shouldn't go to even fewer primes
1192
            if( nGroups == 0 || maxPrime * cPerPrimeCost >=  cRabinMillerCost)
1193
            {
1194
                break;
1195
            }
1196
            i++;
1197
            minPrime = maxPrime;
1198
        }
1199

1200
        // Now we know how many primes are in the last groups, let's find out how large the largest prime should be
1201
        tmp64 = cRabinMillerCost / cPerPrimeCost;
1202
        tmp64 = SYMCRYPT_MIN( tmp64, SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME );
1203
        maxPrime = (UINT32) tmp64;
1204
        maxPrime = SYMCRYPT_MAX( maxPrime, minPrime );       // Make sure we don't fall into the previous group size that we don't want
1205
    }
1206
    else
1207
    {
1208
        maxPrime = SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME;
1209
    }
1210

1211
    nSmallPrimes = SymCryptGenerateSmallPrimes( maxPrime, &pSmallPrimeList );
1212

1213
    // Find out how many groups we'll have, and how many actual primes we'll use
1214
    n = nSmallPrimes;
1215
    nG = 0;
1216
    nP = 0;
1217
    i = 0;
1218
    for(;;)
1219
    {
1220
        nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes;
1221
        nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups;
1222

1223
        if( n < nPrimes * nGroups || nGroups == 0 )
1224
        {
1225
            // At the right nPrimes, compute exactly how many groups to add
1226
            n = n / nPrimes;
1227
            nG += n;
1228
            nP += n * nPrimes;
1229
            n = 0;      // No primes left
1230
            break;
1231
        }
1232

1233
        // Use up all the groups of this size...
1234
        nG += nGroups;
1235
        nP += nPrimes * nGroups;
1236
        n -= nPrimes * nGroups;
1237
        i++;
1238
    }
1239

1240
	// dcl - Potential integer overflow
1241
	// Need to document sizes, and limits of nG, nP, and confirm
1242
	// an overflow is not possible, also recall that size_t varies in size, but nBytes is 32-bit
1243
    nBytes = sizeof( SYMCRYPT_TRIALDIVISION_CONTEXT )
1244
            + (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP )     // + 1 for 0 sentinel
1245
            + (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME )     // + 1 for 0 sentinel
1246
            + (nP + 1) * sizeof( UINT32 );                          // + 1 for 0 sentinel
1247

1248
    pAlloc = SymCryptCallbackAlloc( nBytes );
1249
    if( pAlloc == NULL )
1250
    {
1251
        goto cleanup;
1252
    }
1253

1254
    pRes = (PSYMCRYPT_TRIALDIVISION_CONTEXT) pAlloc;
1255
    pAlloc += sizeof( *pRes );
1256

1257
    pRes->nBytesAlloc = nBytes;
1258

1259
    pRes->pGroupList = (PSYMCRYPT_TRIALDIVISION_GROUP)pAlloc;
1260
    pAlloc += (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP );
1261

1262
    pRes->pPrimeList = (PSYMCRYPT_TRIALDIVISION_PRIME) pAlloc;
1263
    pAlloc += (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME );
1264

1265
    pRes->pPrimes = (PUINT32) pAlloc;
1266
    pAlloc += (nP + 1) * sizeof( UINT32 );
1267

1268
    SYMCRYPT_ASSERT( nBytes == (SIZE_T)(pAlloc - (PBYTE)pRes) );
1269

1270
    // Initialize the primes 3, 5, and 17
1271
    SymCryptFdefInitTrialdivisionPrime(  3, &pRes->Primes3_5_17[0] );
1272
    SymCryptFdefInitTrialdivisionPrime(  5, &pRes->Primes3_5_17[1] );
1273
    SymCryptFdefInitTrialdivisionPrime( 17, &pRes->Primes3_5_17[2] );
1274

1275
    memcpy( pRes->pPrimes, pSmallPrimeList, nP * sizeof( UINT32 ) );
1276
    pRes->pPrimes[nP] = 0;
1277
    pRes->maxTrialPrime = pRes->pPrimes[nP-1];
1278

1279
    /*
1280
    *** Old code to decrypt the nibble encoding. Keep in case we want it back later...
1281
    // Generate the other primes from the difference table.
1282
    // We initialize the prime structures, and a list of the primes that is used to compute the group specs
1283

1284
    pNibs = &g_SymCryptSmallPrimeDifferenceNibbles[0];
1285

1286
    smallPrime = 3;
1287
    nPrimes = 0;
1288
    while( smallPrime < SYMCRYPT_MAX_SMALL_PRIME )
1289
    {
1290
        b = *pNibs++;
1291
        nib = b & 0xf;
1292

1293
        if( nib == 0 )
1294
        {
1295
            smallPrime += 30;
1296
            // No check for termination here as we wouldn't encode a 0 if there wasn't another prime.
1297
        } else {
1298
            smallPrime += 2*nib;
1299
            pRes->pPrimes[nPrimes] = smallPrime;
1300
            SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] );
1301
            nPrimes++;
1302
            if( smallPrime >= SYMCRYPT_MAX_SMALL_PRIME )
1303
            {
1304
                break;
1305
            }
1306
        }
1307
        nib = b >> 4;
1308
        if( nib == 0 )
1309
        {
1310
            smallPrime += 30;
1311
        } else {
1312
            smallPrime += 2*nib;
1313
            pRes->pPrimes[nPrimes] = smallPrime;
1314
            SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] );
1315
            nPrimes++;
1316
        }
1317
    }
1318
    SYMCRYPT_ASSERT( smallPrime == SYMCRYPT_MAX_SMALL_PRIME && nPrimes == SYMCRYPT_N_SMALL_PRIMES_ENCODED );
1319
    */
1320

1321
    for( iPrime = 0; iPrime < nP; iPrime++ )
1322
    {
1323
        SymCryptFdefInitTrialdivisionPrime( pRes->pPrimes[iPrime], &pRes->pPrimeList[iPrime] );
1324
    }
1325

1326
    // Add the trailing 0s
1327
    pRes->pPrimeList[nP].invMod2e64 = 0;
1328
    pRes->pPrimeList[nP].compareLimit = 0;
1329

1330
    // Make sure we have the 32-bit tables, not the 64-bit ones.
1331
	// dcl - warning suppression is not portable. Also, if it is a compile time constant, shouldn't it be a compile assert?
1332
#pragma warning( suppress: 4127 )       // conditional expression is constant
1333
    SYMCRYPT_ASSERT( SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT <= (UINT32)-1 );
1334

1335
    iGroup = 0;
1336
    iPrime = 0;
1337
    iGroupSpec = 0;
1338
    nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes;
1339
    nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups;
1340
    while( iPrime < nP )
1341
    {
1342
        if( nGroups == 0 )
1343
        {
1344
            iGroupSpec +=1 ;
1345
            nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes;
1346
            nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups;
1347
            if( nGroups == 0 )
1348
            {
1349
                nGroups = nG - iGroup;
1350
            }
1351
        }
1352

1353
        SYMCRYPT_ASSERT( iPrime + nPrimes <= nP );
1354
        M = pRes->pPrimes[iPrime++];
1355
        for( j=1; j<nPrimes; j++ )
1356
        {
1357
            SYMCRYPT_ASSERT( M <= SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT / pRes->pPrimes[iPrime] );
1358
            M *= pRes->pPrimes[iPrime++];
1359
        }
1360
        SymCryptFdefInitTrialDivisionGroup( &pRes->pGroupList[iGroup], nPrimes, M );
1361
        iGroup++;
1362

1363
        nGroups--;
1364
    }
1365

1366
    SYMCRYPT_ASSERT( iPrime == nP && iGroup == nG );
1367

1368
    // Add the trailing sentinel group
1369
    pRes->pGroupList[iGroup].nPrimes = 0;
1370

1371
cleanup:
1372
    if( pSmallPrimeList != NULL )
1373
    {
1374
        SymCryptWipe( pSmallPrimeList, nSmallPrimes * sizeof( UINT32 ) );
1375
        SymCryptCallbackFree( pSmallPrimeList );
1376
        pSmallPrimeList = NULL;
1377
    }
1378
    return pRes;
1379
}
1380

1381
VOID
1382
SYMCRYPT_CALL
1383
SymCryptFdefFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext )
1384
{
1385
    // No security reason to wipe it, but our test code verifies that we wipe everything...
1386
    // Perf cost is minor
1387
    SymCryptWipe( (PBYTE) pContext, pContext->nBytesAlloc );
1388
    SymCryptCallbackFree( (PSYMCRYPT_TRIALDIVISION_CONTEXT) pContext );
1389
}
1390

1391
UINT32
1392
SYMCRYPT_CALL
1393
SymCryptFdefIntFindSmallDivisor(
1394
    _In_                            PCSYMCRYPT_TRIALDIVISION_CONTEXT    pContext,
1395
    _In_                            PCSYMCRYPT_INT                      piSrc,
1396
    _Out_writes_bytes_( cbScratch ) PBYTE                               pbScratch,
1397
                                    SIZE_T                              cbScratch )
1398
{
1399
    PCUINT32    pSrc = SYMCRYPT_FDEF_INT_PUINT32( piSrc );
1400
    PCUINT32    p;
1401
    UINT32      nDigits = piSrc->nDigits;
1402
    UINT32      nUint32 = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
1403
    UINT64      Acc;
1404
    PCSYMCRYPT_TRIALDIVISION_GROUP pGroup;
1405
    PCSYMCRYPT_TRIALDIVISION_PRIME pPrime;
1406
    UINT32      nPrimes;
1407
    UINT32      res;
1408

1409
    // Check for 2. Not really needed for prime generation, but it makes the function easier to test/document/describe.
1410
    if( (*pSrc & 1) == 0 )
1411
    {
1412
        res = 2;
1413
        goto cleanup;
1414
    }
1415

1416
    // Check the factors 3, 5, 17. These are special as they divide 2^32 - 1
1417
    // (We could also do 257 and 65537 but that doesn't seem worth the added complexity.)
1418
    Acc = 0;
1419
    p = pSrc;
1420
    do {
1421
#if SYMCRYPT_FDEF_DIGIT_SIZE == 16
1422
        Acc = Acc + p[0] + p[1] + p[2] + p[3];
1423
        p += 4;
1424
#elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0
1425
        Acc = Acc + p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7];
1426
        p += 8;
1427
#else
1428
		// dcl - ideally, #error would have a descriptive message so it is easily found in code if encountered, same below
1429
#error ??
1430
#endif
1431
    } while( p < pSrc + nUint32 );
1432

1433
    if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[0] ) )
1434
    {
1435
        res = 3;
1436
        goto cleanup;
1437
    }
1438

1439
    if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[1] ) )
1440
    {
1441
        res = 5;
1442
        goto cleanup;
1443
    }
1444

1445
    if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[2] ) )
1446
    {
1447
        res = 17;
1448
        goto cleanup;
1449
    }
1450

1451
    pGroup = pContext->pGroupList;
1452
    pPrime = pContext->pPrimeList;
1453
    while( (nPrimes = pGroup->nPrimes) != 0 )
1454
    {
1455
        // Reduce Src modulo the group product to a 64-bit value
1456
        Acc = 0;
1457
        p = pSrc + nUint32;
1458

1459
#if SYMCRYPT_FDEF_DIGIT_SIZE == 16
1460
        if( (nUint32 & 4) != 0 )
1461
        {
1462
            // nUInt32 is 4 mod 8, process the top 4 words only
1463
            p -= 4;
1464
            Acc =
1465
                p[0] +
1466
                SYMCRYPT_MUL32x32TO64( p[1],  pGroup->factor[0] ) +
1467
                SYMCRYPT_MUL32x32TO64( p[2],  pGroup->factor[1] ) +
1468
                SYMCRYPT_MUL32x32TO64( p[3],  pGroup->factor[2] );
1469
        } else {
1470
            // Process 8 words to start
1471
            p -= 8;
1472
            Acc =
1473
                p[0] +
1474
                SYMCRYPT_MUL32x32TO64( p[1],  pGroup->factor[0] ) +
1475
                SYMCRYPT_MUL32x32TO64( p[2],  pGroup->factor[1] ) +
1476
                SYMCRYPT_MUL32x32TO64( p[3],  pGroup->factor[2] ) +
1477
                SYMCRYPT_MUL32x32TO64( p[4],  pGroup->factor[3] ) +
1478
                SYMCRYPT_MUL32x32TO64( p[5],  pGroup->factor[4] ) +
1479
                SYMCRYPT_MUL32x32TO64( p[6],  pGroup->factor[5] ) +
1480
                SYMCRYPT_MUL32x32TO64( p[7],  pGroup->factor[6] );
1481
        }
1482
#elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0
1483

1484
        p -= 8;
1485
        Acc =
1486
            p[0] +
1487
            SYMCRYPT_MUL32x32TO64( p[1],  pGroup->factor[0] ) +
1488
            SYMCRYPT_MUL32x32TO64( p[2],  pGroup->factor[1] ) +
1489
            SYMCRYPT_MUL32x32TO64( p[3],  pGroup->factor[2] ) +
1490
            SYMCRYPT_MUL32x32TO64( p[4],  pGroup->factor[3] ) +
1491
            SYMCRYPT_MUL32x32TO64( p[5],  pGroup->factor[4] ) +
1492
            SYMCRYPT_MUL32x32TO64( p[6],  pGroup->factor[5] ) +
1493
            SYMCRYPT_MUL32x32TO64( p[7],  pGroup->factor[6] );
1494

1495
#else
1496
#error ??
1497
#endif
1498
        while( p > pSrc )
1499
        {
1500
            p -= 8;
1501
            Acc =
1502
                p[0] +
1503
                SYMCRYPT_MUL32x32TO64( p[1],  pGroup->factor[0] ) +
1504
                SYMCRYPT_MUL32x32TO64( p[2],  pGroup->factor[1] ) +
1505
                SYMCRYPT_MUL32x32TO64( p[3],  pGroup->factor[2] ) +
1506
                SYMCRYPT_MUL32x32TO64( p[4],  pGroup->factor[3] ) +
1507
                SYMCRYPT_MUL32x32TO64( p[5],  pGroup->factor[4] ) +
1508
                SYMCRYPT_MUL32x32TO64( p[6],  pGroup->factor[5] ) +
1509
                SYMCRYPT_MUL32x32TO64( p[7],  pGroup->factor[6] ) +
1510
                SYMCRYPT_MUL32x32TO64( (UINT32) Acc       , pGroup->factor[7] ) +
1511
                SYMCRYPT_MUL32x32TO64( (UINT32)(Acc >> 32), pGroup->factor[8] );
1512
        }
1513

1514
        // Now we check whether we have a multiple of one of the primes
1515
        while( nPrimes > 0 )
1516
        {
1517
            if( SymCryptIsMultipleOfSmallPrime( Acc, pPrime ) )
1518
            {
1519
                res = pContext->pPrimes[ (pPrime - pContext->pPrimeList) ];    // pointer subtraction auto-divides by size...
1520
                goto cleanup;
1521
            }
1522
            pPrime++;
1523
            nPrimes--;
1524
        }
1525

1526
        pGroup++;
1527
    }
1528

1529
    UNREFERENCED_PARAMETER( pbScratch );
1530
    UNREFERENCED_PARAMETER( cbScratch );
1531

1532
    // Did not find a small factor, return zero
1533
    res = 0;
1534

1535
cleanup:
1536
    return res;
1537
}
1538

1539
/* Wine hack: asm not supported yet */
1540

1541
VOID
1542
SYMCRYPT_CALL
1543
SymCryptFdefMaskedCopyAsm(
1544
    _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )        PCBYTE      pbSrc,
1545
    _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE )   PBYTE       pbDst,
1546
                                                                UINT32      nDigits,
1547
                                                                UINT32      mask )
1548
{
1549
    SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask );
1550
}
1551

1552
Product

Resources

Company