Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/tomcrypt/mpi.c
4393 views
1
/* LibTomMath, multiple-precision integer library -- Tom St Denis
2
*
3
* LibTomMath is a library that provides multiple-precision
4
* integer arithmetic as well as number theoretic functionality.
5
*
6
* The library was designed directly after the MPI library by
7
* Michael Fromberger but has been written from scratch with
8
* additional optimizations in place.
9
*
10
* SPDX-License-Identifier: Unlicense
11
*/
12
13
#include <stdarg.h>
14
#include "tommath_private.h"
15
16
/* Start: bn_fast_mp_invmod.c */
17
18
/* computes the modular inverse via binary extended euclidean algorithm,
19
* that is c = 1/a mod b
20
*
21
* Based on slow invmod except this is optimized for the case where b is
22
* odd as per HAC Note 14.64 on pp. 610
23
*/
24
int fast_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c)
25
{
26
mp_int x, y, u, v, B, D;
27
int res, neg;
28
29
/* 2. [modified] b must be odd */
30
if (mp_iseven(b) == MP_YES) {
31
return MP_VAL;
32
}
33
34
/* init all our temps */
35
if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
36
return res;
37
}
38
39
/* x == modulus, y == value to invert */
40
if ((res = mp_copy(b, &x)) != MP_OKAY) {
41
goto LBL_ERR;
42
}
43
44
/* we need y = |a| */
45
if ((res = mp_mod(a, b, &y)) != MP_OKAY) {
46
goto LBL_ERR;
47
}
48
49
/* if one of x,y is zero return an error! */
50
if ((mp_iszero(&x) == MP_YES) || (mp_iszero(&y) == MP_YES)) {
51
res = MP_VAL;
52
goto LBL_ERR;
53
}
54
55
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
56
if ((res = mp_copy(&x, &u)) != MP_OKAY) {
57
goto LBL_ERR;
58
}
59
if ((res = mp_copy(&y, &v)) != MP_OKAY) {
60
goto LBL_ERR;
61
}
62
mp_set(&D, 1uL);
63
64
top:
65
/* 4. while u is even do */
66
while (mp_iseven(&u) == MP_YES) {
67
/* 4.1 u = u/2 */
68
if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
69
goto LBL_ERR;
70
}
71
/* 4.2 if B is odd then */
72
if (mp_isodd(&B) == MP_YES) {
73
if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
74
goto LBL_ERR;
75
}
76
}
77
/* B = B/2 */
78
if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
79
goto LBL_ERR;
80
}
81
}
82
83
/* 5. while v is even do */
84
while (mp_iseven(&v) == MP_YES) {
85
/* 5.1 v = v/2 */
86
if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
87
goto LBL_ERR;
88
}
89
/* 5.2 if D is odd then */
90
if (mp_isodd(&D) == MP_YES) {
91
/* D = (D-x)/2 */
92
if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
93
goto LBL_ERR;
94
}
95
}
96
/* D = D/2 */
97
if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
98
goto LBL_ERR;
99
}
100
}
101
102
/* 6. if u >= v then */
103
if (mp_cmp(&u, &v) != MP_LT) {
104
/* u = u - v, B = B - D */
105
if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
106
goto LBL_ERR;
107
}
108
109
if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
110
goto LBL_ERR;
111
}
112
} else {
113
/* v - v - u, D = D - B */
114
if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
115
goto LBL_ERR;
116
}
117
118
if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
119
goto LBL_ERR;
120
}
121
}
122
123
/* if not zero goto step 4 */
124
if (mp_iszero(&u) == MP_NO) {
125
goto top;
126
}
127
128
/* now a = C, b = D, gcd == g*v */
129
130
/* if v != 1 then there is no inverse */
131
if (mp_cmp_d(&v, 1uL) != MP_EQ) {
132
res = MP_VAL;
133
goto LBL_ERR;
134
}
135
136
/* b is now the inverse */
137
neg = a->sign;
138
while (D.sign == MP_NEG) {
139
if ((res = mp_add(&D, b, &D)) != MP_OKAY) {
140
goto LBL_ERR;
141
}
142
}
143
144
/* too big */
145
while (mp_cmp_mag(&D, b) != MP_LT) {
146
if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
147
goto LBL_ERR;
148
}
149
}
150
151
mp_exch(&D, c);
152
c->sign = neg;
153
res = MP_OKAY;
154
155
LBL_ERR:
156
mp_clear_multi(&x, &y, &u, &v, &B, &D, NULL);
157
return res;
158
}
159
160
/* End: bn_fast_mp_invmod.c */
161
162
/* Start: bn_fast_mp_montgomery_reduce.c */
163
164
/* computes xR**-1 == x (mod N) via Montgomery Reduction
165
*
166
* This is an optimized implementation of montgomery_reduce
167
* which uses the comba method to quickly calculate the columns of the
168
* reduction.
169
*
170
* Based on Algorithm 14.32 on pp.601 of HAC.
171
*/
172
int fast_mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
173
{
174
int ix, res, olduse;
175
mp_word W[MP_WARRAY];
176
177
if (x->used > (int)MP_WARRAY) {
178
return MP_VAL;
179
}
180
181
/* get old used count */
182
olduse = x->used;
183
184
/* grow a as required */
185
if (x->alloc < (n->used + 1)) {
186
if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) {
187
return res;
188
}
189
}
190
191
/* first we have to get the digits of the input into
192
* an array of double precision words W[...]
193
*/
194
{
195
mp_word *_W;
196
mp_digit *tmpx;
197
198
/* alias for the W[] array */
199
_W = W;
200
201
/* alias for the digits of x*/
202
tmpx = x->dp;
203
204
/* copy the digits of a into W[0..a->used-1] */
205
for (ix = 0; ix < x->used; ix++) {
206
*_W++ = *tmpx++;
207
}
208
209
/* zero the high words of W[a->used..m->used*2] */
210
for (; ix < ((n->used * 2) + 1); ix++) {
211
*_W++ = 0;
212
}
213
}
214
215
/* now we proceed to zero successive digits
216
* from the least significant upwards
217
*/
218
for (ix = 0; ix < n->used; ix++) {
219
/* mu = ai * m' mod b
220
*
221
* We avoid a double precision multiplication (which isn't required)
222
* by casting the value down to a mp_digit. Note this requires
223
* that W[ix-1] have the carry cleared (see after the inner loop)
224
*/
225
mp_digit mu;
226
mu = ((W[ix] & MP_MASK) * rho) & MP_MASK;
227
228
/* a = a + mu * m * b**i
229
*
230
* This is computed in place and on the fly. The multiplication
231
* by b**i is handled by offseting which columns the results
232
* are added to.
233
*
234
* Note the comba method normally doesn't handle carries in the
235
* inner loop In this case we fix the carry from the previous
236
* column since the Montgomery reduction requires digits of the
237
* result (so far) [see above] to work. This is
238
* handled by fixing up one carry after the inner loop. The
239
* carry fixups are done in order so after these loops the
240
* first m->used words of W[] have the carries fixed
241
*/
242
{
243
int iy;
244
mp_digit *tmpn;
245
mp_word *_W;
246
247
/* alias for the digits of the modulus */
248
tmpn = n->dp;
249
250
/* Alias for the columns set by an offset of ix */
251
_W = W + ix;
252
253
/* inner loop */
254
for (iy = 0; iy < n->used; iy++) {
255
*_W++ += (mp_word)mu * (mp_word)*tmpn++;
256
}
257
}
258
259
/* now fix carry for next digit, W[ix+1] */
260
W[ix + 1] += W[ix] >> (mp_word)DIGIT_BIT;
261
}
262
263
/* now we have to propagate the carries and
264
* shift the words downward [all those least
265
* significant digits we zeroed].
266
*/
267
{
268
mp_digit *tmpx;
269
mp_word *_W, *_W1;
270
271
/* nox fix rest of carries */
272
273
/* alias for current word */
274
_W1 = W + ix;
275
276
/* alias for next word, where the carry goes */
277
_W = W + ++ix;
278
279
for (; ix <= ((n->used * 2) + 1); ix++) {
280
*_W++ += *_W1++ >> (mp_word)DIGIT_BIT;
281
}
282
283
/* copy out, A = A/b**n
284
*
285
* The result is A/b**n but instead of converting from an
286
* array of mp_word to mp_digit than calling mp_rshd
287
* we just copy them in the right order
288
*/
289
290
/* alias for destination word */
291
tmpx = x->dp;
292
293
/* alias for shifted double precision result */
294
_W = W + n->used;
295
296
for (ix = 0; ix < (n->used + 1); ix++) {
297
*tmpx++ = *_W++ & (mp_word)MP_MASK;
298
}
299
300
/* zero oldused digits, if the input a was larger than
301
* m->used+1 we'll have to clear the digits
302
*/
303
for (; ix < olduse; ix++) {
304
*tmpx++ = 0;
305
}
306
}
307
308
/* set the max used and clamp */
309
x->used = n->used + 1;
310
mp_clamp(x);
311
312
/* if A >= m then A = A - m */
313
if (mp_cmp_mag(x, n) != MP_LT) {
314
return s_mp_sub(x, n, x);
315
}
316
return MP_OKAY;
317
}
318
319
/* End: bn_fast_mp_montgomery_reduce.c */
320
321
/* Start: bn_fast_s_mp_mul_digs.c */
322
323
/* Fast (comba) multiplier
324
*
325
* This is the fast column-array [comba] multiplier. It is
326
* designed to compute the columns of the product first
327
* then handle the carries afterwards. This has the effect
328
* of making the nested loops that compute the columns very
329
* simple and schedulable on super-scalar processors.
330
*
331
* This has been modified to produce a variable number of
332
* digits of output so if say only a half-product is required
333
* you don't have to compute the upper half (a feature
334
* required for fast Barrett reduction).
335
*
336
* Based on Algorithm 14.12 on pp.595 of HAC.
337
*
338
*/
339
int fast_s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
340
{
341
int olduse, res, pa, ix, iz;
342
mp_digit W[MP_WARRAY];
343
mp_word _W;
344
345
/* grow the destination as required */
346
if (c->alloc < digs) {
347
if ((res = mp_grow(c, digs)) != MP_OKAY) {
348
return res;
349
}
350
}
351
352
/* number of output digits to produce */
353
pa = MIN(digs, a->used + b->used);
354
355
/* clear the carry */
356
_W = 0;
357
for (ix = 0; ix < pa; ix++) {
358
int tx, ty;
359
int iy;
360
mp_digit *tmpx, *tmpy;
361
362
/* get offsets into the two bignums */
363
ty = MIN(b->used-1, ix);
364
tx = ix - ty;
365
366
/* setup temp aliases */
367
tmpx = a->dp + tx;
368
tmpy = b->dp + ty;
369
370
/* this is the number of times the loop will iterrate, essentially
371
while (tx++ < a->used && ty-- >= 0) { ... }
372
*/
373
iy = MIN(a->used-tx, ty+1);
374
375
/* execute loop */
376
for (iz = 0; iz < iy; ++iz) {
377
_W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
378
379
}
380
381
/* store term */
382
W[ix] = (mp_digit)_W & MP_MASK;
383
384
/* make next carry */
385
_W = _W >> (mp_word)DIGIT_BIT;
386
}
387
388
/* setup dest */
389
olduse = c->used;
390
c->used = pa;
391
392
{
393
mp_digit *tmpc;
394
tmpc = c->dp;
395
for (ix = 0; ix < pa; ix++) {
396
/* now extract the previous digit [below the carry] */
397
*tmpc++ = W[ix];
398
}
399
400
/* clear unused digits [that existed in the old copy of c] */
401
for (; ix < olduse; ix++) {
402
*tmpc++ = 0;
403
}
404
}
405
mp_clamp(c);
406
return MP_OKAY;
407
}
408
409
/* End: bn_fast_s_mp_mul_digs.c */
410
411
/* Start: bn_fast_s_mp_mul_high_digs.c */
412
413
/* this is a modified version of fast_s_mul_digs that only produces
414
* output digits *above* digs. See the comments for fast_s_mul_digs
415
* to see how it works.
416
*
417
* This is used in the Barrett reduction since for one of the multiplications
418
* only the higher digits were needed. This essentially halves the work.
419
*
420
* Based on Algorithm 14.12 on pp.595 of HAC.
421
*/
422
int fast_s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
423
{
424
int olduse, res, pa, ix, iz;
425
mp_digit W[MP_WARRAY];
426
mp_word _W;
427
428
/* grow the destination as required */
429
pa = a->used + b->used;
430
if (c->alloc < pa) {
431
if ((res = mp_grow(c, pa)) != MP_OKAY) {
432
return res;
433
}
434
}
435
436
/* number of output digits to produce */
437
pa = a->used + b->used;
438
_W = 0;
439
for (ix = digs; ix < pa; ix++) {
440
int tx, ty, iy;
441
mp_digit *tmpx, *tmpy;
442
443
/* get offsets into the two bignums */
444
ty = MIN(b->used-1, ix);
445
tx = ix - ty;
446
447
/* setup temp aliases */
448
tmpx = a->dp + tx;
449
tmpy = b->dp + ty;
450
451
/* this is the number of times the loop will iterrate, essentially its
452
while (tx++ < a->used && ty-- >= 0) { ... }
453
*/
454
iy = MIN(a->used-tx, ty+1);
455
456
/* execute loop */
457
for (iz = 0; iz < iy; iz++) {
458
_W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
459
}
460
461
/* store term */
462
W[ix] = (mp_digit)_W & MP_MASK;
463
464
/* make next carry */
465
_W = _W >> (mp_word)DIGIT_BIT;
466
}
467
468
/* setup dest */
469
olduse = c->used;
470
c->used = pa;
471
472
{
473
mp_digit *tmpc;
474
475
tmpc = c->dp + digs;
476
for (ix = digs; ix < pa; ix++) {
477
/* now extract the previous digit [below the carry] */
478
*tmpc++ = W[ix];
479
}
480
481
/* clear unused digits [that existed in the old copy of c] */
482
for (; ix < olduse; ix++) {
483
*tmpc++ = 0;
484
}
485
}
486
mp_clamp(c);
487
return MP_OKAY;
488
}
489
490
/* End: bn_fast_s_mp_mul_high_digs.c */
491
492
/* Start: bn_fast_s_mp_sqr.c */
493
494
/* the jist of squaring...
495
* you do like mult except the offset of the tmpx [one that
496
* starts closer to zero] can't equal the offset of tmpy.
497
* So basically you set up iy like before then you min it with
498
* (ty-tx) so that it never happens. You double all those
499
* you add in the inner loop
500
501
After that loop you do the squares and add them in.
502
*/
503
504
int fast_s_mp_sqr(const mp_int *a, mp_int *b)
505
{
506
int olduse, res, pa, ix, iz;
507
mp_digit W[MP_WARRAY], *tmpx;
508
mp_word W1;
509
510
/* grow the destination as required */
511
pa = a->used + a->used;
512
if (b->alloc < pa) {
513
if ((res = mp_grow(b, pa)) != MP_OKAY) {
514
return res;
515
}
516
}
517
518
/* number of output digits to produce */
519
W1 = 0;
520
for (ix = 0; ix < pa; ix++) {
521
int tx, ty, iy;
522
mp_word _W;
523
mp_digit *tmpy;
524
525
/* clear counter */
526
_W = 0;
527
528
/* get offsets into the two bignums */
529
ty = MIN(a->used-1, ix);
530
tx = ix - ty;
531
532
/* setup temp aliases */
533
tmpx = a->dp + tx;
534
tmpy = a->dp + ty;
535
536
/* this is the number of times the loop will iterrate, essentially
537
while (tx++ < a->used && ty-- >= 0) { ... }
538
*/
539
iy = MIN(a->used-tx, ty+1);
540
541
/* now for squaring tx can never equal ty
542
* we halve the distance since they approach at a rate of 2x
543
* and we have to round because odd cases need to be executed
544
*/
545
iy = MIN(iy, ((ty-tx)+1)>>1);
546
547
/* execute loop */
548
for (iz = 0; iz < iy; iz++) {
549
_W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
550
}
551
552
/* double the inner product and add carry */
553
_W = _W + _W + W1;
554
555
/* even columns have the square term in them */
556
if (((unsigned)ix & 1u) == 0u) {
557
_W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];
558
}
559
560
/* store it */
561
W[ix] = _W & MP_MASK;
562
563
/* make next carry */
564
W1 = _W >> (mp_word)DIGIT_BIT;
565
}
566
567
/* setup dest */
568
olduse = b->used;
569
b->used = a->used+a->used;
570
571
{
572
mp_digit *tmpb;
573
tmpb = b->dp;
574
for (ix = 0; ix < pa; ix++) {
575
*tmpb++ = W[ix] & MP_MASK;
576
}
577
578
/* clear unused digits [that existed in the old copy of c] */
579
for (; ix < olduse; ix++) {
580
*tmpb++ = 0;
581
}
582
}
583
mp_clamp(b);
584
return MP_OKAY;
585
}
586
587
/* End: bn_fast_s_mp_sqr.c */
588
589
/* Start: bn_mp_2expt.c */
590
591
/* computes a = 2**b
592
*
593
* Simple algorithm which zeroes the int, grows it then just sets one bit
594
* as required.
595
*/
596
int mp_2expt(mp_int *a, int b)
597
{
598
int res;
599
600
/* zero a as per default */
601
mp_zero(a);
602
603
/* grow a to accomodate the single bit */
604
if ((res = mp_grow(a, (b / DIGIT_BIT) + 1)) != MP_OKAY) {
605
return res;
606
}
607
608
/* set the used count of where the bit will go */
609
a->used = (b / DIGIT_BIT) + 1;
610
611
/* put the single bit in its place */
612
a->dp[b / DIGIT_BIT] = (mp_digit)1 << (mp_digit)(b % DIGIT_BIT);
613
614
return MP_OKAY;
615
}
616
617
/* End: bn_mp_2expt.c */
618
619
/* Start: bn_mp_abs.c */
620
621
/* b = |a|
622
*
623
* Simple function copies the input and fixes the sign to positive
624
*/
625
int mp_abs(const mp_int *a, mp_int *b)
626
{
627
int res;
628
629
/* copy a to b */
630
if (a != b) {
631
if ((res = mp_copy(a, b)) != MP_OKAY) {
632
return res;
633
}
634
}
635
636
/* force the sign of b to positive */
637
b->sign = MP_ZPOS;
638
639
return MP_OKAY;
640
}
641
642
/* End: bn_mp_abs.c */
643
644
/* Start: bn_mp_add.c */
645
646
/* high level addition (handles signs) */
647
int mp_add(const mp_int *a, const mp_int *b, mp_int *c)
648
{
649
int sa, sb, res;
650
651
/* get sign of both inputs */
652
sa = a->sign;
653
sb = b->sign;
654
655
/* handle two cases, not four */
656
if (sa == sb) {
657
/* both positive or both negative */
658
/* add their magnitudes, copy the sign */
659
c->sign = sa;
660
res = s_mp_add(a, b, c);
661
} else {
662
/* one positive, the other negative */
663
/* subtract the one with the greater magnitude from */
664
/* the one of the lesser magnitude. The result gets */
665
/* the sign of the one with the greater magnitude. */
666
if (mp_cmp_mag(a, b) == MP_LT) {
667
c->sign = sb;
668
res = s_mp_sub(b, a, c);
669
} else {
670
c->sign = sa;
671
res = s_mp_sub(a, b, c);
672
}
673
}
674
return res;
675
}
676
677
/* End: bn_mp_add.c */
678
679
/* Start: bn_mp_add_d.c */
680
681
/* single digit addition */
682
int mp_add_d(const mp_int *a, mp_digit b, mp_int *c)
683
{
684
int res, ix, oldused;
685
mp_digit *tmpa, *tmpc, mu;
686
687
/* grow c as required */
688
if (c->alloc < (a->used + 1)) {
689
if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
690
return res;
691
}
692
}
693
694
/* if a is negative and |a| >= b, call c = |a| - b */
695
if ((a->sign == MP_NEG) && ((a->used > 1) || (a->dp[0] >= b))) {
696
mp_int a_ = *a;
697
/* temporarily fix sign of a */
698
a_.sign = MP_ZPOS;
699
700
/* c = |a| - b */
701
res = mp_sub_d(&a_, b, c);
702
703
/* fix sign */
704
c->sign = MP_NEG;
705
706
/* clamp */
707
mp_clamp(c);
708
709
return res;
710
}
711
712
/* old number of used digits in c */
713
oldused = c->used;
714
715
/* source alias */
716
tmpa = a->dp;
717
718
/* destination alias */
719
tmpc = c->dp;
720
721
/* if a is positive */
722
if (a->sign == MP_ZPOS) {
723
/* add digit, after this we're propagating
724
* the carry.
725
*/
726
*tmpc = *tmpa++ + b;
727
mu = *tmpc >> DIGIT_BIT;
728
*tmpc++ &= MP_MASK;
729
730
/* now handle rest of the digits */
731
for (ix = 1; ix < a->used; ix++) {
732
*tmpc = *tmpa++ + mu;
733
mu = *tmpc >> DIGIT_BIT;
734
*tmpc++ &= MP_MASK;
735
}
736
/* set final carry */
737
ix++;
738
*tmpc++ = mu;
739
740
/* setup size */
741
c->used = a->used + 1;
742
} else {
743
/* a was negative and |a| < b */
744
c->used = 1;
745
746
/* the result is a single digit */
747
if (a->used == 1) {
748
*tmpc++ = b - a->dp[0];
749
} else {
750
*tmpc++ = b;
751
}
752
753
/* setup count so the clearing of oldused
754
* can fall through correctly
755
*/
756
ix = 1;
757
}
758
759
/* sign always positive */
760
c->sign = MP_ZPOS;
761
762
/* now zero to oldused */
763
while (ix++ < oldused) {
764
*tmpc++ = 0;
765
}
766
mp_clamp(c);
767
768
return MP_OKAY;
769
}
770
771
/* End: bn_mp_add_d.c */
772
773
/* Start: bn_mp_addmod.c */
774
775
/* d = a + b (mod c) */
776
int mp_addmod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)
777
{
778
int res;
779
mp_int t;
780
781
if ((res = mp_init(&t)) != MP_OKAY) {
782
return res;
783
}
784
785
if ((res = mp_add(a, b, &t)) != MP_OKAY) {
786
mp_clear(&t);
787
return res;
788
}
789
res = mp_mod(&t, c, d);
790
mp_clear(&t);
791
return res;
792
}
793
794
/* End: bn_mp_addmod.c */
795
796
/* Start: bn_mp_and.c */
797
798
/* AND two ints together */
799
int mp_and(const mp_int *a, const mp_int *b, mp_int *c)
800
{
801
int res, ix, px;
802
mp_int t;
803
const mp_int *x;
804
805
if (a->used > b->used) {
806
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
807
return res;
808
}
809
px = b->used;
810
x = b;
811
} else {
812
if ((res = mp_init_copy(&t, b)) != MP_OKAY) {
813
return res;
814
}
815
px = a->used;
816
x = a;
817
}
818
819
for (ix = 0; ix < px; ix++) {
820
t.dp[ix] &= x->dp[ix];
821
}
822
823
/* zero digits above the last from the smallest mp_int */
824
for (; ix < t.used; ix++) {
825
t.dp[ix] = 0;
826
}
827
828
mp_clamp(&t);
829
mp_exch(c, &t);
830
mp_clear(&t);
831
return MP_OKAY;
832
}
833
834
/* End: bn_mp_and.c */
835
836
/* Start: bn_mp_clamp.c */
837
838
/* trim unused digits
839
*
840
* This is used to ensure that leading zero digits are
841
* trimed and the leading "used" digit will be non-zero
842
* Typically very fast. Also fixes the sign if there
843
* are no more leading digits
844
*/
845
void mp_clamp(mp_int *a)
846
{
847
/* decrease used while the most significant digit is
848
* zero.
849
*/
850
while ((a->used > 0) && (a->dp[a->used - 1] == 0u)) {
851
--(a->used);
852
}
853
854
/* reset the sign flag if used == 0 */
855
if (a->used == 0) {
856
a->sign = MP_ZPOS;
857
}
858
}
859
860
/* End: bn_mp_clamp.c */
861
862
/* Start: bn_mp_clear.c */
863
864
/* clear one (frees) */
865
void mp_clear(mp_int *a)
866
{
867
int i;
868
869
/* only do anything if a hasn't been freed previously */
870
if (a->dp != NULL) {
871
/* first zero the digits */
872
for (i = 0; i < a->used; i++) {
873
a->dp[i] = 0;
874
}
875
876
/* free ram */
877
XFREE(a->dp);
878
879
/* reset members to make debugging easier */
880
a->dp = NULL;
881
a->alloc = a->used = 0;
882
a->sign = MP_ZPOS;
883
}
884
}
885
886
/* End: bn_mp_clear.c */
887
888
/* Start: bn_mp_clear_multi.c */
889
890
void mp_clear_multi(mp_int *mp, ...)
891
{
892
mp_int *next_mp = mp;
893
va_list args;
894
va_start(args, mp);
895
while (next_mp != NULL) {
896
mp_clear(next_mp);
897
next_mp = va_arg(args, mp_int *);
898
}
899
va_end(args);
900
}
901
902
/* End: bn_mp_clear_multi.c */
903
904
/* Start: bn_mp_cmp.c */
905
906
/* compare two ints (signed)*/
907
int mp_cmp(const mp_int *a, const mp_int *b)
908
{
909
/* compare based on sign */
910
if (a->sign != b->sign) {
911
if (a->sign == MP_NEG) {
912
return MP_LT;
913
} else {
914
return MP_GT;
915
}
916
}
917
918
/* compare digits */
919
if (a->sign == MP_NEG) {
920
/* if negative compare opposite direction */
921
return mp_cmp_mag(b, a);
922
} else {
923
return mp_cmp_mag(a, b);
924
}
925
}
926
927
/* End: bn_mp_cmp.c */
928
929
/* Start: bn_mp_cmp_d.c */
930
931
/* compare a digit */
932
int mp_cmp_d(const mp_int *a, mp_digit b)
933
{
934
/* compare based on sign */
935
if (a->sign == MP_NEG) {
936
return MP_LT;
937
}
938
939
/* compare based on magnitude */
940
if (a->used > 1) {
941
return MP_GT;
942
}
943
944
/* compare the only digit of a to b */
945
if (a->dp[0] > b) {
946
return MP_GT;
947
} else if (a->dp[0] < b) {
948
return MP_LT;
949
} else {
950
return MP_EQ;
951
}
952
}
953
954
/* End: bn_mp_cmp_d.c */
955
956
/* Start: bn_mp_cmp_mag.c */
957
958
/* compare maginitude of two ints (unsigned) */
959
int mp_cmp_mag(const mp_int *a, const mp_int *b)
960
{
961
int n;
962
mp_digit *tmpa, *tmpb;
963
964
/* compare based on # of non-zero digits */
965
if (a->used > b->used) {
966
return MP_GT;
967
}
968
969
if (a->used < b->used) {
970
return MP_LT;
971
}
972
973
/* alias for a */
974
tmpa = a->dp + (a->used - 1);
975
976
/* alias for b */
977
tmpb = b->dp + (a->used - 1);
978
979
/* compare based on digits */
980
for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
981
if (*tmpa > *tmpb) {
982
return MP_GT;
983
}
984
985
if (*tmpa < *tmpb) {
986
return MP_LT;
987
}
988
}
989
return MP_EQ;
990
}
991
992
/* End: bn_mp_cmp_mag.c */
993
994
/* Start: bn_mp_cnt_lsb.c */
995
996
static const int lnz[16] = {
997
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
998
};
999
1000
/* Counts the number of lsbs which are zero before the first zero bit */
1001
int mp_cnt_lsb(const mp_int *a)
1002
{
1003
int x;
1004
mp_digit q, qq;
1005
1006
/* easy out */
1007
if (mp_iszero(a) == MP_YES) {
1008
return 0;
1009
}
1010
1011
/* scan lower digits until non-zero */
1012
for (x = 0; (x < a->used) && (a->dp[x] == 0u); x++) {}
1013
q = a->dp[x];
1014
x *= DIGIT_BIT;
1015
1016
/* now scan this digit until a 1 is found */
1017
if ((q & 1u) == 0u) {
1018
do {
1019
qq = q & 15u;
1020
x += lnz[qq];
1021
q >>= 4;
1022
} while (qq == 0u);
1023
}
1024
return x;
1025
}
1026
1027
/* End: bn_mp_cnt_lsb.c */
1028
1029
/* Start: bn_mp_complement.c */
1030
1031
/* b = ~a */
1032
int mp_complement(const mp_int *a, mp_int *b)
1033
{
1034
int res = mp_neg(a, b);
1035
return (res == MP_OKAY) ? mp_sub_d(b, 1uL, b) : res;
1036
}
1037
1038
/* End: bn_mp_complement.c */
1039
1040
/* Start: bn_mp_copy.c */
1041
1042
/* copy, b = a */
1043
int mp_copy(const mp_int *a, mp_int *b)
1044
{
1045
int res, n;
1046
1047
/* if dst == src do nothing */
1048
if (a == b) {
1049
return MP_OKAY;
1050
}
1051
1052
/* grow dest */
1053
if (b->alloc < a->used) {
1054
if ((res = mp_grow(b, a->used)) != MP_OKAY) {
1055
return res;
1056
}
1057
}
1058
1059
/* zero b and copy the parameters over */
1060
{
1061
mp_digit *tmpa, *tmpb;
1062
1063
/* pointer aliases */
1064
1065
/* source */
1066
tmpa = a->dp;
1067
1068
/* destination */
1069
tmpb = b->dp;
1070
1071
/* copy all the digits */
1072
for (n = 0; n < a->used; n++) {
1073
*tmpb++ = *tmpa++;
1074
}
1075
1076
/* clear high digits */
1077
for (; n < b->used; n++) {
1078
*tmpb++ = 0;
1079
}
1080
}
1081
1082
/* copy used count and sign */
1083
b->used = a->used;
1084
b->sign = a->sign;
1085
return MP_OKAY;
1086
}
1087
1088
/* End: bn_mp_copy.c */
1089
1090
/* Start: bn_mp_count_bits.c */
1091
1092
/* returns the number of bits in an int */
1093
int mp_count_bits(const mp_int *a)
1094
{
1095
int r;
1096
mp_digit q;
1097
1098
/* shortcut */
1099
if (a->used == 0) {
1100
return 0;
1101
}
1102
1103
/* get number of digits and add that */
1104
r = (a->used - 1) * DIGIT_BIT;
1105
1106
/* take the last digit and count the bits in it */
1107
q = a->dp[a->used - 1];
1108
while (q > (mp_digit)0) {
1109
++r;
1110
q >>= (mp_digit)1;
1111
}
1112
return r;
1113
}
1114
1115
/* End: bn_mp_count_bits.c */
1116
1117
/* Start: bn_mp_div.c */
1118
1119
/* integer signed division.
1120
* c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1121
* HAC pp.598 Algorithm 14.20
1122
*
1123
* Note that the description in HAC is horribly
1124
* incomplete. For example, it doesn't consider
1125
* the case where digits are removed from 'x' in
1126
* the inner loop. It also doesn't consider the
1127
* case that y has fewer than three digits, etc..
1128
*
1129
* The overall algorithm is as described as
1130
* 14.20 from HAC but fixed to treat these cases.
1131
*/
1132
int mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d)
1133
{
1134
mp_int q, x, y, t1, t2;
1135
int res, n, t, i, norm, neg;
1136
1137
/* is divisor zero ? */
1138
if (mp_iszero(b) == MP_YES) {
1139
return MP_VAL;
1140
}
1141
1142
/* if a < b then q=0, r = a */
1143
if (mp_cmp_mag(a, b) == MP_LT) {
1144
if (d != NULL) {
1145
res = mp_copy(a, d);
1146
} else {
1147
res = MP_OKAY;
1148
}
1149
if (c != NULL) {
1150
mp_zero(c);
1151
}
1152
return res;
1153
}
1154
1155
if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) {
1156
return res;
1157
}
1158
q.used = a->used + 2;
1159
1160
if ((res = mp_init(&t1)) != MP_OKAY) {
1161
goto LBL_Q;
1162
}
1163
1164
if ((res = mp_init(&t2)) != MP_OKAY) {
1165
goto LBL_T1;
1166
}
1167
1168
if ((res = mp_init_copy(&x, a)) != MP_OKAY) {
1169
goto LBL_T2;
1170
}
1171
1172
if ((res = mp_init_copy(&y, b)) != MP_OKAY) {
1173
goto LBL_X;
1174
}
1175
1176
/* fix the sign */
1177
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1178
x.sign = y.sign = MP_ZPOS;
1179
1180
/* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1181
norm = mp_count_bits(&y) % DIGIT_BIT;
1182
if (norm < (DIGIT_BIT - 1)) {
1183
norm = (DIGIT_BIT - 1) - norm;
1184
if ((res = mp_mul_2d(&x, norm, &x)) != MP_OKAY) {
1185
goto LBL_Y;
1186
}
1187
if ((res = mp_mul_2d(&y, norm, &y)) != MP_OKAY) {
1188
goto LBL_Y;
1189
}
1190
} else {
1191
norm = 0;
1192
}
1193
1194
/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1195
n = x.used - 1;
1196
t = y.used - 1;
1197
1198
/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1199
if ((res = mp_lshd(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1200
goto LBL_Y;
1201
}
1202
1203
while (mp_cmp(&x, &y) != MP_LT) {
1204
++(q.dp[n - t]);
1205
if ((res = mp_sub(&x, &y, &x)) != MP_OKAY) {
1206
goto LBL_Y;
1207
}
1208
}
1209
1210
/* reset y by shifting it back down */
1211
mp_rshd(&y, n - t);
1212
1213
/* step 3. for i from n down to (t + 1) */
1214
for (i = n; i >= (t + 1); i--) {
1215
if (i > x.used) {
1216
continue;
1217
}
1218
1219
/* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1220
* otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1221
if (x.dp[i] == y.dp[t]) {
1222
q.dp[(i - t) - 1] = ((mp_digit)1 << (mp_digit)DIGIT_BIT) - (mp_digit)1;
1223
} else {
1224
mp_word tmp;
1225
tmp = (mp_word)x.dp[i] << (mp_word)DIGIT_BIT;
1226
tmp |= (mp_word)x.dp[i - 1];
1227
tmp /= (mp_word)y.dp[t];
1228
if (tmp > (mp_word)MP_MASK) {
1229
tmp = MP_MASK;
1230
}
1231
q.dp[(i - t) - 1] = (mp_digit)(tmp & (mp_word)MP_MASK);
1232
}
1233
1234
/* while (q{i-t-1} * (yt * b + y{t-1})) >
1235
xi * b**2 + xi-1 * b + xi-2
1236
1237
do q{i-t-1} -= 1;
1238
*/
1239
q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] + 1uL) & (mp_digit)MP_MASK;
1240
do {
1241
q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & (mp_digit)MP_MASK;
1242
1243
/* find left hand */
1244
mp_zero(&t1);
1245
t1.dp[0] = ((t - 1) < 0) ? 0u : y.dp[t - 1];
1246
t1.dp[1] = y.dp[t];
1247
t1.used = 2;
1248
if ((res = mp_mul_d(&t1, q.dp[(i - t) - 1], &t1)) != MP_OKAY) {
1249
goto LBL_Y;
1250
}
1251
1252
/* find right hand */
1253
t2.dp[0] = ((i - 2) < 0) ? 0u : x.dp[i - 2];
1254
t2.dp[1] = ((i - 1) < 0) ? 0u : x.dp[i - 1];
1255
t2.dp[2] = x.dp[i];
1256
t2.used = 3;
1257
} while (mp_cmp_mag(&t1, &t2) == MP_GT);
1258
1259
/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1260
if ((res = mp_mul_d(&y, q.dp[(i - t) - 1], &t1)) != MP_OKAY) {
1261
goto LBL_Y;
1262
}
1263
1264
if ((res = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) {
1265
goto LBL_Y;
1266
}
1267
1268
if ((res = mp_sub(&x, &t1, &x)) != MP_OKAY) {
1269
goto LBL_Y;
1270
}
1271
1272
/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1273
if (x.sign == MP_NEG) {
1274
if ((res = mp_copy(&y, &t1)) != MP_OKAY) {
1275
goto LBL_Y;
1276
}
1277
if ((res = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) {
1278
goto LBL_Y;
1279
}
1280
if ((res = mp_add(&x, &t1, &x)) != MP_OKAY) {
1281
goto LBL_Y;
1282
}
1283
1284
q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & MP_MASK;
1285
}
1286
}
1287
1288
/* now q is the quotient and x is the remainder
1289
* [which we have to normalize]
1290
*/
1291
1292
/* get sign before writing to c */
1293
x.sign = (x.used == 0) ? MP_ZPOS : a->sign;
1294
1295
if (c != NULL) {
1296
mp_clamp(&q);
1297
mp_exch(&q, c);
1298
c->sign = neg;
1299
}
1300
1301
if (d != NULL) {
1302
if ((res = mp_div_2d(&x, norm, &x, NULL)) != MP_OKAY) {
1303
goto LBL_Y;
1304
}
1305
mp_exch(&x, d);
1306
}
1307
1308
res = MP_OKAY;
1309
1310
LBL_Y:
1311
mp_clear(&y);
1312
LBL_X:
1313
mp_clear(&x);
1314
LBL_T2:
1315
mp_clear(&t2);
1316
LBL_T1:
1317
mp_clear(&t1);
1318
LBL_Q:
1319
mp_clear(&q);
1320
return res;
1321
}
1322
1323
/* End: bn_mp_div.c */
1324
1325
/* Start: bn_mp_div_2.c */
1326
1327
/* b = a/2 */
1328
int mp_div_2(const mp_int *a, mp_int *b)
1329
{
1330
int x, res, oldused;
1331
1332
/* copy */
1333
if (b->alloc < a->used) {
1334
if ((res = mp_grow(b, a->used)) != MP_OKAY) {
1335
return res;
1336
}
1337
}
1338
1339
oldused = b->used;
1340
b->used = a->used;
1341
{
1342
mp_digit r, rr, *tmpa, *tmpb;
1343
1344
/* source alias */
1345
tmpa = a->dp + b->used - 1;
1346
1347
/* dest alias */
1348
tmpb = b->dp + b->used - 1;
1349
1350
/* carry */
1351
r = 0;
1352
for (x = b->used - 1; x >= 0; x--) {
1353
/* get the carry for the next iteration */
1354
rr = *tmpa & 1u;
1355
1356
/* shift the current digit, add in carry and store */
1357
*tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1358
1359
/* forward carry to next iteration */
1360
r = rr;
1361
}
1362
1363
/* zero excess digits */
1364
tmpb = b->dp + b->used;
1365
for (x = b->used; x < oldused; x++) {
1366
*tmpb++ = 0;
1367
}
1368
}
1369
b->sign = a->sign;
1370
mp_clamp(b);
1371
return MP_OKAY;
1372
}
1373
1374
/* End: bn_mp_div_2.c */
1375
1376
/* Start: bn_mp_div_2d.c */
1377
1378
/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1379
int mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d)
1380
{
1381
mp_digit D, r, rr;
1382
int x, res;
1383
1384
/* if the shift count is <= 0 then we do no work */
1385
if (b <= 0) {
1386
res = mp_copy(a, c);
1387
if (d != NULL) {
1388
mp_zero(d);
1389
}
1390
return res;
1391
}
1392
1393
/* copy */
1394
if ((res = mp_copy(a, c)) != MP_OKAY) {
1395
return res;
1396
}
1397
/* 'a' should not be used after here - it might be the same as d */
1398
1399
/* get the remainder */
1400
if (d != NULL) {
1401
if ((res = mp_mod_2d(a, b, d)) != MP_OKAY) {
1402
return res;
1403
}
1404
}
1405
1406
/* shift by as many digits in the bit count */
1407
if (b >= DIGIT_BIT) {
1408
mp_rshd(c, b / DIGIT_BIT);
1409
}
1410
1411
/* shift any bit count < DIGIT_BIT */
1412
D = (mp_digit)(b % DIGIT_BIT);
1413
if (D != 0u) {
1414
mp_digit *tmpc, mask, shift;
1415
1416
/* mask */
1417
mask = ((mp_digit)1 << D) - 1uL;
1418
1419
/* shift for lsb */
1420
shift = (mp_digit)DIGIT_BIT - D;
1421
1422
/* alias */
1423
tmpc = c->dp + (c->used - 1);
1424
1425
/* carry */
1426
r = 0;
1427
for (x = c->used - 1; x >= 0; x--) {
1428
/* get the lower bits of this word in a temp */
1429
rr = *tmpc & mask;
1430
1431
/* shift the current word and mix in the carry bits from the previous word */
1432
*tmpc = (*tmpc >> D) | (r << shift);
1433
--tmpc;
1434
1435
/* set the carry to the carry bits of the current word found above */
1436
r = rr;
1437
}
1438
}
1439
mp_clamp(c);
1440
return MP_OKAY;
1441
}
1442
1443
/* End: bn_mp_div_2d.c */
1444
1445
/* Start: bn_mp_div_3.c */
1446
1447
/* divide by three (based on routine from MPI and the GMP manual) */
1448
int mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
1449
{
1450
mp_int q;
1451
mp_word w, t;
1452
mp_digit b;
1453
int res, ix;
1454
1455
/* b = 2**DIGIT_BIT / 3 */
1456
b = ((mp_word)1 << (mp_word)DIGIT_BIT) / (mp_word)3;
1457
1458
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1459
return res;
1460
}
1461
1462
q.used = a->used;
1463
q.sign = a->sign;
1464
w = 0;
1465
for (ix = a->used - 1; ix >= 0; ix--) {
1466
w = (w << (mp_word)DIGIT_BIT) | (mp_word)a->dp[ix];
1467
1468
if (w >= 3u) {
1469
/* multiply w by [1/3] */
1470
t = (w * (mp_word)b) >> (mp_word)DIGIT_BIT;
1471
1472
/* now subtract 3 * [w/3] from w, to get the remainder */
1473
w -= t+t+t;
1474
1475
/* fixup the remainder as required since
1476
* the optimization is not exact.
1477
*/
1478
while (w >= 3u) {
1479
t += 1u;
1480
w -= 3u;
1481
}
1482
} else {
1483
t = 0;
1484
}
1485
q.dp[ix] = (mp_digit)t;
1486
}
1487
1488
/* [optional] store the remainder */
1489
if (d != NULL) {
1490
*d = (mp_digit)w;
1491
}
1492
1493
/* [optional] store the quotient */
1494
if (c != NULL) {
1495
mp_clamp(&q);
1496
mp_exch(&q, c);
1497
}
1498
mp_clear(&q);
1499
1500
return res;
1501
}
1502
1503
/* End: bn_mp_div_3.c */
1504
1505
/* Start: bn_mp_div_d.c */
1506
1507
static int s_is_power_of_two(mp_digit b, int *p)
1508
{
1509
int x;
1510
1511
/* fast return if no power of two */
1512
if ((b == 0u) || ((b & (b-1u)) != 0u)) {
1513
return 0;
1514
}
1515
1516
for (x = 0; x < DIGIT_BIT; x++) {
1517
if (b == ((mp_digit)1<<(mp_digit)x)) {
1518
*p = x;
1519
return 1;
1520
}
1521
}
1522
return 0;
1523
}
1524
1525
/* single digit division (based on routine from MPI) */
1526
int mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
1527
{
1528
mp_int q;
1529
mp_word w;
1530
mp_digit t;
1531
int res, ix;
1532
1533
/* cannot divide by zero */
1534
if (b == 0u) {
1535
return MP_VAL;
1536
}
1537
1538
/* quick outs */
1539
if ((b == 1u) || (mp_iszero(a) == MP_YES)) {
1540
if (d != NULL) {
1541
*d = 0;
1542
}
1543
if (c != NULL) {
1544
return mp_copy(a, c);
1545
}
1546
return MP_OKAY;
1547
}
1548
1549
/* power of two ? */
1550
if (s_is_power_of_two(b, &ix) == 1) {
1551
if (d != NULL) {
1552
*d = a->dp[0] & (((mp_digit)1<<(mp_digit)ix) - 1uL);
1553
}
1554
if (c != NULL) {
1555
return mp_div_2d(a, ix, c, NULL);
1556
}
1557
return MP_OKAY;
1558
}
1559
1560
/* three? */
1561
if (b == 3u) {
1562
return mp_div_3(a, c, d);
1563
}
1564
1565
/* no easy answer [c'est la vie]. Just division */
1566
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1567
return res;
1568
}
1569
1570
q.used = a->used;
1571
q.sign = a->sign;
1572
w = 0;
1573
for (ix = a->used - 1; ix >= 0; ix--) {
1574
w = (w << (mp_word)DIGIT_BIT) | (mp_word)a->dp[ix];
1575
1576
if (w >= b) {
1577
t = (mp_digit)(w / b);
1578
w -= (mp_word)t * (mp_word)b;
1579
} else {
1580
t = 0;
1581
}
1582
q.dp[ix] = t;
1583
}
1584
1585
if (d != NULL) {
1586
*d = (mp_digit)w;
1587
}
1588
1589
if (c != NULL) {
1590
mp_clamp(&q);
1591
mp_exch(&q, c);
1592
}
1593
mp_clear(&q);
1594
1595
return res;
1596
}
1597
1598
/* End: bn_mp_div_d.c */
1599
1600
/* Start: bn_mp_dr_is_modulus.c */
1601
1602
/* determines if a number is a valid DR modulus */
1603
int mp_dr_is_modulus(const mp_int *a)
1604
{
1605
int ix;
1606
1607
/* must be at least two digits */
1608
if (a->used < 2) {
1609
return 0;
1610
}
1611
1612
/* must be of the form b**k - a [a <= b] so all
1613
* but the first digit must be equal to -1 (mod b).
1614
*/
1615
for (ix = 1; ix < a->used; ix++) {
1616
if (a->dp[ix] != MP_MASK) {
1617
return 0;
1618
}
1619
}
1620
return 1;
1621
}
1622
1623
/* End: bn_mp_dr_is_modulus.c */
1624
1625
/* Start: bn_mp_dr_reduce.c */
1626
1627
/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1628
*
1629
* Based on algorithm from the paper
1630
*
1631
* "Generating Efficient Primes for Discrete Log Cryptosystems"
1632
* Chae Hoon Lim, Pil Joong Lee,
1633
* POSTECH Information Research Laboratories
1634
*
1635
* The modulus must be of a special format [see manual]
1636
*
1637
* Has been modified to use algorithm 7.10 from the LTM book instead
1638
*
1639
* Input x must be in the range 0 <= x <= (n-1)**2
1640
*/
1641
int mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)
1642
{
1643
int err, i, m;
1644
mp_word r;
1645
mp_digit mu, *tmpx1, *tmpx2;
1646
1647
/* m = digits in modulus */
1648
m = n->used;
1649
1650
/* ensure that "x" has at least 2m digits */
1651
if (x->alloc < (m + m)) {
1652
if ((err = mp_grow(x, m + m)) != MP_OKAY) {
1653
return err;
1654
}
1655
}
1656
1657
/* top of loop, this is where the code resumes if
1658
* another reduction pass is required.
1659
*/
1660
top:
1661
/* aliases for digits */
1662
/* alias for lower half of x */
1663
tmpx1 = x->dp;
1664
1665
/* alias for upper half of x, or x/B**m */
1666
tmpx2 = x->dp + m;
1667
1668
/* set carry to zero */
1669
mu = 0;
1670
1671
/* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1672
for (i = 0; i < m; i++) {
1673
r = ((mp_word)*tmpx2++ * (mp_word)k) + *tmpx1 + mu;
1674
*tmpx1++ = (mp_digit)(r & MP_MASK);
1675
mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1676
}
1677
1678
/* set final carry */
1679
*tmpx1++ = mu;
1680
1681
/* zero words above m */
1682
for (i = m + 1; i < x->used; i++) {
1683
*tmpx1++ = 0;
1684
}
1685
1686
/* clamp, sub and return */
1687
mp_clamp(x);
1688
1689
/* if x >= n then subtract and reduce again
1690
* Each successive "recursion" makes the input smaller and smaller.
1691
*/
1692
if (mp_cmp_mag(x, n) != MP_LT) {
1693
if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
1694
return err;
1695
}
1696
goto top;
1697
}
1698
return MP_OKAY;
1699
}
1700
1701
/* End: bn_mp_dr_reduce.c */
1702
1703
/* Start: bn_mp_dr_setup.c */
1704
#include "tommath_private.h"
1705
1706
/* determines the setup value */
1707
void mp_dr_setup(const mp_int *a, mp_digit *d)
1708
{
1709
/* the casts are required if DIGIT_BIT is one less than
1710
* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1711
*/
1712
*d = (mp_digit)(((mp_word)1 << (mp_word)DIGIT_BIT) - (mp_word)a->dp[0]);
1713
}
1714
1715
/* End: bn_mp_dr_setup.c */
1716
1717
/* Start: bn_mp_exch.c */
1718
1719
/* swap the elements of two integers, for cases where you can't simply swap the
1720
* mp_int pointers around
1721
*/
1722
void mp_exch(mp_int *a, mp_int *b)
1723
{
1724
mp_int t;
1725
1726
t = *a;
1727
*a = *b;
1728
*b = t;
1729
}
1730
1731
/* End: bn_mp_exch.c */
1732
1733
/* Start: bn_mp_export.c */
1734
1735
/* based on gmp's mpz_export.
1736
* see http://gmplib.org/manual/Integer-Import-and-Export.html
1737
*/
1738
int mp_export(void *rop, size_t *countp, int order, size_t size,
1739
int endian, size_t nails, const mp_int *op)
1740
{
1741
int result;
1742
size_t odd_nails, nail_bytes, i, j, bits, count;
1743
unsigned char odd_nail_mask;
1744
1745
mp_int t;
1746
1747
if ((result = mp_init_copy(&t, op)) != MP_OKAY) {
1748
return result;
1749
}
1750
1751
if (endian == 0) {
1752
union {
1753
unsigned int i;
1754
char c[4];
1755
} lint;
1756
lint.i = 0x01020304;
1757
1758
endian = (lint.c[0] == '\x04') ? -1 : 1;
1759
}
1760
1761
odd_nails = (nails % 8u);
1762
odd_nail_mask = 0xff;
1763
for (i = 0; i < odd_nails; ++i) {
1764
odd_nail_mask ^= (unsigned char)(1u << (7u - i));
1765
}
1766
nail_bytes = nails / 8u;
1767
1768
bits = (size_t)mp_count_bits(&t);
1769
count = (bits / ((size * 8u) - nails)) + (((bits % ((size * 8u) - nails)) != 0u) ? 1u : 0u);
1770
1771
for (i = 0; i < count; ++i) {
1772
for (j = 0; j < size; ++j) {
1773
unsigned char *byte = (unsigned char *)rop +
1774
(((order == -1) ? i : ((count - 1u) - i)) * size) +
1775
((endian == -1) ? j : ((size - 1u) - j));
1776
1777
if (j >= (size - nail_bytes)) {
1778
*byte = 0;
1779
continue;
1780
}
1781
1782
*byte = (unsigned char)((j == ((size - nail_bytes) - 1u)) ? (t.dp[0] & odd_nail_mask) : (t.dp[0] & 0xFFuL));
1783
1784
if ((result = mp_div_2d(&t, (j == ((size - nail_bytes) - 1u)) ? (int)(8u - odd_nails) : 8, &t, NULL)) != MP_OKAY) {
1785
mp_clear(&t);
1786
return result;
1787
}
1788
}
1789
}
1790
1791
mp_clear(&t);
1792
1793
if (countp != NULL) {
1794
*countp = count;
1795
}
1796
1797
return MP_OKAY;
1798
}
1799
1800
/* End: bn_mp_export.c */
1801
1802
/* Start: bn_mp_expt_d.c */
1803
1804
/* wrapper function for mp_expt_d_ex() */
1805
int mp_expt_d(const mp_int *a, mp_digit b, mp_int *c)
1806
{
1807
return mp_expt_d_ex(a, b, c, 0);
1808
}
1809
1810
/* End: bn_mp_expt_d.c */
1811
1812
/* Start: bn_mp_expt_d_ex.c */
1813
1814
/* calculate c = a**b using a square-multiply algorithm */
1815
int mp_expt_d_ex(const mp_int *a, mp_digit b, mp_int *c, int fast)
1816
{
1817
int res;
1818
unsigned int x;
1819
1820
mp_int g;
1821
1822
if ((res = mp_init_copy(&g, a)) != MP_OKAY) {
1823
return res;
1824
}
1825
1826
/* set initial result */
1827
mp_set(c, 1uL);
1828
1829
if (fast != 0) {
1830
while (b > 0u) {
1831
/* if the bit is set multiply */
1832
if ((b & 1u) != 0u) {
1833
if ((res = mp_mul(c, &g, c)) != MP_OKAY) {
1834
mp_clear(&g);
1835
return res;
1836
}
1837
}
1838
1839
/* square */
1840
if (b > 1u) {
1841
if ((res = mp_sqr(&g, &g)) != MP_OKAY) {
1842
mp_clear(&g);
1843
return res;
1844
}
1845
}
1846
1847
/* shift to next bit */
1848
b >>= 1;
1849
}
1850
} else {
1851
for (x = 0; x < (unsigned)DIGIT_BIT; x++) {
1852
/* square */
1853
if ((res = mp_sqr(c, c)) != MP_OKAY) {
1854
mp_clear(&g);
1855
return res;
1856
}
1857
1858
/* if the bit is set multiply */
1859
if ((b & ((mp_digit)1 << (DIGIT_BIT - 1))) != 0u) {
1860
if ((res = mp_mul(c, &g, c)) != MP_OKAY) {
1861
mp_clear(&g);
1862
return res;
1863
}
1864
}
1865
1866
/* shift to next bit */
1867
b <<= 1;
1868
}
1869
} /* if ... else */
1870
1871
mp_clear(&g);
1872
return MP_OKAY;
1873
}
1874
1875
/* End: bn_mp_expt_d_ex.c */
1876
1877
/* Start: bn_mp_exptmod.c */
1878
1879
/* this is a shell function that calls either the normal or Montgomery
1880
* exptmod functions. Originally the call to the montgomery code was
1881
* embedded in the normal function but that wasted alot of stack space
1882
* for nothing (since 99% of the time the Montgomery code would be called)
1883
*/
1884
int mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y)
1885
{
1886
int dr;
1887
1888
/* modulus P must be positive */
1889
if (P->sign == MP_NEG) {
1890
return MP_VAL;
1891
}
1892
1893
/* if exponent X is negative we have to recurse */
1894
if (X->sign == MP_NEG) {
1895
mp_int tmpG, tmpX;
1896
int err;
1897
1898
/* first compute 1/G mod P */
1899
if ((err = mp_init(&tmpG)) != MP_OKAY) {
1900
return err;
1901
}
1902
if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1903
mp_clear(&tmpG);
1904
return err;
1905
}
1906
1907
/* now get |X| */
1908
if ((err = mp_init(&tmpX)) != MP_OKAY) {
1909
mp_clear(&tmpG);
1910
return err;
1911
}
1912
if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1913
mp_clear_multi(&tmpG, &tmpX, NULL);
1914
return err;
1915
}
1916
1917
/* and now compute (1/G)**|X| instead of G**X [X < 0] */
1918
err = mp_exptmod(&tmpG, &tmpX, P, Y);
1919
mp_clear_multi(&tmpG, &tmpX, NULL);
1920
return err;
1921
}
1922
1923
/* modified diminished radix reduction */
1924
if (mp_reduce_is_2k_l(P) == MP_YES) {
1925
return s_mp_exptmod(G, X, P, Y, 1);
1926
}
1927
1928
/* is it a DR modulus? */
1929
dr = mp_dr_is_modulus(P);
1930
1931
/* if not, is it a unrestricted DR modulus? */
1932
if (dr == 0) {
1933
dr = mp_reduce_is_2k(P) << 1;
1934
}
1935
1936
/* if the modulus is odd or dr != 0 use the montgomery method */
1937
if ((mp_isodd(P) == MP_YES) || (dr != 0)) {
1938
return mp_exptmod_fast(G, X, P, Y, dr);
1939
} else {
1940
/* otherwise use the generic Barrett reduction technique */
1941
return s_mp_exptmod(G, X, P, Y, 0);
1942
}
1943
}
1944
1945
/* End: bn_mp_exptmod.c */
1946
1947
/* Start: bn_mp_exptmod_fast.c */
1948
1949
/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1950
*
1951
* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1952
* The value of k changes based on the size of the exponent.
1953
*
1954
* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1955
*/
1956
1957
#ifdef MP_LOW_MEM
1958
# define TAB_SIZE 32
1959
#else
1960
# define TAB_SIZE 256
1961
#endif
1962
1963
int mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
1964
{
1965
mp_int M[TAB_SIZE], res;
1966
mp_digit buf, mp;
1967
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1968
1969
/* use a pointer to the reduction algorithm. This allows us to use
1970
* one of many reduction algorithms without modding the guts of
1971
* the code with if statements everywhere.
1972
*/
1973
int (*redux)(mp_int *x, const mp_int *n, mp_digit rho);
1974
1975
/* find window size */
1976
x = mp_count_bits(X);
1977
if (x <= 7) {
1978
winsize = 2;
1979
} else if (x <= 36) {
1980
winsize = 3;
1981
} else if (x <= 140) {
1982
winsize = 4;
1983
} else if (x <= 450) {
1984
winsize = 5;
1985
} else if (x <= 1303) {
1986
winsize = 6;
1987
} else if (x <= 3529) {
1988
winsize = 7;
1989
} else {
1990
winsize = 8;
1991
}
1992
1993
#ifdef MP_LOW_MEM
1994
if (winsize > 5) {
1995
winsize = 5;
1996
}
1997
#endif
1998
1999
/* init M array */
2000
/* init first cell */
2001
if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
2002
return err;
2003
}
2004
2005
/* now init the second half of the array */
2006
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2007
if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
2008
for (y = 1<<(winsize-1); y < x; y++) {
2009
mp_clear(&M[y]);
2010
}
2011
mp_clear(&M[1]);
2012
return err;
2013
}
2014
}
2015
2016
/* determine and setup reduction code */
2017
if (redmode == 0) {
2018
/* now setup montgomery */
2019
if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
2020
goto LBL_M;
2021
}
2022
2023
/* automatically pick the comba one if available (saves quite a few calls/ifs) */
2024
if ((((P->used * 2) + 1) < (int)MP_WARRAY) &&
2025
(P->used < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) {
2026
redux = fast_mp_montgomery_reduce;
2027
} else
2028
{
2029
/* use slower baseline Montgomery method */
2030
redux = mp_montgomery_reduce;
2031
}
2032
} else if (redmode == 1) {
2033
/* setup DR reduction for moduli of the form B**k - b */
2034
mp_dr_setup(P, &mp);
2035
redux = mp_dr_reduce;
2036
} else {
2037
/* setup DR reduction for moduli of the form 2**k - b */
2038
if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
2039
goto LBL_M;
2040
}
2041
redux = mp_reduce_2k;
2042
}
2043
2044
/* setup result */
2045
if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) {
2046
goto LBL_M;
2047
}
2048
2049
/* create M table
2050
*
2051
2052
*
2053
* The first half of the table is not computed though accept for M[0] and M[1]
2054
*/
2055
2056
if (redmode == 0) {
2057
/* now we need R mod m */
2058
if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {
2059
goto LBL_RES;
2060
}
2061
2062
/* now set M[1] to G * R mod m */
2063
if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) {
2064
goto LBL_RES;
2065
}
2066
} else {
2067
mp_set(&res, 1uL);
2068
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2069
goto LBL_RES;
2070
}
2071
}
2072
2073
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2074
if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
2075
goto LBL_RES;
2076
}
2077
2078
for (x = 0; x < (winsize - 1); x++) {
2079
if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
2080
goto LBL_RES;
2081
}
2082
if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) {
2083
goto LBL_RES;
2084
}
2085
}
2086
2087
/* create upper table */
2088
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2089
if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2090
goto LBL_RES;
2091
}
2092
if ((err = redux(&M[x], P, mp)) != MP_OKAY) {
2093
goto LBL_RES;
2094
}
2095
}
2096
2097
/* set initial mode and bit cnt */
2098
mode = 0;
2099
bitcnt = 1;
2100
buf = 0;
2101
digidx = X->used - 1;
2102
bitcpy = 0;
2103
bitbuf = 0;
2104
2105
for (;;) {
2106
/* grab next digit as required */
2107
if (--bitcnt == 0) {
2108
/* if digidx == -1 we are out of digits so break */
2109
if (digidx == -1) {
2110
break;
2111
}
2112
/* read next digit and reset bitcnt */
2113
buf = X->dp[digidx--];
2114
bitcnt = (int)DIGIT_BIT;
2115
}
2116
2117
/* grab the next msb from the exponent */
2118
y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
2119
buf <<= (mp_digit)1;
2120
2121
/* if the bit is zero and mode == 0 then we ignore it
2122
* These represent the leading zero bits before the first 1 bit
2123
* in the exponent. Technically this opt is not required but it
2124
* does lower the # of trivial squaring/reductions used
2125
*/
2126
if ((mode == 0) && (y == 0)) {
2127
continue;
2128
}
2129
2130
/* if the bit is zero and mode == 1 then we square */
2131
if ((mode == 1) && (y == 0)) {
2132
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
2133
goto LBL_RES;
2134
}
2135
if ((err = redux(&res, P, mp)) != MP_OKAY) {
2136
goto LBL_RES;
2137
}
2138
continue;
2139
}
2140
2141
/* else we add it to the window */
2142
bitbuf |= (y << (winsize - ++bitcpy));
2143
mode = 2;
2144
2145
if (bitcpy == winsize) {
2146
/* ok window is filled so square as required and multiply */
2147
/* square first */
2148
for (x = 0; x < winsize; x++) {
2149
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
2150
goto LBL_RES;
2151
}
2152
if ((err = redux(&res, P, mp)) != MP_OKAY) {
2153
goto LBL_RES;
2154
}
2155
}
2156
2157
/* then multiply */
2158
if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
2159
goto LBL_RES;
2160
}
2161
if ((err = redux(&res, P, mp)) != MP_OKAY) {
2162
goto LBL_RES;
2163
}
2164
2165
/* empty window and reset */
2166
bitcpy = 0;
2167
bitbuf = 0;
2168
mode = 1;
2169
}
2170
}
2171
2172
/* if bits remain then square/multiply */
2173
if ((mode == 2) && (bitcpy > 0)) {
2174
/* square then multiply if the bit is set */
2175
for (x = 0; x < bitcpy; x++) {
2176
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
2177
goto LBL_RES;
2178
}
2179
if ((err = redux(&res, P, mp)) != MP_OKAY) {
2180
goto LBL_RES;
2181
}
2182
2183
/* get next bit of the window */
2184
bitbuf <<= 1;
2185
if ((bitbuf & (1 << winsize)) != 0) {
2186
/* then multiply */
2187
if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
2188
goto LBL_RES;
2189
}
2190
if ((err = redux(&res, P, mp)) != MP_OKAY) {
2191
goto LBL_RES;
2192
}
2193
}
2194
}
2195
}
2196
2197
if (redmode == 0) {
2198
/* fixup result if Montgomery reduction is used
2199
* recall that any value in a Montgomery system is
2200
* actually multiplied by R mod n. So we have
2201
* to reduce one more time to cancel out the factor
2202
* of R.
2203
*/
2204
if ((err = redux(&res, P, mp)) != MP_OKAY) {
2205
goto LBL_RES;
2206
}
2207
}
2208
2209
/* swap res with Y */
2210
mp_exch(&res, Y);
2211
err = MP_OKAY;
2212
LBL_RES:
2213
mp_clear(&res);
2214
LBL_M:
2215
mp_clear(&M[1]);
2216
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2217
mp_clear(&M[x]);
2218
}
2219
return err;
2220
}
2221
2222
/* End: bn_mp_exptmod_fast.c */
2223
2224
/* Start: bn_mp_exteuclid.c */
2225
2226
/* Extended euclidean algorithm of (a, b) produces
2227
a*u1 + b*u2 = u3
2228
*/
2229
int mp_exteuclid(const mp_int *a, const mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
2230
{
2231
mp_int u1, u2, u3, v1, v2, v3, t1, t2, t3, q, tmp;
2232
int err;
2233
2234
if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) {
2235
return err;
2236
}
2237
2238
/* initialize, (u1,u2,u3) = (1,0,a) */
2239
mp_set(&u1, 1uL);
2240
if ((err = mp_copy(a, &u3)) != MP_OKAY) {
2241
goto LBL_ERR;
2242
}
2243
2244
/* initialize, (v1,v2,v3) = (0,1,b) */
2245
mp_set(&v2, 1uL);
2246
if ((err = mp_copy(b, &v3)) != MP_OKAY) {
2247
goto LBL_ERR;
2248
}
2249
2250
/* loop while v3 != 0 */
2251
while (mp_iszero(&v3) == MP_NO) {
2252
/* q = u3/v3 */
2253
if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) {
2254
goto LBL_ERR;
2255
}
2256
2257
/* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */
2258
if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) {
2259
goto LBL_ERR;
2260
}
2261
if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) {
2262
goto LBL_ERR;
2263
}
2264
if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) {
2265
goto LBL_ERR;
2266
}
2267
if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) {
2268
goto LBL_ERR;
2269
}
2270
if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) {
2271
goto LBL_ERR;
2272
}
2273
if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) {
2274
goto LBL_ERR;
2275
}
2276
2277
/* (u1,u2,u3) = (v1,v2,v3) */
2278
if ((err = mp_copy(&v1, &u1)) != MP_OKAY) {
2279
goto LBL_ERR;
2280
}
2281
if ((err = mp_copy(&v2, &u2)) != MP_OKAY) {
2282
goto LBL_ERR;
2283
}
2284
if ((err = mp_copy(&v3, &u3)) != MP_OKAY) {
2285
goto LBL_ERR;
2286
}
2287
2288
/* (v1,v2,v3) = (t1,t2,t3) */
2289
if ((err = mp_copy(&t1, &v1)) != MP_OKAY) {
2290
goto LBL_ERR;
2291
}
2292
if ((err = mp_copy(&t2, &v2)) != MP_OKAY) {
2293
goto LBL_ERR;
2294
}
2295
if ((err = mp_copy(&t3, &v3)) != MP_OKAY) {
2296
goto LBL_ERR;
2297
}
2298
}
2299
2300
/* make sure U3 >= 0 */
2301
if (u3.sign == MP_NEG) {
2302
if ((err = mp_neg(&u1, &u1)) != MP_OKAY) {
2303
goto LBL_ERR;
2304
}
2305
if ((err = mp_neg(&u2, &u2)) != MP_OKAY) {
2306
goto LBL_ERR;
2307
}
2308
if ((err = mp_neg(&u3, &u3)) != MP_OKAY) {
2309
goto LBL_ERR;
2310
}
2311
}
2312
2313
/* copy result out */
2314
if (U1 != NULL) {
2315
mp_exch(U1, &u1);
2316
}
2317
if (U2 != NULL) {
2318
mp_exch(U2, &u2);
2319
}
2320
if (U3 != NULL) {
2321
mp_exch(U3, &u3);
2322
}
2323
2324
err = MP_OKAY;
2325
LBL_ERR:
2326
mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL);
2327
return err;
2328
}
2329
2330
/* End: bn_mp_exteuclid.c */
2331
2332
/* Start: bn_mp_gcd.c */
2333
2334
/* Greatest Common Divisor using the binary method */
2335
int mp_gcd(const mp_int *a, const mp_int *b, mp_int *c)
2336
{
2337
mp_int u, v;
2338
int k, u_lsb, v_lsb, res;
2339
2340
/* either zero than gcd is the largest */
2341
if (mp_iszero(a) == MP_YES) {
2342
return mp_abs(b, c);
2343
}
2344
if (mp_iszero(b) == MP_YES) {
2345
return mp_abs(a, c);
2346
}
2347
2348
/* get copies of a and b we can modify */
2349
if ((res = mp_init_copy(&u, a)) != MP_OKAY) {
2350
return res;
2351
}
2352
2353
if ((res = mp_init_copy(&v, b)) != MP_OKAY) {
2354
goto LBL_U;
2355
}
2356
2357
/* must be positive for the remainder of the algorithm */
2358
u.sign = v.sign = MP_ZPOS;
2359
2360
/* B1. Find the common power of two for u and v */
2361
u_lsb = mp_cnt_lsb(&u);
2362
v_lsb = mp_cnt_lsb(&v);
2363
k = MIN(u_lsb, v_lsb);
2364
2365
if (k > 0) {
2366
/* divide the power of two out */
2367
if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
2368
goto LBL_V;
2369
}
2370
2371
if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
2372
goto LBL_V;
2373
}
2374
}
2375
2376
/* divide any remaining factors of two out */
2377
if (u_lsb != k) {
2378
if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
2379
goto LBL_V;
2380
}
2381
}
2382
2383
if (v_lsb != k) {
2384
if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
2385
goto LBL_V;
2386
}
2387
}
2388
2389
while (mp_iszero(&v) == MP_NO) {
2390
/* make sure v is the largest */
2391
if (mp_cmp_mag(&u, &v) == MP_GT) {
2392
/* swap u and v to make sure v is >= u */
2393
mp_exch(&u, &v);
2394
}
2395
2396
/* subtract smallest from largest */
2397
if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
2398
goto LBL_V;
2399
}
2400
2401
/* Divide out all factors of two */
2402
if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
2403
goto LBL_V;
2404
}
2405
}
2406
2407
/* multiply by 2**k which we divided out at the beginning */
2408
if ((res = mp_mul_2d(&u, k, c)) != MP_OKAY) {
2409
goto LBL_V;
2410
}
2411
c->sign = MP_ZPOS;
2412
res = MP_OKAY;
2413
LBL_V:
2414
mp_clear(&u);
2415
LBL_U:
2416
mp_clear(&v);
2417
return res;
2418
}
2419
2420
/* End: bn_mp_gcd.c */
2421
2422
/* Start: bn_mp_get_bit.c */
2423
2424
/* Checks the bit at position b and returns MP_YES
2425
if the bit is 1, MP_NO if it is 0 and MP_VAL
2426
in case of error */
2427
int mp_get_bit(const mp_int *a, int b)
2428
{
2429
int limb;
2430
mp_digit bit, isset;
2431
2432
if (b < 0) {
2433
return MP_VAL;
2434
}
2435
2436
limb = b / DIGIT_BIT;
2437
2438
/*
2439
* Zero is a special value with the member "used" set to zero.
2440
* Needs to be tested before the check for the upper boundary
2441
* otherwise (limb >= a->used) would be true for a = 0
2442
*/
2443
2444
if (mp_iszero(a) != MP_NO) {
2445
return MP_NO;
2446
}
2447
2448
if (limb >= a->used) {
2449
return MP_VAL;
2450
}
2451
2452
bit = (mp_digit)(1) << (b % DIGIT_BIT);
2453
2454
isset = a->dp[limb] & bit;
2455
return (isset != 0u) ? MP_YES : MP_NO;
2456
}
2457
2458
/* End: bn_mp_get_bit.c */
2459
2460
/* Start: bn_mp_get_double.c */
2461
2462
double mp_get_double(const mp_int *a)
2463
{
2464
int i;
2465
double d = 0.0, fac = 1.0;
2466
for (i = 0; i < DIGIT_BIT; ++i) {
2467
fac *= 2.0;
2468
}
2469
for (i = USED(a); i --> 0;) {
2470
d = (d * fac) + (double)DIGIT(a, i);
2471
}
2472
return (mp_isneg(a) != MP_NO) ? -d : d;
2473
}
2474
2475
/* End: bn_mp_get_double.c */
2476
2477
/* Start: bn_mp_get_int.c */
2478
2479
/* get the lower 32-bits of an mp_int */
2480
unsigned long mp_get_int(const mp_int *a)
2481
{
2482
int i;
2483
mp_min_u32 res;
2484
2485
if (a->used == 0) {
2486
return 0;
2487
}
2488
2489
/* get number of digits of the lsb we have to read */
2490
i = MIN(a->used, ((((int)sizeof(unsigned long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;
2491
2492
/* get most significant digit of result */
2493
res = DIGIT(a, i);
2494
2495
while (--i >= 0) {
2496
res = (res << DIGIT_BIT) | DIGIT(a, i);
2497
}
2498
2499
/* force result to 32-bits always so it is consistent on non 32-bit platforms */
2500
return res & 0xFFFFFFFFUL;
2501
}
2502
2503
/* End: bn_mp_get_int.c */
2504
2505
/* Start: bn_mp_get_long.c */
2506
2507
/* get the lower unsigned long of an mp_int, platform dependent */
2508
unsigned long mp_get_long(const mp_int *a)
2509
{
2510
int i;
2511
unsigned long res;
2512
2513
if (a->used == 0) {
2514
return 0;
2515
}
2516
2517
/* get number of digits of the lsb we have to read */
2518
i = MIN(a->used, ((((int)sizeof(unsigned long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;
2519
2520
/* get most significant digit of result */
2521
res = DIGIT(a, i);
2522
2523
#if (ULONG_MAX != 0xffffffffuL) || (DIGIT_BIT < 32)
2524
while (--i >= 0) {
2525
res = (res << DIGIT_BIT) | DIGIT(a, i);
2526
}
2527
#endif
2528
return res;
2529
}
2530
2531
/* End: bn_mp_get_long.c */
2532
2533
/* Start: bn_mp_get_long_long.c */
2534
2535
/* get the lower unsigned long long of an mp_int, platform dependent */
2536
unsigned long long mp_get_long_long(const mp_int *a)
2537
{
2538
int i;
2539
unsigned long long res;
2540
2541
if (a->used == 0) {
2542
return 0;
2543
}
2544
2545
/* get number of digits of the lsb we have to read */
2546
i = MIN(a->used, ((((int)sizeof(unsigned long long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;
2547
2548
/* get most significant digit of result */
2549
res = DIGIT(a, i);
2550
2551
#if DIGIT_BIT < 64
2552
while (--i >= 0) {
2553
res = (res << DIGIT_BIT) | DIGIT(a, i);
2554
}
2555
#endif
2556
return res;
2557
}
2558
2559
/* End: bn_mp_get_long_long.c */
2560
2561
/* Start: bn_mp_grow.c */
2562
2563
/* grow as required */
2564
int mp_grow(mp_int *a, int size)
2565
{
2566
int i;
2567
mp_digit *tmp;
2568
2569
/* if the alloc size is smaller alloc more ram */
2570
if (a->alloc < size) {
2571
/* ensure there are always at least MP_PREC digits extra on top */
2572
size += (MP_PREC * 2) - (size % MP_PREC);
2573
2574
/* reallocate the array a->dp
2575
*
2576
* We store the return in a temporary variable
2577
* in case the operation failed we don't want
2578
* to overwrite the dp member of a.
2579
*/
2580
tmp = OPT_CAST(mp_digit) XREALLOC(a->dp, sizeof(mp_digit) * (size_t)size);
2581
if (tmp == NULL) {
2582
/* reallocation failed but "a" is still valid [can be freed] */
2583
return MP_MEM;
2584
}
2585
2586
/* reallocation succeeded so set a->dp */
2587
a->dp = tmp;
2588
2589
/* zero excess digits */
2590
i = a->alloc;
2591
a->alloc = size;
2592
for (; i < a->alloc; i++) {
2593
a->dp[i] = 0;
2594
}
2595
}
2596
return MP_OKAY;
2597
}
2598
2599
/* End: bn_mp_grow.c */
2600
2601
/* Start: bn_mp_import.c */
2602
2603
/* based on gmp's mpz_import.
2604
* see http://gmplib.org/manual/Integer-Import-and-Export.html
2605
*/
2606
int mp_import(mp_int *rop, size_t count, int order, size_t size,
2607
int endian, size_t nails, const void *op)
2608
{
2609
int result;
2610
size_t odd_nails, nail_bytes, i, j;
2611
unsigned char odd_nail_mask;
2612
2613
mp_zero(rop);
2614
2615
if (endian == 0) {
2616
union {
2617
unsigned int i;
2618
char c[4];
2619
} lint;
2620
lint.i = 0x01020304;
2621
2622
endian = (lint.c[0] == '\x04') ? -1 : 1;
2623
}
2624
2625
odd_nails = (nails % 8u);
2626
odd_nail_mask = 0xff;
2627
for (i = 0; i < odd_nails; ++i) {
2628
odd_nail_mask ^= (unsigned char)(1u << (7u - i));
2629
}
2630
nail_bytes = nails / 8u;
2631
2632
for (i = 0; i < count; ++i) {
2633
for (j = 0; j < (size - nail_bytes); ++j) {
2634
unsigned char byte = *((unsigned char *)op +
2635
(((order == 1) ? i : ((count - 1u) - i)) * size) +
2636
((endian == 1) ? (j + nail_bytes) : (((size - 1u) - j) - nail_bytes)));
2637
2638
if ((result = mp_mul_2d(rop, (j == 0u) ? (int)(8u - odd_nails) : 8, rop)) != MP_OKAY) {
2639
return result;
2640
}
2641
2642
rop->dp[0] |= (j == 0u) ? (mp_digit)(byte & odd_nail_mask) : (mp_digit)byte;
2643
rop->used += 1;
2644
}
2645
}
2646
2647
mp_clamp(rop);
2648
2649
return MP_OKAY;
2650
}
2651
2652
/* End: bn_mp_import.c */
2653
2654
/* Start: bn_mp_init.c */
2655
2656
/* init a new mp_int */
2657
int mp_init(mp_int *a)
2658
{
2659
int i;
2660
2661
/* allocate memory required and clear it */
2662
a->dp = OPT_CAST(mp_digit) XMALLOC(sizeof(mp_digit) * (size_t)MP_PREC);
2663
if (a->dp == NULL) {
2664
return MP_MEM;
2665
}
2666
2667
/* set the digits to zero */
2668
for (i = 0; i < MP_PREC; i++) {
2669
a->dp[i] = 0;
2670
}
2671
2672
/* set the used to zero, allocated digits to the default precision
2673
* and sign to positive */
2674
a->used = 0;
2675
a->alloc = MP_PREC;
2676
a->sign = MP_ZPOS;
2677
2678
return MP_OKAY;
2679
}
2680
2681
/* End: bn_mp_init.c */
2682
2683
/* Start: bn_mp_init_copy.c */
2684
2685
/* creates "a" then copies b into it */
2686
int mp_init_copy(mp_int *a, const mp_int *b)
2687
{
2688
int res;
2689
2690
if ((res = mp_init_size(a, b->used)) != MP_OKAY) {
2691
return res;
2692
}
2693
2694
if ((res = mp_copy(b, a)) != MP_OKAY) {
2695
mp_clear(a);
2696
}
2697
2698
return res;
2699
}
2700
2701
/* End: bn_mp_init_copy.c */
2702
2703
/* Start: bn_mp_init_multi.c */
2704
2705
int mp_init_multi(mp_int *mp, ...)
2706
{
2707
mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2708
int n = 0; /* Number of ok inits */
2709
mp_int *cur_arg = mp;
2710
va_list args;
2711
2712
va_start(args, mp); /* init args to next argument from caller */
2713
while (cur_arg != NULL) {
2714
if (mp_init(cur_arg) != MP_OKAY) {
2715
/* Oops - error! Back-track and mp_clear what we already
2716
succeeded in init-ing, then return error.
2717
*/
2718
va_list clean_args;
2719
2720
/* now start cleaning up */
2721
cur_arg = mp;
2722
va_start(clean_args, mp);
2723
while (n-- != 0) {
2724
mp_clear(cur_arg);
2725
cur_arg = va_arg(clean_args, mp_int *);
2726
}
2727
va_end(clean_args);
2728
res = MP_MEM;
2729
break;
2730
}
2731
n++;
2732
cur_arg = va_arg(args, mp_int *);
2733
}
2734
va_end(args);
2735
return res; /* Assumed ok, if error flagged above. */
2736
}
2737
2738
/* End: bn_mp_init_multi.c */
2739
2740
/* Start: bn_mp_init_set.c */
2741
2742
/* initialize and set a digit */
2743
int mp_init_set(mp_int *a, mp_digit b)
2744
{
2745
int err;
2746
if ((err = mp_init(a)) != MP_OKAY) {
2747
return err;
2748
}
2749
mp_set(a, b);
2750
return err;
2751
}
2752
2753
/* End: bn_mp_init_set.c */
2754
2755
/* Start: bn_mp_init_set_int.c */
2756
2757
/* initialize and set a digit */
2758
int mp_init_set_int(mp_int *a, unsigned long b)
2759
{
2760
int err;
2761
if ((err = mp_init(a)) != MP_OKAY) {
2762
return err;
2763
}
2764
return mp_set_int(a, b);
2765
}
2766
2767
/* End: bn_mp_init_set_int.c */
2768
2769
/* Start: bn_mp_init_size.c */
2770
2771
/* init an mp_init for a given size */
2772
int mp_init_size(mp_int *a, int size)
2773
{
2774
int x;
2775
2776
/* pad size so there are always extra digits */
2777
size += (MP_PREC * 2) - (size % MP_PREC);
2778
2779
/* alloc mem */
2780
a->dp = OPT_CAST(mp_digit) XMALLOC(sizeof(mp_digit) * (size_t)size);
2781
if (a->dp == NULL) {
2782
return MP_MEM;
2783
}
2784
2785
/* set the members */
2786
a->used = 0;
2787
a->alloc = size;
2788
a->sign = MP_ZPOS;
2789
2790
/* zero the digits */
2791
for (x = 0; x < size; x++) {
2792
a->dp[x] = 0;
2793
}
2794
2795
return MP_OKAY;
2796
}
2797
2798
/* End: bn_mp_init_size.c */
2799
2800
/* Start: bn_mp_invmod.c */
2801
2802
/* hac 14.61, pp608 */
2803
int mp_invmod(const mp_int *a, const mp_int *b, mp_int *c)
2804
{
2805
/* b cannot be negative and has to be >1 */
2806
if ((b->sign == MP_NEG) || (mp_cmp_d(b, 1uL) != MP_GT)) {
2807
return MP_VAL;
2808
}
2809
2810
/* if the modulus is odd we can use a faster routine instead */
2811
if ((mp_isodd(b) == MP_YES)) {
2812
return fast_mp_invmod(a, b, c);
2813
}
2814
2815
return mp_invmod_slow(a, b, c);
2816
}
2817
2818
/* End: bn_mp_invmod.c */
2819
2820
/* Start: bn_mp_invmod_slow.c */
2821
2822
/* hac 14.61, pp608 */
2823
int mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c)
2824
{
2825
mp_int x, y, u, v, A, B, C, D;
2826
int res;
2827
2828
/* b cannot be negative */
2829
if ((b->sign == MP_NEG) || (mp_iszero(b) == MP_YES)) {
2830
return MP_VAL;
2831
}
2832
2833
/* init temps */
2834
if ((res = mp_init_multi(&x, &y, &u, &v,
2835
&A, &B, &C, &D, NULL)) != MP_OKAY) {
2836
return res;
2837
}
2838
2839
/* x = a, y = b */
2840
if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
2841
goto LBL_ERR;
2842
}
2843
if ((res = mp_copy(b, &y)) != MP_OKAY) {
2844
goto LBL_ERR;
2845
}
2846
2847
/* 2. [modified] if x,y are both even then return an error! */
2848
if ((mp_iseven(&x) == MP_YES) && (mp_iseven(&y) == MP_YES)) {
2849
res = MP_VAL;
2850
goto LBL_ERR;
2851
}
2852
2853
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2854
if ((res = mp_copy(&x, &u)) != MP_OKAY) {
2855
goto LBL_ERR;
2856
}
2857
if ((res = mp_copy(&y, &v)) != MP_OKAY) {
2858
goto LBL_ERR;
2859
}
2860
mp_set(&A, 1uL);
2861
mp_set(&D, 1uL);
2862
2863
top:
2864
/* 4. while u is even do */
2865
while (mp_iseven(&u) == MP_YES) {
2866
/* 4.1 u = u/2 */
2867
if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
2868
goto LBL_ERR;
2869
}
2870
/* 4.2 if A or B is odd then */
2871
if ((mp_isodd(&A) == MP_YES) || (mp_isodd(&B) == MP_YES)) {
2872
/* A = (A+y)/2, B = (B-x)/2 */
2873
if ((res = mp_add(&A, &y, &A)) != MP_OKAY) {
2874
goto LBL_ERR;
2875
}
2876
if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
2877
goto LBL_ERR;
2878
}
2879
}
2880
/* A = A/2, B = B/2 */
2881
if ((res = mp_div_2(&A, &A)) != MP_OKAY) {
2882
goto LBL_ERR;
2883
}
2884
if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
2885
goto LBL_ERR;
2886
}
2887
}
2888
2889
/* 5. while v is even do */
2890
while (mp_iseven(&v) == MP_YES) {
2891
/* 5.1 v = v/2 */
2892
if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
2893
goto LBL_ERR;
2894
}
2895
/* 5.2 if C or D is odd then */
2896
if ((mp_isodd(&C) == MP_YES) || (mp_isodd(&D) == MP_YES)) {
2897
/* C = (C+y)/2, D = (D-x)/2 */
2898
if ((res = mp_add(&C, &y, &C)) != MP_OKAY) {
2899
goto LBL_ERR;
2900
}
2901
if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
2902
goto LBL_ERR;
2903
}
2904
}
2905
/* C = C/2, D = D/2 */
2906
if ((res = mp_div_2(&C, &C)) != MP_OKAY) {
2907
goto LBL_ERR;
2908
}
2909
if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
2910
goto LBL_ERR;
2911
}
2912
}
2913
2914
/* 6. if u >= v then */
2915
if (mp_cmp(&u, &v) != MP_LT) {
2916
/* u = u - v, A = A - C, B = B - D */
2917
if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
2918
goto LBL_ERR;
2919
}
2920
2921
if ((res = mp_sub(&A, &C, &A)) != MP_OKAY) {
2922
goto LBL_ERR;
2923
}
2924
2925
if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
2926
goto LBL_ERR;
2927
}
2928
} else {
2929
/* v - v - u, C = C - A, D = D - B */
2930
if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
2931
goto LBL_ERR;
2932
}
2933
2934
if ((res = mp_sub(&C, &A, &C)) != MP_OKAY) {
2935
goto LBL_ERR;
2936
}
2937
2938
if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
2939
goto LBL_ERR;
2940
}
2941
}
2942
2943
/* if not zero goto step 4 */
2944
if (mp_iszero(&u) == MP_NO)
2945
goto top;
2946
2947
/* now a = C, b = D, gcd == g*v */
2948
2949
/* if v != 1 then there is no inverse */
2950
if (mp_cmp_d(&v, 1uL) != MP_EQ) {
2951
res = MP_VAL;
2952
goto LBL_ERR;
2953
}
2954
2955
/* if its too low */
2956
while (mp_cmp_d(&C, 0uL) == MP_LT) {
2957
if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2958
goto LBL_ERR;
2959
}
2960
}
2961
2962
/* too big */
2963
while (mp_cmp_mag(&C, b) != MP_LT) {
2964
if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2965
goto LBL_ERR;
2966
}
2967
}
2968
2969
/* C is now the inverse */
2970
mp_exch(&C, c);
2971
res = MP_OKAY;
2972
LBL_ERR:
2973
mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2974
return res;
2975
}
2976
2977
/* End: bn_mp_invmod_slow.c */
2978
2979
/* Start: bn_mp_is_square.c */
2980
2981
/* Check if remainders are possible squares - fast exclude non-squares */
2982
static const char rem_128[128] = {
2983
0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2984
0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2985
1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2986
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2987
0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2988
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2989
1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2990
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1
2991
};
2992
2993
static const char rem_105[105] = {
2994
0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
2995
0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
2996
0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
2997
1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
2998
0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
2999
1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
3000
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
3001
};
3002
3003
/* Store non-zero to ret if arg is square, and zero if not */
3004
int mp_is_square(const mp_int *arg, int *ret)
3005
{
3006
int res;
3007
mp_digit c;
3008
mp_int t;
3009
unsigned long r;
3010
3011
/* Default to Non-square :) */
3012
*ret = MP_NO;
3013
3014
if (arg->sign == MP_NEG) {
3015
return MP_VAL;
3016
}
3017
3018
/* digits used? (TSD) */
3019
if (arg->used == 0) {
3020
return MP_OKAY;
3021
}
3022
3023
/* First check mod 128 (suppose that DIGIT_BIT is at least 7) */
3024
if (rem_128[127u & DIGIT(arg, 0)] == (char)1) {
3025
return MP_OKAY;
3026
}
3027
3028
/* Next check mod 105 (3*5*7) */
3029
if ((res = mp_mod_d(arg, 105uL, &c)) != MP_OKAY) {
3030
return res;
3031
}
3032
if (rem_105[c] == (char)1) {
3033
return MP_OKAY;
3034
}
3035
3036
3037
if ((res = mp_init_set_int(&t, 11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) {
3038
return res;
3039
}
3040
if ((res = mp_mod(arg, &t, &t)) != MP_OKAY) {
3041
goto LBL_ERR;
3042
}
3043
r = mp_get_int(&t);
3044
/* Check for other prime modules, note it's not an ERROR but we must
3045
* free "t" so the easiest way is to goto LBL_ERR. We know that res
3046
* is already equal to MP_OKAY from the mp_mod call
3047
*/
3048
if (((1uL<<(r%11uL)) & 0x5C4uL) != 0uL) goto LBL_ERR;
3049
if (((1uL<<(r%13uL)) & 0x9E4uL) != 0uL) goto LBL_ERR;
3050
if (((1uL<<(r%17uL)) & 0x5CE8uL) != 0uL) goto LBL_ERR;
3051
if (((1uL<<(r%19uL)) & 0x4F50CuL) != 0uL) goto LBL_ERR;
3052
if (((1uL<<(r%23uL)) & 0x7ACCA0uL) != 0uL) goto LBL_ERR;
3053
if (((1uL<<(r%29uL)) & 0xC2EDD0CuL) != 0uL) goto LBL_ERR;
3054
if (((1uL<<(r%31uL)) & 0x6DE2B848uL) != 0uL) goto LBL_ERR;
3055
3056
/* Final check - is sqr(sqrt(arg)) == arg ? */
3057
if ((res = mp_sqrt(arg, &t)) != MP_OKAY) {
3058
goto LBL_ERR;
3059
}
3060
if ((res = mp_sqr(&t, &t)) != MP_OKAY) {
3061
goto LBL_ERR;
3062
}
3063
3064
*ret = (mp_cmp_mag(&t, arg) == MP_EQ) ? MP_YES : MP_NO;
3065
LBL_ERR:
3066
mp_clear(&t);
3067
return res;
3068
}
3069
3070
/* End: bn_mp_is_square.c */
3071
3072
/* Start: bn_mp_jacobi.c */
3073
3074
/* computes the jacobi c = (a | n) (or Legendre if n is prime)
3075
* Kept for legacy reasons, please use mp_kronecker() instead
3076
*/
3077
int mp_jacobi(const mp_int *a, const mp_int *n, int *c)
3078
{
3079
/* if a < 0 return MP_VAL */
3080
if (mp_isneg(a) == MP_YES) {
3081
return MP_VAL;
3082
}
3083
3084
/* if n <= 0 return MP_VAL */
3085
if (mp_cmp_d(n, 0uL) != MP_GT) {
3086
return MP_VAL;
3087
}
3088
3089
return mp_kronecker(a, n, c);
3090
}
3091
3092
/* End: bn_mp_jacobi.c */
3093
3094
/* Start: bn_mp_karatsuba_mul.c */
3095
3096
/* c = |a| * |b| using Karatsuba Multiplication using
3097
* three half size multiplications
3098
*
3099
* Let B represent the radix [e.g. 2**DIGIT_BIT] and
3100
* let n represent half of the number of digits in
3101
* the min(a,b)
3102
*
3103
* a = a1 * B**n + a0
3104
* b = b1 * B**n + b0
3105
*
3106
* Then, a * b =>
3107
a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
3108
*
3109
* Note that a1b1 and a0b0 are used twice and only need to be
3110
* computed once. So in total three half size (half # of
3111
* digit) multiplications are performed, a0b0, a1b1 and
3112
* (a1+b1)(a0+b0)
3113
*
3114
* Note that a multiplication of half the digits requires
3115
* 1/4th the number of single precision multiplications so in
3116
* total after one call 25% of the single precision multiplications
3117
* are saved. Note also that the call to mp_mul can end up back
3118
* in this function if the a0, a1, b0, or b1 are above the threshold.
3119
* This is known as divide-and-conquer and leads to the famous
3120
* O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
3121
* the standard O(N**2) that the baseline/comba methods use.
3122
* Generally though the overhead of this method doesn't pay off
3123
* until a certain size (N ~ 80) is reached.
3124
*/
3125
int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)
3126
{
3127
mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
3128
int B, err;
3129
3130
/* default the return code to an error */
3131
err = MP_MEM;
3132
3133
/* min # of digits */
3134
B = MIN(a->used, b->used);
3135
3136
/* now divide in two */
3137
B = B >> 1;
3138
3139
/* init copy all the temps */
3140
if (mp_init_size(&x0, B) != MP_OKAY)
3141
goto LBL_ERR;
3142
if (mp_init_size(&x1, a->used - B) != MP_OKAY)
3143
goto X0;
3144
if (mp_init_size(&y0, B) != MP_OKAY)
3145
goto X1;
3146
if (mp_init_size(&y1, b->used - B) != MP_OKAY)
3147
goto Y0;
3148
3149
/* init temps */
3150
if (mp_init_size(&t1, B * 2) != MP_OKAY)
3151
goto Y1;
3152
if (mp_init_size(&x0y0, B * 2) != MP_OKAY)
3153
goto T1;
3154
if (mp_init_size(&x1y1, B * 2) != MP_OKAY)
3155
goto X0Y0;
3156
3157
/* now shift the digits */
3158
x0.used = y0.used = B;
3159
x1.used = a->used - B;
3160
y1.used = b->used - B;
3161
3162
{
3163
int x;
3164
mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
3165
3166
/* we copy the digits directly instead of using higher level functions
3167
* since we also need to shift the digits
3168
*/
3169
tmpa = a->dp;
3170
tmpb = b->dp;
3171
3172
tmpx = x0.dp;
3173
tmpy = y0.dp;
3174
for (x = 0; x < B; x++) {
3175
*tmpx++ = *tmpa++;
3176
*tmpy++ = *tmpb++;
3177
}
3178
3179
tmpx = x1.dp;
3180
for (x = B; x < a->used; x++) {
3181
*tmpx++ = *tmpa++;
3182
}
3183
3184
tmpy = y1.dp;
3185
for (x = B; x < b->used; x++) {
3186
*tmpy++ = *tmpb++;
3187
}
3188
}
3189
3190
/* only need to clamp the lower words since by definition the
3191
* upper words x1/y1 must have a known number of digits
3192
*/
3193
mp_clamp(&x0);
3194
mp_clamp(&y0);
3195
3196
/* now calc the products x0y0 and x1y1 */
3197
/* after this x0 is no longer required, free temp [x0==t2]! */
3198
if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY)
3199
goto X1Y1; /* x0y0 = x0*y0 */
3200
if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY)
3201
goto X1Y1; /* x1y1 = x1*y1 */
3202
3203
/* now calc x1+x0 and y1+y0 */
3204
if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)
3205
goto X1Y1; /* t1 = x1 - x0 */
3206
if (s_mp_add(&y1, &y0, &x0) != MP_OKAY)
3207
goto X1Y1; /* t2 = y1 - y0 */
3208
if (mp_mul(&t1, &x0, &t1) != MP_OKAY)
3209
goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */
3210
3211
/* add x0y0 */
3212
if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY)
3213
goto X1Y1; /* t2 = x0y0 + x1y1 */
3214
if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY)
3215
goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
3216
3217
/* shift by B */
3218
if (mp_lshd(&t1, B) != MP_OKAY)
3219
goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
3220
if (mp_lshd(&x1y1, B * 2) != MP_OKAY)
3221
goto X1Y1; /* x1y1 = x1y1 << 2*B */
3222
3223
if (mp_add(&x0y0, &t1, &t1) != MP_OKAY)
3224
goto X1Y1; /* t1 = x0y0 + t1 */
3225
if (mp_add(&t1, &x1y1, c) != MP_OKAY)
3226
goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
3227
3228
/* Algorithm succeeded set the return code to MP_OKAY */
3229
err = MP_OKAY;
3230
3231
X1Y1:
3232
mp_clear(&x1y1);
3233
X0Y0:
3234
mp_clear(&x0y0);
3235
T1:
3236
mp_clear(&t1);
3237
Y1:
3238
mp_clear(&y1);
3239
Y0:
3240
mp_clear(&y0);
3241
X1:
3242
mp_clear(&x1);
3243
X0:
3244
mp_clear(&x0);
3245
LBL_ERR:
3246
return err;
3247
}
3248
3249
/* End: bn_mp_karatsuba_mul.c */
3250
3251
/* Start: bn_mp_karatsuba_sqr.c */
3252
3253
/* Karatsuba squaring, computes b = a*a using three
3254
* half size squarings
3255
*
3256
* See comments of karatsuba_mul for details. It
3257
* is essentially the same algorithm but merely
3258
* tuned to perform recursive squarings.
3259
*/
3260
int mp_karatsuba_sqr(const mp_int *a, mp_int *b)
3261
{
3262
mp_int x0, x1, t1, t2, x0x0, x1x1;
3263
int B, err;
3264
3265
err = MP_MEM;
3266
3267
/* min # of digits */
3268
B = a->used;
3269
3270
/* now divide in two */
3271
B = B >> 1;
3272
3273
/* init copy all the temps */
3274
if (mp_init_size(&x0, B) != MP_OKAY)
3275
goto LBL_ERR;
3276
if (mp_init_size(&x1, a->used - B) != MP_OKAY)
3277
goto X0;
3278
3279
/* init temps */
3280
if (mp_init_size(&t1, a->used * 2) != MP_OKAY)
3281
goto X1;
3282
if (mp_init_size(&t2, a->used * 2) != MP_OKAY)
3283
goto T1;
3284
if (mp_init_size(&x0x0, B * 2) != MP_OKAY)
3285
goto T2;
3286
if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY)
3287
goto X0X0;
3288
3289
{
3290
int x;
3291
mp_digit *dst, *src;
3292
3293
src = a->dp;
3294
3295
/* now shift the digits */
3296
dst = x0.dp;
3297
for (x = 0; x < B; x++) {
3298
*dst++ = *src++;
3299
}
3300
3301
dst = x1.dp;
3302
for (x = B; x < a->used; x++) {
3303
*dst++ = *src++;
3304
}
3305
}
3306
3307
x0.used = B;
3308
x1.used = a->used - B;
3309
3310
mp_clamp(&x0);
3311
3312
/* now calc the products x0*x0 and x1*x1 */
3313
if (mp_sqr(&x0, &x0x0) != MP_OKAY)
3314
goto X1X1; /* x0x0 = x0*x0 */
3315
if (mp_sqr(&x1, &x1x1) != MP_OKAY)
3316
goto X1X1; /* x1x1 = x1*x1 */
3317
3318
/* now calc (x1+x0)**2 */
3319
if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)
3320
goto X1X1; /* t1 = x1 - x0 */
3321
if (mp_sqr(&t1, &t1) != MP_OKAY)
3322
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
3323
3324
/* add x0y0 */
3325
if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY)
3326
goto X1X1; /* t2 = x0x0 + x1x1 */
3327
if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY)
3328
goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
3329
3330
/* shift by B */
3331
if (mp_lshd(&t1, B) != MP_OKAY)
3332
goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
3333
if (mp_lshd(&x1x1, B * 2) != MP_OKAY)
3334
goto X1X1; /* x1x1 = x1x1 << 2*B */
3335
3336
if (mp_add(&x0x0, &t1, &t1) != MP_OKAY)
3337
goto X1X1; /* t1 = x0x0 + t1 */
3338
if (mp_add(&t1, &x1x1, b) != MP_OKAY)
3339
goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
3340
3341
err = MP_OKAY;
3342
3343
X1X1:
3344
mp_clear(&x1x1);
3345
X0X0:
3346
mp_clear(&x0x0);
3347
T2:
3348
mp_clear(&t2);
3349
T1:
3350
mp_clear(&t1);
3351
X1:
3352
mp_clear(&x1);
3353
X0:
3354
mp_clear(&x0);
3355
LBL_ERR:
3356
return err;
3357
}
3358
3359
/* End: bn_mp_karatsuba_sqr.c */
3360
3361
/* Start: bn_mp_kronecker.c */
3362
3363
/*
3364
Kronecker symbol (a|p)
3365
Straightforward implementation of algorithm 1.4.10 in
3366
Henri Cohen: "A Course in Computational Algebraic Number Theory"
3367
3368
@book{cohen2013course,
3369
title={A course in computational algebraic number theory},
3370
author={Cohen, Henri},
3371
volume={138},
3372
year={2013},
3373
publisher={Springer Science \& Business Media}
3374
}
3375
*/
3376
int mp_kronecker(const mp_int *a, const mp_int *p, int *c)
3377
{
3378
mp_int a1, p1, r;
3379
3380
int e = MP_OKAY;
3381
int v, k;
3382
3383
static const int table[8] = {0, 1, 0, -1, 0, -1, 0, 1};
3384
3385
if (mp_iszero(p) != MP_NO) {
3386
if ((a->used == 1) && (a->dp[0] == 1u)) {
3387
*c = 1;
3388
return e;
3389
} else {
3390
*c = 0;
3391
return e;
3392
}
3393
}
3394
3395
if ((mp_iseven(a) != MP_NO) && (mp_iseven(p) != MP_NO)) {
3396
*c = 0;
3397
return e;
3398
}
3399
3400
if ((e = mp_init_copy(&a1, a)) != MP_OKAY) {
3401
return e;
3402
}
3403
if ((e = mp_init_copy(&p1, p)) != MP_OKAY) {
3404
goto LBL_KRON_0;
3405
}
3406
3407
v = mp_cnt_lsb(&p1);
3408
if ((e = mp_div_2d(&p1, v, &p1, NULL)) != MP_OKAY) {
3409
goto LBL_KRON_1;
3410
}
3411
3412
if ((v & 0x1) == 0) {
3413
k = 1;
3414
} else {
3415
k = table[a->dp[0] & 7u];
3416
}
3417
3418
if (p1.sign == MP_NEG) {
3419
p1.sign = MP_ZPOS;
3420
if (a1.sign == MP_NEG) {
3421
k = -k;
3422
}
3423
}
3424
3425
if ((e = mp_init(&r)) != MP_OKAY) {
3426
goto LBL_KRON_1;
3427
}
3428
3429
for (;;) {
3430
if (mp_iszero(&a1) != MP_NO) {
3431
if (mp_cmp_d(&p1, 1uL) == MP_EQ) {
3432
*c = k;
3433
goto LBL_KRON;
3434
} else {
3435
*c = 0;
3436
goto LBL_KRON;
3437
}
3438
}
3439
3440
v = mp_cnt_lsb(&a1);
3441
if ((e = mp_div_2d(&a1, v, &a1, NULL)) != MP_OKAY) {
3442
goto LBL_KRON;
3443
}
3444
3445
if ((v & 0x1) == 1) {
3446
k = k * table[p1.dp[0] & 7u];
3447
}
3448
3449
if (a1.sign == MP_NEG) {
3450
/*
3451
* Compute k = (-1)^((a1)*(p1-1)/4) * k
3452
* a1.dp[0] + 1 cannot overflow because the MSB
3453
* of the type mp_digit is not set by definition
3454
*/
3455
if (((a1.dp[0] + 1u) & p1.dp[0] & 2u) != 0u) {
3456
k = -k;
3457
}
3458
} else {
3459
/* compute k = (-1)^((a1-1)*(p1-1)/4) * k */
3460
if ((a1.dp[0] & p1.dp[0] & 2u) != 0u) {
3461
k = -k;
3462
}
3463
}
3464
3465
if ((e = mp_copy(&a1, &r)) != MP_OKAY) {
3466
goto LBL_KRON;
3467
}
3468
r.sign = MP_ZPOS;
3469
if ((e = mp_mod(&p1, &r, &a1)) != MP_OKAY) {
3470
goto LBL_KRON;
3471
}
3472
if ((e = mp_copy(&r, &p1)) != MP_OKAY) {
3473
goto LBL_KRON;
3474
}
3475
}
3476
3477
LBL_KRON:
3478
mp_clear(&r);
3479
LBL_KRON_1:
3480
mp_clear(&p1);
3481
LBL_KRON_0:
3482
mp_clear(&a1);
3483
3484
return e;
3485
}
3486
3487
/* End: bn_mp_kronecker.c */
3488
3489
/* Start: bn_mp_lcm.c */
3490
3491
/* computes least common multiple as |a*b|/(a, b) */
3492
int mp_lcm(const mp_int *a, const mp_int *b, mp_int *c)
3493
{
3494
int res;
3495
mp_int t1, t2;
3496
3497
3498
if ((res = mp_init_multi(&t1, &t2, NULL)) != MP_OKAY) {
3499
return res;
3500
}
3501
3502
/* t1 = get the GCD of the two inputs */
3503
if ((res = mp_gcd(a, b, &t1)) != MP_OKAY) {
3504
goto LBL_T;
3505
}
3506
3507
/* divide the smallest by the GCD */
3508
if (mp_cmp_mag(a, b) == MP_LT) {
3509
/* store quotient in t2 such that t2 * b is the LCM */
3510
if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
3511
goto LBL_T;
3512
}
3513
res = mp_mul(b, &t2, c);
3514
} else {
3515
/* store quotient in t2 such that t2 * a is the LCM */
3516
if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
3517
goto LBL_T;
3518
}
3519
res = mp_mul(a, &t2, c);
3520
}
3521
3522
/* fix the sign to positive */
3523
c->sign = MP_ZPOS;
3524
3525
LBL_T:
3526
mp_clear_multi(&t1, &t2, NULL);
3527
return res;
3528
}
3529
3530
/* End: bn_mp_lcm.c */
3531
3532
/* Start: bn_mp_lshd.c */
3533
3534
/* shift left a certain amount of digits */
3535
int mp_lshd(mp_int *a, int b)
3536
{
3537
int x, res;
3538
3539
/* if its less than zero return */
3540
if (b <= 0) {
3541
return MP_OKAY;
3542
}
3543
/* no need to shift 0 around */
3544
if (mp_iszero(a) == MP_YES) {
3545
return MP_OKAY;
3546
}
3547
3548
/* grow to fit the new digits */
3549
if (a->alloc < (a->used + b)) {
3550
if ((res = mp_grow(a, a->used + b)) != MP_OKAY) {
3551
return res;
3552
}
3553
}
3554
3555
{
3556
mp_digit *top, *bottom;
3557
3558
/* increment the used by the shift amount then copy upwards */
3559
a->used += b;
3560
3561
/* top */
3562
top = a->dp + a->used - 1;
3563
3564
/* base */
3565
bottom = (a->dp + a->used - 1) - b;
3566
3567
/* much like mp_rshd this is implemented using a sliding window
3568
* except the window goes the otherway around. Copying from
3569
* the bottom to the top. see bn_mp_rshd.c for more info.
3570
*/
3571
for (x = a->used - 1; x >= b; x--) {
3572
*top-- = *bottom--;
3573
}
3574
3575
/* zero the lower digits */
3576
top = a->dp;
3577
for (x = 0; x < b; x++) {
3578
*top++ = 0;
3579
}
3580
}
3581
return MP_OKAY;
3582
}
3583
3584
/* End: bn_mp_lshd.c */
3585
3586
/* Start: bn_mp_mod.c */
3587
3588
/* c = a mod b, 0 <= c < b if b > 0, b < c <= 0 if b < 0 */
3589
int mp_mod(const mp_int *a, const mp_int *b, mp_int *c)
3590
{
3591
mp_int t;
3592
int res;
3593
3594
if ((res = mp_init_size(&t, b->used)) != MP_OKAY) {
3595
return res;
3596
}
3597
3598
if ((res = mp_div(a, b, NULL, &t)) != MP_OKAY) {
3599
mp_clear(&t);
3600
return res;
3601
}
3602
3603
if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
3604
res = MP_OKAY;
3605
mp_exch(&t, c);
3606
} else {
3607
res = mp_add(b, &t, c);
3608
}
3609
3610
mp_clear(&t);
3611
return res;
3612
}
3613
3614
/* End: bn_mp_mod.c */
3615
3616
/* Start: bn_mp_mod_2d.c */
3617
3618
/* calc a value mod 2**b */
3619
int mp_mod_2d(const mp_int *a, int b, mp_int *c)
3620
{
3621
int x, res;
3622
3623
/* if b is <= 0 then zero the int */
3624
if (b <= 0) {
3625
mp_zero(c);
3626
return MP_OKAY;
3627
}
3628
3629
/* if the modulus is larger than the value than return */
3630
if (b >= (a->used * DIGIT_BIT)) {
3631
res = mp_copy(a, c);
3632
return res;
3633
}
3634
3635
/* copy */
3636
if ((res = mp_copy(a, c)) != MP_OKAY) {
3637
return res;
3638
}
3639
3640
/* zero digits above the last digit of the modulus */
3641
for (x = (b / DIGIT_BIT) + (((b % DIGIT_BIT) == 0) ? 0 : 1); x < c->used; x++) {
3642
c->dp[x] = 0;
3643
}
3644
/* clear the digit that is not completely outside/inside the modulus */
3645
c->dp[b / DIGIT_BIT] &=
3646
((mp_digit)1 << (mp_digit)(b % DIGIT_BIT)) - (mp_digit)1;
3647
mp_clamp(c);
3648
return MP_OKAY;
3649
}
3650
3651
/* End: bn_mp_mod_2d.c */
3652
3653
/* Start: bn_mp_mod_d.c */
3654
3655
int mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c)
3656
{
3657
return mp_div_d(a, b, NULL, c);
3658
}
3659
3660
/* End: bn_mp_mod_d.c */
3661
3662
/* Start: bn_mp_montgomery_calc_normalization.c */
3663
3664
/*
3665
* shifts with subtractions when the result is greater than b.
3666
*
3667
* The method is slightly modified to shift B unconditionally upto just under
3668
* the leading bit of b. This saves alot of multiple precision shifting.
3669
*/
3670
int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b)
3671
{
3672
int x, bits, res;
3673
3674
/* how many bits of last digit does b use */
3675
bits = mp_count_bits(b) % DIGIT_BIT;
3676
3677
if (b->used > 1) {
3678
if ((res = mp_2expt(a, ((b->used - 1) * DIGIT_BIT) + bits - 1)) != MP_OKAY) {
3679
return res;
3680
}
3681
} else {
3682
mp_set(a, 1uL);
3683
bits = 1;
3684
}
3685
3686
3687
/* now compute C = A * B mod b */
3688
for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3689
if ((res = mp_mul_2(a, a)) != MP_OKAY) {
3690
return res;
3691
}
3692
if (mp_cmp_mag(a, b) != MP_LT) {
3693
if ((res = s_mp_sub(a, b, a)) != MP_OKAY) {
3694
return res;
3695
}
3696
}
3697
}
3698
3699
return MP_OKAY;
3700
}
3701
3702
/* End: bn_mp_montgomery_calc_normalization.c */
3703
3704
/* Start: bn_mp_montgomery_reduce.c */
3705
3706
/* computes xR**-1 == x (mod N) via Montgomery Reduction */
3707
int mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
3708
{
3709
int ix, res, digs;
3710
mp_digit mu;
3711
3712
/* can the fast reduction [comba] method be used?
3713
*
3714
* Note that unlike in mul you're safely allowed *less*
3715
* than the available columns [255 per default] since carries
3716
* are fixed up in the inner loop.
3717
*/
3718
digs = (n->used * 2) + 1;
3719
if ((digs < (int)MP_WARRAY) &&
3720
(x->used <= (int)MP_WARRAY) &&
3721
(n->used <
3722
(int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
3723
return fast_mp_montgomery_reduce(x, n, rho);
3724
}
3725
3726
/* grow the input as required */
3727
if (x->alloc < digs) {
3728
if ((res = mp_grow(x, digs)) != MP_OKAY) {
3729
return res;
3730
}
3731
}
3732
x->used = digs;
3733
3734
for (ix = 0; ix < n->used; ix++) {
3735
/* mu = ai * rho mod b
3736
*
3737
* The value of rho must be precalculated via
3738
* montgomery_setup() such that
3739
* it equals -1/n0 mod b this allows the
3740
* following inner loop to reduce the
3741
* input one digit at a time
3742
*/
3743
mu = (mp_digit)(((mp_word)x->dp[ix] * (mp_word)rho) & MP_MASK);
3744
3745
/* a = a + mu * m * b**i */
3746
{
3747
int iy;
3748
mp_digit *tmpn, *tmpx, u;
3749
mp_word r;
3750
3751
/* alias for digits of the modulus */
3752
tmpn = n->dp;
3753
3754
/* alias for the digits of x [the input] */
3755
tmpx = x->dp + ix;
3756
3757
/* set the carry to zero */
3758
u = 0;
3759
3760
/* Multiply and add in place */
3761
for (iy = 0; iy < n->used; iy++) {
3762
/* compute product and sum */
3763
r = ((mp_word)mu * (mp_word)*tmpn++) +
3764
(mp_word)u + (mp_word)*tmpx;
3765
3766
/* get carry */
3767
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
3768
3769
/* fix digit */
3770
*tmpx++ = (mp_digit)(r & (mp_word)MP_MASK);
3771
}
3772
/* At this point the ix'th digit of x should be zero */
3773
3774
3775
/* propagate carries upwards as required*/
3776
while (u != 0u) {
3777
*tmpx += u;
3778
u = *tmpx >> DIGIT_BIT;
3779
*tmpx++ &= MP_MASK;
3780
}
3781
}
3782
}
3783
3784
/* at this point the n.used'th least
3785
* significant digits of x are all zero
3786
* which means we can shift x to the
3787
* right by n.used digits and the
3788
* residue is unchanged.
3789
*/
3790
3791
/* x = x/b**n.used */
3792
mp_clamp(x);
3793
mp_rshd(x, n->used);
3794
3795
/* if x >= n then x = x - n */
3796
if (mp_cmp_mag(x, n) != MP_LT) {
3797
return s_mp_sub(x, n, x);
3798
}
3799
3800
return MP_OKAY;
3801
}
3802
3803
/* End: bn_mp_montgomery_reduce.c */
3804
3805
/* Start: bn_mp_montgomery_setup.c */
3806
3807
/* setups the montgomery reduction stuff */
3808
int mp_montgomery_setup(const mp_int *n, mp_digit *rho)
3809
{
3810
mp_digit x, b;
3811
3812
/* fast inversion mod 2**k
3813
*
3814
* Based on the fact that
3815
*
3816
* XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3817
* => 2*X*A - X*X*A*A = 1
3818
* => 2*(1) - (1) = 1
3819
*/
3820
b = n->dp[0];
3821
3822
if ((b & 1u) == 0u) {
3823
return MP_VAL;
3824
}
3825
3826
x = (((b + 2u) & 4u) << 1) + b; /* here x*a==1 mod 2**4 */
3827
x *= 2u - (b * x); /* here x*a==1 mod 2**8 */
3828
#if !defined(MP_8BIT)
3829
x *= 2u - (b * x); /* here x*a==1 mod 2**16 */
3830
#endif
3831
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
3832
x *= 2u - (b * x); /* here x*a==1 mod 2**32 */
3833
#endif
3834
#ifdef MP_64BIT
3835
x *= 2u - (b * x); /* here x*a==1 mod 2**64 */
3836
#endif
3837
3838
/* rho = -1/m mod b */
3839
*rho = (mp_digit)(((mp_word)1 << (mp_word)DIGIT_BIT) - x) & MP_MASK;
3840
3841
return MP_OKAY;
3842
}
3843
3844
/* End: bn_mp_montgomery_setup.c */
3845
3846
/* Start: bn_mp_mul.c */
3847
3848
/* high level multiplication (handles sign) */
3849
int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
3850
{
3851
int res, neg;
3852
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
3853
3854
/* use Toom-Cook? */
3855
if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) {
3856
res = mp_toom_mul(a, b, c);
3857
} else
3858
/* use Karatsuba? */
3859
if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
3860
res = mp_karatsuba_mul(a, b, c);
3861
} else
3862
{
3863
/* can we use the fast multiplier?
3864
*
3865
* The fast multiplier can be used if the output will
3866
* have less than MP_WARRAY digits and the number of
3867
* digits won't affect carry propagation
3868
*/
3869
int digs = a->used + b->used + 1;
3870
3871
if ((digs < (int)MP_WARRAY) &&
3872
(MIN(a->used, b->used) <=
3873
(int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
3874
res = fast_s_mp_mul_digs(a, b, c, digs);
3875
} else
3876
{
3877
res = s_mp_mul(a, b, c); /* uses s_mp_mul_digs */
3878
}
3879
}
3880
c->sign = (c->used > 0) ? neg : MP_ZPOS;
3881
return res;
3882
}
3883
3884
/* End: bn_mp_mul.c */
3885
3886
/* Start: bn_mp_mul_2.c */
3887
3888
/* b = a*2 */
3889
int mp_mul_2(const mp_int *a, mp_int *b)
3890
{
3891
int x, res, oldused;
3892
3893
/* grow to accomodate result */
3894
if (b->alloc < (a->used + 1)) {
3895
if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) {
3896
return res;
3897
}
3898
}
3899
3900
oldused = b->used;
3901
b->used = a->used;
3902
3903
{
3904
mp_digit r, rr, *tmpa, *tmpb;
3905
3906
/* alias for source */
3907
tmpa = a->dp;
3908
3909
/* alias for dest */
3910
tmpb = b->dp;
3911
3912
/* carry */
3913
r = 0;
3914
for (x = 0; x < a->used; x++) {
3915
3916
/* get what will be the *next* carry bit from the
3917
* MSB of the current digit
3918
*/
3919
rr = *tmpa >> (mp_digit)(DIGIT_BIT - 1);
3920
3921
/* now shift up this digit, add in the carry [from the previous] */
3922
*tmpb++ = ((*tmpa++ << 1uL) | r) & MP_MASK;
3923
3924
/* copy the carry that would be from the source
3925
* digit into the next iteration
3926
*/
3927
r = rr;
3928
}
3929
3930
/* new leading digit? */
3931
if (r != 0u) {
3932
/* add a MSB which is always 1 at this point */
3933
*tmpb = 1;
3934
++(b->used);
3935
}
3936
3937
/* now zero any excess digits on the destination
3938
* that we didn't write to
3939
*/
3940
tmpb = b->dp + b->used;
3941
for (x = b->used; x < oldused; x++) {
3942
*tmpb++ = 0;
3943
}
3944
}
3945
b->sign = a->sign;
3946
return MP_OKAY;
3947
}
3948
3949
/* End: bn_mp_mul_2.c */
3950
3951
/* Start: bn_mp_mul_2d.c */
3952
3953
/* shift left by a certain bit count */
3954
int mp_mul_2d(const mp_int *a, int b, mp_int *c)
3955
{
3956
mp_digit d;
3957
int res;
3958
3959
/* copy */
3960
if (a != c) {
3961
if ((res = mp_copy(a, c)) != MP_OKAY) {
3962
return res;
3963
}
3964
}
3965
3966
if (c->alloc < (c->used + (b / DIGIT_BIT) + 1)) {
3967
if ((res = mp_grow(c, c->used + (b / DIGIT_BIT) + 1)) != MP_OKAY) {
3968
return res;
3969
}
3970
}
3971
3972
/* shift by as many digits in the bit count */
3973
if (b >= DIGIT_BIT) {
3974
if ((res = mp_lshd(c, b / DIGIT_BIT)) != MP_OKAY) {
3975
return res;
3976
}
3977
}
3978
3979
/* shift any bit count < DIGIT_BIT */
3980
d = (mp_digit)(b % DIGIT_BIT);
3981
if (d != 0u) {
3982
mp_digit *tmpc, shift, mask, r, rr;
3983
int x;
3984
3985
/* bitmask for carries */
3986
mask = ((mp_digit)1 << d) - (mp_digit)1;
3987
3988
/* shift for msbs */
3989
shift = (mp_digit)DIGIT_BIT - d;
3990
3991
/* alias */
3992
tmpc = c->dp;
3993
3994
/* carry */
3995
r = 0;
3996
for (x = 0; x < c->used; x++) {
3997
/* get the higher bits of the current word */
3998
rr = (*tmpc >> shift) & mask;
3999
4000
/* shift the current word and OR in the carry */
4001
*tmpc = ((*tmpc << d) | r) & MP_MASK;
4002
++tmpc;
4003
4004
/* set the carry to the carry bits of the current word */
4005
r = rr;
4006
}
4007
4008
/* set final carry */
4009
if (r != 0u) {
4010
c->dp[(c->used)++] = r;
4011
}
4012
}
4013
mp_clamp(c);
4014
return MP_OKAY;
4015
}
4016
4017
/* End: bn_mp_mul_2d.c */
4018
4019
/* Start: bn_mp_mul_d.c */
4020
4021
/* multiply by a digit */
4022
int mp_mul_d(const mp_int *a, mp_digit b, mp_int *c)
4023
{
4024
mp_digit u, *tmpa, *tmpc;
4025
mp_word r;
4026
int ix, res, olduse;
4027
4028
/* make sure c is big enough to hold a*b */
4029
if (c->alloc < (a->used + 1)) {
4030
if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4031
return res;
4032
}
4033
}
4034
4035
/* get the original destinations used count */
4036
olduse = c->used;
4037
4038
/* set the sign */
4039
c->sign = a->sign;
4040
4041
/* alias for a->dp [source] */
4042
tmpa = a->dp;
4043
4044
/* alias for c->dp [dest] */
4045
tmpc = c->dp;
4046
4047
/* zero carry */
4048
u = 0;
4049
4050
/* compute columns */
4051
for (ix = 0; ix < a->used; ix++) {
4052
/* compute product and carry sum for this term */
4053
r = (mp_word)u + ((mp_word)*tmpa++ * (mp_word)b);
4054
4055
/* mask off higher bits to get a single digit */
4056
*tmpc++ = (mp_digit)(r & (mp_word)MP_MASK);
4057
4058
/* send carry into next iteration */
4059
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
4060
}
4061
4062
/* store final carry [if any] and increment ix offset */
4063
*tmpc++ = u;
4064
++ix;
4065
4066
/* now zero digits above the top */
4067
while (ix++ < olduse) {
4068
*tmpc++ = 0;
4069
}
4070
4071
/* set used count */
4072
c->used = a->used + 1;
4073
mp_clamp(c);
4074
4075
return MP_OKAY;
4076
}
4077
4078
/* End: bn_mp_mul_d.c */
4079
4080
/* Start: bn_mp_mulmod.c */
4081
4082
/* d = a * b (mod c) */
4083
int mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)
4084
{
4085
int res;
4086
mp_int t;
4087
4088
if ((res = mp_init_size(&t, c->used)) != MP_OKAY) {
4089
return res;
4090
}
4091
4092
if ((res = mp_mul(a, b, &t)) != MP_OKAY) {
4093
mp_clear(&t);
4094
return res;
4095
}
4096
res = mp_mod(&t, c, d);
4097
mp_clear(&t);
4098
return res;
4099
}
4100
4101
/* End: bn_mp_mulmod.c */
4102
4103
/* Start: bn_mp_n_root.c */
4104
4105
/* wrapper function for mp_n_root_ex()
4106
* computes c = (a)**(1/b) such that (c)**b <= a and (c+1)**b > a
4107
*/
4108
int mp_n_root(const mp_int *a, mp_digit b, mp_int *c)
4109
{
4110
return mp_n_root_ex(a, b, c, 0);
4111
}
4112
4113
/* End: bn_mp_n_root.c */
4114
4115
/* Start: bn_mp_n_root_ex.c */
4116
4117
/* find the n'th root of an integer
4118
*
4119
* Result found such that (c)**b <= a and (c+1)**b > a
4120
*
4121
* This algorithm uses Newton's approximation
4122
* x[i+1] = x[i] - f(x[i])/f'(x[i])
4123
* which will find the root in log(N) time where
4124
* each step involves a fair bit. This is not meant to
4125
* find huge roots [square and cube, etc].
4126
*/
4127
int mp_n_root_ex(const mp_int *a, mp_digit b, mp_int *c, int fast)
4128
{
4129
mp_int t1, t2, t3, a_;
4130
int res;
4131
4132
/* input must be positive if b is even */
4133
if (((b & 1u) == 0u) && (a->sign == MP_NEG)) {
4134
return MP_VAL;
4135
}
4136
4137
if ((res = mp_init(&t1)) != MP_OKAY) {
4138
return res;
4139
}
4140
4141
if ((res = mp_init(&t2)) != MP_OKAY) {
4142
goto LBL_T1;
4143
}
4144
4145
if ((res = mp_init(&t3)) != MP_OKAY) {
4146
goto LBL_T2;
4147
}
4148
4149
/* if a is negative fudge the sign but keep track */
4150
a_ = *a;
4151
a_.sign = MP_ZPOS;
4152
4153
/* t2 = 2 */
4154
mp_set(&t2, 2uL);
4155
4156
do {
4157
/* t1 = t2 */
4158
if ((res = mp_copy(&t2, &t1)) != MP_OKAY) {
4159
goto LBL_T3;
4160
}
4161
4162
/* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
4163
4164
/* t3 = t1**(b-1) */
4165
if ((res = mp_expt_d_ex(&t1, b - 1u, &t3, fast)) != MP_OKAY) {
4166
goto LBL_T3;
4167
}
4168
4169
/* numerator */
4170
/* t2 = t1**b */
4171
if ((res = mp_mul(&t3, &t1, &t2)) != MP_OKAY) {
4172
goto LBL_T3;
4173
}
4174
4175
/* t2 = t1**b - a */
4176
if ((res = mp_sub(&t2, &a_, &t2)) != MP_OKAY) {
4177
goto LBL_T3;
4178
}
4179
4180
/* denominator */
4181
/* t3 = t1**(b-1) * b */
4182
if ((res = mp_mul_d(&t3, b, &t3)) != MP_OKAY) {
4183
goto LBL_T3;
4184
}
4185
4186
/* t3 = (t1**b - a)/(b * t1**(b-1)) */
4187
if ((res = mp_div(&t2, &t3, &t3, NULL)) != MP_OKAY) {
4188
goto LBL_T3;
4189
}
4190
4191
if ((res = mp_sub(&t1, &t3, &t2)) != MP_OKAY) {
4192
goto LBL_T3;
4193
}
4194
} while (mp_cmp(&t1, &t2) != MP_EQ);
4195
4196
/* result can be off by a few so check */
4197
for (;;) {
4198
if ((res = mp_expt_d_ex(&t1, b, &t2, fast)) != MP_OKAY) {
4199
goto LBL_T3;
4200
}
4201
4202
if (mp_cmp(&t2, &a_) == MP_GT) {
4203
if ((res = mp_sub_d(&t1, 1uL, &t1)) != MP_OKAY) {
4204
goto LBL_T3;
4205
}
4206
} else {
4207
break;
4208
}
4209
}
4210
4211
/* set the result */
4212
mp_exch(&t1, c);
4213
4214
/* set the sign of the result */
4215
c->sign = a->sign;
4216
4217
res = MP_OKAY;
4218
4219
LBL_T3:
4220
mp_clear(&t3);
4221
LBL_T2:
4222
mp_clear(&t2);
4223
LBL_T1:
4224
mp_clear(&t1);
4225
return res;
4226
}
4227
4228
/* End: bn_mp_n_root_ex.c */
4229
4230
/* Start: bn_mp_neg.c */
4231
4232
/* b = -a */
4233
int mp_neg(const mp_int *a, mp_int *b)
4234
{
4235
int res;
4236
if (a != b) {
4237
if ((res = mp_copy(a, b)) != MP_OKAY) {
4238
return res;
4239
}
4240
}
4241
4242
if (mp_iszero(b) != MP_YES) {
4243
b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
4244
} else {
4245
b->sign = MP_ZPOS;
4246
}
4247
4248
return MP_OKAY;
4249
}
4250
4251
/* End: bn_mp_neg.c */
4252
4253
/* Start: bn_mp_or.c */
4254
4255
/* OR two ints together */
4256
int mp_or(const mp_int *a, const mp_int *b, mp_int *c)
4257
{
4258
int res, ix, px;
4259
mp_int t;
4260
const mp_int *x;
4261
4262
if (a->used > b->used) {
4263
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
4264
return res;
4265
}
4266
px = b->used;
4267
x = b;
4268
} else {
4269
if ((res = mp_init_copy(&t, b)) != MP_OKAY) {
4270
return res;
4271
}
4272
px = a->used;
4273
x = a;
4274
}
4275
4276
for (ix = 0; ix < px; ix++) {
4277
t.dp[ix] |= x->dp[ix];
4278
}
4279
mp_clamp(&t);
4280
mp_exch(c, &t);
4281
mp_clear(&t);
4282
return MP_OKAY;
4283
}
4284
4285
/* End: bn_mp_or.c */
4286
4287
/* Start: bn_mp_prime_fermat.c */
4288
4289
/* performs one Fermat test.
4290
*
4291
* If "a" were prime then b**a == b (mod a) since the order of
4292
* the multiplicative sub-group would be phi(a) = a-1. That means
4293
* it would be the same as b**(a mod (a-1)) == b**1 == b (mod a).
4294
*
4295
* Sets result to 1 if the congruence holds, or zero otherwise.
4296
*/
4297
int mp_prime_fermat(const mp_int *a, const mp_int *b, int *result)
4298
{
4299
mp_int t;
4300
int err;
4301
4302
/* default to composite */
4303
*result = MP_NO;
4304
4305
/* ensure b > 1 */
4306
if (mp_cmp_d(b, 1uL) != MP_GT) {
4307
return MP_VAL;
4308
}
4309
4310
/* init t */
4311
if ((err = mp_init(&t)) != MP_OKAY) {
4312
return err;
4313
}
4314
4315
/* compute t = b**a mod a */
4316
if ((err = mp_exptmod(b, a, a, &t)) != MP_OKAY) {
4317
goto LBL_T;
4318
}
4319
4320
/* is it equal to b? */
4321
if (mp_cmp(&t, b) == MP_EQ) {
4322
*result = MP_YES;
4323
}
4324
4325
err = MP_OKAY;
4326
LBL_T:
4327
mp_clear(&t);
4328
return err;
4329
}
4330
4331
/* End: bn_mp_prime_fermat.c */
4332
4333
/* Start: bn_mp_prime_frobenius_underwood.c */
4334
4335
/*
4336
* See file bn_mp_prime_is_prime.c or the documentation in doc/bn.tex for the details
4337
*/
4338
#ifndef LTM_USE_FIPS_ONLY
4339
4340
#ifdef MP_8BIT
4341
/*
4342
* floor of positive solution of
4343
* (2^16)-1 = (a+4)*(2*a+5)
4344
* TODO: Both values are smaller than N^(1/4), would have to use a bigint
4345
* for a instead but any a biger than about 120 are already so rare that
4346
* it is possible to ignore them and still get enough pseudoprimes.
4347
* But it is still a restriction of the set of available pseudoprimes
4348
* which makes this implementation less secure if used stand-alone.
4349
*/
4350
#define LTM_FROBENIUS_UNDERWOOD_A 177
4351
#else
4352
#define LTM_FROBENIUS_UNDERWOOD_A 32764
4353
#endif
4354
int mp_prime_frobenius_underwood(const mp_int *N, int *result)
4355
{
4356
mp_int T1z, T2z, Np1z, sz, tz;
4357
4358
int a, ap2, length, i, j, isset;
4359
int e;
4360
4361
*result = MP_NO;
4362
4363
if ((e = mp_init_multi(&T1z, &T2z, &Np1z, &sz, &tz, NULL)) != MP_OKAY) {
4364
return e;
4365
}
4366
4367
for (a = 0; a < LTM_FROBENIUS_UNDERWOOD_A; a++) {
4368
/* TODO: That's ugly! No, really, it is! */
4369
if ((a==2) || (a==4) || (a==7) || (a==8) || (a==10) ||
4370
(a==14) || (a==18) || (a==23) || (a==26) || (a==28)) {
4371
continue;
4372
}
4373
/* (32764^2 - 4) < 2^31, no bigint for >MP_8BIT needed) */
4374
if ((e = mp_set_long(&T1z, (unsigned long)a)) != MP_OKAY) {
4375
goto LBL_FU_ERR;
4376
}
4377
4378
if ((e = mp_sqr(&T1z, &T1z)) != MP_OKAY) {
4379
goto LBL_FU_ERR;
4380
}
4381
4382
if ((e = mp_sub_d(&T1z, 4uL, &T1z)) != MP_OKAY) {
4383
goto LBL_FU_ERR;
4384
}
4385
4386
if ((e = mp_kronecker(&T1z, N, &j)) != MP_OKAY) {
4387
goto LBL_FU_ERR;
4388
}
4389
4390
if (j == -1) {
4391
break;
4392
}
4393
4394
if (j == 0) {
4395
/* composite */
4396
goto LBL_FU_ERR;
4397
}
4398
}
4399
/* Tell it a composite and set return value accordingly */
4400
if (a >= LTM_FROBENIUS_UNDERWOOD_A) {
4401
e = MP_ITER;
4402
goto LBL_FU_ERR;
4403
}
4404
/* Composite if N and (a+4)*(2*a+5) are not coprime */
4405
if ((e = mp_set_long(&T1z, (unsigned long)((a+4)*((2*a)+5)))) != MP_OKAY) {
4406
goto LBL_FU_ERR;
4407
}
4408
4409
if ((e = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) {
4410
goto LBL_FU_ERR;
4411
}
4412
4413
if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) {
4414
goto LBL_FU_ERR;
4415
}
4416
4417
ap2 = a + 2;
4418
if ((e = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) {
4419
goto LBL_FU_ERR;
4420
}
4421
4422
mp_set(&sz, 1uL);
4423
mp_set(&tz, 2uL);
4424
length = mp_count_bits(&Np1z);
4425
4426
for (i = length - 2; i >= 0; i--) {
4427
/*
4428
* temp = (sz*(a*sz+2*tz))%N;
4429
* tz = ((tz-sz)*(tz+sz))%N;
4430
* sz = temp;
4431
*/
4432
if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {
4433
goto LBL_FU_ERR;
4434
}
4435
4436
/* a = 0 at about 50% of the cases (non-square and odd input) */
4437
if (a != 0) {
4438
if ((e = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) {
4439
goto LBL_FU_ERR;
4440
}
4441
if ((e = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) {
4442
goto LBL_FU_ERR;
4443
}
4444
}
4445
4446
if ((e = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) {
4447
goto LBL_FU_ERR;
4448
}
4449
if ((e = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) {
4450
goto LBL_FU_ERR;
4451
}
4452
if ((e = mp_add(&sz, &tz, &sz)) != MP_OKAY) {
4453
goto LBL_FU_ERR;
4454
}
4455
if ((e = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) {
4456
goto LBL_FU_ERR;
4457
}
4458
if ((e = mp_mod(&tz, N, &tz)) != MP_OKAY) {
4459
goto LBL_FU_ERR;
4460
}
4461
if ((e = mp_mod(&T1z, N, &sz)) != MP_OKAY) {
4462
goto LBL_FU_ERR;
4463
}
4464
if ((isset = mp_get_bit(&Np1z, i)) == MP_VAL) {
4465
e = isset;
4466
goto LBL_FU_ERR;
4467
}
4468
if (isset == MP_YES) {
4469
/*
4470
* temp = (a+2) * sz + tz
4471
* tz = 2 * tz - sz
4472
* sz = temp
4473
*/
4474
if (a == 0) {
4475
if ((e = mp_mul_2(&sz, &T1z)) != MP_OKAY) {
4476
goto LBL_FU_ERR;
4477
}
4478
} else {
4479
if ((e = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) {
4480
goto LBL_FU_ERR;
4481
}
4482
}
4483
if ((e = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) {
4484
goto LBL_FU_ERR;
4485
}
4486
if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {
4487
goto LBL_FU_ERR;
4488
}
4489
if ((e = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) {
4490
goto LBL_FU_ERR;
4491
}
4492
mp_exch(&sz, &T1z);
4493
}
4494
}
4495
4496
if ((e = mp_set_long(&T1z, (unsigned long)((2 * a) + 5))) != MP_OKAY) {
4497
goto LBL_FU_ERR;
4498
}
4499
if ((e = mp_mod(&T1z, N, &T1z)) != MP_OKAY) {
4500
goto LBL_FU_ERR;
4501
}
4502
if ((mp_iszero(&sz) != MP_NO) && (mp_cmp(&tz, &T1z) == MP_EQ)) {
4503
*result = MP_YES;
4504
goto LBL_FU_ERR;
4505
}
4506
4507
LBL_FU_ERR:
4508
mp_clear_multi(&tz, &sz, &Np1z, &T2z, &T1z, NULL);
4509
return e;
4510
}
4511
4512
#endif
4513
4514
/* End: bn_mp_prime_frobenius_underwood.c */
4515
4516
/* Start: bn_mp_prime_is_divisible.c */
4517
4518
/* determines if an integers is divisible by one
4519
* of the first PRIME_SIZE primes or not
4520
*
4521
* sets result to 0 if not, 1 if yes
4522
*/
4523
int mp_prime_is_divisible(const mp_int *a, int *result)
4524
{
4525
int err, ix;
4526
mp_digit res;
4527
4528
/* default to not */
4529
*result = MP_NO;
4530
4531
for (ix = 0; ix < PRIME_SIZE; ix++) {
4532
/* what is a mod LBL_prime_tab[ix] */
4533
if ((err = mp_mod_d(a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4534
return err;
4535
}
4536
4537
/* is the residue zero? */
4538
if (res == 0u) {
4539
*result = MP_YES;
4540
return MP_OKAY;
4541
}
4542
}
4543
4544
return MP_OKAY;
4545
}
4546
4547
/* End: bn_mp_prime_is_divisible.c */
4548
4549
/* Start: bn_mp_prime_is_prime.c */
4550
4551
/* portable integer log of two with small footprint */
4552
static unsigned int s_floor_ilog2(int value)
4553
{
4554
unsigned int r = 0;
4555
while ((value >>= 1) != 0) {
4556
r++;
4557
}
4558
return r;
4559
}
4560
4561
4562
int mp_prime_is_prime(const mp_int *a, int t, int *result)
4563
{
4564
mp_int b;
4565
int ix, err, res, p_max = 0, size_a, len;
4566
unsigned int fips_rand, mask;
4567
4568
/* default to no */
4569
*result = MP_NO;
4570
4571
/* valid value of t? */
4572
if (t > PRIME_SIZE) {
4573
return MP_VAL;
4574
}
4575
4576
/* Some shortcuts */
4577
/* N > 3 */
4578
if (a->used == 1) {
4579
if ((a->dp[0] == 0u) || (a->dp[0] == 1u)) {
4580
*result = 0;
4581
return MP_OKAY;
4582
}
4583
if (a->dp[0] == 2u) {
4584
*result = 1;
4585
return MP_OKAY;
4586
}
4587
}
4588
4589
/* N must be odd */
4590
if (mp_iseven(a) == MP_YES) {
4591
return MP_OKAY;
4592
}
4593
/* N is not a perfect square: floor(sqrt(N))^2 != N */
4594
if ((err = mp_is_square(a, &res)) != MP_OKAY) {
4595
return err;
4596
}
4597
if (res != 0) {
4598
return MP_OKAY;
4599
}
4600
4601
/* is the input equal to one of the primes in the table? */
4602
for (ix = 0; ix < PRIME_SIZE; ix++) {
4603
if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4604
*result = MP_YES;
4605
return MP_OKAY;
4606
}
4607
}
4608
#ifdef MP_8BIT
4609
/* The search in the loop above was exhaustive in this case */
4610
if ((a->used == 1) && (PRIME_SIZE >= 31)) {
4611
return MP_OKAY;
4612
}
4613
#endif
4614
4615
/* first perform trial division */
4616
if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) {
4617
return err;
4618
}
4619
4620
/* return if it was trivially divisible */
4621
if (res == MP_YES) {
4622
return MP_OKAY;
4623
}
4624
4625
/*
4626
Run the Miller-Rabin test with base 2 for the BPSW test.
4627
*/
4628
if ((err = mp_init_set(&b, 2uL)) != MP_OKAY) {
4629
return err;
4630
}
4631
4632
if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4633
goto LBL_B;
4634
}
4635
if (res == MP_NO) {
4636
goto LBL_B;
4637
}
4638
/*
4639
Rumours have it that Mathematica does a second M-R test with base 3.
4640
Other rumours have it that their strong L-S test is slightly different.
4641
It does not hurt, though, beside a bit of extra runtime.
4642
*/
4643
b.dp[0]++;
4644
if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4645
goto LBL_B;
4646
}
4647
if (res == MP_NO) {
4648
goto LBL_B;
4649
}
4650
4651
/*
4652
* Both, the Frobenius-Underwood test and the the Lucas-Selfridge test are quite
4653
* slow so if speed is an issue, define LTM_USE_FIPS_ONLY to use M-R tests with
4654
* bases 2, 3 and t random bases.
4655
*/
4656
#ifndef LTM_USE_FIPS_ONLY
4657
if (t >= 0) {
4658
/*
4659
* Use a Frobenius-Underwood test instead of the Lucas-Selfridge test for
4660
* MP_8BIT (It is unknown if the Lucas-Selfridge test works with 16-bit
4661
* integers but the necesssary analysis is on the todo-list).
4662
*/
4663
#if defined (MP_8BIT) || defined (LTM_USE_FROBENIUS_TEST)
4664
err = mp_prime_frobenius_underwood(a, &res);
4665
if ((err != MP_OKAY) && (err != MP_ITER)) {
4666
goto LBL_B;
4667
}
4668
if (res == MP_NO) {
4669
goto LBL_B;
4670
}
4671
#else
4672
if ((err = mp_prime_strong_lucas_selfridge(a, &res)) != MP_OKAY) {
4673
goto LBL_B;
4674
}
4675
if (res == MP_NO) {
4676
goto LBL_B;
4677
}
4678
#endif
4679
}
4680
#endif
4681
4682
/* run at least one Miller-Rabin test with a random base */
4683
if (t == 0) {
4684
t = 1;
4685
}
4686
4687
/*
4688
abs(t) extra rounds of M-R to extend the range of primes it can find if t < 0.
4689
Only recommended if the input range is known to be < 3317044064679887385961981
4690
4691
It uses the bases for a deterministic M-R test if input < 3317044064679887385961981
4692
The caller has to check the size.
4693
4694
Not for cryptographic use because with known bases strong M-R pseudoprimes can
4695
be constructed. Use at least one M-R test with a random base (t >= 1).
4696
4697
The 1119 bit large number
4698
4699
80383745745363949125707961434194210813883768828755814583748891752229742737653\
4700
33652186502336163960045457915042023603208766569966760987284043965408232928738\
4701
79185086916685732826776177102938969773947016708230428687109997439976544144845\
4702
34115587245063340927902227529622941498423068816854043264575340183297861112989\
4703
60644845216191652872597534901
4704
4705
has been constructed by F. Arnault (F. Arnault, "Rabin-Miller primality test:
4706
composite numbers which pass it.", Mathematics of Computation, 1995, 64. Jg.,
4707
Nr. 209, S. 355-361), is a semiprime with the two factors
4708
4709
40095821663949960541830645208454685300518816604113250877450620473800321707011\
4710
96242716223191597219733582163165085358166969145233813917169287527980445796800\
4711
452592031836601
4712
4713
20047910831974980270915322604227342650259408302056625438725310236900160853505\
4714
98121358111595798609866791081582542679083484572616906958584643763990222898400\
4715
226296015918301
4716
4717
and it is a strong pseudoprime to all forty-six prime M-R bases up to 200
4718
4719
It does not fail the strong Bailley-PSP test as implemented here, it is just
4720
given as an example, if not the reason to use the BPSW-test instead of M-R-tests
4721
with a sequence of primes 2...n.
4722
4723
*/
4724
if (t < 0) {
4725
t = -t;
4726
/*
4727
Sorenson, Jonathan; Webster, Jonathan (2015).
4728
"Strong Pseudoprimes to Twelve Prime Bases".
4729
*/
4730
/* 0x437ae92817f9fc85b7e5 = 318665857834031151167461 */
4731
if ((err = mp_read_radix(&b, "437ae92817f9fc85b7e5", 16)) != MP_OKAY) {
4732
goto LBL_B;
4733
}
4734
4735
if (mp_cmp(a, &b) == MP_LT) {
4736
p_max = 12;
4737
} else {
4738
/* 0x2be6951adc5b22410a5fd = 3317044064679887385961981 */
4739
if ((err = mp_read_radix(&b, "2be6951adc5b22410a5fd", 16)) != MP_OKAY) {
4740
goto LBL_B;
4741
}
4742
4743
if (mp_cmp(a, &b) == MP_LT) {
4744
p_max = 13;
4745
} else {
4746
err = MP_VAL;
4747
goto LBL_B;
4748
}
4749
}
4750
4751
/* for compatibility with the current API (well, compatible within a sign's width) */
4752
if (p_max < t) {
4753
p_max = t;
4754
}
4755
4756
if (p_max > PRIME_SIZE) {
4757
err = MP_VAL;
4758
goto LBL_B;
4759
}
4760
/* we did bases 2 and 3 already, skip them */
4761
for (ix = 2; ix < p_max; ix++) {
4762
mp_set(&b, ltm_prime_tab[ix]);
4763
if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4764
goto LBL_B;
4765
}
4766
if (res == MP_NO) {
4767
goto LBL_B;
4768
}
4769
}
4770
}
4771
/*
4772
Do "t" M-R tests with random bases between 3 and "a".
4773
See Fips 186.4 p. 126ff
4774
*/
4775
else if (t > 0) {
4776
/*
4777
* The mp_digit's have a defined bit-size but the size of the
4778
* array a.dp is a simple 'int' and this library can not assume full
4779
* compliance to the current C-standard (ISO/IEC 9899:2011) because
4780
* it gets used for small embeded processors, too. Some of those MCUs
4781
* have compilers that one cannot call standard compliant by any means.
4782
* Hence the ugly type-fiddling in the following code.
4783
*/
4784
size_a = mp_count_bits(a);
4785
mask = (1u << s_floor_ilog2(size_a)) - 1u;
4786
/*
4787
Assuming the General Rieman hypothesis (never thought to write that in a
4788
comment) the upper bound can be lowered to 2*(log a)^2.
4789
E. Bach, "Explicit bounds for primality testing and related problems,"
4790
Math. Comp. 55 (1990), 355-380.
4791
4792
size_a = (size_a/10) * 7;
4793
len = 2 * (size_a * size_a);
4794
4795
E.g.: a number of size 2^2048 would be reduced to the upper limit
4796
4797
floor(2048/10)*7 = 1428
4798
2 * 1428^2 = 4078368
4799
4800
(would have been ~4030331.9962 with floats and natural log instead)
4801
That number is smaller than 2^28, the default bit-size of mp_digit.
4802
*/
4803
4804
/*
4805
How many tests, you might ask? Dana Jacobsen of Math::Prime::Util fame
4806
does exactly 1. In words: one. Look at the end of _GMP_is_prime() in
4807
Math-Prime-Util-GMP-0.50/primality.c if you do not believe it.
4808
4809
The function mp_rand() goes to some length to use a cryptographically
4810
good PRNG. That also means that the chance to always get the same base
4811
in the loop is non-zero, although very low.
4812
If the BPSW test and/or the addtional Frobenious test have been
4813
performed instead of just the Miller-Rabin test with the bases 2 and 3,
4814
a single extra test should suffice, so such a very unlikely event
4815
will not do much harm.
4816
4817
To preemptivly answer the dangling question: no, a witness does not
4818
need to be prime.
4819
*/
4820
for (ix = 0; ix < t; ix++) {
4821
/* mp_rand() guarantees the first digit to be non-zero */
4822
if ((err = mp_rand(&b, 1)) != MP_OKAY) {
4823
goto LBL_B;
4824
}
4825
/*
4826
* Reduce digit before casting because mp_digit might be bigger than
4827
* an unsigned int and "mask" on the other side is most probably not.
4828
*/
4829
fips_rand = (unsigned int)(b.dp[0] & (mp_digit) mask);
4830
#ifdef MP_8BIT
4831
/*
4832
* One 8-bit digit is too small, so concatenate two if the size of
4833
* unsigned int allows for it.
4834
*/
4835
if (((sizeof(unsigned int) * CHAR_BIT)/2) >= (sizeof(mp_digit) * CHAR_BIT)) {
4836
if ((err = mp_rand(&b, 1)) != MP_OKAY) {
4837
goto LBL_B;
4838
}
4839
fips_rand <<= sizeof(mp_digit) * CHAR_BIT;
4840
fips_rand |= (unsigned int) b.dp[0];
4841
fips_rand &= mask;
4842
}
4843
#endif
4844
if (fips_rand > (unsigned int)(INT_MAX - DIGIT_BIT)) {
4845
len = INT_MAX / DIGIT_BIT;
4846
} else {
4847
len = (((int)fips_rand + DIGIT_BIT) / DIGIT_BIT);
4848
}
4849
/* Unlikely. */
4850
if (len < 0) {
4851
ix--;
4852
continue;
4853
}
4854
/*
4855
* As mentioned above, one 8-bit digit is too small and
4856
* although it can only happen in the unlikely case that
4857
* an "unsigned int" is smaller than 16 bit a simple test
4858
* is cheap and the correction even cheaper.
4859
*/
4860
#ifdef MP_8BIT
4861
/* All "a" < 2^8 have been caught before */
4862
if (len == 1) {
4863
len++;
4864
}
4865
#endif
4866
if ((err = mp_rand(&b, len)) != MP_OKAY) {
4867
goto LBL_B;
4868
}
4869
/*
4870
* That number might got too big and the witness has to be
4871
* smaller than or equal to "a"
4872
*/
4873
len = mp_count_bits(&b);
4874
if (len > size_a) {
4875
len = len - size_a;
4876
if ((err = mp_div_2d(&b, len, &b, NULL)) != MP_OKAY) {
4877
goto LBL_B;
4878
}
4879
}
4880
4881
/* Although the chance for b <= 3 is miniscule, try again. */
4882
if (mp_cmp_d(&b, 3uL) != MP_GT) {
4883
ix--;
4884
continue;
4885
}
4886
if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4887
goto LBL_B;
4888
}
4889
if (res == MP_NO) {
4890
goto LBL_B;
4891
}
4892
}
4893
}
4894
4895
/* passed the test */
4896
*result = MP_YES;
4897
LBL_B:
4898
mp_clear(&b);
4899
return err;
4900
}
4901
4902
/* End: bn_mp_prime_is_prime.c */
4903
4904
/* Start: bn_mp_prime_miller_rabin.c */
4905
4906
/* Miller-Rabin test of "a" to the base of "b" as described in
4907
* HAC pp. 139 Algorithm 4.24
4908
*
4909
* Sets result to 0 if definitely composite or 1 if probably prime.
4910
* Randomly the chance of error is no more than 1/4 and often
4911
* very much lower.
4912
*/
4913
int mp_prime_miller_rabin(const mp_int *a, const mp_int *b, int *result)
4914
{
4915
mp_int n1, y, r;
4916
int s, j, err;
4917
4918
/* default */
4919
*result = MP_NO;
4920
4921
/* ensure b > 1 */
4922
if (mp_cmp_d(b, 1uL) != MP_GT) {
4923
return MP_VAL;
4924
}
4925
4926
/* get n1 = a - 1 */
4927
if ((err = mp_init_copy(&n1, a)) != MP_OKAY) {
4928
return err;
4929
}
4930
if ((err = mp_sub_d(&n1, 1uL, &n1)) != MP_OKAY) {
4931
goto LBL_N1;
4932
}
4933
4934
/* set 2**s * r = n1 */
4935
if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) {
4936
goto LBL_N1;
4937
}
4938
4939
/* count the number of least significant bits
4940
* which are zero
4941
*/
4942
s = mp_cnt_lsb(&r);
4943
4944
/* now divide n - 1 by 2**s */
4945
if ((err = mp_div_2d(&r, s, &r, NULL)) != MP_OKAY) {
4946
goto LBL_R;
4947
}
4948
4949
/* compute y = b**r mod a */
4950
if ((err = mp_init(&y)) != MP_OKAY) {
4951
goto LBL_R;
4952
}
4953
if ((err = mp_exptmod(b, &r, a, &y)) != MP_OKAY) {
4954
goto LBL_Y;
4955
}
4956
4957
/* if y != 1 and y != n1 do */
4958
if ((mp_cmp_d(&y, 1uL) != MP_EQ) && (mp_cmp(&y, &n1) != MP_EQ)) {
4959
j = 1;
4960
/* while j <= s-1 and y != n1 */
4961
while ((j <= (s - 1)) && (mp_cmp(&y, &n1) != MP_EQ)) {
4962
if ((err = mp_sqrmod(&y, a, &y)) != MP_OKAY) {
4963
goto LBL_Y;
4964
}
4965
4966
/* if y == 1 then composite */
4967
if (mp_cmp_d(&y, 1uL) == MP_EQ) {
4968
goto LBL_Y;
4969
}
4970
4971
++j;
4972
}
4973
4974
/* if y != n1 then composite */
4975
if (mp_cmp(&y, &n1) != MP_EQ) {
4976
goto LBL_Y;
4977
}
4978
}
4979
4980
/* probably prime now */
4981
*result = MP_YES;
4982
LBL_Y:
4983
mp_clear(&y);
4984
LBL_R:
4985
mp_clear(&r);
4986
LBL_N1:
4987
mp_clear(&n1);
4988
return err;
4989
}
4990
4991
/* End: bn_mp_prime_miller_rabin.c */
4992
4993
/* Start: bn_mp_prime_next_prime.c */
4994
4995
/* finds the next prime after the number "a" using "t" trials
4996
* of Miller-Rabin.
4997
*
4998
* bbs_style = 1 means the prime must be congruent to 3 mod 4
4999
*/
5000
int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
5001
{
5002
int err, res = MP_NO, x, y;
5003
mp_digit res_tab[PRIME_SIZE], step, kstep;
5004
mp_int b;
5005
5006
/* force positive */
5007
a->sign = MP_ZPOS;
5008
5009
/* simple algo if a is less than the largest prime in the table */
5010
if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
5011
/* find which prime it is bigger than */
5012
for (x = PRIME_SIZE - 2; x >= 0; x--) {
5013
if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
5014
if (bbs_style == 1) {
5015
/* ok we found a prime smaller or
5016
* equal [so the next is larger]
5017
*
5018
* however, the prime must be
5019
* congruent to 3 mod 4
5020
*/
5021
if ((ltm_prime_tab[x + 1] & 3u) != 3u) {
5022
/* scan upwards for a prime congruent to 3 mod 4 */
5023
for (y = x + 1; y < PRIME_SIZE; y++) {
5024
if ((ltm_prime_tab[y] & 3u) == 3u) {
5025
mp_set(a, ltm_prime_tab[y]);
5026
return MP_OKAY;
5027
}
5028
}
5029
}
5030
} else {
5031
mp_set(a, ltm_prime_tab[x + 1]);
5032
return MP_OKAY;
5033
}
5034
}
5035
}
5036
/* at this point a maybe 1 */
5037
if (mp_cmp_d(a, 1uL) == MP_EQ) {
5038
mp_set(a, 2uL);
5039
return MP_OKAY;
5040
}
5041
/* fall through to the sieve */
5042
}
5043
5044
/* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */
5045
if (bbs_style == 1) {
5046
kstep = 4;
5047
} else {
5048
kstep = 2;
5049
}
5050
5051
/* at this point we will use a combination of a sieve and Miller-Rabin */
5052
5053
if (bbs_style == 1) {
5054
/* if a mod 4 != 3 subtract the correct value to make it so */
5055
if ((a->dp[0] & 3u) != 3u) {
5056
if ((err = mp_sub_d(a, (a->dp[0] & 3u) + 1u, a)) != MP_OKAY) {
5057
return err;
5058
};
5059
}
5060
} else {
5061
if (mp_iseven(a) == MP_YES) {
5062
/* force odd */
5063
if ((err = mp_sub_d(a, 1uL, a)) != MP_OKAY) {
5064
return err;
5065
}
5066
}
5067
}
5068
5069
/* generate the restable */
5070
for (x = 1; x < PRIME_SIZE; x++) {
5071
if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
5072
return err;
5073
}
5074
}
5075
5076
/* init temp used for Miller-Rabin Testing */
5077
if ((err = mp_init(&b)) != MP_OKAY) {
5078
return err;
5079
}
5080
5081
for (;;) {
5082
/* skip to the next non-trivially divisible candidate */
5083
step = 0;
5084
do {
5085
/* y == 1 if any residue was zero [e.g. cannot be prime] */
5086
y = 0;
5087
5088
/* increase step to next candidate */
5089
step += kstep;
5090
5091
/* compute the new residue without using division */
5092
for (x = 1; x < PRIME_SIZE; x++) {
5093
/* add the step to each residue */
5094
res_tab[x] += kstep;
5095
5096
/* subtract the modulus [instead of using division] */
5097
if (res_tab[x] >= ltm_prime_tab[x]) {
5098
res_tab[x] -= ltm_prime_tab[x];
5099
}
5100
5101
/* set flag if zero */
5102
if (res_tab[x] == 0u) {
5103
y = 1;
5104
}
5105
}
5106
} while ((y == 1) && (step < (((mp_digit)1 << DIGIT_BIT) - kstep)));
5107
5108
/* add the step */
5109
if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
5110
goto LBL_ERR;
5111
}
5112
5113
/* if didn't pass sieve and step == MAX then skip test */
5114
if ((y == 1) && (step >= (((mp_digit)1 << DIGIT_BIT) - kstep))) {
5115
continue;
5116
}
5117
5118
if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
5119
goto LBL_ERR;
5120
}
5121
if (res == MP_YES) {
5122
break;
5123
}
5124
}
5125
5126
err = MP_OKAY;
5127
LBL_ERR:
5128
mp_clear(&b);
5129
return err;
5130
}
5131
5132
/* End: bn_mp_prime_next_prime.c */
5133
5134
/* Start: bn_mp_prime_rabin_miller_trials.c */
5135
5136
static const struct {
5137
int k, t;
5138
} sizes[] = {
5139
{ 80, -1 }, /* Use deterministic algorithm for size <= 80 bits */
5140
{ 81, 39 },
5141
{ 96, 37 },
5142
{ 128, 32 },
5143
{ 160, 27 },
5144
{ 192, 21 },
5145
{ 256, 16 },
5146
{ 384, 10 },
5147
{ 512, 7 },
5148
{ 640, 6 },
5149
{ 768, 5 },
5150
{ 896, 4 },
5151
{ 1024, 4 },
5152
{ 2048, 2 },
5153
{ 4096, 1 },
5154
};
5155
5156
/* returns # of RM trials required for a given bit size and max. error of 2^(-96)*/
5157
int mp_prime_rabin_miller_trials(int size)
5158
{
5159
int x;
5160
5161
for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
5162
if (sizes[x].k == size) {
5163
return sizes[x].t;
5164
} else if (sizes[x].k > size) {
5165
return (x == 0) ? sizes[0].t : sizes[x - 1].t;
5166
}
5167
}
5168
return sizes[x-1].t + 1;
5169
}
5170
5171
/* End: bn_mp_prime_rabin_miller_trials.c */
5172
5173
/* Start: bn_mp_prime_random_ex.c */
5174
5175
/* makes a truly random prime of a given size (bits),
5176
*
5177
* Flags are as follows:
5178
*
5179
* LTM_PRIME_BBS - make prime congruent to 3 mod 4
5180
* LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
5181
* LTM_PRIME_2MSB_ON - make the 2nd highest bit one
5182
*
5183
* You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
5184
* have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
5185
* so it can be NULL
5186
*
5187
*/
5188
5189
/* This is possibly the mother of all prime generation functions, muahahahahaha! */
5190
int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
5191
{
5192
unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
5193
int res, err, bsize, maskOR_msb_offset;
5194
5195
/* sanity check the input */
5196
if ((size <= 1) || (t <= 0)) {
5197
return MP_VAL;
5198
}
5199
5200
/* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
5201
if ((flags & LTM_PRIME_SAFE) != 0) {
5202
flags |= LTM_PRIME_BBS;
5203
}
5204
5205
/* calc the byte size */
5206
bsize = (size>>3) + ((size&7)?1:0);
5207
5208
/* we need a buffer of bsize bytes */
5209
tmp = OPT_CAST(unsigned char) XMALLOC((size_t)bsize);
5210
if (tmp == NULL) {
5211
return MP_MEM;
5212
}
5213
5214
/* calc the maskAND value for the MSbyte*/
5215
maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
5216
5217
/* calc the maskOR_msb */
5218
maskOR_msb = 0;
5219
maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
5220
if ((flags & LTM_PRIME_2MSB_ON) != 0) {
5221
maskOR_msb |= 0x80 >> ((9 - size) & 7);
5222
}
5223
5224
/* get the maskOR_lsb */
5225
maskOR_lsb = 1;
5226
if ((flags & LTM_PRIME_BBS) != 0) {
5227
maskOR_lsb |= 3;
5228
}
5229
5230
do {
5231
/* read the bytes */
5232
if (cb(tmp, bsize, dat) != bsize) {
5233
err = MP_VAL;
5234
goto error;
5235
}
5236
5237
/* work over the MSbyte */
5238
tmp[0] &= maskAND;
5239
tmp[0] |= 1 << ((size - 1) & 7);
5240
5241
/* mix in the maskORs */
5242
tmp[maskOR_msb_offset] |= maskOR_msb;
5243
tmp[bsize-1] |= maskOR_lsb;
5244
5245
/* read it in */
5246
if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) {
5247
goto error;
5248
}
5249
5250
/* is it prime? */
5251
if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
5252
goto error;
5253
}
5254
if (res == MP_NO) {
5255
continue;
5256
}
5257
5258
if ((flags & LTM_PRIME_SAFE) != 0) {
5259
/* see if (a-1)/2 is prime */
5260
if ((err = mp_sub_d(a, 1uL, a)) != MP_OKAY) {
5261
goto error;
5262
}
5263
if ((err = mp_div_2(a, a)) != MP_OKAY) {
5264
goto error;
5265
}
5266
5267
/* is it prime? */
5268
if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
5269
goto error;
5270
}
5271
}
5272
} while (res == MP_NO);
5273
5274
if ((flags & LTM_PRIME_SAFE) != 0) {
5275
/* restore a to the original value */
5276
if ((err = mp_mul_2(a, a)) != MP_OKAY) {
5277
goto error;
5278
}
5279
if ((err = mp_add_d(a, 1uL, a)) != MP_OKAY) {
5280
goto error;
5281
}
5282
}
5283
5284
err = MP_OKAY;
5285
error:
5286
XFREE(tmp);
5287
return err;
5288
}
5289
5290
/* End: bn_mp_prime_random_ex.c */
5291
5292
/* Start: bn_mp_prime_strong_lucas_selfridge.c */
5293
5294
/*
5295
* See file bn_mp_prime_is_prime.c or the documentation in doc/bn.tex for the details
5296
*/
5297
#ifndef LTM_USE_FIPS_ONLY
5298
5299
/*
5300
* 8-bit is just too small. You can try the Frobenius test
5301
* but that frobenius test can fail, too, for the same reason.
5302
*/
5303
#ifndef MP_8BIT
5304
5305
/*
5306
* multiply bigint a with int d and put the result in c
5307
* Like mp_mul_d() but with a signed long as the small input
5308
*/
5309
static int s_mp_mul_si(const mp_int *a, long d, mp_int *c)
5310
{
5311
mp_int t;
5312
int err, neg = 0;
5313
5314
if ((err = mp_init(&t)) != MP_OKAY) {
5315
return err;
5316
}
5317
if (d < 0) {
5318
neg = 1;
5319
d = -d;
5320
}
5321
5322
/*
5323
* mp_digit might be smaller than a long, which excludes
5324
* the use of mp_mul_d() here.
5325
*/
5326
if ((err = mp_set_long(&t, (unsigned long) d)) != MP_OKAY) {
5327
goto LBL_MPMULSI_ERR;
5328
}
5329
if ((err = mp_mul(a, &t, c)) != MP_OKAY) {
5330
goto LBL_MPMULSI_ERR;
5331
}
5332
if (neg == 1) {
5333
c->sign = (a->sign == MP_NEG) ? MP_ZPOS: MP_NEG;
5334
}
5335
LBL_MPMULSI_ERR:
5336
mp_clear(&t);
5337
return err;
5338
}
5339
/*
5340
Strong Lucas-Selfridge test.
5341
returns MP_YES if it is a strong L-S prime, MP_NO if it is composite
5342
5343
Code ported from Thomas Ray Nicely's implementation of the BPSW test
5344
at http://www.trnicely.net/misc/bpsw.html
5345
5346
Freeware copyright (C) 2016 Thomas R. Nicely <http://www.trnicely.net>.
5347
Released into the public domain by the author, who disclaims any legal
5348
liability arising from its use
5349
5350
The multi-line comments are made by Thomas R. Nicely and are copied verbatim.
5351
Additional comments marked "CZ" (without the quotes) are by the code-portist.
5352
5353
(If that name sounds familiar, he is the guy who found the fdiv bug in the
5354
Pentium (P5x, I think) Intel processor)
5355
*/
5356
int mp_prime_strong_lucas_selfridge(const mp_int *a, int *result)
5357
{
5358
/* CZ TODO: choose better variable names! */
5359
mp_int Dz, gcd, Np1, Uz, Vz, U2mz, V2mz, Qmz, Q2mz, Qkdz, T1z, T2z, T3z, T4z, Q2kdz;
5360
/* CZ TODO: Some of them need the full 32 bit, hence the (temporary) exclusion of MP_8BIT */
5361
int32_t D, Ds, J, sign, P, Q, r, s, u, Nbits;
5362
int e;
5363
int isset, oddness;
5364
5365
*result = MP_NO;
5366
/*
5367
Find the first element D in the sequence {5, -7, 9, -11, 13, ...}
5368
such that Jacobi(D,N) = -1 (Selfridge's algorithm). Theory
5369
indicates that, if N is not a perfect square, D will "nearly
5370
always" be "small." Just in case, an overflow trap for D is
5371
included.
5372
*/
5373
5374
if ((e = mp_init_multi(&Dz, &gcd, &Np1, &Uz, &Vz, &U2mz, &V2mz, &Qmz, &Q2mz, &Qkdz, &T1z, &T2z, &T3z, &T4z, &Q2kdz,
5375
NULL)) != MP_OKAY) {
5376
return e;
5377
}
5378
5379
D = 5;
5380
sign = 1;
5381
5382
for (;;) {
5383
Ds = sign * D;
5384
sign = -sign;
5385
if ((e = mp_set_long(&Dz, (unsigned long)D)) != MP_OKAY) {
5386
goto LBL_LS_ERR;
5387
}
5388
if ((e = mp_gcd(a, &Dz, &gcd)) != MP_OKAY) {
5389
goto LBL_LS_ERR;
5390
}
5391
/* if 1 < GCD < N then N is composite with factor "D", and
5392
Jacobi(D,N) is technically undefined (but often returned
5393
as zero). */
5394
if ((mp_cmp_d(&gcd, 1uL) == MP_GT) && (mp_cmp(&gcd, a) == MP_LT)) {
5395
goto LBL_LS_ERR;
5396
}
5397
if (Ds < 0) {
5398
Dz.sign = MP_NEG;
5399
}
5400
if ((e = mp_kronecker(&Dz, a, &J)) != MP_OKAY) {
5401
goto LBL_LS_ERR;
5402
}
5403
5404
if (J == -1) {
5405
break;
5406
}
5407
D += 2;
5408
5409
if (D > (INT_MAX - 2)) {
5410
e = MP_VAL;
5411
goto LBL_LS_ERR;
5412
}
5413
}
5414
5415
5416
5417
P = 1; /* Selfridge's choice */
5418
Q = (1 - Ds) / 4; /* Required so D = P*P - 4*Q */
5419
5420
/* NOTE: The conditions (a) N does not divide Q, and
5421
(b) D is square-free or not a perfect square, are included by
5422
some authors; e.g., "Prime numbers and computer methods for
5423
factorization," Hans Riesel (2nd ed., 1994, Birkhauser, Boston),
5424
p. 130. For this particular application of Lucas sequences,
5425
these conditions were found to be immaterial. */
5426
5427
/* Now calculate N - Jacobi(D,N) = N + 1 (even), and calculate the
5428
odd positive integer d and positive integer s for which
5429
N + 1 = 2^s*d (similar to the step for N - 1 in Miller's test).
5430
The strong Lucas-Selfridge test then returns N as a strong
5431
Lucas probable prime (slprp) if any of the following
5432
conditions is met: U_d=0, V_d=0, V_2d=0, V_4d=0, V_8d=0,
5433
V_16d=0, ..., etc., ending with V_{2^(s-1)*d}=V_{(N+1)/2}=0
5434
(all equalities mod N). Thus d is the highest index of U that
5435
must be computed (since V_2m is independent of U), compared
5436
to U_{N+1} for the standard Lucas-Selfridge test; and no
5437
index of V beyond (N+1)/2 is required, just as in the
5438
standard Lucas-Selfridge test. However, the quantity Q^d must
5439
be computed for use (if necessary) in the latter stages of
5440
the test. The result is that the strong Lucas-Selfridge test
5441
has a running time only slightly greater (order of 10 %) than
5442
that of the standard Lucas-Selfridge test, while producing
5443
only (roughly) 30 % as many pseudoprimes (and every strong
5444
Lucas pseudoprime is also a standard Lucas pseudoprime). Thus
5445
the evidence indicates that the strong Lucas-Selfridge test is
5446
more effective than the standard Lucas-Selfridge test, and a
5447
Baillie-PSW test based on the strong Lucas-Selfridge test
5448
should be more reliable. */
5449
5450
if ((e = mp_add_d(a, 1uL, &Np1)) != MP_OKAY) {
5451
goto LBL_LS_ERR;
5452
}
5453
s = mp_cnt_lsb(&Np1);
5454
5455
/* CZ
5456
* This should round towards zero because
5457
* Thomas R. Nicely used GMP's mpz_tdiv_q_2exp()
5458
* and mp_div_2d() is equivalent. Additionally:
5459
* dividing an even number by two does not produce
5460
* any leftovers.
5461
*/
5462
if ((e = mp_div_2d(&Np1, s, &Dz, NULL)) != MP_OKAY) {
5463
goto LBL_LS_ERR;
5464
}
5465
/* We must now compute U_d and V_d. Since d is odd, the accumulated
5466
values U and V are initialized to U_1 and V_1 (if the target
5467
index were even, U and V would be initialized instead to U_0=0
5468
and V_0=2). The values of U_2m and V_2m are also initialized to
5469
U_1 and V_1; the FOR loop calculates in succession U_2 and V_2,
5470
U_4 and V_4, U_8 and V_8, etc. If the corresponding bits
5471
(1, 2, 3, ...) of t are on (the zero bit having been accounted
5472
for in the initialization of U and V), these values are then
5473
combined with the previous totals for U and V, using the
5474
composition formulas for addition of indices. */
5475
5476
mp_set(&Uz, 1uL); /* U=U_1 */
5477
mp_set(&Vz, (mp_digit)P); /* V=V_1 */
5478
mp_set(&U2mz, 1uL); /* U_1 */
5479
mp_set(&V2mz, (mp_digit)P); /* V_1 */
5480
5481
if (Q < 0) {
5482
Q = -Q;
5483
if ((e = mp_set_long(&Qmz, (unsigned long)Q)) != MP_OKAY) {
5484
goto LBL_LS_ERR;
5485
}
5486
if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {
5487
goto LBL_LS_ERR;
5488
}
5489
/* Initializes calculation of Q^d */
5490
if ((e = mp_set_long(&Qkdz, (unsigned long)Q)) != MP_OKAY) {
5491
goto LBL_LS_ERR;
5492
}
5493
Qmz.sign = MP_NEG;
5494
Q2mz.sign = MP_NEG;
5495
Qkdz.sign = MP_NEG;
5496
Q = -Q;
5497
} else {
5498
if ((e = mp_set_long(&Qmz, (unsigned long)Q)) != MP_OKAY) {
5499
goto LBL_LS_ERR;
5500
}
5501
if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {
5502
goto LBL_LS_ERR;
5503
}
5504
/* Initializes calculation of Q^d */
5505
if ((e = mp_set_long(&Qkdz, (unsigned long)Q)) != MP_OKAY) {
5506
goto LBL_LS_ERR;
5507
}
5508
}
5509
5510
Nbits = mp_count_bits(&Dz);
5511
5512
for (u = 1; u < Nbits; u++) { /* zero bit off, already accounted for */
5513
/* Formulas for doubling of indices (carried out mod N). Note that
5514
* the indices denoted as "2m" are actually powers of 2, specifically
5515
* 2^(ul-1) beginning each loop and 2^ul ending each loop.
5516
*
5517
* U_2m = U_m*V_m
5518
* V_2m = V_m*V_m - 2*Q^m
5519
*/
5520
5521
if ((e = mp_mul(&U2mz, &V2mz, &U2mz)) != MP_OKAY) {
5522
goto LBL_LS_ERR;
5523
}
5524
if ((e = mp_mod(&U2mz, a, &U2mz)) != MP_OKAY) {
5525
goto LBL_LS_ERR;
5526
}
5527
if ((e = mp_sqr(&V2mz, &V2mz)) != MP_OKAY) {
5528
goto LBL_LS_ERR;
5529
}
5530
if ((e = mp_sub(&V2mz, &Q2mz, &V2mz)) != MP_OKAY) {
5531
goto LBL_LS_ERR;
5532
}
5533
if ((e = mp_mod(&V2mz, a, &V2mz)) != MP_OKAY) {
5534
goto LBL_LS_ERR;
5535
}
5536
/* Must calculate powers of Q for use in V_2m, also for Q^d later */
5537
if ((e = mp_sqr(&Qmz, &Qmz)) != MP_OKAY) {
5538
goto LBL_LS_ERR;
5539
}
5540
/* prevents overflow */ /* CZ still necessary without a fixed prealloc'd mem.? */
5541
if ((e = mp_mod(&Qmz, a, &Qmz)) != MP_OKAY) {
5542
goto LBL_LS_ERR;
5543
}
5544
if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {
5545
goto LBL_LS_ERR;
5546
}
5547
if ((isset = mp_get_bit(&Dz, u)) == MP_VAL) {
5548
e = isset;
5549
goto LBL_LS_ERR;
5550
}
5551
if (isset == MP_YES) {
5552
/* Formulas for addition of indices (carried out mod N);
5553
*
5554
* U_(m+n) = (U_m*V_n + U_n*V_m)/2
5555
* V_(m+n) = (V_m*V_n + D*U_m*U_n)/2
5556
*
5557
* Be careful with division by 2 (mod N)!
5558
*/
5559
if ((e = mp_mul(&U2mz, &Vz, &T1z)) != MP_OKAY) {
5560
goto LBL_LS_ERR;
5561
}
5562
if ((e = mp_mul(&Uz, &V2mz, &T2z)) != MP_OKAY) {
5563
goto LBL_LS_ERR;
5564
}
5565
if ((e = mp_mul(&V2mz, &Vz, &T3z)) != MP_OKAY) {
5566
goto LBL_LS_ERR;
5567
}
5568
if ((e = mp_mul(&U2mz, &Uz, &T4z)) != MP_OKAY) {
5569
goto LBL_LS_ERR;
5570
}
5571
if ((e = s_mp_mul_si(&T4z, (long)Ds, &T4z)) != MP_OKAY) {
5572
goto LBL_LS_ERR;
5573
}
5574
if ((e = mp_add(&T1z, &T2z, &Uz)) != MP_OKAY) {
5575
goto LBL_LS_ERR;
5576
}
5577
if (mp_isodd(&Uz) != MP_NO) {
5578
if ((e = mp_add(&Uz, a, &Uz)) != MP_OKAY) {
5579
goto LBL_LS_ERR;
5580
}
5581
}
5582
/* CZ
5583
* This should round towards negative infinity because
5584
* Thomas R. Nicely used GMP's mpz_fdiv_q_2exp().
5585
* But mp_div_2() does not do so, it is truncating instead.
5586
*/
5587
oddness = mp_isodd(&Uz);
5588
if ((e = mp_div_2(&Uz, &Uz)) != MP_OKAY) {
5589
goto LBL_LS_ERR;
5590
}
5591
if ((Uz.sign == MP_NEG) && (oddness != MP_NO)) {
5592
if ((e = mp_sub_d(&Uz, 1uL, &Uz)) != MP_OKAY) {
5593
goto LBL_LS_ERR;
5594
}
5595
}
5596
if ((e = mp_add(&T3z, &T4z, &Vz)) != MP_OKAY) {
5597
goto LBL_LS_ERR;
5598
}
5599
if (mp_isodd(&Vz) != MP_NO) {
5600
if ((e = mp_add(&Vz, a, &Vz)) != MP_OKAY) {
5601
goto LBL_LS_ERR;
5602
}
5603
}
5604
oddness = mp_isodd(&Vz);
5605
if ((e = mp_div_2(&Vz, &Vz)) != MP_OKAY) {
5606
goto LBL_LS_ERR;
5607
}
5608
if ((Vz.sign == MP_NEG) && (oddness != MP_NO)) {
5609
if ((e = mp_sub_d(&Vz, 1uL, &Vz)) != MP_OKAY) {
5610
goto LBL_LS_ERR;
5611
}
5612
}
5613
if ((e = mp_mod(&Uz, a, &Uz)) != MP_OKAY) {
5614
goto LBL_LS_ERR;
5615
}
5616
if ((e = mp_mod(&Vz, a, &Vz)) != MP_OKAY) {
5617
goto LBL_LS_ERR;
5618
}
5619
/* Calculating Q^d for later use */
5620
if ((e = mp_mul(&Qkdz, &Qmz, &Qkdz)) != MP_OKAY) {
5621
goto LBL_LS_ERR;
5622
}
5623
if ((e = mp_mod(&Qkdz, a, &Qkdz)) != MP_OKAY) {
5624
goto LBL_LS_ERR;
5625
}
5626
}
5627
}
5628
5629
/* If U_d or V_d is congruent to 0 mod N, then N is a prime or a
5630
strong Lucas pseudoprime. */
5631
if ((mp_iszero(&Uz) != MP_NO) || (mp_iszero(&Vz) != MP_NO)) {
5632
*result = MP_YES;
5633
goto LBL_LS_ERR;
5634
}
5635
5636
/* NOTE: Ribenboim ("The new book of prime number records," 3rd ed.,
5637
1995/6) omits the condition V0 on p.142, but includes it on
5638
p. 130. The condition is NECESSARY; otherwise the test will
5639
return false negatives---e.g., the primes 29 and 2000029 will be
5640
returned as composite. */
5641
5642
/* Otherwise, we must compute V_2d, V_4d, V_8d, ..., V_{2^(s-1)*d}
5643
by repeated use of the formula V_2m = V_m*V_m - 2*Q^m. If any of
5644
these are congruent to 0 mod N, then N is a prime or a strong
5645
Lucas pseudoprime. */
5646
5647
/* Initialize 2*Q^(d*2^r) for V_2m */
5648
if ((e = mp_mul_2(&Qkdz, &Q2kdz)) != MP_OKAY) {
5649
goto LBL_LS_ERR;
5650
}
5651
5652
for (r = 1; r < s; r++) {
5653
if ((e = mp_sqr(&Vz, &Vz)) != MP_OKAY) {
5654
goto LBL_LS_ERR;
5655
}
5656
if ((e = mp_sub(&Vz, &Q2kdz, &Vz)) != MP_OKAY) {
5657
goto LBL_LS_ERR;
5658
}
5659
if ((e = mp_mod(&Vz, a, &Vz)) != MP_OKAY) {
5660
goto LBL_LS_ERR;
5661
}
5662
if (mp_iszero(&Vz) != MP_NO) {
5663
*result = MP_YES;
5664
goto LBL_LS_ERR;
5665
}
5666
/* Calculate Q^{d*2^r} for next r (final iteration irrelevant). */
5667
if (r < (s - 1)) {
5668
if ((e = mp_sqr(&Qkdz, &Qkdz)) != MP_OKAY) {
5669
goto LBL_LS_ERR;
5670
}
5671
if ((e = mp_mod(&Qkdz, a, &Qkdz)) != MP_OKAY) {
5672
goto LBL_LS_ERR;
5673
}
5674
if ((e = mp_mul_2(&Qkdz, &Q2kdz)) != MP_OKAY) {
5675
goto LBL_LS_ERR;
5676
}
5677
}
5678
}
5679
LBL_LS_ERR:
5680
mp_clear_multi(&Q2kdz, &T4z, &T3z, &T2z, &T1z, &Qkdz, &Q2mz, &Qmz, &V2mz, &U2mz, &Vz, &Uz, &Np1, &gcd, &Dz, NULL);
5681
return e;
5682
}
5683
#endif
5684
#endif
5685
5686
/* End: bn_mp_prime_strong_lucas_selfridge.c */
5687
5688
/* Start: bn_mp_radix_size.c */
5689
5690
/* returns size of ASCII reprensentation */
5691
int mp_radix_size(const mp_int *a, int radix, int *size)
5692
{
5693
int res, digs;
5694
mp_int t;
5695
mp_digit d;
5696
5697
*size = 0;
5698
5699
/* make sure the radix is in range */
5700
if ((radix < 2) || (radix > 64)) {
5701
return MP_VAL;
5702
}
5703
5704
if (mp_iszero(a) == MP_YES) {
5705
*size = 2;
5706
return MP_OKAY;
5707
}
5708
5709
/* special case for binary */
5710
if (radix == 2) {
5711
*size = mp_count_bits(a) + ((a->sign == MP_NEG) ? 1 : 0) + 1;
5712
return MP_OKAY;
5713
}
5714
5715
/* digs is the digit count */
5716
digs = 0;
5717
5718
/* if it's negative add one for the sign */
5719
if (a->sign == MP_NEG) {
5720
++digs;
5721
}
5722
5723
/* init a copy of the input */
5724
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
5725
return res;
5726
}
5727
5728
/* force temp to positive */
5729
t.sign = MP_ZPOS;
5730
5731
/* fetch out all of the digits */
5732
while (mp_iszero(&t) == MP_NO) {
5733
if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
5734
mp_clear(&t);
5735
return res;
5736
}
5737
++digs;
5738
}
5739
mp_clear(&t);
5740
5741
/* return digs + 1, the 1 is for the NULL byte that would be required. */
5742
*size = digs + 1;
5743
return MP_OKAY;
5744
}
5745
5746
/* End: bn_mp_radix_size.c */
5747
5748
/* Start: bn_mp_radix_smap.c */
5749
5750
/* chars used in radix conversions */
5751
const char *const mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
5752
const uint8_t mp_s_rmap_reverse[] = {
5753
0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, /* ()*+,-./ */
5754
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 01234567 */
5755
0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 89:;<=>? */
5756
0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, /* @ABCDEFG */
5757
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, /* HIJKLMNO */
5758
0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, /* PQRSTUVW */
5759
0x21, 0x22, 0x23, 0xff, 0xff, 0xff, 0xff, 0xff, /* XYZ[\]^_ */
5760
0xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, /* `abcdefg */
5761
0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, /* hijklmno */
5762
0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, /* pqrstuvw */
5763
0x3b, 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, /* xyz{|}~. */
5764
};
5765
const size_t mp_s_rmap_reverse_sz = sizeof(mp_s_rmap_reverse);
5766
5767
/* End: bn_mp_radix_smap.c */
5768
5769
/* Start: bn_mp_rand.c */
5770
5771
/* First the OS-specific special cases
5772
* - *BSD
5773
* - Windows
5774
*/
5775
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
5776
#define MP_ARC4RANDOM
5777
#define MP_GEN_RANDOM_MAX 0xffffffffu
5778
#define MP_GEN_RANDOM_SHIFT 32
5779
5780
static int s_read_arc4random(mp_digit *p)
5781
{
5782
mp_digit d = 0, msk = 0;
5783
do {
5784
d <<= MP_GEN_RANDOM_SHIFT;
5785
d |= ((mp_digit) arc4random());
5786
msk <<= MP_GEN_RANDOM_SHIFT;
5787
msk |= (MP_MASK & MP_GEN_RANDOM_MAX);
5788
} while ((MP_MASK & msk) != MP_MASK);
5789
*p = d;
5790
return MP_OKAY;
5791
}
5792
#endif
5793
5794
#if defined(_WIN32) || defined(_WIN32_WCE)
5795
#define MP_WIN_CSP
5796
5797
#ifndef _WIN32_WINNT
5798
#define _WIN32_WINNT 0x0400
5799
#endif
5800
#ifdef _WIN32_WCE
5801
#define UNDER_CE
5802
#define ARM
5803
#endif
5804
5805
#define WIN32_LEAN_AND_MEAN
5806
#include <windows.h>
5807
#include <ntsecapi.h>
5808
5809
static int s_read_win_csp(mp_digit *p)
5810
{
5811
int ret = -1;
5812
if (RtlGenRandom(p, sizeof(*p)) == TRUE) {
5813
ret = MP_OKAY;
5814
}
5815
return ret;
5816
}
5817
#endif /* WIN32 */
5818
5819
#if !defined(MP_WIN_CSP) && defined(__linux__) && defined(__GLIBC_PREREQ)
5820
#if __GLIBC_PREREQ(2, 25)
5821
#define MP_GETRANDOM
5822
#include <sys/random.h>
5823
#include <errno.h>
5824
5825
static int s_read_getrandom(mp_digit *p)
5826
{
5827
int ret;
5828
do {
5829
ret = getrandom(p, sizeof(*p), 0);
5830
} while ((ret == -1) && (errno == EINTR));
5831
if (ret == sizeof(*p)) return MP_OKAY;
5832
return -1;
5833
}
5834
#endif
5835
#endif
5836
5837
/* We assume all platforms besides windows provide "/dev/urandom".
5838
* In case yours doesn't, define MP_NO_DEV_URANDOM at compile-time.
5839
*/
5840
#if !defined(MP_WIN_CSP) && !defined(MP_NO_DEV_URANDOM)
5841
#ifndef MP_DEV_URANDOM
5842
#define MP_DEV_URANDOM "/dev/urandom"
5843
#endif
5844
#include <fcntl.h>
5845
#include <errno.h>
5846
#include <unistd.h>
5847
5848
static int s_read_dev_urandom(mp_digit *p)
5849
{
5850
ssize_t r;
5851
int fd;
5852
do {
5853
fd = open(MP_DEV_URANDOM, O_RDONLY);
5854
} while ((fd == -1) && (errno == EINTR));
5855
if (fd == -1) return -1;
5856
do {
5857
r = read(fd, p, sizeof(*p));
5858
} while ((r == -1) && (errno == EINTR));
5859
close(fd);
5860
if (r != sizeof(*p)) return -1;
5861
return MP_OKAY;
5862
}
5863
#endif
5864
5865
#if defined(MP_PRNG_ENABLE_LTM_RNG)
5866
unsigned long (*ltm_rng)(unsigned char *out, unsigned long outlen, void (*callback)(void));
5867
void (*ltm_rng_callback)(void);
5868
5869
static int s_read_ltm_rng(mp_digit *p)
5870
{
5871
unsigned long ret;
5872
if (ltm_rng == NULL) return -1;
5873
ret = ltm_rng((void *)p, sizeof(*p), ltm_rng_callback);
5874
if (ret != sizeof(*p)) return -1;
5875
return MP_OKAY;
5876
}
5877
#endif
5878
5879
static int s_rand_digit(mp_digit *p)
5880
{
5881
int ret = -1;
5882
5883
#if defined(MP_ARC4RANDOM)
5884
ret = s_read_arc4random(p);
5885
if (ret == MP_OKAY) return ret;
5886
#endif
5887
5888
#if defined(MP_WIN_CSP)
5889
ret = s_read_win_csp(p);
5890
if (ret == MP_OKAY) return ret;
5891
#else
5892
5893
#if defined(MP_GETRANDOM)
5894
ret = s_read_getrandom(p);
5895
if (ret == MP_OKAY) return ret;
5896
#endif
5897
#if defined(MP_DEV_URANDOM)
5898
ret = s_read_dev_urandom(p);
5899
if (ret == MP_OKAY) return ret;
5900
#endif
5901
5902
#endif /* MP_WIN_CSP */
5903
5904
#if defined(MP_PRNG_ENABLE_LTM_RNG)
5905
ret = s_read_ltm_rng(p);
5906
if (ret == MP_OKAY) return ret;
5907
#endif
5908
5909
return ret;
5910
}
5911
5912
/* makes a pseudo-random int of a given size */
5913
int mp_rand_digit(mp_digit *r)
5914
{
5915
int ret = s_rand_digit(r);
5916
*r &= MP_MASK;
5917
return ret;
5918
}
5919
5920
int mp_rand(mp_int *a, int digits)
5921
{
5922
int res;
5923
mp_digit d;
5924
5925
mp_zero(a);
5926
if (digits <= 0) {
5927
return MP_OKAY;
5928
}
5929
5930
/* first place a random non-zero digit */
5931
do {
5932
if (mp_rand_digit(&d) != MP_OKAY) {
5933
return MP_VAL;
5934
}
5935
} while (d == 0u);
5936
5937
if ((res = mp_add_d(a, d, a)) != MP_OKAY) {
5938
return res;
5939
}
5940
5941
while (--digits > 0) {
5942
if ((res = mp_lshd(a, 1)) != MP_OKAY) {
5943
return res;
5944
}
5945
5946
if (mp_rand_digit(&d) != MP_OKAY) {
5947
return MP_VAL;
5948
}
5949
if ((res = mp_add_d(a, d, a)) != MP_OKAY) {
5950
return res;
5951
}
5952
}
5953
5954
return MP_OKAY;
5955
}
5956
5957
/* Start: bn_mp_read_radix.c */
5958
5959
/* read a string [ASCII] in a given radix */
5960
int mp_read_radix(mp_int *a, const char *str, int radix)
5961
{
5962
int y, res, neg;
5963
unsigned pos;
5964
char ch;
5965
5966
/* zero the digit bignum */
5967
mp_zero(a);
5968
5969
/* make sure the radix is ok */
5970
if ((radix < 2) || (radix > 64)) {
5971
return MP_VAL;
5972
}
5973
5974
/* if the leading digit is a
5975
* minus set the sign to negative.
5976
*/
5977
if (*str == '-') {
5978
++str;
5979
neg = MP_NEG;
5980
} else {
5981
neg = MP_ZPOS;
5982
}
5983
5984
/* set the integer to the default of zero */
5985
mp_zero(a);
5986
5987
/* process each digit of the string */
5988
while (*str != '\0') {
5989
/* if the radix <= 36 the conversion is case insensitive
5990
* this allows numbers like 1AB and 1ab to represent the same value
5991
* [e.g. in hex]
5992
*/
5993
ch = (radix <= 36) ? (char)toupper((int)*str) : *str;
5994
pos = (unsigned)(ch - '(');
5995
if (mp_s_rmap_reverse_sz < pos) {
5996
break;
5997
}
5998
y = (int)mp_s_rmap_reverse[pos];
5999
6000
/* if the char was found in the map
6001
* and is less than the given radix add it
6002
* to the number, otherwise exit the loop.
6003
*/
6004
if ((y == 0xff) || (y >= radix)) {
6005
break;
6006
}
6007
if ((res = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) {
6008
return res;
6009
}
6010
if ((res = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) {
6011
return res;
6012
}
6013
++str;
6014
}
6015
6016
/* if an illegal character was found, fail. */
6017
if (!((*str == '\0') || (*str == '\r') || (*str == '\n'))) {
6018
mp_zero(a);
6019
return MP_VAL;
6020
}
6021
6022
/* set the sign only if a != 0 */
6023
if (mp_iszero(a) != MP_YES) {
6024
a->sign = neg;
6025
}
6026
return MP_OKAY;
6027
}
6028
6029
/* End: bn_mp_read_radix.c */
6030
6031
/* Start: bn_mp_read_signed_bin.c */
6032
6033
/* read signed bin, big endian, first byte is 0==positive or 1==negative */
6034
int mp_read_signed_bin(mp_int *a, const unsigned char *b, int c)
6035
{
6036
int res;
6037
6038
/* read magnitude */
6039
if ((res = mp_read_unsigned_bin(a, b + 1, c - 1)) != MP_OKAY) {
6040
return res;
6041
}
6042
6043
/* first byte is 0 for positive, non-zero for negative */
6044
if (b[0] == (unsigned char)0) {
6045
a->sign = MP_ZPOS;
6046
} else {
6047
a->sign = MP_NEG;
6048
}
6049
6050
return MP_OKAY;
6051
}
6052
6053
/* End: bn_mp_read_signed_bin.c */
6054
6055
/* Start: bn_mp_read_unsigned_bin.c */
6056
6057
/* reads a unsigned char array, assumes the msb is stored first [big endian] */
6058
int mp_read_unsigned_bin(mp_int *a, const unsigned char *b, int c)
6059
{
6060
int res;
6061
6062
/* make sure there are at least two digits */
6063
if (a->alloc < 2) {
6064
if ((res = mp_grow(a, 2)) != MP_OKAY) {
6065
return res;
6066
}
6067
}
6068
6069
/* zero the int */
6070
mp_zero(a);
6071
6072
/* read the bytes in */
6073
while (c-- > 0) {
6074
if ((res = mp_mul_2d(a, 8, a)) != MP_OKAY) {
6075
return res;
6076
}
6077
6078
#ifndef MP_8BIT
6079
a->dp[0] |= *b++;
6080
a->used += 1;
6081
#else
6082
a->dp[0] = (*b & MP_MASK);
6083
a->dp[1] |= ((*b++ >> 7) & 1u);
6084
a->used += 2;
6085
#endif
6086
}
6087
mp_clamp(a);
6088
return MP_OKAY;
6089
}
6090
6091
/* End: bn_mp_read_unsigned_bin.c */
6092
6093
/* Start: bn_mp_reduce.c */
6094
6095
/* reduces x mod m, assumes 0 < x < m**2, mu is
6096
* precomputed via mp_reduce_setup.
6097
* From HAC pp.604 Algorithm 14.42
6098
*/
6099
int mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
6100
{
6101
mp_int q;
6102
int res, um = m->used;
6103
6104
/* q = x */
6105
if ((res = mp_init_copy(&q, x)) != MP_OKAY) {
6106
return res;
6107
}
6108
6109
/* q1 = x / b**(k-1) */
6110
mp_rshd(&q, um - 1);
6111
6112
/* according to HAC this optimization is ok */
6113
if ((mp_digit)um > ((mp_digit)1 << (DIGIT_BIT - 1))) {
6114
if ((res = mp_mul(&q, mu, &q)) != MP_OKAY) {
6115
goto CLEANUP;
6116
}
6117
} else {
6118
if ((res = s_mp_mul_high_digs(&q, mu, &q, um)) != MP_OKAY) {
6119
goto CLEANUP;
6120
}
6121
}
6122
6123
/* q3 = q2 / b**(k+1) */
6124
mp_rshd(&q, um + 1);
6125
6126
/* x = x mod b**(k+1), quick (no division) */
6127
if ((res = mp_mod_2d(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
6128
goto CLEANUP;
6129
}
6130
6131
/* q = q * m mod b**(k+1), quick (no division) */
6132
if ((res = s_mp_mul_digs(&q, m, &q, um + 1)) != MP_OKAY) {
6133
goto CLEANUP;
6134
}
6135
6136
/* x = x - q */
6137
if ((res = mp_sub(x, &q, x)) != MP_OKAY) {
6138
goto CLEANUP;
6139
}
6140
6141
/* If x < 0, add b**(k+1) to it */
6142
if (mp_cmp_d(x, 0uL) == MP_LT) {
6143
mp_set(&q, 1uL);
6144
if ((res = mp_lshd(&q, um + 1)) != MP_OKAY)
6145
goto CLEANUP;
6146
if ((res = mp_add(x, &q, x)) != MP_OKAY)
6147
goto CLEANUP;
6148
}
6149
6150
/* Back off if it's too big */
6151
while (mp_cmp(x, m) != MP_LT) {
6152
if ((res = s_mp_sub(x, m, x)) != MP_OKAY) {
6153
goto CLEANUP;
6154
}
6155
}
6156
6157
CLEANUP:
6158
mp_clear(&q);
6159
6160
return res;
6161
}
6162
6163
/* End: bn_mp_reduce.c */
6164
6165
/* Start: bn_mp_reduce_2k.c */
6166
6167
/* reduces a modulo n where n is of the form 2**p - d */
6168
int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
6169
{
6170
mp_int q;
6171
int p, res;
6172
6173
if ((res = mp_init(&q)) != MP_OKAY) {
6174
return res;
6175
}
6176
6177
p = mp_count_bits(n);
6178
top:
6179
/* q = a/2**p, a = a mod 2**p */
6180
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
6181
goto LBL_ERR;
6182
}
6183
6184
if (d != 1u) {
6185
/* q = q * d */
6186
if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
6187
goto LBL_ERR;
6188
}
6189
}
6190
6191
/* a = a + q */
6192
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
6193
goto LBL_ERR;
6194
}
6195
6196
if (mp_cmp_mag(a, n) != MP_LT) {
6197
if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
6198
goto LBL_ERR;
6199
}
6200
goto top;
6201
}
6202
6203
LBL_ERR:
6204
mp_clear(&q);
6205
return res;
6206
}
6207
6208
/* End: bn_mp_reduce_2k.c */
6209
6210
/* Start: bn_mp_reduce_2k_l.c */
6211
6212
/* reduces a modulo n where n is of the form 2**p - d
6213
This differs from reduce_2k since "d" can be larger
6214
than a single digit.
6215
*/
6216
int mp_reduce_2k_l(mp_int *a, const mp_int *n, const mp_int *d)
6217
{
6218
mp_int q;
6219
int p, res;
6220
6221
if ((res = mp_init(&q)) != MP_OKAY) {
6222
return res;
6223
}
6224
6225
p = mp_count_bits(n);
6226
top:
6227
/* q = a/2**p, a = a mod 2**p */
6228
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
6229
goto LBL_ERR;
6230
}
6231
6232
/* q = q * d */
6233
if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
6234
goto LBL_ERR;
6235
}
6236
6237
/* a = a + q */
6238
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
6239
goto LBL_ERR;
6240
}
6241
6242
if (mp_cmp_mag(a, n) != MP_LT) {
6243
if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
6244
goto LBL_ERR;
6245
}
6246
goto top;
6247
}
6248
6249
LBL_ERR:
6250
mp_clear(&q);
6251
return res;
6252
}
6253
6254
/* End: bn_mp_reduce_2k_l.c */
6255
6256
/* Start: bn_mp_reduce_2k_setup.c */
6257
6258
/* determines the setup value */
6259
int mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
6260
{
6261
int res, p;
6262
mp_int tmp;
6263
6264
if ((res = mp_init(&tmp)) != MP_OKAY) {
6265
return res;
6266
}
6267
6268
p = mp_count_bits(a);
6269
if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
6270
mp_clear(&tmp);
6271
return res;
6272
}
6273
6274
if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
6275
mp_clear(&tmp);
6276
return res;
6277
}
6278
6279
*d = tmp.dp[0];
6280
mp_clear(&tmp);
6281
return MP_OKAY;
6282
}
6283
6284
/* End: bn_mp_reduce_2k_setup.c */
6285
6286
/* Start: bn_mp_reduce_2k_setup_l.c */
6287
6288
/* determines the setup value */
6289
int mp_reduce_2k_setup_l(const mp_int *a, mp_int *d)
6290
{
6291
int res;
6292
mp_int tmp;
6293
6294
if ((res = mp_init(&tmp)) != MP_OKAY) {
6295
return res;
6296
}
6297
6298
if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
6299
goto LBL_ERR;
6300
}
6301
6302
if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
6303
goto LBL_ERR;
6304
}
6305
6306
LBL_ERR:
6307
mp_clear(&tmp);
6308
return res;
6309
}
6310
6311
/* End: bn_mp_reduce_2k_setup_l.c */
6312
6313
/* Start: bn_mp_reduce_is_2k.c */
6314
6315
/* determines if mp_reduce_2k can be used */
6316
int mp_reduce_is_2k(const mp_int *a)
6317
{
6318
int ix, iy, iw;
6319
mp_digit iz;
6320
6321
if (a->used == 0) {
6322
return MP_NO;
6323
} else if (a->used == 1) {
6324
return MP_YES;
6325
} else if (a->used > 1) {
6326
iy = mp_count_bits(a);
6327
iz = 1;
6328
iw = 1;
6329
6330
/* Test every bit from the second digit up, must be 1 */
6331
for (ix = DIGIT_BIT; ix < iy; ix++) {
6332
if ((a->dp[iw] & iz) == 0u) {
6333
return MP_NO;
6334
}
6335
iz <<= 1;
6336
if (iz > (mp_digit)MP_MASK) {
6337
++iw;
6338
iz = 1;
6339
}
6340
}
6341
}
6342
return MP_YES;
6343
}
6344
6345
/* End: bn_mp_reduce_is_2k.c */
6346
6347
/* Start: bn_mp_reduce_is_2k_l.c */
6348
6349
/* determines if reduce_2k_l can be used */
6350
int mp_reduce_is_2k_l(const mp_int *a)
6351
{
6352
int ix, iy;
6353
6354
if (a->used == 0) {
6355
return MP_NO;
6356
} else if (a->used == 1) {
6357
return MP_YES;
6358
} else if (a->used > 1) {
6359
/* if more than half of the digits are -1 we're sold */
6360
for (iy = ix = 0; ix < a->used; ix++) {
6361
if (a->dp[ix] == MP_MASK) {
6362
++iy;
6363
}
6364
}
6365
return (iy >= (a->used/2)) ? MP_YES : MP_NO;
6366
6367
}
6368
return MP_NO;
6369
}
6370
6371
/* End: bn_mp_reduce_is_2k_l.c */
6372
6373
/* Start: bn_mp_reduce_setup.c */
6374
6375
/* pre-calculate the value required for Barrett reduction
6376
* For a given modulus "b" it calulates the value required in "a"
6377
*/
6378
int mp_reduce_setup(mp_int *a, const mp_int *b)
6379
{
6380
int res;
6381
6382
if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
6383
return res;
6384
}
6385
return mp_div(a, b, a, NULL);
6386
}
6387
6388
/* End: bn_mp_reduce_setup.c */
6389
6390
/* Start: bn_mp_rshd.c */
6391
6392
/* shift right a certain amount of digits */
6393
void mp_rshd(mp_int *a, int b)
6394
{
6395
int x;
6396
6397
/* if b <= 0 then ignore it */
6398
if (b <= 0) {
6399
return;
6400
}
6401
6402
/* if b > used then simply zero it and return */
6403
if (a->used <= b) {
6404
mp_zero(a);
6405
return;
6406
}
6407
6408
{
6409
mp_digit *bottom, *top;
6410
6411
/* shift the digits down */
6412
6413
/* bottom */
6414
bottom = a->dp;
6415
6416
/* top [offset into digits] */
6417
top = a->dp + b;
6418
6419
/* this is implemented as a sliding window where
6420
* the window is b-digits long and digits from
6421
* the top of the window are copied to the bottom
6422
*
6423
* e.g.
6424
6425
b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
6426
/\ | ---->
6427
\-------------------/ ---->
6428
*/
6429
for (x = 0; x < (a->used - b); x++) {
6430
*bottom++ = *top++;
6431
}
6432
6433
/* zero the top digits */
6434
for (; x < a->used; x++) {
6435
*bottom++ = 0;
6436
}
6437
}
6438
6439
/* remove excess digits */
6440
a->used -= b;
6441
}
6442
6443
/* End: bn_mp_rshd.c */
6444
6445
/* Start: bn_mp_set.c */
6446
6447
/* set to a digit */
6448
void mp_set(mp_int *a, mp_digit b)
6449
{
6450
mp_zero(a);
6451
a->dp[0] = b & MP_MASK;
6452
a->used = (a->dp[0] != 0u) ? 1 : 0;
6453
}
6454
6455
/* End: bn_mp_set.c */
6456
6457
/* Start: bn_mp_set_int.c */
6458
6459
/* set a 32-bit const */
6460
int mp_set_int(mp_int *a, unsigned long b)
6461
{
6462
int x, res;
6463
6464
mp_zero(a);
6465
6466
/* set four bits at a time */
6467
for (x = 0; x < 8; x++) {
6468
/* shift the number up four bits */
6469
if ((res = mp_mul_2d(a, 4, a)) != MP_OKAY) {
6470
return res;
6471
}
6472
6473
/* OR in the top four bits of the source */
6474
a->dp[0] |= (mp_digit)(b >> 28) & 15uL;
6475
6476
/* shift the source up to the next four bits */
6477
b <<= 4;
6478
6479
/* ensure that digits are not clamped off */
6480
a->used += 1;
6481
}
6482
mp_clamp(a);
6483
return MP_OKAY;
6484
}
6485
6486
/* End: bn_mp_set_int.c */
6487
6488
/* Start: bn_mp_set_long.c */
6489
6490
/* set a platform dependent unsigned long int */
6491
MP_SET_XLONG(mp_set_long, unsigned long)
6492
6493
/* End: bn_mp_set_long.c */
6494
6495
/* Start: bn_mp_set_long_long.c */
6496
6497
/* set a platform dependent unsigned long long int */
6498
MP_SET_XLONG(mp_set_long_long, unsigned long long)
6499
6500
/* End: bn_mp_set_long_long.c */
6501
6502
/* Start: bn_mp_shrink.c */
6503
6504
/* shrink a bignum */
6505
int mp_shrink(mp_int *a)
6506
{
6507
mp_digit *tmp;
6508
int used = 1;
6509
6510
if (a->used > 0) {
6511
used = a->used;
6512
}
6513
6514
if (a->alloc != used) {
6515
if ((tmp = OPT_CAST(mp_digit) XREALLOC(a->dp, sizeof(mp_digit) * (size_t)used)) == NULL) {
6516
return MP_MEM;
6517
}
6518
a->dp = tmp;
6519
a->alloc = used;
6520
}
6521
return MP_OKAY;
6522
}
6523
6524
/* End: bn_mp_shrink.c */
6525
6526
/* Start: bn_mp_signed_bin_size.c */
6527
6528
/* get the size for an signed equivalent */
6529
int mp_signed_bin_size(const mp_int *a)
6530
{
6531
return 1 + mp_unsigned_bin_size(a);
6532
}
6533
6534
/* End: bn_mp_signed_bin_size.c */
6535
6536
/* Start: bn_mp_sqr.c */
6537
6538
/* computes b = a*a */
6539
int mp_sqr(const mp_int *a, mp_int *b)
6540
{
6541
int res;
6542
6543
/* use Toom-Cook? */
6544
if (a->used >= TOOM_SQR_CUTOFF) {
6545
res = mp_toom_sqr(a, b);
6546
/* Karatsuba? */
6547
} else
6548
if (a->used >= KARATSUBA_SQR_CUTOFF) {
6549
res = mp_karatsuba_sqr(a, b);
6550
} else
6551
{
6552
/* can we use the fast comba multiplier? */
6553
if ((((a->used * 2) + 1) < (int)MP_WARRAY) &&
6554
(a->used <
6555
(int)(1u << (((sizeof(mp_word) * (size_t)CHAR_BIT) - (2u * (size_t)DIGIT_BIT)) - 1u)))) {
6556
res = fast_s_mp_sqr(a, b);
6557
} else
6558
{
6559
res = s_mp_sqr(a, b);
6560
}
6561
}
6562
b->sign = MP_ZPOS;
6563
return res;
6564
}
6565
6566
/* End: bn_mp_sqr.c */
6567
6568
/* Start: bn_mp_sqrmod.c */
6569
6570
/* c = a * a (mod b) */
6571
int mp_sqrmod(const mp_int *a, const mp_int *b, mp_int *c)
6572
{
6573
int res;
6574
mp_int t;
6575
6576
if ((res = mp_init(&t)) != MP_OKAY) {
6577
return res;
6578
}
6579
6580
if ((res = mp_sqr(a, &t)) != MP_OKAY) {
6581
mp_clear(&t);
6582
return res;
6583
}
6584
res = mp_mod(&t, b, c);
6585
mp_clear(&t);
6586
return res;
6587
}
6588
6589
/* End: bn_mp_sqrmod.c */
6590
6591
/* Start: bn_mp_sqrt.c */
6592
6593
/* this function is less generic than mp_n_root, simpler and faster */
6594
int mp_sqrt(const mp_int *arg, mp_int *ret)
6595
{
6596
int res;
6597
mp_int t1, t2;
6598
6599
/* must be positive */
6600
if (arg->sign == MP_NEG) {
6601
return MP_VAL;
6602
}
6603
6604
/* easy out */
6605
if (mp_iszero(arg) == MP_YES) {
6606
mp_zero(ret);
6607
return MP_OKAY;
6608
}
6609
6610
if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) {
6611
return res;
6612
}
6613
6614
if ((res = mp_init(&t2)) != MP_OKAY) {
6615
goto E2;
6616
}
6617
6618
/* First approx. (not very bad for large arg) */
6619
mp_rshd(&t1, t1.used/2);
6620
6621
/* t1 > 0 */
6622
if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {
6623
goto E1;
6624
}
6625
if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {
6626
goto E1;
6627
}
6628
if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {
6629
goto E1;
6630
}
6631
/* And now t1 > sqrt(arg) */
6632
do {
6633
if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {
6634
goto E1;
6635
}
6636
if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {
6637
goto E1;
6638
}
6639
if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {
6640
goto E1;
6641
}
6642
/* t1 >= sqrt(arg) >= t2 at this point */
6643
} while (mp_cmp_mag(&t1, &t2) == MP_GT);
6644
6645
mp_exch(&t1, ret);
6646
6647
E1:
6648
mp_clear(&t2);
6649
E2:
6650
mp_clear(&t1);
6651
return res;
6652
}
6653
6654
/* End: bn_mp_sqrt.c */
6655
6656
/* Start: bn_mp_sqrtmod_prime.c */
6657
6658
/* Tonelli-Shanks algorithm
6659
* https://en.wikipedia.org/wiki/Tonelli%E2%80%93Shanks_algorithm
6660
* https://gmplib.org/list-archives/gmp-discuss/2013-April/005300.html
6661
*
6662
*/
6663
6664
int mp_sqrtmod_prime(const mp_int *n, const mp_int *prime, mp_int *ret)
6665
{
6666
int res, legendre;
6667
mp_int t1, C, Q, S, Z, M, T, R, two;
6668
mp_digit i;
6669
6670
/* first handle the simple cases */
6671
if (mp_cmp_d(n, 0uL) == MP_EQ) {
6672
mp_zero(ret);
6673
return MP_OKAY;
6674
}
6675
if (mp_cmp_d(prime, 2uL) == MP_EQ) return MP_VAL; /* prime must be odd */
6676
if ((res = mp_jacobi(n, prime, &legendre)) != MP_OKAY) return res;
6677
if (legendre == -1) return MP_VAL; /* quadratic non-residue mod prime */
6678
6679
if ((res = mp_init_multi(&t1, &C, &Q, &S, &Z, &M, &T, &R, &two, NULL)) != MP_OKAY) {
6680
return res;
6681
}
6682
6683
/* SPECIAL CASE: if prime mod 4 == 3
6684
* compute directly: res = n^(prime+1)/4 mod prime
6685
* Handbook of Applied Cryptography algorithm 3.36
6686
*/
6687
if ((res = mp_mod_d(prime, 4uL, &i)) != MP_OKAY) goto cleanup;
6688
if (i == 3u) {
6689
if ((res = mp_add_d(prime, 1uL, &t1)) != MP_OKAY) goto cleanup;
6690
if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) goto cleanup;
6691
if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) goto cleanup;
6692
if ((res = mp_exptmod(n, &t1, prime, ret)) != MP_OKAY) goto cleanup;
6693
res = MP_OKAY;
6694
goto cleanup;
6695
}
6696
6697
/* NOW: Tonelli-Shanks algorithm */
6698
6699
/* factor out powers of 2 from prime-1, defining Q and S as: prime-1 = Q*2^S */
6700
if ((res = mp_copy(prime, &Q)) != MP_OKAY) goto cleanup;
6701
if ((res = mp_sub_d(&Q, 1uL, &Q)) != MP_OKAY) goto cleanup;
6702
/* Q = prime - 1 */
6703
mp_zero(&S);
6704
/* S = 0 */
6705
while (mp_iseven(&Q) != MP_NO) {
6706
if ((res = mp_div_2(&Q, &Q)) != MP_OKAY) goto cleanup;
6707
/* Q = Q / 2 */
6708
if ((res = mp_add_d(&S, 1uL, &S)) != MP_OKAY) goto cleanup;
6709
/* S = S + 1 */
6710
}
6711
6712
/* find a Z such that the Legendre symbol (Z|prime) == -1 */
6713
if ((res = mp_set_int(&Z, 2uL)) != MP_OKAY) goto cleanup;
6714
/* Z = 2 */
6715
while (1) {
6716
if ((res = mp_jacobi(&Z, prime, &legendre)) != MP_OKAY) goto cleanup;
6717
if (legendre == -1) break;
6718
if ((res = mp_add_d(&Z, 1uL, &Z)) != MP_OKAY) goto cleanup;
6719
/* Z = Z + 1 */
6720
}
6721
6722
if ((res = mp_exptmod(&Z, &Q, prime, &C)) != MP_OKAY) goto cleanup;
6723
/* C = Z ^ Q mod prime */
6724
if ((res = mp_add_d(&Q, 1uL, &t1)) != MP_OKAY) goto cleanup;
6725
if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) goto cleanup;
6726
/* t1 = (Q + 1) / 2 */
6727
if ((res = mp_exptmod(n, &t1, prime, &R)) != MP_OKAY) goto cleanup;
6728
/* R = n ^ ((Q + 1) / 2) mod prime */
6729
if ((res = mp_exptmod(n, &Q, prime, &T)) != MP_OKAY) goto cleanup;
6730
/* T = n ^ Q mod prime */
6731
if ((res = mp_copy(&S, &M)) != MP_OKAY) goto cleanup;
6732
/* M = S */
6733
if ((res = mp_set_int(&two, 2uL)) != MP_OKAY) goto cleanup;
6734
6735
res = MP_VAL;
6736
while (1) {
6737
if ((res = mp_copy(&T, &t1)) != MP_OKAY) goto cleanup;
6738
i = 0;
6739
while (1) {
6740
if (mp_cmp_d(&t1, 1uL) == MP_EQ) break;
6741
if ((res = mp_exptmod(&t1, &two, prime, &t1)) != MP_OKAY) goto cleanup;
6742
i++;
6743
}
6744
if (i == 0u) {
6745
if ((res = mp_copy(&R, ret)) != MP_OKAY) goto cleanup;
6746
res = MP_OKAY;
6747
goto cleanup;
6748
}
6749
if ((res = mp_sub_d(&M, i, &t1)) != MP_OKAY) goto cleanup;
6750
if ((res = mp_sub_d(&t1, 1uL, &t1)) != MP_OKAY) goto cleanup;
6751
if ((res = mp_exptmod(&two, &t1, prime, &t1)) != MP_OKAY) goto cleanup;
6752
/* t1 = 2 ^ (M - i - 1) */
6753
if ((res = mp_exptmod(&C, &t1, prime, &t1)) != MP_OKAY) goto cleanup;
6754
/* t1 = C ^ (2 ^ (M - i - 1)) mod prime */
6755
if ((res = mp_sqrmod(&t1, prime, &C)) != MP_OKAY) goto cleanup;
6756
/* C = (t1 * t1) mod prime */
6757
if ((res = mp_mulmod(&R, &t1, prime, &R)) != MP_OKAY) goto cleanup;
6758
/* R = (R * t1) mod prime */
6759
if ((res = mp_mulmod(&T, &C, prime, &T)) != MP_OKAY) goto cleanup;
6760
/* T = (T * C) mod prime */
6761
mp_set(&M, i);
6762
/* M = i */
6763
}
6764
6765
cleanup:
6766
mp_clear_multi(&t1, &C, &Q, &S, &Z, &M, &T, &R, &two, NULL);
6767
return res;
6768
}
6769
6770
/* End: bn_mp_sqrtmod_prime.c */
6771
6772
/* Start: bn_mp_sub.c */
6773
6774
/* high level subtraction (handles signs) */
6775
int mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
6776
{
6777
int sa, sb, res;
6778
6779
sa = a->sign;
6780
sb = b->sign;
6781
6782
if (sa != sb) {
6783
/* subtract a negative from a positive, OR */
6784
/* subtract a positive from a negative. */
6785
/* In either case, ADD their magnitudes, */
6786
/* and use the sign of the first number. */
6787
c->sign = sa;
6788
res = s_mp_add(a, b, c);
6789
} else {
6790
/* subtract a positive from a positive, OR */
6791
/* subtract a negative from a negative. */
6792
/* First, take the difference between their */
6793
/* magnitudes, then... */
6794
if (mp_cmp_mag(a, b) != MP_LT) {
6795
/* Copy the sign from the first */
6796
c->sign = sa;
6797
/* The first has a larger or equal magnitude */
6798
res = s_mp_sub(a, b, c);
6799
} else {
6800
/* The result has the *opposite* sign from */
6801
/* the first number. */
6802
c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
6803
/* The second has a larger magnitude */
6804
res = s_mp_sub(b, a, c);
6805
}
6806
}
6807
return res;
6808
}
6809
6810
/* End: bn_mp_sub.c */
6811
6812
/* Start: bn_mp_sub_d.c */
6813
6814
/* single digit subtraction */
6815
int mp_sub_d(const mp_int *a, mp_digit b, mp_int *c)
6816
{
6817
mp_digit *tmpa, *tmpc, mu;
6818
int res, ix, oldused;
6819
6820
/* grow c as required */
6821
if (c->alloc < (a->used + 1)) {
6822
if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
6823
return res;
6824
}
6825
}
6826
6827
/* if a is negative just do an unsigned
6828
* addition [with fudged signs]
6829
*/
6830
if (a->sign == MP_NEG) {
6831
mp_int a_ = *a;
6832
a_.sign = MP_ZPOS;
6833
res = mp_add_d(&a_, b, c);
6834
c->sign = MP_NEG;
6835
6836
/* clamp */
6837
mp_clamp(c);
6838
6839
return res;
6840
}
6841
6842
/* setup regs */
6843
oldused = c->used;
6844
tmpa = a->dp;
6845
tmpc = c->dp;
6846
6847
/* if a <= b simply fix the single digit */
6848
if (((a->used == 1) && (a->dp[0] <= b)) || (a->used == 0)) {
6849
if (a->used == 1) {
6850
*tmpc++ = b - *tmpa;
6851
} else {
6852
*tmpc++ = b;
6853
}
6854
ix = 1;
6855
6856
/* negative/1digit */
6857
c->sign = MP_NEG;
6858
c->used = 1;
6859
} else {
6860
/* positive/size */
6861
c->sign = MP_ZPOS;
6862
c->used = a->used;
6863
6864
/* subtract first digit */
6865
*tmpc = *tmpa++ - b;
6866
mu = *tmpc >> ((sizeof(mp_digit) * (size_t)CHAR_BIT) - 1u);
6867
*tmpc++ &= MP_MASK;
6868
6869
/* handle rest of the digits */
6870
for (ix = 1; ix < a->used; ix++) {
6871
*tmpc = *tmpa++ - mu;
6872
mu = *tmpc >> ((sizeof(mp_digit) * (size_t)CHAR_BIT) - 1u);
6873
*tmpc++ &= MP_MASK;
6874
}
6875
}
6876
6877
/* zero excess digits */
6878
while (ix++ < oldused) {
6879
*tmpc++ = 0;
6880
}
6881
mp_clamp(c);
6882
return MP_OKAY;
6883
}
6884
6885
/* End: bn_mp_sub_d.c */
6886
6887
/* Start: bn_mp_submod.c */
6888
6889
/* d = a - b (mod c) */
6890
int mp_submod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)
6891
{
6892
int res;
6893
mp_int t;
6894
6895
6896
if ((res = mp_init(&t)) != MP_OKAY) {
6897
return res;
6898
}
6899
6900
if ((res = mp_sub(a, b, &t)) != MP_OKAY) {
6901
mp_clear(&t);
6902
return res;
6903
}
6904
res = mp_mod(&t, c, d);
6905
mp_clear(&t);
6906
return res;
6907
}
6908
6909
/* End: bn_mp_submod.c */
6910
6911
/* Start: bn_mp_tc_and.c */
6912
6913
/* two complement and */
6914
int mp_tc_and(const mp_int *a, const mp_int *b, mp_int *c)
6915
{
6916
int res = MP_OKAY, bits, abits, bbits;
6917
int as = mp_isneg(a), bs = mp_isneg(b);
6918
mp_int *mx = NULL, _mx, acpy, bcpy;
6919
6920
if ((as != MP_NO) || (bs != MP_NO)) {
6921
abits = mp_count_bits(a);
6922
bbits = mp_count_bits(b);
6923
bits = MAX(abits, bbits);
6924
res = mp_init_set_int(&_mx, 1uL);
6925
if (res != MP_OKAY) {
6926
goto end;
6927
}
6928
6929
mx = &_mx;
6930
res = mp_mul_2d(mx, bits + 1, mx);
6931
if (res != MP_OKAY) {
6932
goto end;
6933
}
6934
6935
if (as != MP_NO) {
6936
res = mp_init(&acpy);
6937
if (res != MP_OKAY) {
6938
goto end;
6939
}
6940
6941
res = mp_add(mx, a, &acpy);
6942
if (res != MP_OKAY) {
6943
mp_clear(&acpy);
6944
goto end;
6945
}
6946
a = &acpy;
6947
}
6948
if (bs != MP_NO) {
6949
res = mp_init(&bcpy);
6950
if (res != MP_OKAY) {
6951
goto end;
6952
}
6953
6954
res = mp_add(mx, b, &bcpy);
6955
if (res != MP_OKAY) {
6956
mp_clear(&bcpy);
6957
goto end;
6958
}
6959
b = &bcpy;
6960
}
6961
}
6962
6963
res = mp_and(a, b, c);
6964
6965
if ((as != MP_NO) && (bs != MP_NO) && (res == MP_OKAY)) {
6966
res = mp_sub(c, mx, c);
6967
}
6968
6969
end:
6970
if (a == &acpy) {
6971
mp_clear(&acpy);
6972
}
6973
6974
if (b == &bcpy) {
6975
mp_clear(&bcpy);
6976
}
6977
6978
if (mx == &_mx) {
6979
mp_clear(mx);
6980
}
6981
6982
return res;
6983
}
6984
6985
/* End: bn_mp_tc_and.c */
6986
6987
/* Start: bn_mp_tc_div_2d.c */
6988
6989
/* two complement right shift */
6990
int mp_tc_div_2d(const mp_int *a, int b, mp_int *c)
6991
{
6992
int res;
6993
if (mp_isneg(a) == MP_NO) {
6994
return mp_div_2d(a, b, c, NULL);
6995
}
6996
6997
res = mp_add_d(a, 1uL, c);
6998
if (res != MP_OKAY) {
6999
return res;
7000
}
7001
7002
res = mp_div_2d(c, b, c, NULL);
7003
return (res == MP_OKAY) ? mp_sub_d(c, 1uL, c) : res;
7004
}
7005
7006
/* End: bn_mp_tc_div_2d.c */
7007
7008
/* Start: bn_mp_tc_or.c */
7009
7010
/* two complement or */
7011
int mp_tc_or(const mp_int *a, const mp_int *b, mp_int *c)
7012
{
7013
int res = MP_OKAY, bits, abits, bbits;
7014
int as = mp_isneg(a), bs = mp_isneg(b);
7015
mp_int *mx = NULL, _mx, acpy, bcpy;
7016
7017
if ((as != MP_NO) || (bs != MP_NO)) {
7018
abits = mp_count_bits(a);
7019
bbits = mp_count_bits(b);
7020
bits = MAX(abits, bbits);
7021
res = mp_init_set_int(&_mx, 1uL);
7022
if (res != MP_OKAY) {
7023
goto end;
7024
}
7025
7026
mx = &_mx;
7027
res = mp_mul_2d(mx, bits + 1, mx);
7028
if (res != MP_OKAY) {
7029
goto end;
7030
}
7031
7032
if (as != MP_NO) {
7033
res = mp_init(&acpy);
7034
if (res != MP_OKAY) {
7035
goto end;
7036
}
7037
7038
res = mp_add(mx, a, &acpy);
7039
if (res != MP_OKAY) {
7040
mp_clear(&acpy);
7041
goto end;
7042
}
7043
a = &acpy;
7044
}
7045
if (bs != MP_NO) {
7046
res = mp_init(&bcpy);
7047
if (res != MP_OKAY) {
7048
goto end;
7049
}
7050
7051
res = mp_add(mx, b, &bcpy);
7052
if (res != MP_OKAY) {
7053
mp_clear(&bcpy);
7054
goto end;
7055
}
7056
b = &bcpy;
7057
}
7058
}
7059
7060
res = mp_or(a, b, c);
7061
7062
if (((as != MP_NO) || (bs != MP_NO)) && (res == MP_OKAY)) {
7063
res = mp_sub(c, mx, c);
7064
}
7065
7066
end:
7067
if (a == &acpy) {
7068
mp_clear(&acpy);
7069
}
7070
7071
if (b == &bcpy) {
7072
mp_clear(&bcpy);
7073
}
7074
7075
if (mx == &_mx) {
7076
mp_clear(mx);
7077
}
7078
7079
return res;
7080
}
7081
7082
/* End: bn_mp_tc_or.c */
7083
7084
/* Start: bn_mp_tc_xor.c */
7085
7086
/* two complement xor */
7087
int mp_tc_xor(const mp_int *a, const mp_int *b, mp_int *c)
7088
{
7089
int res = MP_OKAY, bits, abits, bbits;
7090
int as = mp_isneg(a), bs = mp_isneg(b);
7091
mp_int *mx = NULL, _mx, acpy, bcpy;
7092
7093
if ((as != MP_NO) || (bs != MP_NO)) {
7094
abits = mp_count_bits(a);
7095
bbits = mp_count_bits(b);
7096
bits = MAX(abits, bbits);
7097
res = mp_init_set_int(&_mx, 1uL);
7098
if (res != MP_OKAY) {
7099
goto end;
7100
}
7101
7102
mx = &_mx;
7103
res = mp_mul_2d(mx, bits + 1, mx);
7104
if (res != MP_OKAY) {
7105
goto end;
7106
}
7107
7108
if (as != MP_NO) {
7109
res = mp_init(&acpy);
7110
if (res != MP_OKAY) {
7111
goto end;
7112
}
7113
7114
res = mp_add(mx, a, &acpy);
7115
if (res != MP_OKAY) {
7116
mp_clear(&acpy);
7117
goto end;
7118
}
7119
a = &acpy;
7120
}
7121
if (bs != MP_NO) {
7122
res = mp_init(&bcpy);
7123
if (res != MP_OKAY) {
7124
goto end;
7125
}
7126
7127
res = mp_add(mx, b, &bcpy);
7128
if (res != MP_OKAY) {
7129
mp_clear(&bcpy);
7130
goto end;
7131
}
7132
b = &bcpy;
7133
}
7134
}
7135
7136
res = mp_xor(a, b, c);
7137
7138
if ((as != bs) && (res == MP_OKAY)) {
7139
res = mp_sub(c, mx, c);
7140
}
7141
7142
end:
7143
if (a == &acpy) {
7144
mp_clear(&acpy);
7145
}
7146
7147
if (b == &bcpy) {
7148
mp_clear(&bcpy);
7149
}
7150
7151
if (mx == &_mx) {
7152
mp_clear(mx);
7153
}
7154
7155
return res;
7156
}
7157
7158
/* End: bn_mp_tc_xor.c */
7159
7160
/* Start: bn_mp_to_signed_bin.c */
7161
7162
/* store in signed [big endian] format */
7163
int mp_to_signed_bin(const mp_int *a, unsigned char *b)
7164
{
7165
int res;
7166
7167
if ((res = mp_to_unsigned_bin(a, b + 1)) != MP_OKAY) {
7168
return res;
7169
}
7170
b[0] = (a->sign == MP_ZPOS) ? (unsigned char)0 : (unsigned char)1;
7171
return MP_OKAY;
7172
}
7173
7174
/* End: bn_mp_to_signed_bin.c */
7175
7176
/* Start: bn_mp_to_signed_bin_n.c */
7177
7178
/* store in signed [big endian] format */
7179
int mp_to_signed_bin_n(const mp_int *a, unsigned char *b, unsigned long *outlen)
7180
{
7181
if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
7182
return MP_VAL;
7183
}
7184
*outlen = (unsigned long)mp_signed_bin_size(a);
7185
return mp_to_signed_bin(a, b);
7186
}
7187
7188
/* End: bn_mp_to_signed_bin_n.c */
7189
7190
/* Start: bn_mp_to_unsigned_bin.c */
7191
7192
/* store in unsigned [big endian] format */
7193
int mp_to_unsigned_bin(const mp_int *a, unsigned char *b)
7194
{
7195
int x, res;
7196
mp_int t;
7197
7198
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7199
return res;
7200
}
7201
7202
x = 0;
7203
while (mp_iszero(&t) == MP_NO) {
7204
#ifndef MP_8BIT
7205
b[x++] = (unsigned char)(t.dp[0] & 255u);
7206
#else
7207
b[x++] = (unsigned char)(t.dp[0] | ((t.dp[1] & 1u) << 7));
7208
#endif
7209
if ((res = mp_div_2d(&t, 8, &t, NULL)) != MP_OKAY) {
7210
mp_clear(&t);
7211
return res;
7212
}
7213
}
7214
bn_reverse(b, x);
7215
mp_clear(&t);
7216
return MP_OKAY;
7217
}
7218
7219
/* End: bn_mp_to_unsigned_bin.c */
7220
7221
/* Start: bn_mp_to_unsigned_bin_n.c */
7222
7223
/* store in unsigned [big endian] format */
7224
int mp_to_unsigned_bin_n(const mp_int *a, unsigned char *b, unsigned long *outlen)
7225
{
7226
if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
7227
return MP_VAL;
7228
}
7229
*outlen = (unsigned long)mp_unsigned_bin_size(a);
7230
return mp_to_unsigned_bin(a, b);
7231
}
7232
7233
/* End: bn_mp_to_unsigned_bin_n.c */
7234
7235
/* Start: bn_mp_toom_mul.c */
7236
7237
/* multiplication using the Toom-Cook 3-way algorithm
7238
*
7239
* Much more complicated than Karatsuba but has a lower
7240
* asymptotic running time of O(N**1.464). This algorithm is
7241
* only particularly useful on VERY large inputs
7242
* (we're talking 1000s of digits here...).
7243
*/
7244
int mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
7245
{
7246
mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
7247
int res, B;
7248
7249
/* init temps */
7250
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
7251
&a0, &a1, &a2, &b0, &b1,
7252
&b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
7253
return res;
7254
}
7255
7256
/* B */
7257
B = MIN(a->used, b->used) / 3;
7258
7259
/* a = a2 * B**2 + a1 * B + a0 */
7260
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
7261
goto LBL_ERR;
7262
}
7263
7264
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
7265
goto LBL_ERR;
7266
}
7267
mp_rshd(&a1, B);
7268
if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {
7269
goto LBL_ERR;
7270
}
7271
7272
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
7273
goto LBL_ERR;
7274
}
7275
mp_rshd(&a2, B*2);
7276
7277
/* b = b2 * B**2 + b1 * B + b0 */
7278
if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
7279
goto LBL_ERR;
7280
}
7281
7282
if ((res = mp_copy(b, &b1)) != MP_OKAY) {
7283
goto LBL_ERR;
7284
}
7285
mp_rshd(&b1, B);
7286
(void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
7287
7288
if ((res = mp_copy(b, &b2)) != MP_OKAY) {
7289
goto LBL_ERR;
7290
}
7291
mp_rshd(&b2, B*2);
7292
7293
/* w0 = a0*b0 */
7294
if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
7295
goto LBL_ERR;
7296
}
7297
7298
/* w4 = a2 * b2 */
7299
if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
7300
goto LBL_ERR;
7301
}
7302
7303
/* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
7304
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
7305
goto LBL_ERR;
7306
}
7307
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7308
goto LBL_ERR;
7309
}
7310
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7311
goto LBL_ERR;
7312
}
7313
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
7314
goto LBL_ERR;
7315
}
7316
7317
if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
7318
goto LBL_ERR;
7319
}
7320
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
7321
goto LBL_ERR;
7322
}
7323
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
7324
goto LBL_ERR;
7325
}
7326
if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
7327
goto LBL_ERR;
7328
}
7329
7330
if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
7331
goto LBL_ERR;
7332
}
7333
7334
/* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
7335
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
7336
goto LBL_ERR;
7337
}
7338
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7339
goto LBL_ERR;
7340
}
7341
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7342
goto LBL_ERR;
7343
}
7344
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7345
goto LBL_ERR;
7346
}
7347
7348
if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
7349
goto LBL_ERR;
7350
}
7351
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
7352
goto LBL_ERR;
7353
}
7354
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
7355
goto LBL_ERR;
7356
}
7357
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
7358
goto LBL_ERR;
7359
}
7360
7361
if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
7362
goto LBL_ERR;
7363
}
7364
7365
7366
/* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
7367
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
7368
goto LBL_ERR;
7369
}
7370
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7371
goto LBL_ERR;
7372
}
7373
if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
7374
goto LBL_ERR;
7375
}
7376
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
7377
goto LBL_ERR;
7378
}
7379
if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
7380
goto LBL_ERR;
7381
}
7382
7383
/* now solve the matrix
7384
7385
0 0 0 0 1
7386
1 2 4 8 16
7387
1 1 1 1 1
7388
16 8 4 2 1
7389
1 0 0 0 0
7390
7391
using 12 subtractions, 4 shifts,
7392
2 small divisions and 1 small multiplication
7393
*/
7394
7395
/* r1 - r4 */
7396
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
7397
goto LBL_ERR;
7398
}
7399
/* r3 - r0 */
7400
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
7401
goto LBL_ERR;
7402
}
7403
/* r1/2 */
7404
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
7405
goto LBL_ERR;
7406
}
7407
/* r3/2 */
7408
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
7409
goto LBL_ERR;
7410
}
7411
/* r2 - r0 - r4 */
7412
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
7413
goto LBL_ERR;
7414
}
7415
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
7416
goto LBL_ERR;
7417
}
7418
/* r1 - r2 */
7419
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7420
goto LBL_ERR;
7421
}
7422
/* r3 - r2 */
7423
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7424
goto LBL_ERR;
7425
}
7426
/* r1 - 8r0 */
7427
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
7428
goto LBL_ERR;
7429
}
7430
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
7431
goto LBL_ERR;
7432
}
7433
/* r3 - 8r4 */
7434
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
7435
goto LBL_ERR;
7436
}
7437
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
7438
goto LBL_ERR;
7439
}
7440
/* 3r2 - r1 - r3 */
7441
if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {
7442
goto LBL_ERR;
7443
}
7444
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
7445
goto LBL_ERR;
7446
}
7447
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
7448
goto LBL_ERR;
7449
}
7450
/* r1 - r2 */
7451
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7452
goto LBL_ERR;
7453
}
7454
/* r3 - r2 */
7455
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7456
goto LBL_ERR;
7457
}
7458
/* r1/3 */
7459
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
7460
goto LBL_ERR;
7461
}
7462
/* r3/3 */
7463
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
7464
goto LBL_ERR;
7465
}
7466
7467
/* at this point shift W[n] by B*n */
7468
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
7469
goto LBL_ERR;
7470
}
7471
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
7472
goto LBL_ERR;
7473
}
7474
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
7475
goto LBL_ERR;
7476
}
7477
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
7478
goto LBL_ERR;
7479
}
7480
7481
if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
7482
goto LBL_ERR;
7483
}
7484
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
7485
goto LBL_ERR;
7486
}
7487
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
7488
goto LBL_ERR;
7489
}
7490
if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
7491
goto LBL_ERR;
7492
}
7493
7494
LBL_ERR:
7495
mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
7496
&a0, &a1, &a2, &b0, &b1,
7497
&b2, &tmp1, &tmp2, NULL);
7498
return res;
7499
}
7500
7501
/* End: bn_mp_toom_mul.c */
7502
7503
/* Start: bn_mp_toom_sqr.c */
7504
7505
/* squaring using Toom-Cook 3-way algorithm */
7506
int mp_toom_sqr(const mp_int *a, mp_int *b)
7507
{
7508
mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
7509
int res, B;
7510
7511
/* init temps */
7512
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
7513
return res;
7514
}
7515
7516
/* B */
7517
B = a->used / 3;
7518
7519
/* a = a2 * B**2 + a1 * B + a0 */
7520
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
7521
goto LBL_ERR;
7522
}
7523
7524
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
7525
goto LBL_ERR;
7526
}
7527
mp_rshd(&a1, B);
7528
if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {
7529
goto LBL_ERR;
7530
}
7531
7532
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
7533
goto LBL_ERR;
7534
}
7535
mp_rshd(&a2, B*2);
7536
7537
/* w0 = a0*a0 */
7538
if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
7539
goto LBL_ERR;
7540
}
7541
7542
/* w4 = a2 * a2 */
7543
if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
7544
goto LBL_ERR;
7545
}
7546
7547
/* w1 = (a2 + 2(a1 + 2a0))**2 */
7548
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
7549
goto LBL_ERR;
7550
}
7551
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7552
goto LBL_ERR;
7553
}
7554
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7555
goto LBL_ERR;
7556
}
7557
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
7558
goto LBL_ERR;
7559
}
7560
7561
if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
7562
goto LBL_ERR;
7563
}
7564
7565
/* w3 = (a0 + 2(a1 + 2a2))**2 */
7566
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
7567
goto LBL_ERR;
7568
}
7569
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7570
goto LBL_ERR;
7571
}
7572
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7573
goto LBL_ERR;
7574
}
7575
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7576
goto LBL_ERR;
7577
}
7578
7579
if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
7580
goto LBL_ERR;
7581
}
7582
7583
7584
/* w2 = (a2 + a1 + a0)**2 */
7585
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
7586
goto LBL_ERR;
7587
}
7588
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7589
goto LBL_ERR;
7590
}
7591
if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
7592
goto LBL_ERR;
7593
}
7594
7595
/* now solve the matrix
7596
7597
0 0 0 0 1
7598
1 2 4 8 16
7599
1 1 1 1 1
7600
16 8 4 2 1
7601
1 0 0 0 0
7602
7603
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
7604
*/
7605
7606
/* r1 - r4 */
7607
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
7608
goto LBL_ERR;
7609
}
7610
/* r3 - r0 */
7611
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
7612
goto LBL_ERR;
7613
}
7614
/* r1/2 */
7615
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
7616
goto LBL_ERR;
7617
}
7618
/* r3/2 */
7619
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
7620
goto LBL_ERR;
7621
}
7622
/* r2 - r0 - r4 */
7623
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
7624
goto LBL_ERR;
7625
}
7626
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
7627
goto LBL_ERR;
7628
}
7629
/* r1 - r2 */
7630
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7631
goto LBL_ERR;
7632
}
7633
/* r3 - r2 */
7634
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7635
goto LBL_ERR;
7636
}
7637
/* r1 - 8r0 */
7638
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
7639
goto LBL_ERR;
7640
}
7641
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
7642
goto LBL_ERR;
7643
}
7644
/* r3 - 8r4 */
7645
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
7646
goto LBL_ERR;
7647
}
7648
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
7649
goto LBL_ERR;
7650
}
7651
/* 3r2 - r1 - r3 */
7652
if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {
7653
goto LBL_ERR;
7654
}
7655
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
7656
goto LBL_ERR;
7657
}
7658
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
7659
goto LBL_ERR;
7660
}
7661
/* r1 - r2 */
7662
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7663
goto LBL_ERR;
7664
}
7665
/* r3 - r2 */
7666
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7667
goto LBL_ERR;
7668
}
7669
/* r1/3 */
7670
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
7671
goto LBL_ERR;
7672
}
7673
/* r3/3 */
7674
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
7675
goto LBL_ERR;
7676
}
7677
7678
/* at this point shift W[n] by B*n */
7679
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
7680
goto LBL_ERR;
7681
}
7682
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
7683
goto LBL_ERR;
7684
}
7685
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
7686
goto LBL_ERR;
7687
}
7688
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
7689
goto LBL_ERR;
7690
}
7691
7692
if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
7693
goto LBL_ERR;
7694
}
7695
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
7696
goto LBL_ERR;
7697
}
7698
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
7699
goto LBL_ERR;
7700
}
7701
if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
7702
goto LBL_ERR;
7703
}
7704
7705
LBL_ERR:
7706
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
7707
return res;
7708
}
7709
7710
/* End: bn_mp_toom_sqr.c */
7711
7712
/* Start: bn_mp_toradix.c */
7713
7714
/* stores a bignum as a ASCII string in a given radix (2..64) */
7715
int mp_toradix(const mp_int *a, char *str, int radix)
7716
{
7717
int res, digs;
7718
mp_int t;
7719
mp_digit d;
7720
char *_s = str;
7721
7722
/* check range of the radix */
7723
if ((radix < 2) || (radix > 64)) {
7724
return MP_VAL;
7725
}
7726
7727
/* quick out if its zero */
7728
if (mp_iszero(a) == MP_YES) {
7729
*str++ = '0';
7730
*str = '\0';
7731
return MP_OKAY;
7732
}
7733
7734
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7735
return res;
7736
}
7737
7738
/* if it is negative output a - */
7739
if (t.sign == MP_NEG) {
7740
++_s;
7741
*str++ = '-';
7742
t.sign = MP_ZPOS;
7743
}
7744
7745
digs = 0;
7746
while (mp_iszero(&t) == MP_NO) {
7747
if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
7748
mp_clear(&t);
7749
return res;
7750
}
7751
*str++ = mp_s_rmap[d];
7752
++digs;
7753
}
7754
7755
/* reverse the digits of the string. In this case _s points
7756
* to the first digit [exluding the sign] of the number]
7757
*/
7758
bn_reverse((unsigned char *)_s, digs);
7759
7760
/* append a NULL so the string is properly terminated */
7761
*str = '\0';
7762
7763
mp_clear(&t);
7764
return MP_OKAY;
7765
}
7766
7767
/* End: bn_mp_toradix.c */
7768
7769
/* Start: bn_mp_toradix_n.c */
7770
7771
/* stores a bignum as a ASCII string in a given radix (2..64)
7772
*
7773
* Stores upto maxlen-1 chars and always a NULL byte
7774
*/
7775
int mp_toradix_n(const mp_int *a, char *str, int radix, int maxlen)
7776
{
7777
int res, digs;
7778
mp_int t;
7779
mp_digit d;
7780
char *_s = str;
7781
7782
/* check range of the maxlen, radix */
7783
if ((maxlen < 2) || (radix < 2) || (radix > 64)) {
7784
return MP_VAL;
7785
}
7786
7787
/* quick out if its zero */
7788
if (mp_iszero(a) == MP_YES) {
7789
*str++ = '0';
7790
*str = '\0';
7791
return MP_OKAY;
7792
}
7793
7794
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7795
return res;
7796
}
7797
7798
/* if it is negative output a - */
7799
if (t.sign == MP_NEG) {
7800
/* we have to reverse our digits later... but not the - sign!! */
7801
++_s;
7802
7803
/* store the flag and mark the number as positive */
7804
*str++ = '-';
7805
t.sign = MP_ZPOS;
7806
7807
/* subtract a char */
7808
--maxlen;
7809
}
7810
7811
digs = 0;
7812
while (mp_iszero(&t) == MP_NO) {
7813
if (--maxlen < 1) {
7814
/* no more room */
7815
break;
7816
}
7817
if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
7818
mp_clear(&t);
7819
return res;
7820
}
7821
*str++ = mp_s_rmap[d];
7822
++digs;
7823
}
7824
7825
/* reverse the digits of the string. In this case _s points
7826
* to the first digit [exluding the sign] of the number
7827
*/
7828
bn_reverse((unsigned char *)_s, digs);
7829
7830
/* append a NULL so the string is properly terminated */
7831
*str = '\0';
7832
7833
mp_clear(&t);
7834
return MP_OKAY;
7835
}
7836
7837
/* End: bn_mp_toradix_n.c */
7838
7839
/* Start: bn_mp_unsigned_bin_size.c */
7840
7841
/* get the size for an unsigned equivalent */
7842
int mp_unsigned_bin_size(const mp_int *a)
7843
{
7844
int size = mp_count_bits(a);
7845
return (size / 8) + ((((unsigned)size & 7u) != 0u) ? 1 : 0);
7846
}
7847
7848
/* End: bn_mp_unsigned_bin_size.c */
7849
7850
/* Start: bn_mp_xor.c */
7851
7852
/* XOR two ints together */
7853
int mp_xor(const mp_int *a, const mp_int *b, mp_int *c)
7854
{
7855
int res, ix, px;
7856
mp_int t;
7857
const mp_int *x;
7858
7859
if (a->used > b->used) {
7860
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7861
return res;
7862
}
7863
px = b->used;
7864
x = b;
7865
} else {
7866
if ((res = mp_init_copy(&t, b)) != MP_OKAY) {
7867
return res;
7868
}
7869
px = a->used;
7870
x = a;
7871
}
7872
7873
for (ix = 0; ix < px; ix++) {
7874
t.dp[ix] ^= x->dp[ix];
7875
}
7876
mp_clamp(&t);
7877
mp_exch(c, &t);
7878
mp_clear(&t);
7879
return MP_OKAY;
7880
}
7881
7882
/* End: bn_mp_xor.c */
7883
7884
/* Start: bn_mp_zero.c */
7885
7886
/* set to zero */
7887
void mp_zero(mp_int *a)
7888
{
7889
int n;
7890
mp_digit *tmp;
7891
7892
a->sign = MP_ZPOS;
7893
a->used = 0;
7894
7895
tmp = a->dp;
7896
for (n = 0; n < a->alloc; n++) {
7897
*tmp++ = 0;
7898
}
7899
}
7900
7901
/* End: bn_mp_zero.c */
7902
7903
/* Start: bn_prime_tab.c */
7904
7905
const mp_digit ltm_prime_tab[] = {
7906
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
7907
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
7908
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
7909
0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
7910
#ifndef MP_8BIT
7911
0x0083,
7912
0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
7913
0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
7914
0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
7915
0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
7916
7917
0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
7918
0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
7919
0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
7920
0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
7921
0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
7922
0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
7923
0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
7924
0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
7925
7926
0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
7927
0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
7928
0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
7929
0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
7930
0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
7931
0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
7932
0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
7933
0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
7934
7935
0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
7936
0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
7937
0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
7938
0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
7939
0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
7940
0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
7941
0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
7942
0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
7943
#endif
7944
};
7945
7946
/* End: bn_prime_tab.c */
7947
7948
/* Start: bn_reverse.c */
7949
7950
/* reverse an array, used for radix code */
7951
void bn_reverse(unsigned char *s, int len)
7952
{
7953
int ix, iy;
7954
unsigned char t;
7955
7956
ix = 0;
7957
iy = len - 1;
7958
while (ix < iy) {
7959
t = s[ix];
7960
s[ix] = s[iy];
7961
s[iy] = t;
7962
++ix;
7963
--iy;
7964
}
7965
}
7966
7967
/* End: bn_reverse.c */
7968
7969
/* Start: bn_s_mp_add.c */
7970
7971
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
7972
int s_mp_add(const mp_int *a, const mp_int *b, mp_int *c)
7973
{
7974
const mp_int *x;
7975
int olduse, res, min, max;
7976
7977
/* find sizes, we let |a| <= |b| which means we have to sort
7978
* them. "x" will point to the input with the most digits
7979
*/
7980
if (a->used > b->used) {
7981
min = b->used;
7982
max = a->used;
7983
x = a;
7984
} else {
7985
min = a->used;
7986
max = b->used;
7987
x = b;
7988
}
7989
7990
/* init result */
7991
if (c->alloc < (max + 1)) {
7992
if ((res = mp_grow(c, max + 1)) != MP_OKAY) {
7993
return res;
7994
}
7995
}
7996
7997
/* get old used digit count and set new one */
7998
olduse = c->used;
7999
c->used = max + 1;
8000
8001
{
8002
mp_digit u, *tmpa, *tmpb, *tmpc;
8003
int i;
8004
8005
/* alias for digit pointers */
8006
8007
/* first input */
8008
tmpa = a->dp;
8009
8010
/* second input */
8011
tmpb = b->dp;
8012
8013
/* destination */
8014
tmpc = c->dp;
8015
8016
/* zero the carry */
8017
u = 0;
8018
for (i = 0; i < min; i++) {
8019
/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
8020
*tmpc = *tmpa++ + *tmpb++ + u;
8021
8022
/* U = carry bit of T[i] */
8023
u = *tmpc >> (mp_digit)DIGIT_BIT;
8024
8025
/* take away carry bit from T[i] */
8026
*tmpc++ &= MP_MASK;
8027
}
8028
8029
/* now copy higher words if any, that is in A+B
8030
* if A or B has more digits add those in
8031
*/
8032
if (min != max) {
8033
for (; i < max; i++) {
8034
/* T[i] = X[i] + U */
8035
*tmpc = x->dp[i] + u;
8036
8037
/* U = carry bit of T[i] */
8038
u = *tmpc >> (mp_digit)DIGIT_BIT;
8039
8040
/* take away carry bit from T[i] */
8041
*tmpc++ &= MP_MASK;
8042
}
8043
}
8044
8045
/* add carry */
8046
*tmpc++ = u;
8047
8048
/* clear digits above oldused */
8049
for (i = c->used; i < olduse; i++) {
8050
*tmpc++ = 0;
8051
}
8052
}
8053
8054
mp_clamp(c);
8055
return MP_OKAY;
8056
}
8057
8058
/* End: bn_s_mp_add.c */
8059
8060
/* Start: bn_s_mp_exptmod.c */
8061
8062
#ifdef MP_LOW_MEM
8063
# define TAB_SIZE 32
8064
#else
8065
# define TAB_SIZE 256
8066
#endif
8067
8068
int s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
8069
{
8070
mp_int M[TAB_SIZE], res, mu;
8071
mp_digit buf;
8072
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
8073
int (*redux)(mp_int *x, const mp_int *m, const mp_int *mu);
8074
8075
/* find window size */
8076
x = mp_count_bits(X);
8077
if (x <= 7) {
8078
winsize = 2;
8079
} else if (x <= 36) {
8080
winsize = 3;
8081
} else if (x <= 140) {
8082
winsize = 4;
8083
} else if (x <= 450) {
8084
winsize = 5;
8085
} else if (x <= 1303) {
8086
winsize = 6;
8087
} else if (x <= 3529) {
8088
winsize = 7;
8089
} else {
8090
winsize = 8;
8091
}
8092
8093
#ifdef MP_LOW_MEM
8094
if (winsize > 5) {
8095
winsize = 5;
8096
}
8097
#endif
8098
8099
/* init M array */
8100
/* init first cell */
8101
if ((err = mp_init(&M[1])) != MP_OKAY) {
8102
return err;
8103
}
8104
8105
/* now init the second half of the array */
8106
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
8107
if ((err = mp_init(&M[x])) != MP_OKAY) {
8108
for (y = 1<<(winsize-1); y < x; y++) {
8109
mp_clear(&M[y]);
8110
}
8111
mp_clear(&M[1]);
8112
return err;
8113
}
8114
}
8115
8116
/* create mu, used for Barrett reduction */
8117
if ((err = mp_init(&mu)) != MP_OKAY) {
8118
goto LBL_M;
8119
}
8120
8121
if (redmode == 0) {
8122
if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) {
8123
goto LBL_MU;
8124
}
8125
redux = mp_reduce;
8126
} else {
8127
if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) {
8128
goto LBL_MU;
8129
}
8130
redux = mp_reduce_2k_l;
8131
}
8132
8133
/* create M table
8134
*
8135
* The M table contains powers of the base,
8136
* e.g. M[x] = G**x mod P
8137
*
8138
* The first half of the table is not
8139
* computed though accept for M[0] and M[1]
8140
*/
8141
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
8142
goto LBL_MU;
8143
}
8144
8145
/* compute the value at M[1<<(winsize-1)] by squaring
8146
* M[1] (winsize-1) times
8147
*/
8148
if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
8149
goto LBL_MU;
8150
}
8151
8152
for (x = 0; x < (winsize - 1); x++) {
8153
/* square it */
8154
if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)],
8155
&M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
8156
goto LBL_MU;
8157
}
8158
8159
/* reduce modulo P */
8160
if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
8161
goto LBL_MU;
8162
}
8163
}
8164
8165
/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
8166
* for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
8167
*/
8168
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
8169
if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
8170
goto LBL_MU;
8171
}
8172
if ((err = redux(&M[x], P, &mu)) != MP_OKAY) {
8173
goto LBL_MU;
8174
}
8175
}
8176
8177
/* setup result */
8178
if ((err = mp_init(&res)) != MP_OKAY) {
8179
goto LBL_MU;
8180
}
8181
mp_set(&res, 1uL);
8182
8183
/* set initial mode and bit cnt */
8184
mode = 0;
8185
bitcnt = 1;
8186
buf = 0;
8187
digidx = X->used - 1;
8188
bitcpy = 0;
8189
bitbuf = 0;
8190
8191
for (;;) {
8192
/* grab next digit as required */
8193
if (--bitcnt == 0) {
8194
/* if digidx == -1 we are out of digits */
8195
if (digidx == -1) {
8196
break;
8197
}
8198
/* read next digit and reset the bitcnt */
8199
buf = X->dp[digidx--];
8200
bitcnt = (int)DIGIT_BIT;
8201
}
8202
8203
/* grab the next msb from the exponent */
8204
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
8205
buf <<= (mp_digit)1;
8206
8207
/* if the bit is zero and mode == 0 then we ignore it
8208
* These represent the leading zero bits before the first 1 bit
8209
* in the exponent. Technically this opt is not required but it
8210
* does lower the # of trivial squaring/reductions used
8211
*/
8212
if ((mode == 0) && (y == 0)) {
8213
continue;
8214
}
8215
8216
/* if the bit is zero and mode == 1 then we square */
8217
if ((mode == 1) && (y == 0)) {
8218
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
8219
goto LBL_RES;
8220
}
8221
if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8222
goto LBL_RES;
8223
}
8224
continue;
8225
}
8226
8227
/* else we add it to the window */
8228
bitbuf |= (y << (winsize - ++bitcpy));
8229
mode = 2;
8230
8231
if (bitcpy == winsize) {
8232
/* ok window is filled so square as required and multiply */
8233
/* square first */
8234
for (x = 0; x < winsize; x++) {
8235
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
8236
goto LBL_RES;
8237
}
8238
if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8239
goto LBL_RES;
8240
}
8241
}
8242
8243
/* then multiply */
8244
if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
8245
goto LBL_RES;
8246
}
8247
if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8248
goto LBL_RES;
8249
}
8250
8251
/* empty window and reset */
8252
bitcpy = 0;
8253
bitbuf = 0;
8254
mode = 1;
8255
}
8256
}
8257
8258
/* if bits remain then square/multiply */
8259
if ((mode == 2) && (bitcpy > 0)) {
8260
/* square then multiply if the bit is set */
8261
for (x = 0; x < bitcpy; x++) {
8262
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
8263
goto LBL_RES;
8264
}
8265
if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8266
goto LBL_RES;
8267
}
8268
8269
bitbuf <<= 1;
8270
if ((bitbuf & (1 << winsize)) != 0) {
8271
/* then multiply */
8272
if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
8273
goto LBL_RES;
8274
}
8275
if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8276
goto LBL_RES;
8277
}
8278
}
8279
}
8280
}
8281
8282
mp_exch(&res, Y);
8283
err = MP_OKAY;
8284
LBL_RES:
8285
mp_clear(&res);
8286
LBL_MU:
8287
mp_clear(&mu);
8288
LBL_M:
8289
mp_clear(&M[1]);
8290
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
8291
mp_clear(&M[x]);
8292
}
8293
return err;
8294
}
8295
8296
/* End: bn_s_mp_exptmod.c */
8297
8298
/* Start: bn_s_mp_mul_digs.c */
8299
8300
/* multiplies |a| * |b| and only computes upto digs digits of result
8301
* HAC pp. 595, Algorithm 14.12 Modified so you can control how
8302
* many digits of output are created.
8303
*/
8304
int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
8305
{
8306
mp_int t;
8307
int res, pa, pb, ix, iy;
8308
mp_digit u;
8309
mp_word r;
8310
mp_digit tmpx, *tmpt, *tmpy;
8311
8312
/* can we use the fast multiplier? */
8313
if ((digs < (int)MP_WARRAY) &&
8314
(MIN(a->used, b->used) <
8315
(int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
8316
return fast_s_mp_mul_digs(a, b, c, digs);
8317
}
8318
8319
if ((res = mp_init_size(&t, digs)) != MP_OKAY) {
8320
return res;
8321
}
8322
t.used = digs;
8323
8324
/* compute the digits of the product directly */
8325
pa = a->used;
8326
for (ix = 0; ix < pa; ix++) {
8327
/* set the carry to zero */
8328
u = 0;
8329
8330
/* limit ourselves to making digs digits of output */
8331
pb = MIN(b->used, digs - ix);
8332
8333
/* setup some aliases */
8334
/* copy of the digit from a used within the nested loop */
8335
tmpx = a->dp[ix];
8336
8337
/* an alias for the destination shifted ix places */
8338
tmpt = t.dp + ix;
8339
8340
/* an alias for the digits of b */
8341
tmpy = b->dp;
8342
8343
/* compute the columns of the output and propagate the carry */
8344
for (iy = 0; iy < pb; iy++) {
8345
/* compute the column as a mp_word */
8346
r = (mp_word)*tmpt +
8347
((mp_word)tmpx * (mp_word)*tmpy++) +
8348
(mp_word)u;
8349
8350
/* the new column is the lower part of the result */
8351
*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8352
8353
/* get the carry word from the result */
8354
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8355
}
8356
/* set carry if it is placed below digs */
8357
if ((ix + iy) < digs) {
8358
*tmpt = u;
8359
}
8360
}
8361
8362
mp_clamp(&t);
8363
mp_exch(&t, c);
8364
8365
mp_clear(&t);
8366
return MP_OKAY;
8367
}
8368
8369
/* End: bn_s_mp_mul_digs.c */
8370
8371
/* Start: bn_s_mp_mul_high_digs.c */
8372
8373
/* multiplies |a| * |b| and does not compute the lower digs digits
8374
* [meant to get the higher part of the product]
8375
*/
8376
int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
8377
{
8378
mp_int t;
8379
int res, pa, pb, ix, iy;
8380
mp_digit u;
8381
mp_word r;
8382
mp_digit tmpx, *tmpt, *tmpy;
8383
8384
/* can we use the fast multiplier? */
8385
if (((a->used + b->used + 1) < (int)MP_WARRAY)
8386
&& (MIN(a->used, b->used) < (int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
8387
return fast_s_mp_mul_high_digs(a, b, c, digs);
8388
}
8389
8390
if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
8391
return res;
8392
}
8393
t.used = a->used + b->used + 1;
8394
8395
pa = a->used;
8396
pb = b->used;
8397
for (ix = 0; ix < pa; ix++) {
8398
/* clear the carry */
8399
u = 0;
8400
8401
/* left hand side of A[ix] * B[iy] */
8402
tmpx = a->dp[ix];
8403
8404
/* alias to the address of where the digits will be stored */
8405
tmpt = &(t.dp[digs]);
8406
8407
/* alias for where to read the right hand side from */
8408
tmpy = b->dp + (digs - ix);
8409
8410
for (iy = digs - ix; iy < pb; iy++) {
8411
/* calculate the double precision result */
8412
r = (mp_word)*tmpt +
8413
((mp_word)tmpx * (mp_word)*tmpy++) +
8414
(mp_word)u;
8415
8416
/* get the lower part */
8417
*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8418
8419
/* carry the carry */
8420
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8421
}
8422
*tmpt = u;
8423
}
8424
mp_clamp(&t);
8425
mp_exch(&t, c);
8426
mp_clear(&t);
8427
return MP_OKAY;
8428
}
8429
8430
/* End: bn_s_mp_mul_high_digs.c */
8431
8432
/* Start: bn_s_mp_sqr.c */
8433
8434
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
8435
int s_mp_sqr(const mp_int *a, mp_int *b)
8436
{
8437
mp_int t;
8438
int res, ix, iy, pa;
8439
mp_word r;
8440
mp_digit u, tmpx, *tmpt;
8441
8442
pa = a->used;
8443
if ((res = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) {
8444
return res;
8445
}
8446
8447
/* default used is maximum possible size */
8448
t.used = (2 * pa) + 1;
8449
8450
for (ix = 0; ix < pa; ix++) {
8451
/* first calculate the digit at 2*ix */
8452
/* calculate double precision result */
8453
r = (mp_word)t.dp[2*ix] +
8454
((mp_word)a->dp[ix] * (mp_word)a->dp[ix]);
8455
8456
/* store lower part in result */
8457
t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);
8458
8459
/* get the carry */
8460
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8461
8462
/* left hand side of A[ix] * A[iy] */
8463
tmpx = a->dp[ix];
8464
8465
/* alias for where to store the results */
8466
tmpt = t.dp + ((2 * ix) + 1);
8467
8468
for (iy = ix + 1; iy < pa; iy++) {
8469
/* first calculate the product */
8470
r = (mp_word)tmpx * (mp_word)a->dp[iy];
8471
8472
/* now calculate the double precision result, note we use
8473
* addition instead of *2 since it's easier to optimize
8474
*/
8475
r = (mp_word)*tmpt + r + r + (mp_word)u;
8476
8477
/* store lower part */
8478
*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8479
8480
/* get carry */
8481
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8482
}
8483
/* propagate upwards */
8484
while (u != 0uL) {
8485
r = (mp_word)*tmpt + (mp_word)u;
8486
*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8487
u = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8488
}
8489
}
8490
8491
mp_clamp(&t);
8492
mp_exch(&t, b);
8493
mp_clear(&t);
8494
return MP_OKAY;
8495
}
8496
8497
/* End: bn_s_mp_sqr.c */
8498
8499
/* Start: bn_s_mp_sub.c */
8500
8501
/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
8502
int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
8503
{
8504
int olduse, res, min, max;
8505
8506
/* find sizes */
8507
min = b->used;
8508
max = a->used;
8509
8510
/* init result */
8511
if (c->alloc < max) {
8512
if ((res = mp_grow(c, max)) != MP_OKAY) {
8513
return res;
8514
}
8515
}
8516
olduse = c->used;
8517
c->used = max;
8518
8519
{
8520
mp_digit u, *tmpa, *tmpb, *tmpc;
8521
int i;
8522
8523
/* alias for digit pointers */
8524
tmpa = a->dp;
8525
tmpb = b->dp;
8526
tmpc = c->dp;
8527
8528
/* set carry to zero */
8529
u = 0;
8530
for (i = 0; i < min; i++) {
8531
/* T[i] = A[i] - B[i] - U */
8532
*tmpc = (*tmpa++ - *tmpb++) - u;
8533
8534
/* U = carry bit of T[i]
8535
* Note this saves performing an AND operation since
8536
* if a carry does occur it will propagate all the way to the
8537
* MSB. As a result a single shift is enough to get the carry
8538
*/
8539
u = *tmpc >> (((size_t)CHAR_BIT * sizeof(mp_digit)) - 1u);
8540
8541
/* Clear carry from T[i] */
8542
*tmpc++ &= MP_MASK;
8543
}
8544
8545
/* now copy higher words if any, e.g. if A has more digits than B */
8546
for (; i < max; i++) {
8547
/* T[i] = A[i] - U */
8548
*tmpc = *tmpa++ - u;
8549
8550
/* U = carry bit of T[i] */
8551
u = *tmpc >> (((size_t)CHAR_BIT * sizeof(mp_digit)) - 1u);
8552
8553
/* Clear carry from T[i] */
8554
*tmpc++ &= MP_MASK;
8555
}
8556
8557
/* clear digits above used (since we may not have grown result above) */
8558
for (i = c->used; i < olduse; i++) {
8559
*tmpc++ = 0;
8560
}
8561
}
8562
8563
mp_clamp(c);
8564
return MP_OKAY;
8565
}
8566
8567
/* End: bn_s_mp_sub.c */
8568
8569
/* Start: bncore.c */
8570
8571
/* Known optimal configurations
8572
8573
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
8574
-------------------------------------------------------------
8575
Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
8576
AMD Athlon64 /GCC v3.4.4 / 80/ 120/LTM 0.35
8577
8578
*/
8579
8580
int KARATSUBA_MUL_CUTOFF = 80, /* Min. number of digits before Karatsuba multiplication is used. */
8581
KARATSUBA_SQR_CUTOFF = 120, /* Min. number of digits before Karatsuba squaring is used. */
8582
8583
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
8584
TOOM_SQR_CUTOFF = 400;
8585
8586
/* End: bncore.c */
8587
8588