CoCalc -- mpi.c

GitHub Repository: wine-mirror/wine
Path: blob/master/libs/tomcrypt/mpi.c
⁴³⁹³ views
1
/* LibTomMath, multiple-precision integer library -- Tom St Denis
2
 *
3
 * LibTomMath is a library that provides multiple-precision
4
 * integer arithmetic as well as number theoretic functionality.
5
 *
6
 * The library was designed directly after the MPI library by
7
 * Michael Fromberger but has been written from scratch with
8
 * additional optimizations in place.
9
 *
10
 * SPDX-License-Identifier: Unlicense
11
 */
12

13
#include <stdarg.h>
14
#include "tommath_private.h"
15

16
/* Start: bn_fast_mp_invmod.c */
17

18
/* computes the modular inverse via binary extended euclidean algorithm,
19
 * that is c = 1/a mod b
20
 *
21
 * Based on slow invmod except this is optimized for the case where b is
22
 * odd as per HAC Note 14.64 on pp. 610
23
 */
24
int fast_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c)
25
{
26
   mp_int  x, y, u, v, B, D;
27
   int     res, neg;
28

29
   /* 2. [modified] b must be odd   */
30
   if (mp_iseven(b) == MP_YES) {
31
      return MP_VAL;
32
   }
33

34
   /* init all our temps */
35
   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
36
      return res;
37
   }
38

39
   /* x == modulus, y == value to invert */
40
   if ((res = mp_copy(b, &x)) != MP_OKAY) {
41
      goto LBL_ERR;
42
   }
43

44
   /* we need y = |a| */
45
   if ((res = mp_mod(a, b, &y)) != MP_OKAY) {
46
      goto LBL_ERR;
47
   }
48

49
   /* if one of x,y is zero return an error! */
50
   if ((mp_iszero(&x) == MP_YES) || (mp_iszero(&y) == MP_YES)) {
51
      res = MP_VAL;
52
      goto LBL_ERR;
53
   }
54

55
   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
56
   if ((res = mp_copy(&x, &u)) != MP_OKAY) {
57
      goto LBL_ERR;
58
   }
59
   if ((res = mp_copy(&y, &v)) != MP_OKAY) {
60
      goto LBL_ERR;
61
   }
62
   mp_set(&D, 1uL);
63

64
top:
65
   /* 4.  while u is even do */
66
   while (mp_iseven(&u) == MP_YES) {
67
      /* 4.1 u = u/2 */
68
      if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
69
         goto LBL_ERR;
70
      }
71
      /* 4.2 if B is odd then */
72
      if (mp_isodd(&B) == MP_YES) {
73
         if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
74
            goto LBL_ERR;
75
         }
76
      }
77
      /* B = B/2 */
78
      if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
79
         goto LBL_ERR;
80
      }
81
   }
82

83
   /* 5.  while v is even do */
84
   while (mp_iseven(&v) == MP_YES) {
85
      /* 5.1 v = v/2 */
86
      if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
87
         goto LBL_ERR;
88
      }
89
      /* 5.2 if D is odd then */
90
      if (mp_isodd(&D) == MP_YES) {
91
         /* D = (D-x)/2 */
92
         if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
93
            goto LBL_ERR;
94
         }
95
      }
96
      /* D = D/2 */
97
      if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
98
         goto LBL_ERR;
99
      }
100
   }
101

102
   /* 6.  if u >= v then */
103
   if (mp_cmp(&u, &v) != MP_LT) {
104
      /* u = u - v, B = B - D */
105
      if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
106
         goto LBL_ERR;
107
      }
108

109
      if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
110
         goto LBL_ERR;
111
      }
112
   } else {
113
      /* v - v - u, D = D - B */
114
      if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
115
         goto LBL_ERR;
116
      }
117

118
      if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
119
         goto LBL_ERR;
120
      }
121
   }
122

123
   /* if not zero goto step 4 */
124
   if (mp_iszero(&u) == MP_NO) {
125
      goto top;
126
   }
127

128
   /* now a = C, b = D, gcd == g*v */
129

130
   /* if v != 1 then there is no inverse */
131
   if (mp_cmp_d(&v, 1uL) != MP_EQ) {
132
      res = MP_VAL;
133
      goto LBL_ERR;
134
   }
135

136
   /* b is now the inverse */
137
   neg = a->sign;
138
   while (D.sign == MP_NEG) {
139
      if ((res = mp_add(&D, b, &D)) != MP_OKAY) {
140
         goto LBL_ERR;
141
      }
142
   }
143

144
   /* too big */
145
   while (mp_cmp_mag(&D, b) != MP_LT) {
146
      if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
147
         goto LBL_ERR;
148
      }
149
   }
150

151
   mp_exch(&D, c);
152
   c->sign = neg;
153
   res = MP_OKAY;
154

155
LBL_ERR:
156
   mp_clear_multi(&x, &y, &u, &v, &B, &D, NULL);
157
   return res;
158
}
159

160
/* End: bn_fast_mp_invmod.c */
161

162
/* Start: bn_fast_mp_montgomery_reduce.c */
163

164
/* computes xR**-1 == x (mod N) via Montgomery Reduction
165
 *
166
 * This is an optimized implementation of montgomery_reduce
167
 * which uses the comba method to quickly calculate the columns of the
168
 * reduction.
169
 *
170
 * Based on Algorithm 14.32 on pp.601 of HAC.
171
*/
172
int fast_mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
173
{
174
   int     ix, res, olduse;
175
   mp_word W[MP_WARRAY];
176

177
   if (x->used > (int)MP_WARRAY) {
178
      return MP_VAL;
179
   }
180

181
   /* get old used count */
182
   olduse = x->used;
183

184
   /* grow a as required */
185
   if (x->alloc < (n->used + 1)) {
186
      if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) {
187
         return res;
188
      }
189
   }
190

191
   /* first we have to get the digits of the input into
192
    * an array of double precision words W[...]
193
    */
194
   {
195
      mp_word *_W;
196
      mp_digit *tmpx;
197

198
      /* alias for the W[] array */
199
      _W   = W;
200

201
      /* alias for the digits of  x*/
202
      tmpx = x->dp;
203

204
      /* copy the digits of a into W[0..a->used-1] */
205
      for (ix = 0; ix < x->used; ix++) {
206
         *_W++ = *tmpx++;
207
      }
208

209
      /* zero the high words of W[a->used..m->used*2] */
210
      for (; ix < ((n->used * 2) + 1); ix++) {
211
         *_W++ = 0;
212
      }
213
   }
214

215
   /* now we proceed to zero successive digits
216
    * from the least significant upwards
217
    */
218
   for (ix = 0; ix < n->used; ix++) {
219
      /* mu = ai * m' mod b
220
       *
221
       * We avoid a double precision multiplication (which isn't required)
222
       * by casting the value down to a mp_digit.  Note this requires
223
       * that W[ix-1] have  the carry cleared (see after the inner loop)
224
       */
225
      mp_digit mu;
226
      mu = ((W[ix] & MP_MASK) * rho) & MP_MASK;
227

228
      /* a = a + mu * m * b**i
229
       *
230
       * This is computed in place and on the fly.  The multiplication
231
       * by b**i is handled by offseting which columns the results
232
       * are added to.
233
       *
234
       * Note the comba method normally doesn't handle carries in the
235
       * inner loop In this case we fix the carry from the previous
236
       * column since the Montgomery reduction requires digits of the
237
       * result (so far) [see above] to work.  This is
238
       * handled by fixing up one carry after the inner loop.  The
239
       * carry fixups are done in order so after these loops the
240
       * first m->used words of W[] have the carries fixed
241
       */
242
      {
243
         int iy;
244
         mp_digit *tmpn;
245
         mp_word *_W;
246

247
         /* alias for the digits of the modulus */
248
         tmpn = n->dp;
249

250
         /* Alias for the columns set by an offset of ix */
251
         _W = W + ix;
252

253
         /* inner loop */
254
         for (iy = 0; iy < n->used; iy++) {
255
            *_W++ += (mp_word)mu * (mp_word)*tmpn++;
256
         }
257
      }
258

259
      /* now fix carry for next digit, W[ix+1] */
260
      W[ix + 1] += W[ix] >> (mp_word)DIGIT_BIT;
261
   }
262

263
   /* now we have to propagate the carries and
264
    * shift the words downward [all those least
265
    * significant digits we zeroed].
266
    */
267
   {
268
      mp_digit *tmpx;
269
      mp_word *_W, *_W1;
270

271
      /* nox fix rest of carries */
272

273
      /* alias for current word */
274
      _W1 = W + ix;
275

276
      /* alias for next word, where the carry goes */
277
      _W = W + ++ix;
278

279
      for (; ix <= ((n->used * 2) + 1); ix++) {
280
         *_W++ += *_W1++ >> (mp_word)DIGIT_BIT;
281
      }
282

283
      /* copy out, A = A/b**n
284
       *
285
       * The result is A/b**n but instead of converting from an
286
       * array of mp_word to mp_digit than calling mp_rshd
287
       * we just copy them in the right order
288
       */
289

290
      /* alias for destination word */
291
      tmpx = x->dp;
292

293
      /* alias for shifted double precision result */
294
      _W = W + n->used;
295

296
      for (ix = 0; ix < (n->used + 1); ix++) {
297
         *tmpx++ = *_W++ & (mp_word)MP_MASK;
298
      }
299

300
      /* zero oldused digits, if the input a was larger than
301
       * m->used+1 we'll have to clear the digits
302
       */
303
      for (; ix < olduse; ix++) {
304
         *tmpx++ = 0;
305
      }
306
   }
307

308
   /* set the max used and clamp */
309
   x->used = n->used + 1;
310
   mp_clamp(x);
311

312
   /* if A >= m then A = A - m */
313
   if (mp_cmp_mag(x, n) != MP_LT) {
314
      return s_mp_sub(x, n, x);
315
   }
316
   return MP_OKAY;
317
}
318

319
/* End: bn_fast_mp_montgomery_reduce.c */
320

321
/* Start: bn_fast_s_mp_mul_digs.c */
322

323
/* Fast (comba) multiplier
324
 *
325
 * This is the fast column-array [comba] multiplier.  It is
326
 * designed to compute the columns of the product first
327
 * then handle the carries afterwards.  This has the effect
328
 * of making the nested loops that compute the columns very
329
 * simple and schedulable on super-scalar processors.
330
 *
331
 * This has been modified to produce a variable number of
332
 * digits of output so if say only a half-product is required
333
 * you don't have to compute the upper half (a feature
334
 * required for fast Barrett reduction).
335
 *
336
 * Based on Algorithm 14.12 on pp.595 of HAC.
337
 *
338
 */
339
int fast_s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
340
{
341
   int     olduse, res, pa, ix, iz;
342
   mp_digit W[MP_WARRAY];
343
   mp_word  _W;
344

345
   /* grow the destination as required */
346
   if (c->alloc < digs) {
347
      if ((res = mp_grow(c, digs)) != MP_OKAY) {
348
         return res;
349
      }
350
   }
351

352
   /* number of output digits to produce */
353
   pa = MIN(digs, a->used + b->used);
354

355
   /* clear the carry */
356
   _W = 0;
357
   for (ix = 0; ix < pa; ix++) {
358
      int      tx, ty;
359
      int      iy;
360
      mp_digit *tmpx, *tmpy;
361

362
      /* get offsets into the two bignums */
363
      ty = MIN(b->used-1, ix);
364
      tx = ix - ty;
365

366
      /* setup temp aliases */
367
      tmpx = a->dp + tx;
368
      tmpy = b->dp + ty;
369

370
      /* this is the number of times the loop will iterrate, essentially
371
         while (tx++ < a->used && ty-- >= 0) { ... }
372
       */
373
      iy = MIN(a->used-tx, ty+1);
374

375
      /* execute loop */
376
      for (iz = 0; iz < iy; ++iz) {
377
         _W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
378

379
      }
380

381
      /* store term */
382
      W[ix] = (mp_digit)_W & MP_MASK;
383

384
      /* make next carry */
385
      _W = _W >> (mp_word)DIGIT_BIT;
386
   }
387

388
   /* setup dest */
389
   olduse  = c->used;
390
   c->used = pa;
391

392
   {
393
      mp_digit *tmpc;
394
      tmpc = c->dp;
395
      for (ix = 0; ix < pa; ix++) {
396
         /* now extract the previous digit [below the carry] */
397
         *tmpc++ = W[ix];
398
      }
399

400
      /* clear unused digits [that existed in the old copy of c] */
401
      for (; ix < olduse; ix++) {
402
         *tmpc++ = 0;
403
      }
404
   }
405
   mp_clamp(c);
406
   return MP_OKAY;
407
}
408

409
/* End: bn_fast_s_mp_mul_digs.c */
410

411
/* Start: bn_fast_s_mp_mul_high_digs.c */
412

413
/* this is a modified version of fast_s_mul_digs that only produces
414
 * output digits *above* digs.  See the comments for fast_s_mul_digs
415
 * to see how it works.
416
 *
417
 * This is used in the Barrett reduction since for one of the multiplications
418
 * only the higher digits were needed.  This essentially halves the work.
419
 *
420
 * Based on Algorithm 14.12 on pp.595 of HAC.
421
 */
422
int fast_s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
423
{
424
   int     olduse, res, pa, ix, iz;
425
   mp_digit W[MP_WARRAY];
426
   mp_word  _W;
427

428
   /* grow the destination as required */
429
   pa = a->used + b->used;
430
   if (c->alloc < pa) {
431
      if ((res = mp_grow(c, pa)) != MP_OKAY) {
432
         return res;
433
      }
434
   }
435

436
   /* number of output digits to produce */
437
   pa = a->used + b->used;
438
   _W = 0;
439
   for (ix = digs; ix < pa; ix++) {
440
      int      tx, ty, iy;
441
      mp_digit *tmpx, *tmpy;
442

443
      /* get offsets into the two bignums */
444
      ty = MIN(b->used-1, ix);
445
      tx = ix - ty;
446

447
      /* setup temp aliases */
448
      tmpx = a->dp + tx;
449
      tmpy = b->dp + ty;
450

451
      /* this is the number of times the loop will iterrate, essentially its
452
         while (tx++ < a->used && ty-- >= 0) { ... }
453
       */
454
      iy = MIN(a->used-tx, ty+1);
455

456
      /* execute loop */
457
      for (iz = 0; iz < iy; iz++) {
458
         _W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
459
      }
460

461
      /* store term */
462
      W[ix] = (mp_digit)_W & MP_MASK;
463

464
      /* make next carry */
465
      _W = _W >> (mp_word)DIGIT_BIT;
466
   }
467

468
   /* setup dest */
469
   olduse  = c->used;
470
   c->used = pa;
471

472
   {
473
      mp_digit *tmpc;
474

475
      tmpc = c->dp + digs;
476
      for (ix = digs; ix < pa; ix++) {
477
         /* now extract the previous digit [below the carry] */
478
         *tmpc++ = W[ix];
479
      }
480

481
      /* clear unused digits [that existed in the old copy of c] */
482
      for (; ix < olduse; ix++) {
483
         *tmpc++ = 0;
484
      }
485
   }
486
   mp_clamp(c);
487
   return MP_OKAY;
488
}
489

490
/* End: bn_fast_s_mp_mul_high_digs.c */
491

492
/* Start: bn_fast_s_mp_sqr.c */
493

494
/* the jist of squaring...
495
 * you do like mult except the offset of the tmpx [one that
496
 * starts closer to zero] can't equal the offset of tmpy.
497
 * So basically you set up iy like before then you min it with
498
 * (ty-tx) so that it never happens.  You double all those
499
 * you add in the inner loop
500

501
After that loop you do the squares and add them in.
502
*/
503

504
int fast_s_mp_sqr(const mp_int *a, mp_int *b)
505
{
506
   int       olduse, res, pa, ix, iz;
507
   mp_digit   W[MP_WARRAY], *tmpx;
508
   mp_word   W1;
509

510
   /* grow the destination as required */
511
   pa = a->used + a->used;
512
   if (b->alloc < pa) {
513
      if ((res = mp_grow(b, pa)) != MP_OKAY) {
514
         return res;
515
      }
516
   }
517

518
   /* number of output digits to produce */
519
   W1 = 0;
520
   for (ix = 0; ix < pa; ix++) {
521
      int      tx, ty, iy;
522
      mp_word  _W;
523
      mp_digit *tmpy;
524

525
      /* clear counter */
526
      _W = 0;
527

528
      /* get offsets into the two bignums */
529
      ty = MIN(a->used-1, ix);
530
      tx = ix - ty;
531

532
      /* setup temp aliases */
533
      tmpx = a->dp + tx;
534
      tmpy = a->dp + ty;
535

536
      /* this is the number of times the loop will iterrate, essentially
537
         while (tx++ < a->used && ty-- >= 0) { ... }
538
       */
539
      iy = MIN(a->used-tx, ty+1);
540

541
      /* now for squaring tx can never equal ty
542
       * we halve the distance since they approach at a rate of 2x
543
       * and we have to round because odd cases need to be executed
544
       */
545
      iy = MIN(iy, ((ty-tx)+1)>>1);
546

547
      /* execute loop */
548
      for (iz = 0; iz < iy; iz++) {
549
         _W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
550
      }
551

552
      /* double the inner product and add carry */
553
      _W = _W + _W + W1;
554

555
      /* even columns have the square term in them */
556
      if (((unsigned)ix & 1u) == 0u) {
557
         _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];
558
      }
559

560
      /* store it */
561
      W[ix] = _W & MP_MASK;
562

563
      /* make next carry */
564
      W1 = _W >> (mp_word)DIGIT_BIT;
565
   }
566

567
   /* setup dest */
568
   olduse  = b->used;
569
   b->used = a->used+a->used;
570

571
   {
572
      mp_digit *tmpb;
573
      tmpb = b->dp;
574
      for (ix = 0; ix < pa; ix++) {
575
         *tmpb++ = W[ix] & MP_MASK;
576
      }
577

578
      /* clear unused digits [that existed in the old copy of c] */
579
      for (; ix < olduse; ix++) {
580
         *tmpb++ = 0;
581
      }
582
   }
583
   mp_clamp(b);
584
   return MP_OKAY;
585
}
586

587
/* End: bn_fast_s_mp_sqr.c */
588

589
/* Start: bn_mp_2expt.c */
590

591
/* computes a = 2**b
592
 *
593
 * Simple algorithm which zeroes the int, grows it then just sets one bit
594
 * as required.
595
 */
596
int mp_2expt(mp_int *a, int b)
597
{
598
   int     res;
599

600
   /* zero a as per default */
601
   mp_zero(a);
602

603
   /* grow a to accomodate the single bit */
604
   if ((res = mp_grow(a, (b / DIGIT_BIT) + 1)) != MP_OKAY) {
605
      return res;
606
   }
607

608
   /* set the used count of where the bit will go */
609
   a->used = (b / DIGIT_BIT) + 1;
610

611
   /* put the single bit in its place */
612
   a->dp[b / DIGIT_BIT] = (mp_digit)1 << (mp_digit)(b % DIGIT_BIT);
613

614
   return MP_OKAY;
615
}
616

617
/* End: bn_mp_2expt.c */
618

619
/* Start: bn_mp_abs.c */
620

621
/* b = |a|
622
 *
623
 * Simple function copies the input and fixes the sign to positive
624
 */
625
int mp_abs(const mp_int *a, mp_int *b)
626
{
627
   int     res;
628

629
   /* copy a to b */
630
   if (a != b) {
631
      if ((res = mp_copy(a, b)) != MP_OKAY) {
632
         return res;
633
      }
634
   }
635

636
   /* force the sign of b to positive */
637
   b->sign = MP_ZPOS;
638

639
   return MP_OKAY;
640
}
641

642
/* End: bn_mp_abs.c */
643

644
/* Start: bn_mp_add.c */
645

646
/* high level addition (handles signs) */
647
int mp_add(const mp_int *a, const mp_int *b, mp_int *c)
648
{
649
   int     sa, sb, res;
650

651
   /* get sign of both inputs */
652
   sa = a->sign;
653
   sb = b->sign;
654

655
   /* handle two cases, not four */
656
   if (sa == sb) {
657
      /* both positive or both negative */
658
      /* add their magnitudes, copy the sign */
659
      c->sign = sa;
660
      res = s_mp_add(a, b, c);
661
   } else {
662
      /* one positive, the other negative */
663
      /* subtract the one with the greater magnitude from */
664
      /* the one of the lesser magnitude.  The result gets */
665
      /* the sign of the one with the greater magnitude. */
666
      if (mp_cmp_mag(a, b) == MP_LT) {
667
         c->sign = sb;
668
         res = s_mp_sub(b, a, c);
669
      } else {
670
         c->sign = sa;
671
         res = s_mp_sub(a, b, c);
672
      }
673
   }
674
   return res;
675
}
676

677
/* End: bn_mp_add.c */
678

679
/* Start: bn_mp_add_d.c */
680

681
/* single digit addition */
682
int mp_add_d(const mp_int *a, mp_digit b, mp_int *c)
683
{
684
   int     res, ix, oldused;
685
   mp_digit *tmpa, *tmpc, mu;
686

687
   /* grow c as required */
688
   if (c->alloc < (a->used + 1)) {
689
      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
690
         return res;
691
      }
692
   }
693

694
   /* if a is negative and |a| >= b, call c = |a| - b */
695
   if ((a->sign == MP_NEG) && ((a->used > 1) || (a->dp[0] >= b))) {
696
      mp_int a_ = *a;
697
      /* temporarily fix sign of a */
698
      a_.sign = MP_ZPOS;
699

700
      /* c = |a| - b */
701
      res = mp_sub_d(&a_, b, c);
702

703
      /* fix sign  */
704
      c->sign = MP_NEG;
705

706
      /* clamp */
707
      mp_clamp(c);
708

709
      return res;
710
   }
711

712
   /* old number of used digits in c */
713
   oldused = c->used;
714

715
   /* source alias */
716
   tmpa    = a->dp;
717

718
   /* destination alias */
719
   tmpc    = c->dp;
720

721
   /* if a is positive */
722
   if (a->sign == MP_ZPOS) {
723
      /* add digit, after this we're propagating
724
       * the carry.
725
       */
726
      *tmpc   = *tmpa++ + b;
727
      mu      = *tmpc >> DIGIT_BIT;
728
      *tmpc++ &= MP_MASK;
729

730
      /* now handle rest of the digits */
731
      for (ix = 1; ix < a->used; ix++) {
732
         *tmpc   = *tmpa++ + mu;
733
         mu      = *tmpc >> DIGIT_BIT;
734
         *tmpc++ &= MP_MASK;
735
      }
736
      /* set final carry */
737
      ix++;
738
      *tmpc++  = mu;
739

740
      /* setup size */
741
      c->used = a->used + 1;
742
   } else {
743
      /* a was negative and |a| < b */
744
      c->used  = 1;
745

746
      /* the result is a single digit */
747
      if (a->used == 1) {
748
         *tmpc++  =  b - a->dp[0];
749
      } else {
750
         *tmpc++  =  b;
751
      }
752

753
      /* setup count so the clearing of oldused
754
       * can fall through correctly
755
       */
756
      ix       = 1;
757
   }
758

759
   /* sign always positive */
760
   c->sign = MP_ZPOS;
761

762
   /* now zero to oldused */
763
   while (ix++ < oldused) {
764
      *tmpc++ = 0;
765
   }
766
   mp_clamp(c);
767

768
   return MP_OKAY;
769
}
770

771
/* End: bn_mp_add_d.c */
772

773
/* Start: bn_mp_addmod.c */
774

775
/* d = a + b (mod c) */
776
int mp_addmod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)
777
{
778
   int     res;
779
   mp_int  t;
780

781
   if ((res = mp_init(&t)) != MP_OKAY) {
782
      return res;
783
   }
784

785
   if ((res = mp_add(a, b, &t)) != MP_OKAY) {
786
      mp_clear(&t);
787
      return res;
788
   }
789
   res = mp_mod(&t, c, d);
790
   mp_clear(&t);
791
   return res;
792
}
793

794
/* End: bn_mp_addmod.c */
795

796
/* Start: bn_mp_and.c */
797

798
/* AND two ints together */
799
int mp_and(const mp_int *a, const mp_int *b, mp_int *c)
800
{
801
   int     res, ix, px;
802
   mp_int  t;
803
   const mp_int *x;
804

805
   if (a->used > b->used) {
806
      if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
807
         return res;
808
      }
809
      px = b->used;
810
      x = b;
811
   } else {
812
      if ((res = mp_init_copy(&t, b)) != MP_OKAY) {
813
         return res;
814
      }
815
      px = a->used;
816
      x = a;
817
   }
818

819
   for (ix = 0; ix < px; ix++) {
820
      t.dp[ix] &= x->dp[ix];
821
   }
822

823
   /* zero digits above the last from the smallest mp_int */
824
   for (; ix < t.used; ix++) {
825
      t.dp[ix] = 0;
826
   }
827

828
   mp_clamp(&t);
829
   mp_exch(c, &t);
830
   mp_clear(&t);
831
   return MP_OKAY;
832
}
833

834
/* End: bn_mp_and.c */
835

836
/* Start: bn_mp_clamp.c */
837

838
/* trim unused digits
839
 *
840
 * This is used to ensure that leading zero digits are
841
 * trimed and the leading "used" digit will be non-zero
842
 * Typically very fast.  Also fixes the sign if there
843
 * are no more leading digits
844
 */
845
void mp_clamp(mp_int *a)
846
{
847
   /* decrease used while the most significant digit is
848
    * zero.
849
    */
850
   while ((a->used > 0) && (a->dp[a->used - 1] == 0u)) {
851
      --(a->used);
852
   }
853

854
   /* reset the sign flag if used == 0 */
855
   if (a->used == 0) {
856
      a->sign = MP_ZPOS;
857
   }
858
}
859

860
/* End: bn_mp_clamp.c */
861

862
/* Start: bn_mp_clear.c */
863

864
/* clear one (frees)  */
865
void mp_clear(mp_int *a)
866
{
867
   int i;
868

869
   /* only do anything if a hasn't been freed previously */
870
   if (a->dp != NULL) {
871
      /* first zero the digits */
872
      for (i = 0; i < a->used; i++) {
873
         a->dp[i] = 0;
874
      }
875

876
      /* free ram */
877
      XFREE(a->dp);
878

879
      /* reset members to make debugging easier */
880
      a->dp    = NULL;
881
      a->alloc = a->used = 0;
882
      a->sign  = MP_ZPOS;
883
   }
884
}
885

886
/* End: bn_mp_clear.c */
887

888
/* Start: bn_mp_clear_multi.c */
889

890
void mp_clear_multi(mp_int *mp, ...)
891
{
892
   mp_int *next_mp = mp;
893
   va_list args;
894
   va_start(args, mp);
895
   while (next_mp != NULL) {
896
      mp_clear(next_mp);
897
      next_mp = va_arg(args, mp_int *);
898
   }
899
   va_end(args);
900
}
901

902
/* End: bn_mp_clear_multi.c */
903

904
/* Start: bn_mp_cmp.c */
905

906
/* compare two ints (signed)*/
907
int mp_cmp(const mp_int *a, const mp_int *b)
908
{
909
   /* compare based on sign */
910
   if (a->sign != b->sign) {
911
      if (a->sign == MP_NEG) {
912
         return MP_LT;
913
      } else {
914
         return MP_GT;
915
      }
916
   }
917

918
   /* compare digits */
919
   if (a->sign == MP_NEG) {
920
      /* if negative compare opposite direction */
921
      return mp_cmp_mag(b, a);
922
   } else {
923
      return mp_cmp_mag(a, b);
924
   }
925
}
926

927
/* End: bn_mp_cmp.c */
928

929
/* Start: bn_mp_cmp_d.c */
930

931
/* compare a digit */
932
int mp_cmp_d(const mp_int *a, mp_digit b)
933
{
934
   /* compare based on sign */
935
   if (a->sign == MP_NEG) {
936
      return MP_LT;
937
   }
938

939
   /* compare based on magnitude */
940
   if (a->used > 1) {
941
      return MP_GT;
942
   }
943

944
   /* compare the only digit of a to b */
945
   if (a->dp[0] > b) {
946
      return MP_GT;
947
   } else if (a->dp[0] < b) {
948
      return MP_LT;
949
   } else {
950
      return MP_EQ;
951
   }
952
}
953

954
/* End: bn_mp_cmp_d.c */
955

956
/* Start: bn_mp_cmp_mag.c */
957

958
/* compare maginitude of two ints (unsigned) */
959
int mp_cmp_mag(const mp_int *a, const mp_int *b)
960
{
961
   int     n;
962
   mp_digit *tmpa, *tmpb;
963

964
   /* compare based on # of non-zero digits */
965
   if (a->used > b->used) {
966
      return MP_GT;
967
   }
968

969
   if (a->used < b->used) {
970
      return MP_LT;
971
   }
972

973
   /* alias for a */
974
   tmpa = a->dp + (a->used - 1);
975

976
   /* alias for b */
977
   tmpb = b->dp + (a->used - 1);
978

979
   /* compare based on digits  */
980
   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
981
      if (*tmpa > *tmpb) {
982
         return MP_GT;
983
      }
984

985
      if (*tmpa < *tmpb) {
986
         return MP_LT;
987
      }
988
   }
989
   return MP_EQ;
990
}
991

992
/* End: bn_mp_cmp_mag.c */
993

994
/* Start: bn_mp_cnt_lsb.c */
995

996
static const int lnz[16] = {
997
   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
998
};
999

1000
/* Counts the number of lsbs which are zero before the first zero bit */
1001
int mp_cnt_lsb(const mp_int *a)
1002
{
1003
   int x;
1004
   mp_digit q, qq;
1005

1006
   /* easy out */
1007
   if (mp_iszero(a) == MP_YES) {
1008
      return 0;
1009
   }
1010

1011
   /* scan lower digits until non-zero */
1012
   for (x = 0; (x < a->used) && (a->dp[x] == 0u); x++) {}
1013
   q = a->dp[x];
1014
   x *= DIGIT_BIT;
1015

1016
   /* now scan this digit until a 1 is found */
1017
   if ((q & 1u) == 0u) {
1018
      do {
1019
         qq  = q & 15u;
1020
         x  += lnz[qq];
1021
         q >>= 4;
1022
      } while (qq == 0u);
1023
   }
1024
   return x;
1025
}
1026

1027
/* End: bn_mp_cnt_lsb.c */
1028

1029
/* Start: bn_mp_complement.c */
1030

1031
/* b = ~a */
1032
int mp_complement(const mp_int *a, mp_int *b)
1033
{
1034
   int res = mp_neg(a, b);
1035
   return (res == MP_OKAY) ? mp_sub_d(b, 1uL, b) : res;
1036
}
1037

1038
/* End: bn_mp_complement.c */
1039

1040
/* Start: bn_mp_copy.c */
1041

1042
/* copy, b = a */
1043
int mp_copy(const mp_int *a, mp_int *b)
1044
{
1045
   int     res, n;
1046

1047
   /* if dst == src do nothing */
1048
   if (a == b) {
1049
      return MP_OKAY;
1050
   }
1051

1052
   /* grow dest */
1053
   if (b->alloc < a->used) {
1054
      if ((res = mp_grow(b, a->used)) != MP_OKAY) {
1055
         return res;
1056
      }
1057
   }
1058

1059
   /* zero b and copy the parameters over */
1060
   {
1061
      mp_digit *tmpa, *tmpb;
1062

1063
      /* pointer aliases */
1064

1065
      /* source */
1066
      tmpa = a->dp;
1067

1068
      /* destination */
1069
      tmpb = b->dp;
1070

1071
      /* copy all the digits */
1072
      for (n = 0; n < a->used; n++) {
1073
         *tmpb++ = *tmpa++;
1074
      }
1075

1076
      /* clear high digits */
1077
      for (; n < b->used; n++) {
1078
         *tmpb++ = 0;
1079
      }
1080
   }
1081

1082
   /* copy used count and sign */
1083
   b->used = a->used;
1084
   b->sign = a->sign;
1085
   return MP_OKAY;
1086
}
1087

1088
/* End: bn_mp_copy.c */
1089

1090
/* Start: bn_mp_count_bits.c */
1091

1092
/* returns the number of bits in an int */
1093
int mp_count_bits(const mp_int *a)
1094
{
1095
   int     r;
1096
   mp_digit q;
1097

1098
   /* shortcut */
1099
   if (a->used == 0) {
1100
      return 0;
1101
   }
1102

1103
   /* get number of digits and add that */
1104
   r = (a->used - 1) * DIGIT_BIT;
1105

1106
   /* take the last digit and count the bits in it */
1107
   q = a->dp[a->used - 1];
1108
   while (q > (mp_digit)0) {
1109
      ++r;
1110
      q >>= (mp_digit)1;
1111
   }
1112
   return r;
1113
}
1114

1115
/* End: bn_mp_count_bits.c */
1116

1117
/* Start: bn_mp_div.c */
1118

1119
/* integer signed division.
1120
 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1121
 * HAC pp.598 Algorithm 14.20
1122
 *
1123
 * Note that the description in HAC is horribly
1124
 * incomplete.  For example, it doesn't consider
1125
 * the case where digits are removed from 'x' in
1126
 * the inner loop.  It also doesn't consider the
1127
 * case that y has fewer than three digits, etc..
1128
 *
1129
 * The overall algorithm is as described as
1130
 * 14.20 from HAC but fixed to treat these cases.
1131
*/
1132
int mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d)
1133
{
1134
   mp_int  q, x, y, t1, t2;
1135
   int     res, n, t, i, norm, neg;
1136

1137
   /* is divisor zero ? */
1138
   if (mp_iszero(b) == MP_YES) {
1139
      return MP_VAL;
1140
   }
1141

1142
   /* if a < b then q=0, r = a */
1143
   if (mp_cmp_mag(a, b) == MP_LT) {
1144
      if (d != NULL) {
1145
         res = mp_copy(a, d);
1146
      } else {
1147
         res = MP_OKAY;
1148
      }
1149
      if (c != NULL) {
1150
         mp_zero(c);
1151
      }
1152
      return res;
1153
   }
1154

1155
   if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) {
1156
      return res;
1157
   }
1158
   q.used = a->used + 2;
1159

1160
   if ((res = mp_init(&t1)) != MP_OKAY) {
1161
      goto LBL_Q;
1162
   }
1163

1164
   if ((res = mp_init(&t2)) != MP_OKAY) {
1165
      goto LBL_T1;
1166
   }
1167

1168
   if ((res = mp_init_copy(&x, a)) != MP_OKAY) {
1169
      goto LBL_T2;
1170
   }
1171

1172
   if ((res = mp_init_copy(&y, b)) != MP_OKAY) {
1173
      goto LBL_X;
1174
   }
1175

1176
   /* fix the sign */
1177
   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1178
   x.sign = y.sign = MP_ZPOS;
1179

1180
   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1181
   norm = mp_count_bits(&y) % DIGIT_BIT;
1182
   if (norm < (DIGIT_BIT - 1)) {
1183
      norm = (DIGIT_BIT - 1) - norm;
1184
      if ((res = mp_mul_2d(&x, norm, &x)) != MP_OKAY) {
1185
         goto LBL_Y;
1186
      }
1187
      if ((res = mp_mul_2d(&y, norm, &y)) != MP_OKAY) {
1188
         goto LBL_Y;
1189
      }
1190
   } else {
1191
      norm = 0;
1192
   }
1193

1194
   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1195
   n = x.used - 1;
1196
   t = y.used - 1;
1197

1198
   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1199
   if ((res = mp_lshd(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1200
      goto LBL_Y;
1201
   }
1202

1203
   while (mp_cmp(&x, &y) != MP_LT) {
1204
      ++(q.dp[n - t]);
1205
      if ((res = mp_sub(&x, &y, &x)) != MP_OKAY) {
1206
         goto LBL_Y;
1207
      }
1208
   }
1209

1210
   /* reset y by shifting it back down */
1211
   mp_rshd(&y, n - t);
1212

1213
   /* step 3. for i from n down to (t + 1) */
1214
   for (i = n; i >= (t + 1); i--) {
1215
      if (i > x.used) {
1216
         continue;
1217
      }
1218

1219
      /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1220
       * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1221
      if (x.dp[i] == y.dp[t]) {
1222
         q.dp[(i - t) - 1] = ((mp_digit)1 << (mp_digit)DIGIT_BIT) - (mp_digit)1;
1223
      } else {
1224
         mp_word tmp;
1225
         tmp = (mp_word)x.dp[i] << (mp_word)DIGIT_BIT;
1226
         tmp |= (mp_word)x.dp[i - 1];
1227
         tmp /= (mp_word)y.dp[t];
1228
         if (tmp > (mp_word)MP_MASK) {
1229
            tmp = MP_MASK;
1230
         }
1231
         q.dp[(i - t) - 1] = (mp_digit)(tmp & (mp_word)MP_MASK);
1232
      }
1233

1234
      /* while (q{i-t-1} * (yt * b + y{t-1})) >
1235
               xi * b**2 + xi-1 * b + xi-2
1236

1237
         do q{i-t-1} -= 1;
1238
      */
1239
      q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] + 1uL) & (mp_digit)MP_MASK;
1240
      do {
1241
         q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & (mp_digit)MP_MASK;
1242

1243
         /* find left hand */
1244
         mp_zero(&t1);
1245
         t1.dp[0] = ((t - 1) < 0) ? 0u : y.dp[t - 1];
1246
         t1.dp[1] = y.dp[t];
1247
         t1.used = 2;
1248
         if ((res = mp_mul_d(&t1, q.dp[(i - t) - 1], &t1)) != MP_OKAY) {
1249
            goto LBL_Y;
1250
         }
1251

1252
         /* find right hand */
1253
         t2.dp[0] = ((i - 2) < 0) ? 0u : x.dp[i - 2];
1254
         t2.dp[1] = ((i - 1) < 0) ? 0u : x.dp[i - 1];
1255
         t2.dp[2] = x.dp[i];
1256
         t2.used = 3;
1257
      } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1258

1259
      /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1260
      if ((res = mp_mul_d(&y, q.dp[(i - t) - 1], &t1)) != MP_OKAY) {
1261
         goto LBL_Y;
1262
      }
1263

1264
      if ((res = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) {
1265
         goto LBL_Y;
1266
      }
1267

1268
      if ((res = mp_sub(&x, &t1, &x)) != MP_OKAY) {
1269
         goto LBL_Y;
1270
      }
1271

1272
      /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1273
      if (x.sign == MP_NEG) {
1274
         if ((res = mp_copy(&y, &t1)) != MP_OKAY) {
1275
            goto LBL_Y;
1276
         }
1277
         if ((res = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) {
1278
            goto LBL_Y;
1279
         }
1280
         if ((res = mp_add(&x, &t1, &x)) != MP_OKAY) {
1281
            goto LBL_Y;
1282
         }
1283

1284
         q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & MP_MASK;
1285
      }
1286
   }
1287

1288
   /* now q is the quotient and x is the remainder
1289
    * [which we have to normalize]
1290
    */
1291

1292
   /* get sign before writing to c */
1293
   x.sign = (x.used == 0) ? MP_ZPOS : a->sign;
1294

1295
   if (c != NULL) {
1296
      mp_clamp(&q);
1297
      mp_exch(&q, c);
1298
      c->sign = neg;
1299
   }
1300

1301
   if (d != NULL) {
1302
      if ((res = mp_div_2d(&x, norm, &x, NULL)) != MP_OKAY) {
1303
         goto LBL_Y;
1304
      }
1305
      mp_exch(&x, d);
1306
   }
1307

1308
   res = MP_OKAY;
1309

1310
LBL_Y:
1311
   mp_clear(&y);
1312
LBL_X:
1313
   mp_clear(&x);
1314
LBL_T2:
1315
   mp_clear(&t2);
1316
LBL_T1:
1317
   mp_clear(&t1);
1318
LBL_Q:
1319
   mp_clear(&q);
1320
   return res;
1321
}
1322

1323
/* End: bn_mp_div.c */
1324

1325
/* Start: bn_mp_div_2.c */
1326

1327
/* b = a/2 */
1328
int mp_div_2(const mp_int *a, mp_int *b)
1329
{
1330
   int     x, res, oldused;
1331

1332
   /* copy */
1333
   if (b->alloc < a->used) {
1334
      if ((res = mp_grow(b, a->used)) != MP_OKAY) {
1335
         return res;
1336
      }
1337
   }
1338

1339
   oldused = b->used;
1340
   b->used = a->used;
1341
   {
1342
      mp_digit r, rr, *tmpa, *tmpb;
1343

1344
      /* source alias */
1345
      tmpa = a->dp + b->used - 1;
1346

1347
      /* dest alias */
1348
      tmpb = b->dp + b->used - 1;
1349

1350
      /* carry */
1351
      r = 0;
1352
      for (x = b->used - 1; x >= 0; x--) {
1353
         /* get the carry for the next iteration */
1354
         rr = *tmpa & 1u;
1355

1356
         /* shift the current digit, add in carry and store */
1357
         *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1358

1359
         /* forward carry to next iteration */
1360
         r = rr;
1361
      }
1362

1363
      /* zero excess digits */
1364
      tmpb = b->dp + b->used;
1365
      for (x = b->used; x < oldused; x++) {
1366
         *tmpb++ = 0;
1367
      }
1368
   }
1369
   b->sign = a->sign;
1370
   mp_clamp(b);
1371
   return MP_OKAY;
1372
}
1373

1374
/* End: bn_mp_div_2.c */
1375

1376
/* Start: bn_mp_div_2d.c */
1377

1378
/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1379
int mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d)
1380
{
1381
   mp_digit D, r, rr;
1382
   int     x, res;
1383

1384
   /* if the shift count is <= 0 then we do no work */
1385
   if (b <= 0) {
1386
      res = mp_copy(a, c);
1387
      if (d != NULL) {
1388
         mp_zero(d);
1389
      }
1390
      return res;
1391
   }
1392

1393
   /* copy */
1394
   if ((res = mp_copy(a, c)) != MP_OKAY) {
1395
      return res;
1396
   }
1397
   /* 'a' should not be used after here - it might be the same as d */
1398

1399
   /* get the remainder */
1400
   if (d != NULL) {
1401
      if ((res = mp_mod_2d(a, b, d)) != MP_OKAY) {
1402
         return res;
1403
      }
1404
   }
1405

1406
   /* shift by as many digits in the bit count */
1407
   if (b >= DIGIT_BIT) {
1408
      mp_rshd(c, b / DIGIT_BIT);
1409
   }
1410

1411
   /* shift any bit count < DIGIT_BIT */
1412
   D = (mp_digit)(b % DIGIT_BIT);
1413
   if (D != 0u) {
1414
      mp_digit *tmpc, mask, shift;
1415

1416
      /* mask */
1417
      mask = ((mp_digit)1 << D) - 1uL;
1418

1419
      /* shift for lsb */
1420
      shift = (mp_digit)DIGIT_BIT - D;
1421

1422
      /* alias */
1423
      tmpc = c->dp + (c->used - 1);
1424

1425
      /* carry */
1426
      r = 0;
1427
      for (x = c->used - 1; x >= 0; x--) {
1428
         /* get the lower  bits of this word in a temp */
1429
         rr = *tmpc & mask;
1430

1431
         /* shift the current word and mix in the carry bits from the previous word */
1432
         *tmpc = (*tmpc >> D) | (r << shift);
1433
         --tmpc;
1434

1435
         /* set the carry to the carry bits of the current word found above */
1436
         r = rr;
1437
      }
1438
   }
1439
   mp_clamp(c);
1440
   return MP_OKAY;
1441
}
1442

1443
/* End: bn_mp_div_2d.c */
1444

1445
/* Start: bn_mp_div_3.c */
1446

1447
/* divide by three (based on routine from MPI and the GMP manual) */
1448
int mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
1449
{
1450
   mp_int   q;
1451
   mp_word  w, t;
1452
   mp_digit b;
1453
   int      res, ix;
1454

1455
   /* b = 2**DIGIT_BIT / 3 */
1456
   b = ((mp_word)1 << (mp_word)DIGIT_BIT) / (mp_word)3;
1457

1458
   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1459
      return res;
1460
   }
1461

1462
   q.used = a->used;
1463
   q.sign = a->sign;
1464
   w = 0;
1465
   for (ix = a->used - 1; ix >= 0; ix--) {
1466
      w = (w << (mp_word)DIGIT_BIT) | (mp_word)a->dp[ix];
1467

1468
      if (w >= 3u) {
1469
         /* multiply w by [1/3] */
1470
         t = (w * (mp_word)b) >> (mp_word)DIGIT_BIT;
1471

1472
         /* now subtract 3 * [w/3] from w, to get the remainder */
1473
         w -= t+t+t;
1474

1475
         /* fixup the remainder as required since
1476
          * the optimization is not exact.
1477
          */
1478
         while (w >= 3u) {
1479
            t += 1u;
1480
            w -= 3u;
1481
         }
1482
      } else {
1483
         t = 0;
1484
      }
1485
      q.dp[ix] = (mp_digit)t;
1486
   }
1487

1488
   /* [optional] store the remainder */
1489
   if (d != NULL) {
1490
      *d = (mp_digit)w;
1491
   }
1492

1493
   /* [optional] store the quotient */
1494
   if (c != NULL) {
1495
      mp_clamp(&q);
1496
      mp_exch(&q, c);
1497
   }
1498
   mp_clear(&q);
1499

1500
   return res;
1501
}
1502

1503
/* End: bn_mp_div_3.c */
1504

1505
/* Start: bn_mp_div_d.c */
1506

1507
static int s_is_power_of_two(mp_digit b, int *p)
1508
{
1509
   int x;
1510

1511
   /* fast return if no power of two */
1512
   if ((b == 0u) || ((b & (b-1u)) != 0u)) {
1513
      return 0;
1514
   }
1515

1516
   for (x = 0; x < DIGIT_BIT; x++) {
1517
      if (b == ((mp_digit)1<<(mp_digit)x)) {
1518
         *p = x;
1519
         return 1;
1520
      }
1521
   }
1522
   return 0;
1523
}
1524

1525
/* single digit division (based on routine from MPI) */
1526
int mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
1527
{
1528
   mp_int  q;
1529
   mp_word w;
1530
   mp_digit t;
1531
   int     res, ix;
1532

1533
   /* cannot divide by zero */
1534
   if (b == 0u) {
1535
      return MP_VAL;
1536
   }
1537

1538
   /* quick outs */
1539
   if ((b == 1u) || (mp_iszero(a) == MP_YES)) {
1540
      if (d != NULL) {
1541
         *d = 0;
1542
      }
1543
      if (c != NULL) {
1544
         return mp_copy(a, c);
1545
      }
1546
      return MP_OKAY;
1547
   }
1548

1549
   /* power of two ? */
1550
   if (s_is_power_of_two(b, &ix) == 1) {
1551
      if (d != NULL) {
1552
         *d = a->dp[0] & (((mp_digit)1<<(mp_digit)ix) - 1uL);
1553
      }
1554
      if (c != NULL) {
1555
         return mp_div_2d(a, ix, c, NULL);
1556
      }
1557
      return MP_OKAY;
1558
   }
1559

1560
   /* three? */
1561
   if (b == 3u) {
1562
      return mp_div_3(a, c, d);
1563
   }
1564

1565
   /* no easy answer [c'est la vie].  Just division */
1566
   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1567
      return res;
1568
   }
1569

1570
   q.used = a->used;
1571
   q.sign = a->sign;
1572
   w = 0;
1573
   for (ix = a->used - 1; ix >= 0; ix--) {
1574
      w = (w << (mp_word)DIGIT_BIT) | (mp_word)a->dp[ix];
1575

1576
      if (w >= b) {
1577
         t = (mp_digit)(w / b);
1578
         w -= (mp_word)t * (mp_word)b;
1579
      } else {
1580
         t = 0;
1581
      }
1582
      q.dp[ix] = t;
1583
   }
1584

1585
   if (d != NULL) {
1586
      *d = (mp_digit)w;
1587
   }
1588

1589
   if (c != NULL) {
1590
      mp_clamp(&q);
1591
      mp_exch(&q, c);
1592
   }
1593
   mp_clear(&q);
1594

1595
   return res;
1596
}
1597

1598
/* End: bn_mp_div_d.c */
1599

1600
/* Start: bn_mp_dr_is_modulus.c */
1601

1602
/* determines if a number is a valid DR modulus */
1603
int mp_dr_is_modulus(const mp_int *a)
1604
{
1605
   int ix;
1606

1607
   /* must be at least two digits */
1608
   if (a->used < 2) {
1609
      return 0;
1610
   }
1611

1612
   /* must be of the form b**k - a [a <= b] so all
1613
    * but the first digit must be equal to -1 (mod b).
1614
    */
1615
   for (ix = 1; ix < a->used; ix++) {
1616
      if (a->dp[ix] != MP_MASK) {
1617
         return 0;
1618
      }
1619
   }
1620
   return 1;
1621
}
1622

1623
/* End: bn_mp_dr_is_modulus.c */
1624

1625
/* Start: bn_mp_dr_reduce.c */
1626

1627
/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1628
 *
1629
 * Based on algorithm from the paper
1630
 *
1631
 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1632
 *                 Chae Hoon Lim, Pil Joong Lee,
1633
 *          POSTECH Information Research Laboratories
1634
 *
1635
 * The modulus must be of a special format [see manual]
1636
 *
1637
 * Has been modified to use algorithm 7.10 from the LTM book instead
1638
 *
1639
 * Input x must be in the range 0 <= x <= (n-1)**2
1640
 */
1641
int mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)
1642
{
1643
   int      err, i, m;
1644
   mp_word  r;
1645
   mp_digit mu, *tmpx1, *tmpx2;
1646

1647
   /* m = digits in modulus */
1648
   m = n->used;
1649

1650
   /* ensure that "x" has at least 2m digits */
1651
   if (x->alloc < (m + m)) {
1652
      if ((err = mp_grow(x, m + m)) != MP_OKAY) {
1653
         return err;
1654
      }
1655
   }
1656

1657
   /* top of loop, this is where the code resumes if
1658
    * another reduction pass is required.
1659
    */
1660
top:
1661
   /* aliases for digits */
1662
   /* alias for lower half of x */
1663
   tmpx1 = x->dp;
1664

1665
   /* alias for upper half of x, or x/B**m */
1666
   tmpx2 = x->dp + m;
1667

1668
   /* set carry to zero */
1669
   mu = 0;
1670

1671
   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1672
   for (i = 0; i < m; i++) {
1673
      r         = ((mp_word)*tmpx2++ * (mp_word)k) + *tmpx1 + mu;
1674
      *tmpx1++  = (mp_digit)(r & MP_MASK);
1675
      mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1676
   }
1677

1678
   /* set final carry */
1679
   *tmpx1++ = mu;
1680

1681
   /* zero words above m */
1682
   for (i = m + 1; i < x->used; i++) {
1683
      *tmpx1++ = 0;
1684
   }
1685

1686
   /* clamp, sub and return */
1687
   mp_clamp(x);
1688

1689
   /* if x >= n then subtract and reduce again
1690
    * Each successive "recursion" makes the input smaller and smaller.
1691
    */
1692
   if (mp_cmp_mag(x, n) != MP_LT) {
1693
      if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
1694
         return err;
1695
      }
1696
      goto top;
1697
   }
1698
   return MP_OKAY;
1699
}
1700

1701
/* End: bn_mp_dr_reduce.c */
1702

1703
/* Start: bn_mp_dr_setup.c */
1704
#include "tommath_private.h"
1705

1706
/* determines the setup value */
1707
void mp_dr_setup(const mp_int *a, mp_digit *d)
1708
{
1709
   /* the casts are required if DIGIT_BIT is one less than
1710
    * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1711
    */
1712
   *d = (mp_digit)(((mp_word)1 << (mp_word)DIGIT_BIT) - (mp_word)a->dp[0]);
1713
}
1714

1715
/* End: bn_mp_dr_setup.c */
1716

1717
/* Start: bn_mp_exch.c */
1718

1719
/* swap the elements of two integers, for cases where you can't simply swap the
1720
 * mp_int pointers around
1721
 */
1722
void mp_exch(mp_int *a, mp_int *b)
1723
{
1724
   mp_int  t;
1725

1726
   t  = *a;
1727
   *a = *b;
1728
   *b = t;
1729
}
1730

1731
/* End: bn_mp_exch.c */
1732

1733
/* Start: bn_mp_export.c */
1734

1735
/* based on gmp's mpz_export.
1736
 * see http://gmplib.org/manual/Integer-Import-and-Export.html
1737
 */
1738
int mp_export(void *rop, size_t *countp, int order, size_t size,
1739
              int endian, size_t nails, const mp_int *op)
1740
{
1741
   int result;
1742
   size_t odd_nails, nail_bytes, i, j, bits, count;
1743
   unsigned char odd_nail_mask;
1744

1745
   mp_int t;
1746

1747
   if ((result = mp_init_copy(&t, op)) != MP_OKAY) {
1748
      return result;
1749
   }
1750

1751
   if (endian == 0) {
1752
      union {
1753
         unsigned int i;
1754
         char c[4];
1755
      } lint;
1756
      lint.i = 0x01020304;
1757

1758
      endian = (lint.c[0] == '\x04') ? -1 : 1;
1759
   }
1760

1761
   odd_nails = (nails % 8u);
1762
   odd_nail_mask = 0xff;
1763
   for (i = 0; i < odd_nails; ++i) {
1764
      odd_nail_mask ^= (unsigned char)(1u << (7u - i));
1765
   }
1766
   nail_bytes = nails / 8u;
1767

1768
   bits = (size_t)mp_count_bits(&t);
1769
   count = (bits / ((size * 8u) - nails)) + (((bits % ((size * 8u) - nails)) != 0u) ? 1u : 0u);
1770

1771
   for (i = 0; i < count; ++i) {
1772
      for (j = 0; j < size; ++j) {
1773
         unsigned char *byte = (unsigned char *)rop +
1774
                               (((order == -1) ? i : ((count - 1u) - i)) * size) +
1775
                               ((endian == -1) ? j : ((size - 1u) - j));
1776

1777
         if (j >= (size - nail_bytes)) {
1778
            *byte = 0;
1779
            continue;
1780
         }
1781

1782
         *byte = (unsigned char)((j == ((size - nail_bytes) - 1u)) ? (t.dp[0] & odd_nail_mask) : (t.dp[0] & 0xFFuL));
1783

1784
         if ((result = mp_div_2d(&t, (j == ((size - nail_bytes) - 1u)) ? (int)(8u - odd_nails) : 8, &t, NULL)) != MP_OKAY) {
1785
            mp_clear(&t);
1786
            return result;
1787
         }
1788
      }
1789
   }
1790

1791
   mp_clear(&t);
1792

1793
   if (countp != NULL) {
1794
      *countp = count;
1795
   }
1796

1797
   return MP_OKAY;
1798
}
1799

1800
/* End: bn_mp_export.c */
1801

1802
/* Start: bn_mp_expt_d.c */
1803

1804
/* wrapper function for mp_expt_d_ex() */
1805
int mp_expt_d(const mp_int *a, mp_digit b, mp_int *c)
1806
{
1807
   return mp_expt_d_ex(a, b, c, 0);
1808
}
1809

1810
/* End: bn_mp_expt_d.c */
1811

1812
/* Start: bn_mp_expt_d_ex.c */
1813

1814
/* calculate c = a**b  using a square-multiply algorithm */
1815
int mp_expt_d_ex(const mp_int *a, mp_digit b, mp_int *c, int fast)
1816
{
1817
   int     res;
1818
   unsigned int x;
1819

1820
   mp_int  g;
1821

1822
   if ((res = mp_init_copy(&g, a)) != MP_OKAY) {
1823
      return res;
1824
   }
1825

1826
   /* set initial result */
1827
   mp_set(c, 1uL);
1828

1829
   if (fast != 0) {
1830
      while (b > 0u) {
1831
         /* if the bit is set multiply */
1832
         if ((b & 1u) != 0u) {
1833
            if ((res = mp_mul(c, &g, c)) != MP_OKAY) {
1834
               mp_clear(&g);
1835
               return res;
1836
            }
1837
         }
1838

1839
         /* square */
1840
         if (b > 1u) {
1841
            if ((res = mp_sqr(&g, &g)) != MP_OKAY) {
1842
               mp_clear(&g);
1843
               return res;
1844
            }
1845
         }
1846

1847
         /* shift to next bit */
1848
         b >>= 1;
1849
      }
1850
   } else {
1851
      for (x = 0; x < (unsigned)DIGIT_BIT; x++) {
1852
         /* square */
1853
         if ((res = mp_sqr(c, c)) != MP_OKAY) {
1854
            mp_clear(&g);
1855
            return res;
1856
         }
1857

1858
         /* if the bit is set multiply */
1859
         if ((b & ((mp_digit)1 << (DIGIT_BIT - 1))) != 0u) {
1860
            if ((res = mp_mul(c, &g, c)) != MP_OKAY) {
1861
               mp_clear(&g);
1862
               return res;
1863
            }
1864
         }
1865

1866
         /* shift to next bit */
1867
         b <<= 1;
1868
      }
1869
   } /* if ... else */
1870

1871
   mp_clear(&g);
1872
   return MP_OKAY;
1873
}
1874

1875
/* End: bn_mp_expt_d_ex.c */
1876

1877
/* Start: bn_mp_exptmod.c */
1878

1879
/* this is a shell function that calls either the normal or Montgomery
1880
 * exptmod functions.  Originally the call to the montgomery code was
1881
 * embedded in the normal function but that wasted alot of stack space
1882
 * for nothing (since 99% of the time the Montgomery code would be called)
1883
 */
1884
int mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y)
1885
{
1886
   int dr;
1887

1888
   /* modulus P must be positive */
1889
   if (P->sign == MP_NEG) {
1890
      return MP_VAL;
1891
   }
1892

1893
   /* if exponent X is negative we have to recurse */
1894
   if (X->sign == MP_NEG) {
1895
      mp_int tmpG, tmpX;
1896
      int err;
1897

1898
      /* first compute 1/G mod P */
1899
      if ((err = mp_init(&tmpG)) != MP_OKAY) {
1900
         return err;
1901
      }
1902
      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1903
         mp_clear(&tmpG);
1904
         return err;
1905
      }
1906

1907
      /* now get |X| */
1908
      if ((err = mp_init(&tmpX)) != MP_OKAY) {
1909
         mp_clear(&tmpG);
1910
         return err;
1911
      }
1912
      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1913
         mp_clear_multi(&tmpG, &tmpX, NULL);
1914
         return err;
1915
      }
1916

1917
      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1918
      err = mp_exptmod(&tmpG, &tmpX, P, Y);
1919
      mp_clear_multi(&tmpG, &tmpX, NULL);
1920
      return err;
1921
   }
1922

1923
   /* modified diminished radix reduction */
1924
   if (mp_reduce_is_2k_l(P) == MP_YES) {
1925
      return s_mp_exptmod(G, X, P, Y, 1);
1926
   }
1927

1928
   /* is it a DR modulus? */
1929
   dr = mp_dr_is_modulus(P);
1930

1931
   /* if not, is it a unrestricted DR modulus? */
1932
   if (dr == 0) {
1933
      dr = mp_reduce_is_2k(P) << 1;
1934
   }
1935

1936
   /* if the modulus is odd or dr != 0 use the montgomery method */
1937
   if ((mp_isodd(P) == MP_YES) || (dr !=  0)) {
1938
      return mp_exptmod_fast(G, X, P, Y, dr);
1939
   } else {
1940
      /* otherwise use the generic Barrett reduction technique */
1941
      return s_mp_exptmod(G, X, P, Y, 0);
1942
   }
1943
}
1944

1945
/* End: bn_mp_exptmod.c */
1946

1947
/* Start: bn_mp_exptmod_fast.c */
1948

1949
/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1950
 *
1951
 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1952
 * The value of k changes based on the size of the exponent.
1953
 *
1954
 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1955
 */
1956

1957
#ifdef MP_LOW_MEM
1958
#   define TAB_SIZE 32
1959
#else
1960
#   define TAB_SIZE 256
1961
#endif
1962

1963
int mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
1964
{
1965
   mp_int  M[TAB_SIZE], res;
1966
   mp_digit buf, mp;
1967
   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1968

1969
   /* use a pointer to the reduction algorithm.  This allows us to use
1970
    * one of many reduction algorithms without modding the guts of
1971
    * the code with if statements everywhere.
1972
    */
1973
   int (*redux)(mp_int *x, const mp_int *n, mp_digit rho);
1974

1975
   /* find window size */
1976
   x = mp_count_bits(X);
1977
   if (x <= 7) {
1978
      winsize = 2;
1979
   } else if (x <= 36) {
1980
      winsize = 3;
1981
   } else if (x <= 140) {
1982
      winsize = 4;
1983
   } else if (x <= 450) {
1984
      winsize = 5;
1985
   } else if (x <= 1303) {
1986
      winsize = 6;
1987
   } else if (x <= 3529) {
1988
      winsize = 7;
1989
   } else {
1990
      winsize = 8;
1991
   }
1992

1993
#ifdef MP_LOW_MEM
1994
   if (winsize > 5) {
1995
      winsize = 5;
1996
   }
1997
#endif
1998

1999
   /* init M array */
2000
   /* init first cell */
2001
   if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
2002
      return err;
2003
   }
2004

2005
   /* now init the second half of the array */
2006
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2007
      if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
2008
         for (y = 1<<(winsize-1); y < x; y++) {
2009
            mp_clear(&M[y]);
2010
         }
2011
         mp_clear(&M[1]);
2012
         return err;
2013
      }
2014
   }
2015

2016
   /* determine and setup reduction code */
2017
   if (redmode == 0) {
2018
      /* now setup montgomery  */
2019
      if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
2020
         goto LBL_M;
2021
      }
2022

2023
      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
2024
      if ((((P->used * 2) + 1) < (int)MP_WARRAY) &&
2025
          (P->used < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) {
2026
         redux = fast_mp_montgomery_reduce;
2027
      } else
2028
      {
2029
         /* use slower baseline Montgomery method */
2030
         redux = mp_montgomery_reduce;
2031
      }
2032
   } else if (redmode == 1) {
2033
      /* setup DR reduction for moduli of the form B**k - b */
2034
      mp_dr_setup(P, &mp);
2035
      redux = mp_dr_reduce;
2036
   } else {
2037
      /* setup DR reduction for moduli of the form 2**k - b */
2038
      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
2039
         goto LBL_M;
2040
      }
2041
      redux = mp_reduce_2k;
2042
   }
2043

2044
   /* setup result */
2045
   if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) {
2046
      goto LBL_M;
2047
   }
2048

2049
   /* create M table
2050
    *
2051

2052
    *
2053
    * The first half of the table is not computed though accept for M[0] and M[1]
2054
    */
2055

2056
   if (redmode == 0) {
2057
      /* now we need R mod m */
2058
      if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {
2059
         goto LBL_RES;
2060
      }
2061

2062
      /* now set M[1] to G * R mod m */
2063
      if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) {
2064
         goto LBL_RES;
2065
      }
2066
   } else {
2067
      mp_set(&res, 1uL);
2068
      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2069
         goto LBL_RES;
2070
      }
2071
   }
2072

2073
   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2074
   if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
2075
      goto LBL_RES;
2076
   }
2077

2078
   for (x = 0; x < (winsize - 1); x++) {
2079
      if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
2080
         goto LBL_RES;
2081
      }
2082
      if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) {
2083
         goto LBL_RES;
2084
      }
2085
   }
2086

2087
   /* create upper table */
2088
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2089
      if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2090
         goto LBL_RES;
2091
      }
2092
      if ((err = redux(&M[x], P, mp)) != MP_OKAY) {
2093
         goto LBL_RES;
2094
      }
2095
   }
2096

2097
   /* set initial mode and bit cnt */
2098
   mode   = 0;
2099
   bitcnt = 1;
2100
   buf    = 0;
2101
   digidx = X->used - 1;
2102
   bitcpy = 0;
2103
   bitbuf = 0;
2104

2105
   for (;;) {
2106
      /* grab next digit as required */
2107
      if (--bitcnt == 0) {
2108
         /* if digidx == -1 we are out of digits so break */
2109
         if (digidx == -1) {
2110
            break;
2111
         }
2112
         /* read next digit and reset bitcnt */
2113
         buf    = X->dp[digidx--];
2114
         bitcnt = (int)DIGIT_BIT;
2115
      }
2116

2117
      /* grab the next msb from the exponent */
2118
      y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
2119
      buf <<= (mp_digit)1;
2120

2121
      /* if the bit is zero and mode == 0 then we ignore it
2122
       * These represent the leading zero bits before the first 1 bit
2123
       * in the exponent.  Technically this opt is not required but it
2124
       * does lower the # of trivial squaring/reductions used
2125
       */
2126
      if ((mode == 0) && (y == 0)) {
2127
         continue;
2128
      }
2129

2130
      /* if the bit is zero and mode == 1 then we square */
2131
      if ((mode == 1) && (y == 0)) {
2132
         if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
2133
            goto LBL_RES;
2134
         }
2135
         if ((err = redux(&res, P, mp)) != MP_OKAY) {
2136
            goto LBL_RES;
2137
         }
2138
         continue;
2139
      }
2140

2141
      /* else we add it to the window */
2142
      bitbuf |= (y << (winsize - ++bitcpy));
2143
      mode    = 2;
2144

2145
      if (bitcpy == winsize) {
2146
         /* ok window is filled so square as required and multiply  */
2147
         /* square first */
2148
         for (x = 0; x < winsize; x++) {
2149
            if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
2150
               goto LBL_RES;
2151
            }
2152
            if ((err = redux(&res, P, mp)) != MP_OKAY) {
2153
               goto LBL_RES;
2154
            }
2155
         }
2156

2157
         /* then multiply */
2158
         if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
2159
            goto LBL_RES;
2160
         }
2161
         if ((err = redux(&res, P, mp)) != MP_OKAY) {
2162
            goto LBL_RES;
2163
         }
2164

2165
         /* empty window and reset */
2166
         bitcpy = 0;
2167
         bitbuf = 0;
2168
         mode   = 1;
2169
      }
2170
   }
2171

2172
   /* if bits remain then square/multiply */
2173
   if ((mode == 2) && (bitcpy > 0)) {
2174
      /* square then multiply if the bit is set */
2175
      for (x = 0; x < bitcpy; x++) {
2176
         if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
2177
            goto LBL_RES;
2178
         }
2179
         if ((err = redux(&res, P, mp)) != MP_OKAY) {
2180
            goto LBL_RES;
2181
         }
2182

2183
         /* get next bit of the window */
2184
         bitbuf <<= 1;
2185
         if ((bitbuf & (1 << winsize)) != 0) {
2186
            /* then multiply */
2187
            if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
2188
               goto LBL_RES;
2189
            }
2190
            if ((err = redux(&res, P, mp)) != MP_OKAY) {
2191
               goto LBL_RES;
2192
            }
2193
         }
2194
      }
2195
   }
2196

2197
   if (redmode == 0) {
2198
      /* fixup result if Montgomery reduction is used
2199
       * recall that any value in a Montgomery system is
2200
       * actually multiplied by R mod n.  So we have
2201
       * to reduce one more time to cancel out the factor
2202
       * of R.
2203
       */
2204
      if ((err = redux(&res, P, mp)) != MP_OKAY) {
2205
         goto LBL_RES;
2206
      }
2207
   }
2208

2209
   /* swap res with Y */
2210
   mp_exch(&res, Y);
2211
   err = MP_OKAY;
2212
LBL_RES:
2213
   mp_clear(&res);
2214
LBL_M:
2215
   mp_clear(&M[1]);
2216
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2217
      mp_clear(&M[x]);
2218
   }
2219
   return err;
2220
}
2221

2222
/* End: bn_mp_exptmod_fast.c */
2223

2224
/* Start: bn_mp_exteuclid.c */
2225

2226
/* Extended euclidean algorithm of (a, b) produces
2227
   a*u1 + b*u2 = u3
2228
 */
2229
int mp_exteuclid(const mp_int *a, const mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
2230
{
2231
   mp_int u1, u2, u3, v1, v2, v3, t1, t2, t3, q, tmp;
2232
   int err;
2233

2234
   if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) {
2235
      return err;
2236
   }
2237

2238
   /* initialize, (u1,u2,u3) = (1,0,a) */
2239
   mp_set(&u1, 1uL);
2240
   if ((err = mp_copy(a, &u3)) != MP_OKAY) {
2241
      goto LBL_ERR;
2242
   }
2243

2244
   /* initialize, (v1,v2,v3) = (0,1,b) */
2245
   mp_set(&v2, 1uL);
2246
   if ((err = mp_copy(b, &v3)) != MP_OKAY) {
2247
      goto LBL_ERR;
2248
   }
2249

2250
   /* loop while v3 != 0 */
2251
   while (mp_iszero(&v3) == MP_NO) {
2252
      /* q = u3/v3 */
2253
      if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) {
2254
         goto LBL_ERR;
2255
      }
2256

2257
      /* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */
2258
      if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) {
2259
         goto LBL_ERR;
2260
      }
2261
      if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) {
2262
         goto LBL_ERR;
2263
      }
2264
      if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) {
2265
         goto LBL_ERR;
2266
      }
2267
      if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) {
2268
         goto LBL_ERR;
2269
      }
2270
      if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) {
2271
         goto LBL_ERR;
2272
      }
2273
      if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) {
2274
         goto LBL_ERR;
2275
      }
2276

2277
      /* (u1,u2,u3) = (v1,v2,v3) */
2278
      if ((err = mp_copy(&v1, &u1)) != MP_OKAY) {
2279
         goto LBL_ERR;
2280
      }
2281
      if ((err = mp_copy(&v2, &u2)) != MP_OKAY) {
2282
         goto LBL_ERR;
2283
      }
2284
      if ((err = mp_copy(&v3, &u3)) != MP_OKAY) {
2285
         goto LBL_ERR;
2286
      }
2287

2288
      /* (v1,v2,v3) = (t1,t2,t3) */
2289
      if ((err = mp_copy(&t1, &v1)) != MP_OKAY) {
2290
         goto LBL_ERR;
2291
      }
2292
      if ((err = mp_copy(&t2, &v2)) != MP_OKAY) {
2293
         goto LBL_ERR;
2294
      }
2295
      if ((err = mp_copy(&t3, &v3)) != MP_OKAY) {
2296
         goto LBL_ERR;
2297
      }
2298
   }
2299

2300
   /* make sure U3 >= 0 */
2301
   if (u3.sign == MP_NEG) {
2302
      if ((err = mp_neg(&u1, &u1)) != MP_OKAY) {
2303
         goto LBL_ERR;
2304
      }
2305
      if ((err = mp_neg(&u2, &u2)) != MP_OKAY) {
2306
         goto LBL_ERR;
2307
      }
2308
      if ((err = mp_neg(&u3, &u3)) != MP_OKAY) {
2309
         goto LBL_ERR;
2310
      }
2311
   }
2312

2313
   /* copy result out */
2314
   if (U1 != NULL) {
2315
      mp_exch(U1, &u1);
2316
   }
2317
   if (U2 != NULL) {
2318
      mp_exch(U2, &u2);
2319
   }
2320
   if (U3 != NULL) {
2321
      mp_exch(U3, &u3);
2322
   }
2323

2324
   err = MP_OKAY;
2325
LBL_ERR:
2326
   mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL);
2327
   return err;
2328
}
2329

2330
/* End: bn_mp_exteuclid.c */
2331

2332
/* Start: bn_mp_gcd.c */
2333

2334
/* Greatest Common Divisor using the binary method */
2335
int mp_gcd(const mp_int *a, const mp_int *b, mp_int *c)
2336
{
2337
   mp_int  u, v;
2338
   int     k, u_lsb, v_lsb, res;
2339

2340
   /* either zero than gcd is the largest */
2341
   if (mp_iszero(a) == MP_YES) {
2342
      return mp_abs(b, c);
2343
   }
2344
   if (mp_iszero(b) == MP_YES) {
2345
      return mp_abs(a, c);
2346
   }
2347

2348
   /* get copies of a and b we can modify */
2349
   if ((res = mp_init_copy(&u, a)) != MP_OKAY) {
2350
      return res;
2351
   }
2352

2353
   if ((res = mp_init_copy(&v, b)) != MP_OKAY) {
2354
      goto LBL_U;
2355
   }
2356

2357
   /* must be positive for the remainder of the algorithm */
2358
   u.sign = v.sign = MP_ZPOS;
2359

2360
   /* B1.  Find the common power of two for u and v */
2361
   u_lsb = mp_cnt_lsb(&u);
2362
   v_lsb = mp_cnt_lsb(&v);
2363
   k     = MIN(u_lsb, v_lsb);
2364

2365
   if (k > 0) {
2366
      /* divide the power of two out */
2367
      if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
2368
         goto LBL_V;
2369
      }
2370

2371
      if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
2372
         goto LBL_V;
2373
      }
2374
   }
2375

2376
   /* divide any remaining factors of two out */
2377
   if (u_lsb != k) {
2378
      if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
2379
         goto LBL_V;
2380
      }
2381
   }
2382

2383
   if (v_lsb != k) {
2384
      if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
2385
         goto LBL_V;
2386
      }
2387
   }
2388

2389
   while (mp_iszero(&v) == MP_NO) {
2390
      /* make sure v is the largest */
2391
      if (mp_cmp_mag(&u, &v) == MP_GT) {
2392
         /* swap u and v to make sure v is >= u */
2393
         mp_exch(&u, &v);
2394
      }
2395

2396
      /* subtract smallest from largest */
2397
      if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
2398
         goto LBL_V;
2399
      }
2400

2401
      /* Divide out all factors of two */
2402
      if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
2403
         goto LBL_V;
2404
      }
2405
   }
2406

2407
   /* multiply by 2**k which we divided out at the beginning */
2408
   if ((res = mp_mul_2d(&u, k, c)) != MP_OKAY) {
2409
      goto LBL_V;
2410
   }
2411
   c->sign = MP_ZPOS;
2412
   res = MP_OKAY;
2413
LBL_V:
2414
   mp_clear(&u);
2415
LBL_U:
2416
   mp_clear(&v);
2417
   return res;
2418
}
2419

2420
/* End: bn_mp_gcd.c */
2421

2422
/* Start: bn_mp_get_bit.c */
2423

2424
/* Checks the bit at position b and returns MP_YES
2425
   if the bit is 1, MP_NO if it is 0 and MP_VAL
2426
   in case of error */
2427
int mp_get_bit(const mp_int *a, int b)
2428
{
2429
   int limb;
2430
   mp_digit bit, isset;
2431

2432
   if (b < 0) {
2433
      return MP_VAL;
2434
   }
2435

2436
   limb = b / DIGIT_BIT;
2437

2438
   /*
2439
    * Zero is a special value with the member "used" set to zero.
2440
    * Needs to be tested before the check for the upper boundary
2441
    * otherwise (limb >= a->used) would be true for a = 0
2442
    */
2443

2444
   if (mp_iszero(a) != MP_NO) {
2445
      return MP_NO;
2446
   }
2447

2448
   if (limb >= a->used) {
2449
      return MP_VAL;
2450
   }
2451

2452
   bit = (mp_digit)(1) << (b % DIGIT_BIT);
2453

2454
   isset = a->dp[limb] & bit;
2455
   return (isset != 0u) ? MP_YES : MP_NO;
2456
}
2457

2458
/* End: bn_mp_get_bit.c */
2459

2460
/* Start: bn_mp_get_double.c */
2461

2462
double mp_get_double(const mp_int *a)
2463
{
2464
   int i;
2465
   double d = 0.0, fac = 1.0;
2466
   for (i = 0; i < DIGIT_BIT; ++i) {
2467
      fac *= 2.0;
2468
   }
2469
   for (i = USED(a); i --> 0;) {
2470
      d = (d * fac) + (double)DIGIT(a, i);
2471
   }
2472
   return (mp_isneg(a) != MP_NO) ? -d : d;
2473
}
2474

2475
/* End: bn_mp_get_double.c */
2476

2477
/* Start: bn_mp_get_int.c */
2478

2479
/* get the lower 32-bits of an mp_int */
2480
unsigned long mp_get_int(const mp_int *a)
2481
{
2482
   int i;
2483
   mp_min_u32 res;
2484

2485
   if (a->used == 0) {
2486
      return 0;
2487
   }
2488

2489
   /* get number of digits of the lsb we have to read */
2490
   i = MIN(a->used, ((((int)sizeof(unsigned long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;
2491

2492
   /* get most significant digit of result */
2493
   res = DIGIT(a, i);
2494

2495
   while (--i >= 0) {
2496
      res = (res << DIGIT_BIT) | DIGIT(a, i);
2497
   }
2498

2499
   /* force result to 32-bits always so it is consistent on non 32-bit platforms */
2500
   return res & 0xFFFFFFFFUL;
2501
}
2502

2503
/* End: bn_mp_get_int.c */
2504

2505
/* Start: bn_mp_get_long.c */
2506

2507
/* get the lower unsigned long of an mp_int, platform dependent */
2508
unsigned long mp_get_long(const mp_int *a)
2509
{
2510
   int i;
2511
   unsigned long res;
2512

2513
   if (a->used == 0) {
2514
      return 0;
2515
   }
2516

2517
   /* get number of digits of the lsb we have to read */
2518
   i = MIN(a->used, ((((int)sizeof(unsigned long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;
2519

2520
   /* get most significant digit of result */
2521
   res = DIGIT(a, i);
2522

2523
#if (ULONG_MAX != 0xffffffffuL) || (DIGIT_BIT < 32)
2524
   while (--i >= 0) {
2525
      res = (res << DIGIT_BIT) | DIGIT(a, i);
2526
   }
2527
#endif
2528
   return res;
2529
}
2530

2531
/* End: bn_mp_get_long.c */
2532

2533
/* Start: bn_mp_get_long_long.c */
2534

2535
/* get the lower unsigned long long of an mp_int, platform dependent */
2536
unsigned long long mp_get_long_long(const mp_int *a)
2537
{
2538
   int i;
2539
   unsigned long long res;
2540

2541
   if (a->used == 0) {
2542
      return 0;
2543
   }
2544

2545
   /* get number of digits of the lsb we have to read */
2546
   i = MIN(a->used, ((((int)sizeof(unsigned long long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;
2547

2548
   /* get most significant digit of result */
2549
   res = DIGIT(a, i);
2550

2551
#if DIGIT_BIT < 64
2552
   while (--i >= 0) {
2553
      res = (res << DIGIT_BIT) | DIGIT(a, i);
2554
   }
2555
#endif
2556
   return res;
2557
}
2558

2559
/* End: bn_mp_get_long_long.c */
2560

2561
/* Start: bn_mp_grow.c */
2562

2563
/* grow as required */
2564
int mp_grow(mp_int *a, int size)
2565
{
2566
   int     i;
2567
   mp_digit *tmp;
2568

2569
   /* if the alloc size is smaller alloc more ram */
2570
   if (a->alloc < size) {
2571
      /* ensure there are always at least MP_PREC digits extra on top */
2572
      size += (MP_PREC * 2) - (size % MP_PREC);
2573

2574
      /* reallocate the array a->dp
2575
       *
2576
       * We store the return in a temporary variable
2577
       * in case the operation failed we don't want
2578
       * to overwrite the dp member of a.
2579
       */
2580
      tmp = OPT_CAST(mp_digit) XREALLOC(a->dp, sizeof(mp_digit) * (size_t)size);
2581
      if (tmp == NULL) {
2582
         /* reallocation failed but "a" is still valid [can be freed] */
2583
         return MP_MEM;
2584
      }
2585

2586
      /* reallocation succeeded so set a->dp */
2587
      a->dp = tmp;
2588

2589
      /* zero excess digits */
2590
      i        = a->alloc;
2591
      a->alloc = size;
2592
      for (; i < a->alloc; i++) {
2593
         a->dp[i] = 0;
2594
      }
2595
   }
2596
   return MP_OKAY;
2597
}
2598

2599
/* End: bn_mp_grow.c */
2600

2601
/* Start: bn_mp_import.c */
2602

2603
/* based on gmp's mpz_import.
2604
 * see http://gmplib.org/manual/Integer-Import-and-Export.html
2605
 */
2606
int mp_import(mp_int *rop, size_t count, int order, size_t size,
2607
              int endian, size_t nails, const void *op)
2608
{
2609
   int result;
2610
   size_t odd_nails, nail_bytes, i, j;
2611
   unsigned char odd_nail_mask;
2612

2613
   mp_zero(rop);
2614

2615
   if (endian == 0) {
2616
      union {
2617
         unsigned int i;
2618
         char c[4];
2619
      } lint;
2620
      lint.i = 0x01020304;
2621

2622
      endian = (lint.c[0] == '\x04') ? -1 : 1;
2623
   }
2624

2625
   odd_nails = (nails % 8u);
2626
   odd_nail_mask = 0xff;
2627
   for (i = 0; i < odd_nails; ++i) {
2628
      odd_nail_mask ^= (unsigned char)(1u << (7u - i));
2629
   }
2630
   nail_bytes = nails / 8u;
2631

2632
   for (i = 0; i < count; ++i) {
2633
      for (j = 0; j < (size - nail_bytes); ++j) {
2634
         unsigned char byte = *((unsigned char *)op +
2635
                                (((order == 1) ? i : ((count - 1u) - i)) * size) +
2636
                                ((endian == 1) ? (j + nail_bytes) : (((size - 1u) - j) - nail_bytes)));
2637

2638
         if ((result = mp_mul_2d(rop, (j == 0u) ? (int)(8u - odd_nails) : 8, rop)) != MP_OKAY) {
2639
            return result;
2640
         }
2641

2642
         rop->dp[0] |= (j == 0u) ? (mp_digit)(byte & odd_nail_mask) : (mp_digit)byte;
2643
         rop->used  += 1;
2644
      }
2645
   }
2646

2647
   mp_clamp(rop);
2648

2649
   return MP_OKAY;
2650
}
2651

2652
/* End: bn_mp_import.c */
2653

2654
/* Start: bn_mp_init.c */
2655

2656
/* init a new mp_int */
2657
int mp_init(mp_int *a)
2658
{
2659
   int i;
2660

2661
   /* allocate memory required and clear it */
2662
   a->dp = OPT_CAST(mp_digit) XMALLOC(sizeof(mp_digit) * (size_t)MP_PREC);
2663
   if (a->dp == NULL) {
2664
      return MP_MEM;
2665
   }
2666

2667
   /* set the digits to zero */
2668
   for (i = 0; i < MP_PREC; i++) {
2669
      a->dp[i] = 0;
2670
   }
2671

2672
   /* set the used to zero, allocated digits to the default precision
2673
    * and sign to positive */
2674
   a->used  = 0;
2675
   a->alloc = MP_PREC;
2676
   a->sign  = MP_ZPOS;
2677

2678
   return MP_OKAY;
2679
}
2680

2681
/* End: bn_mp_init.c */
2682

2683
/* Start: bn_mp_init_copy.c */
2684

2685
/* creates "a" then copies b into it */
2686
int mp_init_copy(mp_int *a, const mp_int *b)
2687
{
2688
   int     res;
2689

2690
   if ((res = mp_init_size(a, b->used)) != MP_OKAY) {
2691
      return res;
2692
   }
2693

2694
   if ((res = mp_copy(b, a)) != MP_OKAY) {
2695
      mp_clear(a);
2696
   }
2697

2698
   return res;
2699
}
2700

2701
/* End: bn_mp_init_copy.c */
2702

2703
/* Start: bn_mp_init_multi.c */
2704

2705
int mp_init_multi(mp_int *mp, ...)
2706
{
2707
   mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
2708
   int n = 0;                 /* Number of ok inits */
2709
   mp_int *cur_arg = mp;
2710
   va_list args;
2711

2712
   va_start(args, mp);        /* init args to next argument from caller */
2713
   while (cur_arg != NULL) {
2714
      if (mp_init(cur_arg) != MP_OKAY) {
2715
         /* Oops - error! Back-track and mp_clear what we already
2716
            succeeded in init-ing, then return error.
2717
         */
2718
         va_list clean_args;
2719

2720
         /* now start cleaning up */
2721
         cur_arg = mp;
2722
         va_start(clean_args, mp);
2723
         while (n-- != 0) {
2724
            mp_clear(cur_arg);
2725
            cur_arg = va_arg(clean_args, mp_int *);
2726
         }
2727
         va_end(clean_args);
2728
         res = MP_MEM;
2729
         break;
2730
      }
2731
      n++;
2732
      cur_arg = va_arg(args, mp_int *);
2733
   }
2734
   va_end(args);
2735
   return res;                /* Assumed ok, if error flagged above. */
2736
}
2737

2738
/* End: bn_mp_init_multi.c */
2739

2740
/* Start: bn_mp_init_set.c */
2741

2742
/* initialize and set a digit */
2743
int mp_init_set(mp_int *a, mp_digit b)
2744
{
2745
   int err;
2746
   if ((err = mp_init(a)) != MP_OKAY) {
2747
      return err;
2748
   }
2749
   mp_set(a, b);
2750
   return err;
2751
}
2752

2753
/* End: bn_mp_init_set.c */
2754

2755
/* Start: bn_mp_init_set_int.c */
2756

2757
/* initialize and set a digit */
2758
int mp_init_set_int(mp_int *a, unsigned long b)
2759
{
2760
   int err;
2761
   if ((err = mp_init(a)) != MP_OKAY) {
2762
      return err;
2763
   }
2764
   return mp_set_int(a, b);
2765
}
2766

2767
/* End: bn_mp_init_set_int.c */
2768

2769
/* Start: bn_mp_init_size.c */
2770

2771
/* init an mp_init for a given size */
2772
int mp_init_size(mp_int *a, int size)
2773
{
2774
   int x;
2775

2776
   /* pad size so there are always extra digits */
2777
   size += (MP_PREC * 2) - (size % MP_PREC);
2778

2779
   /* alloc mem */
2780
   a->dp = OPT_CAST(mp_digit) XMALLOC(sizeof(mp_digit) * (size_t)size);
2781
   if (a->dp == NULL) {
2782
      return MP_MEM;
2783
   }
2784

2785
   /* set the members */
2786
   a->used  = 0;
2787
   a->alloc = size;
2788
   a->sign  = MP_ZPOS;
2789

2790
   /* zero the digits */
2791
   for (x = 0; x < size; x++) {
2792
      a->dp[x] = 0;
2793
   }
2794

2795
   return MP_OKAY;
2796
}
2797

2798
/* End: bn_mp_init_size.c */
2799

2800
/* Start: bn_mp_invmod.c */
2801

2802
/* hac 14.61, pp608 */
2803
int mp_invmod(const mp_int *a, const mp_int *b, mp_int *c)
2804
{
2805
   /* b cannot be negative and has to be >1 */
2806
   if ((b->sign == MP_NEG) || (mp_cmp_d(b, 1uL) != MP_GT)) {
2807
      return MP_VAL;
2808
   }
2809

2810
   /* if the modulus is odd we can use a faster routine instead */
2811
   if ((mp_isodd(b) == MP_YES)) {
2812
      return fast_mp_invmod(a, b, c);
2813
   }
2814

2815
   return mp_invmod_slow(a, b, c);
2816
}
2817

2818
/* End: bn_mp_invmod.c */
2819

2820
/* Start: bn_mp_invmod_slow.c */
2821

2822
/* hac 14.61, pp608 */
2823
int mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c)
2824
{
2825
   mp_int  x, y, u, v, A, B, C, D;
2826
   int     res;
2827

2828
   /* b cannot be negative */
2829
   if ((b->sign == MP_NEG) || (mp_iszero(b) == MP_YES)) {
2830
      return MP_VAL;
2831
   }
2832

2833
   /* init temps */
2834
   if ((res = mp_init_multi(&x, &y, &u, &v,
2835
                            &A, &B, &C, &D, NULL)) != MP_OKAY) {
2836
      return res;
2837
   }
2838

2839
   /* x = a, y = b */
2840
   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
2841
      goto LBL_ERR;
2842
   }
2843
   if ((res = mp_copy(b, &y)) != MP_OKAY) {
2844
      goto LBL_ERR;
2845
   }
2846

2847
   /* 2. [modified] if x,y are both even then return an error! */
2848
   if ((mp_iseven(&x) == MP_YES) && (mp_iseven(&y) == MP_YES)) {
2849
      res = MP_VAL;
2850
      goto LBL_ERR;
2851
   }
2852

2853
   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2854
   if ((res = mp_copy(&x, &u)) != MP_OKAY) {
2855
      goto LBL_ERR;
2856
   }
2857
   if ((res = mp_copy(&y, &v)) != MP_OKAY) {
2858
      goto LBL_ERR;
2859
   }
2860
   mp_set(&A, 1uL);
2861
   mp_set(&D, 1uL);
2862

2863
top:
2864
   /* 4.  while u is even do */
2865
   while (mp_iseven(&u) == MP_YES) {
2866
      /* 4.1 u = u/2 */
2867
      if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
2868
         goto LBL_ERR;
2869
      }
2870
      /* 4.2 if A or B is odd then */
2871
      if ((mp_isodd(&A) == MP_YES) || (mp_isodd(&B) == MP_YES)) {
2872
         /* A = (A+y)/2, B = (B-x)/2 */
2873
         if ((res = mp_add(&A, &y, &A)) != MP_OKAY) {
2874
            goto LBL_ERR;
2875
         }
2876
         if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
2877
            goto LBL_ERR;
2878
         }
2879
      }
2880
      /* A = A/2, B = B/2 */
2881
      if ((res = mp_div_2(&A, &A)) != MP_OKAY) {
2882
         goto LBL_ERR;
2883
      }
2884
      if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
2885
         goto LBL_ERR;
2886
      }
2887
   }
2888

2889
   /* 5.  while v is even do */
2890
   while (mp_iseven(&v) == MP_YES) {
2891
      /* 5.1 v = v/2 */
2892
      if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
2893
         goto LBL_ERR;
2894
      }
2895
      /* 5.2 if C or D is odd then */
2896
      if ((mp_isodd(&C) == MP_YES) || (mp_isodd(&D) == MP_YES)) {
2897
         /* C = (C+y)/2, D = (D-x)/2 */
2898
         if ((res = mp_add(&C, &y, &C)) != MP_OKAY) {
2899
            goto LBL_ERR;
2900
         }
2901
         if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
2902
            goto LBL_ERR;
2903
         }
2904
      }
2905
      /* C = C/2, D = D/2 */
2906
      if ((res = mp_div_2(&C, &C)) != MP_OKAY) {
2907
         goto LBL_ERR;
2908
      }
2909
      if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
2910
         goto LBL_ERR;
2911
      }
2912
   }
2913

2914
   /* 6.  if u >= v then */
2915
   if (mp_cmp(&u, &v) != MP_LT) {
2916
      /* u = u - v, A = A - C, B = B - D */
2917
      if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
2918
         goto LBL_ERR;
2919
      }
2920

2921
      if ((res = mp_sub(&A, &C, &A)) != MP_OKAY) {
2922
         goto LBL_ERR;
2923
      }
2924

2925
      if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
2926
         goto LBL_ERR;
2927
      }
2928
   } else {
2929
      /* v - v - u, C = C - A, D = D - B */
2930
      if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
2931
         goto LBL_ERR;
2932
      }
2933

2934
      if ((res = mp_sub(&C, &A, &C)) != MP_OKAY) {
2935
         goto LBL_ERR;
2936
      }
2937

2938
      if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
2939
         goto LBL_ERR;
2940
      }
2941
   }
2942

2943
   /* if not zero goto step 4 */
2944
   if (mp_iszero(&u) == MP_NO)
2945
      goto top;
2946

2947
   /* now a = C, b = D, gcd == g*v */
2948

2949
   /* if v != 1 then there is no inverse */
2950
   if (mp_cmp_d(&v, 1uL) != MP_EQ) {
2951
      res = MP_VAL;
2952
      goto LBL_ERR;
2953
   }
2954

2955
   /* if its too low */
2956
   while (mp_cmp_d(&C, 0uL) == MP_LT) {
2957
      if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2958
         goto LBL_ERR;
2959
      }
2960
   }
2961

2962
   /* too big */
2963
   while (mp_cmp_mag(&C, b) != MP_LT) {
2964
      if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2965
         goto LBL_ERR;
2966
      }
2967
   }
2968

2969
   /* C is now the inverse */
2970
   mp_exch(&C, c);
2971
   res = MP_OKAY;
2972
LBL_ERR:
2973
   mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2974
   return res;
2975
}
2976

2977
/* End: bn_mp_invmod_slow.c */
2978

2979
/* Start: bn_mp_is_square.c */
2980

2981
/* Check if remainders are possible squares - fast exclude non-squares */
2982
static const char rem_128[128] = {
2983
   0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2984
   0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2985
   1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2986
   1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2987
   0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2988
   1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2989
   1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
2990
   1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1
2991
};
2992

2993
static const char rem_105[105] = {
2994
   0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
2995
   0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
2996
   0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
2997
   1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
2998
   0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
2999
   1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
3000
   1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
3001
};
3002

3003
/* Store non-zero to ret if arg is square, and zero if not */
3004
int mp_is_square(const mp_int *arg, int *ret)
3005
{
3006
   int           res;
3007
   mp_digit      c;
3008
   mp_int        t;
3009
   unsigned long r;
3010

3011
   /* Default to Non-square :) */
3012
   *ret = MP_NO;
3013

3014
   if (arg->sign == MP_NEG) {
3015
      return MP_VAL;
3016
   }
3017

3018
   /* digits used?  (TSD) */
3019
   if (arg->used == 0) {
3020
      return MP_OKAY;
3021
   }
3022

3023
   /* First check mod 128 (suppose that DIGIT_BIT is at least 7) */
3024
   if (rem_128[127u & DIGIT(arg, 0)] == (char)1) {
3025
      return MP_OKAY;
3026
   }
3027

3028
   /* Next check mod 105 (3*5*7) */
3029
   if ((res = mp_mod_d(arg, 105uL, &c)) != MP_OKAY) {
3030
      return res;
3031
   }
3032
   if (rem_105[c] == (char)1) {
3033
      return MP_OKAY;
3034
   }
3035

3036

3037
   if ((res = mp_init_set_int(&t, 11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) {
3038
      return res;
3039
   }
3040
   if ((res = mp_mod(arg, &t, &t)) != MP_OKAY) {
3041
      goto LBL_ERR;
3042
   }
3043
   r = mp_get_int(&t);
3044
   /* Check for other prime modules, note it's not an ERROR but we must
3045
    * free "t" so the easiest way is to goto LBL_ERR.  We know that res
3046
    * is already equal to MP_OKAY from the mp_mod call
3047
    */
3048
   if (((1uL<<(r%11uL)) & 0x5C4uL) != 0uL)         goto LBL_ERR;
3049
   if (((1uL<<(r%13uL)) & 0x9E4uL) != 0uL)         goto LBL_ERR;
3050
   if (((1uL<<(r%17uL)) & 0x5CE8uL) != 0uL)        goto LBL_ERR;
3051
   if (((1uL<<(r%19uL)) & 0x4F50CuL) != 0uL)       goto LBL_ERR;
3052
   if (((1uL<<(r%23uL)) & 0x7ACCA0uL) != 0uL)      goto LBL_ERR;
3053
   if (((1uL<<(r%29uL)) & 0xC2EDD0CuL) != 0uL)     goto LBL_ERR;
3054
   if (((1uL<<(r%31uL)) & 0x6DE2B848uL) != 0uL)    goto LBL_ERR;
3055

3056
   /* Final check - is sqr(sqrt(arg)) == arg ? */
3057
   if ((res = mp_sqrt(arg, &t)) != MP_OKAY) {
3058
      goto LBL_ERR;
3059
   }
3060
   if ((res = mp_sqr(&t, &t)) != MP_OKAY) {
3061
      goto LBL_ERR;
3062
   }
3063

3064
   *ret = (mp_cmp_mag(&t, arg) == MP_EQ) ? MP_YES : MP_NO;
3065
LBL_ERR:
3066
   mp_clear(&t);
3067
   return res;
3068
}
3069

3070
/* End: bn_mp_is_square.c */
3071

3072
/* Start: bn_mp_jacobi.c */
3073

3074
/* computes the jacobi c = (a | n) (or Legendre if n is prime)
3075
 * Kept for legacy reasons, please use mp_kronecker() instead
3076
 */
3077
int mp_jacobi(const mp_int *a, const mp_int *n, int *c)
3078
{
3079
   /* if a < 0 return MP_VAL */
3080
   if (mp_isneg(a) == MP_YES) {
3081
      return MP_VAL;
3082
   }
3083

3084
   /* if n <= 0 return MP_VAL */
3085
   if (mp_cmp_d(n, 0uL) != MP_GT) {
3086
      return MP_VAL;
3087
   }
3088

3089
   return mp_kronecker(a, n, c);
3090
}
3091

3092
/* End: bn_mp_jacobi.c */
3093

3094
/* Start: bn_mp_karatsuba_mul.c */
3095

3096
/* c = |a| * |b| using Karatsuba Multiplication using
3097
 * three half size multiplications
3098
 *
3099
 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
3100
 * let n represent half of the number of digits in
3101
 * the min(a,b)
3102
 *
3103
 * a = a1 * B**n + a0
3104
 * b = b1 * B**n + b0
3105
 *
3106
 * Then, a * b =>
3107
   a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
3108
 *
3109
 * Note that a1b1 and a0b0 are used twice and only need to be
3110
 * computed once.  So in total three half size (half # of
3111
 * digit) multiplications are performed, a0b0, a1b1 and
3112
 * (a1+b1)(a0+b0)
3113
 *
3114
 * Note that a multiplication of half the digits requires
3115
 * 1/4th the number of single precision multiplications so in
3116
 * total after one call 25% of the single precision multiplications
3117
 * are saved.  Note also that the call to mp_mul can end up back
3118
 * in this function if the a0, a1, b0, or b1 are above the threshold.
3119
 * This is known as divide-and-conquer and leads to the famous
3120
 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
3121
 * the standard O(N**2) that the baseline/comba methods use.
3122
 * Generally though the overhead of this method doesn't pay off
3123
 * until a certain size (N ~ 80) is reached.
3124
 */
3125
int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)
3126
{
3127
   mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
3128
   int     B, err;
3129

3130
   /* default the return code to an error */
3131
   err = MP_MEM;
3132

3133
   /* min # of digits */
3134
   B = MIN(a->used, b->used);
3135

3136
   /* now divide in two */
3137
   B = B >> 1;
3138

3139
   /* init copy all the temps */
3140
   if (mp_init_size(&x0, B) != MP_OKAY)
3141
      goto LBL_ERR;
3142
   if (mp_init_size(&x1, a->used - B) != MP_OKAY)
3143
      goto X0;
3144
   if (mp_init_size(&y0, B) != MP_OKAY)
3145
      goto X1;
3146
   if (mp_init_size(&y1, b->used - B) != MP_OKAY)
3147
      goto Y0;
3148

3149
   /* init temps */
3150
   if (mp_init_size(&t1, B * 2) != MP_OKAY)
3151
      goto Y1;
3152
   if (mp_init_size(&x0y0, B * 2) != MP_OKAY)
3153
      goto T1;
3154
   if (mp_init_size(&x1y1, B * 2) != MP_OKAY)
3155
      goto X0Y0;
3156

3157
   /* now shift the digits */
3158
   x0.used = y0.used = B;
3159
   x1.used = a->used - B;
3160
   y1.used = b->used - B;
3161

3162
   {
3163
      int x;
3164
      mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
3165

3166
      /* we copy the digits directly instead of using higher level functions
3167
       * since we also need to shift the digits
3168
       */
3169
      tmpa = a->dp;
3170
      tmpb = b->dp;
3171

3172
      tmpx = x0.dp;
3173
      tmpy = y0.dp;
3174
      for (x = 0; x < B; x++) {
3175
         *tmpx++ = *tmpa++;
3176
         *tmpy++ = *tmpb++;
3177
      }
3178

3179
      tmpx = x1.dp;
3180
      for (x = B; x < a->used; x++) {
3181
         *tmpx++ = *tmpa++;
3182
      }
3183

3184
      tmpy = y1.dp;
3185
      for (x = B; x < b->used; x++) {
3186
         *tmpy++ = *tmpb++;
3187
      }
3188
   }
3189

3190
   /* only need to clamp the lower words since by definition the
3191
    * upper words x1/y1 must have a known number of digits
3192
    */
3193
   mp_clamp(&x0);
3194
   mp_clamp(&y0);
3195

3196
   /* now calc the products x0y0 and x1y1 */
3197
   /* after this x0 is no longer required, free temp [x0==t2]! */
3198
   if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY)
3199
      goto X1Y1;          /* x0y0 = x0*y0 */
3200
   if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY)
3201
      goto X1Y1;          /* x1y1 = x1*y1 */
3202

3203
   /* now calc x1+x0 and y1+y0 */
3204
   if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)
3205
      goto X1Y1;          /* t1 = x1 - x0 */
3206
   if (s_mp_add(&y1, &y0, &x0) != MP_OKAY)
3207
      goto X1Y1;          /* t2 = y1 - y0 */
3208
   if (mp_mul(&t1, &x0, &t1) != MP_OKAY)
3209
      goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */
3210

3211
   /* add x0y0 */
3212
   if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY)
3213
      goto X1Y1;          /* t2 = x0y0 + x1y1 */
3214
   if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY)
3215
      goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
3216

3217
   /* shift by B */
3218
   if (mp_lshd(&t1, B) != MP_OKAY)
3219
      goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
3220
   if (mp_lshd(&x1y1, B * 2) != MP_OKAY)
3221
      goto X1Y1;          /* x1y1 = x1y1 << 2*B */
3222

3223
   if (mp_add(&x0y0, &t1, &t1) != MP_OKAY)
3224
      goto X1Y1;          /* t1 = x0y0 + t1 */
3225
   if (mp_add(&t1, &x1y1, c) != MP_OKAY)
3226
      goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */
3227

3228
   /* Algorithm succeeded set the return code to MP_OKAY */
3229
   err = MP_OKAY;
3230

3231
X1Y1:
3232
   mp_clear(&x1y1);
3233
X0Y0:
3234
   mp_clear(&x0y0);
3235
T1:
3236
   mp_clear(&t1);
3237
Y1:
3238
   mp_clear(&y1);
3239
Y0:
3240
   mp_clear(&y0);
3241
X1:
3242
   mp_clear(&x1);
3243
X0:
3244
   mp_clear(&x0);
3245
LBL_ERR:
3246
   return err;
3247
}
3248

3249
/* End: bn_mp_karatsuba_mul.c */
3250

3251
/* Start: bn_mp_karatsuba_sqr.c */
3252

3253
/* Karatsuba squaring, computes b = a*a using three
3254
 * half size squarings
3255
 *
3256
 * See comments of karatsuba_mul for details.  It
3257
 * is essentially the same algorithm but merely
3258
 * tuned to perform recursive squarings.
3259
 */
3260
int mp_karatsuba_sqr(const mp_int *a, mp_int *b)
3261
{
3262
   mp_int  x0, x1, t1, t2, x0x0, x1x1;
3263
   int     B, err;
3264

3265
   err = MP_MEM;
3266

3267
   /* min # of digits */
3268
   B = a->used;
3269

3270
   /* now divide in two */
3271
   B = B >> 1;
3272

3273
   /* init copy all the temps */
3274
   if (mp_init_size(&x0, B) != MP_OKAY)
3275
      goto LBL_ERR;
3276
   if (mp_init_size(&x1, a->used - B) != MP_OKAY)
3277
      goto X0;
3278

3279
   /* init temps */
3280
   if (mp_init_size(&t1, a->used * 2) != MP_OKAY)
3281
      goto X1;
3282
   if (mp_init_size(&t2, a->used * 2) != MP_OKAY)
3283
      goto T1;
3284
   if (mp_init_size(&x0x0, B * 2) != MP_OKAY)
3285
      goto T2;
3286
   if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY)
3287
      goto X0X0;
3288

3289
   {
3290
      int x;
3291
      mp_digit *dst, *src;
3292

3293
      src = a->dp;
3294

3295
      /* now shift the digits */
3296
      dst = x0.dp;
3297
      for (x = 0; x < B; x++) {
3298
         *dst++ = *src++;
3299
      }
3300

3301
      dst = x1.dp;
3302
      for (x = B; x < a->used; x++) {
3303
         *dst++ = *src++;
3304
      }
3305
   }
3306

3307
   x0.used = B;
3308
   x1.used = a->used - B;
3309

3310
   mp_clamp(&x0);
3311

3312
   /* now calc the products x0*x0 and x1*x1 */
3313
   if (mp_sqr(&x0, &x0x0) != MP_OKAY)
3314
      goto X1X1;           /* x0x0 = x0*x0 */
3315
   if (mp_sqr(&x1, &x1x1) != MP_OKAY)
3316
      goto X1X1;           /* x1x1 = x1*x1 */
3317

3318
   /* now calc (x1+x0)**2 */
3319
   if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)
3320
      goto X1X1;           /* t1 = x1 - x0 */
3321
   if (mp_sqr(&t1, &t1) != MP_OKAY)
3322
      goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */
3323

3324
   /* add x0y0 */
3325
   if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY)
3326
      goto X1X1;           /* t2 = x0x0 + x1x1 */
3327
   if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY)
3328
      goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
3329

3330
   /* shift by B */
3331
   if (mp_lshd(&t1, B) != MP_OKAY)
3332
      goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
3333
   if (mp_lshd(&x1x1, B * 2) != MP_OKAY)
3334
      goto X1X1;           /* x1x1 = x1x1 << 2*B */
3335

3336
   if (mp_add(&x0x0, &t1, &t1) != MP_OKAY)
3337
      goto X1X1;           /* t1 = x0x0 + t1 */
3338
   if (mp_add(&t1, &x1x1, b) != MP_OKAY)
3339
      goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */
3340

3341
   err = MP_OKAY;
3342

3343
X1X1:
3344
   mp_clear(&x1x1);
3345
X0X0:
3346
   mp_clear(&x0x0);
3347
T2:
3348
   mp_clear(&t2);
3349
T1:
3350
   mp_clear(&t1);
3351
X1:
3352
   mp_clear(&x1);
3353
X0:
3354
   mp_clear(&x0);
3355
LBL_ERR:
3356
   return err;
3357
}
3358

3359
/* End: bn_mp_karatsuba_sqr.c */
3360

3361
/* Start: bn_mp_kronecker.c */
3362

3363
/*
3364
   Kronecker symbol (a|p)
3365
   Straightforward implementation of algorithm 1.4.10 in
3366
   Henri Cohen: "A Course in Computational Algebraic Number Theory"
3367

3368
   @book{cohen2013course,
3369
     title={A course in computational algebraic number theory},
3370
     author={Cohen, Henri},
3371
     volume={138},
3372
     year={2013},
3373
     publisher={Springer Science \& Business Media}
3374
    }
3375
 */
3376
int mp_kronecker(const mp_int *a, const mp_int *p, int *c)
3377
{
3378
   mp_int a1, p1, r;
3379

3380
   int e = MP_OKAY;
3381
   int v, k;
3382

3383
   static const int table[8] = {0, 1, 0, -1, 0, -1, 0, 1};
3384

3385
   if (mp_iszero(p) != MP_NO) {
3386
      if ((a->used == 1) && (a->dp[0] == 1u)) {
3387
         *c = 1;
3388
         return e;
3389
      } else {
3390
         *c = 0;
3391
         return e;
3392
      }
3393
   }
3394

3395
   if ((mp_iseven(a) != MP_NO) && (mp_iseven(p) != MP_NO)) {
3396
      *c = 0;
3397
      return e;
3398
   }
3399

3400
   if ((e = mp_init_copy(&a1, a)) != MP_OKAY) {
3401
      return e;
3402
   }
3403
   if ((e = mp_init_copy(&p1, p)) != MP_OKAY) {
3404
      goto LBL_KRON_0;
3405
   }
3406

3407
   v = mp_cnt_lsb(&p1);
3408
   if ((e = mp_div_2d(&p1, v, &p1, NULL)) != MP_OKAY) {
3409
      goto LBL_KRON_1;
3410
   }
3411

3412
   if ((v & 0x1) == 0) {
3413
      k = 1;
3414
   } else {
3415
      k = table[a->dp[0] & 7u];
3416
   }
3417

3418
   if (p1.sign == MP_NEG) {
3419
      p1.sign = MP_ZPOS;
3420
      if (a1.sign == MP_NEG) {
3421
         k = -k;
3422
      }
3423
   }
3424

3425
   if ((e = mp_init(&r)) != MP_OKAY) {
3426
      goto LBL_KRON_1;
3427
   }
3428

3429
   for (;;) {
3430
      if (mp_iszero(&a1) != MP_NO) {
3431
         if (mp_cmp_d(&p1, 1uL) == MP_EQ) {
3432
            *c = k;
3433
            goto LBL_KRON;
3434
         } else {
3435
            *c = 0;
3436
            goto LBL_KRON;
3437
         }
3438
      }
3439

3440
      v = mp_cnt_lsb(&a1);
3441
      if ((e = mp_div_2d(&a1, v, &a1, NULL)) != MP_OKAY) {
3442
         goto LBL_KRON;
3443
      }
3444

3445
      if ((v & 0x1) == 1) {
3446
         k = k * table[p1.dp[0] & 7u];
3447
      }
3448

3449
      if (a1.sign == MP_NEG) {
3450
         /*
3451
          * Compute k = (-1)^((a1)*(p1-1)/4) * k
3452
          * a1.dp[0] + 1 cannot overflow because the MSB
3453
          * of the type mp_digit is not set by definition
3454
          */
3455
         if (((a1.dp[0] + 1u) & p1.dp[0] & 2u) != 0u) {
3456
            k = -k;
3457
         }
3458
      } else {
3459
         /* compute k = (-1)^((a1-1)*(p1-1)/4) * k */
3460
         if ((a1.dp[0] & p1.dp[0] & 2u) != 0u) {
3461
            k = -k;
3462
         }
3463
      }
3464

3465
      if ((e = mp_copy(&a1, &r)) != MP_OKAY) {
3466
         goto LBL_KRON;
3467
      }
3468
      r.sign = MP_ZPOS;
3469
      if ((e = mp_mod(&p1, &r, &a1)) != MP_OKAY) {
3470
         goto LBL_KRON;
3471
      }
3472
      if ((e = mp_copy(&r, &p1)) != MP_OKAY) {
3473
         goto LBL_KRON;
3474
      }
3475
   }
3476

3477
LBL_KRON:
3478
   mp_clear(&r);
3479
LBL_KRON_1:
3480
   mp_clear(&p1);
3481
LBL_KRON_0:
3482
   mp_clear(&a1);
3483

3484
   return e;
3485
}
3486

3487
/* End: bn_mp_kronecker.c */
3488

3489
/* Start: bn_mp_lcm.c */
3490

3491
/* computes least common multiple as |a*b|/(a, b) */
3492
int mp_lcm(const mp_int *a, const mp_int *b, mp_int *c)
3493
{
3494
   int     res;
3495
   mp_int  t1, t2;
3496

3497

3498
   if ((res = mp_init_multi(&t1, &t2, NULL)) != MP_OKAY) {
3499
      return res;
3500
   }
3501

3502
   /* t1 = get the GCD of the two inputs */
3503
   if ((res = mp_gcd(a, b, &t1)) != MP_OKAY) {
3504
      goto LBL_T;
3505
   }
3506

3507
   /* divide the smallest by the GCD */
3508
   if (mp_cmp_mag(a, b) == MP_LT) {
3509
      /* store quotient in t2 such that t2 * b is the LCM */
3510
      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
3511
         goto LBL_T;
3512
      }
3513
      res = mp_mul(b, &t2, c);
3514
   } else {
3515
      /* store quotient in t2 such that t2 * a is the LCM */
3516
      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
3517
         goto LBL_T;
3518
      }
3519
      res = mp_mul(a, &t2, c);
3520
   }
3521

3522
   /* fix the sign to positive */
3523
   c->sign = MP_ZPOS;
3524

3525
LBL_T:
3526
   mp_clear_multi(&t1, &t2, NULL);
3527
   return res;
3528
}
3529

3530
/* End: bn_mp_lcm.c */
3531

3532
/* Start: bn_mp_lshd.c */
3533

3534
/* shift left a certain amount of digits */
3535
int mp_lshd(mp_int *a, int b)
3536
{
3537
   int     x, res;
3538

3539
   /* if its less than zero return */
3540
   if (b <= 0) {
3541
      return MP_OKAY;
3542
   }
3543
   /* no need to shift 0 around */
3544
   if (mp_iszero(a) == MP_YES) {
3545
      return MP_OKAY;
3546
   }
3547

3548
   /* grow to fit the new digits */
3549
   if (a->alloc < (a->used + b)) {
3550
      if ((res = mp_grow(a, a->used + b)) != MP_OKAY) {
3551
         return res;
3552
      }
3553
   }
3554

3555
   {
3556
      mp_digit *top, *bottom;
3557

3558
      /* increment the used by the shift amount then copy upwards */
3559
      a->used += b;
3560

3561
      /* top */
3562
      top = a->dp + a->used - 1;
3563

3564
      /* base */
3565
      bottom = (a->dp + a->used - 1) - b;
3566

3567
      /* much like mp_rshd this is implemented using a sliding window
3568
       * except the window goes the otherway around.  Copying from
3569
       * the bottom to the top.  see bn_mp_rshd.c for more info.
3570
       */
3571
      for (x = a->used - 1; x >= b; x--) {
3572
         *top-- = *bottom--;
3573
      }
3574

3575
      /* zero the lower digits */
3576
      top = a->dp;
3577
      for (x = 0; x < b; x++) {
3578
         *top++ = 0;
3579
      }
3580
   }
3581
   return MP_OKAY;
3582
}
3583

3584
/* End: bn_mp_lshd.c */
3585

3586
/* Start: bn_mp_mod.c */
3587

3588
/* c = a mod b, 0 <= c < b if b > 0, b < c <= 0 if b < 0 */
3589
int mp_mod(const mp_int *a, const mp_int *b, mp_int *c)
3590
{
3591
   mp_int  t;
3592
   int     res;
3593

3594
   if ((res = mp_init_size(&t, b->used)) != MP_OKAY) {
3595
      return res;
3596
   }
3597

3598
   if ((res = mp_div(a, b, NULL, &t)) != MP_OKAY) {
3599
      mp_clear(&t);
3600
      return res;
3601
   }
3602

3603
   if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
3604
      res = MP_OKAY;
3605
      mp_exch(&t, c);
3606
   } else {
3607
      res = mp_add(b, &t, c);
3608
   }
3609

3610
   mp_clear(&t);
3611
   return res;
3612
}
3613

3614
/* End: bn_mp_mod.c */
3615

3616
/* Start: bn_mp_mod_2d.c */
3617

3618
/* calc a value mod 2**b */
3619
int mp_mod_2d(const mp_int *a, int b, mp_int *c)
3620
{
3621
   int     x, res;
3622

3623
   /* if b is <= 0 then zero the int */
3624
   if (b <= 0) {
3625
      mp_zero(c);
3626
      return MP_OKAY;
3627
   }
3628

3629
   /* if the modulus is larger than the value than return */
3630
   if (b >= (a->used * DIGIT_BIT)) {
3631
      res = mp_copy(a, c);
3632
      return res;
3633
   }
3634

3635
   /* copy */
3636
   if ((res = mp_copy(a, c)) != MP_OKAY) {
3637
      return res;
3638
   }
3639

3640
   /* zero digits above the last digit of the modulus */
3641
   for (x = (b / DIGIT_BIT) + (((b % DIGIT_BIT) == 0) ? 0 : 1); x < c->used; x++) {
3642
      c->dp[x] = 0;
3643
   }
3644
   /* clear the digit that is not completely outside/inside the modulus */
3645
   c->dp[b / DIGIT_BIT] &=
3646
      ((mp_digit)1 << (mp_digit)(b % DIGIT_BIT)) - (mp_digit)1;
3647
   mp_clamp(c);
3648
   return MP_OKAY;
3649
}
3650

3651
/* End: bn_mp_mod_2d.c */
3652

3653
/* Start: bn_mp_mod_d.c */
3654

3655
int mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c)
3656
{
3657
   return mp_div_d(a, b, NULL, c);
3658
}
3659

3660
/* End: bn_mp_mod_d.c */
3661

3662
/* Start: bn_mp_montgomery_calc_normalization.c */
3663

3664
/*
3665
 * shifts with subtractions when the result is greater than b.
3666
 *
3667
 * The method is slightly modified to shift B unconditionally upto just under
3668
 * the leading bit of b.  This saves alot of multiple precision shifting.
3669
 */
3670
int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b)
3671
{
3672
   int     x, bits, res;
3673

3674
   /* how many bits of last digit does b use */
3675
   bits = mp_count_bits(b) % DIGIT_BIT;
3676

3677
   if (b->used > 1) {
3678
      if ((res = mp_2expt(a, ((b->used - 1) * DIGIT_BIT) + bits - 1)) != MP_OKAY) {
3679
         return res;
3680
      }
3681
   } else {
3682
      mp_set(a, 1uL);
3683
      bits = 1;
3684
   }
3685

3686

3687
   /* now compute C = A * B mod b */
3688
   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3689
      if ((res = mp_mul_2(a, a)) != MP_OKAY) {
3690
         return res;
3691
      }
3692
      if (mp_cmp_mag(a, b) != MP_LT) {
3693
         if ((res = s_mp_sub(a, b, a)) != MP_OKAY) {
3694
            return res;
3695
         }
3696
      }
3697
   }
3698

3699
   return MP_OKAY;
3700
}
3701

3702
/* End: bn_mp_montgomery_calc_normalization.c */
3703

3704
/* Start: bn_mp_montgomery_reduce.c */
3705

3706
/* computes xR**-1 == x (mod N) via Montgomery Reduction */
3707
int mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
3708
{
3709
   int     ix, res, digs;
3710
   mp_digit mu;
3711

3712
   /* can the fast reduction [comba] method be used?
3713
    *
3714
    * Note that unlike in mul you're safely allowed *less*
3715
    * than the available columns [255 per default] since carries
3716
    * are fixed up in the inner loop.
3717
    */
3718
   digs = (n->used * 2) + 1;
3719
   if ((digs < (int)MP_WARRAY) &&
3720
       (x->used <= (int)MP_WARRAY) &&
3721
       (n->used <
3722
        (int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
3723
      return fast_mp_montgomery_reduce(x, n, rho);
3724
   }
3725

3726
   /* grow the input as required */
3727
   if (x->alloc < digs) {
3728
      if ((res = mp_grow(x, digs)) != MP_OKAY) {
3729
         return res;
3730
      }
3731
   }
3732
   x->used = digs;
3733

3734
   for (ix = 0; ix < n->used; ix++) {
3735
      /* mu = ai * rho mod b
3736
       *
3737
       * The value of rho must be precalculated via
3738
       * montgomery_setup() such that
3739
       * it equals -1/n0 mod b this allows the
3740
       * following inner loop to reduce the
3741
       * input one digit at a time
3742
       */
3743
      mu = (mp_digit)(((mp_word)x->dp[ix] * (mp_word)rho) & MP_MASK);
3744

3745
      /* a = a + mu * m * b**i */
3746
      {
3747
         int iy;
3748
         mp_digit *tmpn, *tmpx, u;
3749
         mp_word r;
3750

3751
         /* alias for digits of the modulus */
3752
         tmpn = n->dp;
3753

3754
         /* alias for the digits of x [the input] */
3755
         tmpx = x->dp + ix;
3756

3757
         /* set the carry to zero */
3758
         u = 0;
3759

3760
         /* Multiply and add in place */
3761
         for (iy = 0; iy < n->used; iy++) {
3762
            /* compute product and sum */
3763
            r       = ((mp_word)mu * (mp_word)*tmpn++) +
3764
                      (mp_word)u + (mp_word)*tmpx;
3765

3766
            /* get carry */
3767
            u       = (mp_digit)(r >> (mp_word)DIGIT_BIT);
3768

3769
            /* fix digit */
3770
            *tmpx++ = (mp_digit)(r & (mp_word)MP_MASK);
3771
         }
3772
         /* At this point the ix'th digit of x should be zero */
3773

3774

3775
         /* propagate carries upwards as required*/
3776
         while (u != 0u) {
3777
            *tmpx   += u;
3778
            u        = *tmpx >> DIGIT_BIT;
3779
            *tmpx++ &= MP_MASK;
3780
         }
3781
      }
3782
   }
3783

3784
   /* at this point the n.used'th least
3785
    * significant digits of x are all zero
3786
    * which means we can shift x to the
3787
    * right by n.used digits and the
3788
    * residue is unchanged.
3789
    */
3790

3791
   /* x = x/b**n.used */
3792
   mp_clamp(x);
3793
   mp_rshd(x, n->used);
3794

3795
   /* if x >= n then x = x - n */
3796
   if (mp_cmp_mag(x, n) != MP_LT) {
3797
      return s_mp_sub(x, n, x);
3798
   }
3799

3800
   return MP_OKAY;
3801
}
3802

3803
/* End: bn_mp_montgomery_reduce.c */
3804

3805
/* Start: bn_mp_montgomery_setup.c */
3806

3807
/* setups the montgomery reduction stuff */
3808
int mp_montgomery_setup(const mp_int *n, mp_digit *rho)
3809
{
3810
   mp_digit x, b;
3811

3812
   /* fast inversion mod 2**k
3813
    *
3814
    * Based on the fact that
3815
    *
3816
    * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
3817
    *                    =>  2*X*A - X*X*A*A = 1
3818
    *                    =>  2*(1) - (1)     = 1
3819
    */
3820
   b = n->dp[0];
3821

3822
   if ((b & 1u) == 0u) {
3823
      return MP_VAL;
3824
   }
3825

3826
   x = (((b + 2u) & 4u) << 1) + b; /* here x*a==1 mod 2**4 */
3827
   x *= 2u - (b * x);              /* here x*a==1 mod 2**8 */
3828
#if !defined(MP_8BIT)
3829
   x *= 2u - (b * x);              /* here x*a==1 mod 2**16 */
3830
#endif
3831
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
3832
   x *= 2u - (b * x);              /* here x*a==1 mod 2**32 */
3833
#endif
3834
#ifdef MP_64BIT
3835
   x *= 2u - (b * x);              /* here x*a==1 mod 2**64 */
3836
#endif
3837

3838
   /* rho = -1/m mod b */
3839
   *rho = (mp_digit)(((mp_word)1 << (mp_word)DIGIT_BIT) - x) & MP_MASK;
3840

3841
   return MP_OKAY;
3842
}
3843

3844
/* End: bn_mp_montgomery_setup.c */
3845

3846
/* Start: bn_mp_mul.c */
3847

3848
/* high level multiplication (handles sign) */
3849
int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
3850
{
3851
   int     res, neg;
3852
   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
3853

3854
   /* use Toom-Cook? */
3855
   if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) {
3856
      res = mp_toom_mul(a, b, c);
3857
   } else
3858
      /* use Karatsuba? */
3859
      if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
3860
         res = mp_karatsuba_mul(a, b, c);
3861
      } else
3862
      {
3863
         /* can we use the fast multiplier?
3864
          *
3865
          * The fast multiplier can be used if the output will
3866
          * have less than MP_WARRAY digits and the number of
3867
          * digits won't affect carry propagation
3868
          */
3869
         int     digs = a->used + b->used + 1;
3870

3871
         if ((digs < (int)MP_WARRAY) &&
3872
             (MIN(a->used, b->used) <=
3873
              (int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
3874
            res = fast_s_mp_mul_digs(a, b, c, digs);
3875
         } else
3876
         {
3877
            res = s_mp_mul(a, b, c); /* uses s_mp_mul_digs */
3878
         }
3879
      }
3880
   c->sign = (c->used > 0) ? neg : MP_ZPOS;
3881
   return res;
3882
}
3883

3884
/* End: bn_mp_mul.c */
3885

3886
/* Start: bn_mp_mul_2.c */
3887

3888
/* b = a*2 */
3889
int mp_mul_2(const mp_int *a, mp_int *b)
3890
{
3891
   int     x, res, oldused;
3892

3893
   /* grow to accomodate result */
3894
   if (b->alloc < (a->used + 1)) {
3895
      if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) {
3896
         return res;
3897
      }
3898
   }
3899

3900
   oldused = b->used;
3901
   b->used = a->used;
3902

3903
   {
3904
      mp_digit r, rr, *tmpa, *tmpb;
3905

3906
      /* alias for source */
3907
      tmpa = a->dp;
3908

3909
      /* alias for dest */
3910
      tmpb = b->dp;
3911

3912
      /* carry */
3913
      r = 0;
3914
      for (x = 0; x < a->used; x++) {
3915

3916
         /* get what will be the *next* carry bit from the
3917
          * MSB of the current digit
3918
          */
3919
         rr = *tmpa >> (mp_digit)(DIGIT_BIT - 1);
3920

3921
         /* now shift up this digit, add in the carry [from the previous] */
3922
         *tmpb++ = ((*tmpa++ << 1uL) | r) & MP_MASK;
3923

3924
         /* copy the carry that would be from the source
3925
          * digit into the next iteration
3926
          */
3927
         r = rr;
3928
      }
3929

3930
      /* new leading digit? */
3931
      if (r != 0u) {
3932
         /* add a MSB which is always 1 at this point */
3933
         *tmpb = 1;
3934
         ++(b->used);
3935
      }
3936

3937
      /* now zero any excess digits on the destination
3938
       * that we didn't write to
3939
       */
3940
      tmpb = b->dp + b->used;
3941
      for (x = b->used; x < oldused; x++) {
3942
         *tmpb++ = 0;
3943
      }
3944
   }
3945
   b->sign = a->sign;
3946
   return MP_OKAY;
3947
}
3948

3949
/* End: bn_mp_mul_2.c */
3950

3951
/* Start: bn_mp_mul_2d.c */
3952

3953
/* shift left by a certain bit count */
3954
int mp_mul_2d(const mp_int *a, int b, mp_int *c)
3955
{
3956
   mp_digit d;
3957
   int      res;
3958

3959
   /* copy */
3960
   if (a != c) {
3961
      if ((res = mp_copy(a, c)) != MP_OKAY) {
3962
         return res;
3963
      }
3964
   }
3965

3966
   if (c->alloc < (c->used + (b / DIGIT_BIT) + 1)) {
3967
      if ((res = mp_grow(c, c->used + (b / DIGIT_BIT) + 1)) != MP_OKAY) {
3968
         return res;
3969
      }
3970
   }
3971

3972
   /* shift by as many digits in the bit count */
3973
   if (b >= DIGIT_BIT) {
3974
      if ((res = mp_lshd(c, b / DIGIT_BIT)) != MP_OKAY) {
3975
         return res;
3976
      }
3977
   }
3978

3979
   /* shift any bit count < DIGIT_BIT */
3980
   d = (mp_digit)(b % DIGIT_BIT);
3981
   if (d != 0u) {
3982
      mp_digit *tmpc, shift, mask, r, rr;
3983
      int x;
3984

3985
      /* bitmask for carries */
3986
      mask = ((mp_digit)1 << d) - (mp_digit)1;
3987

3988
      /* shift for msbs */
3989
      shift = (mp_digit)DIGIT_BIT - d;
3990

3991
      /* alias */
3992
      tmpc = c->dp;
3993

3994
      /* carry */
3995
      r    = 0;
3996
      for (x = 0; x < c->used; x++) {
3997
         /* get the higher bits of the current word */
3998
         rr = (*tmpc >> shift) & mask;
3999

4000
         /* shift the current word and OR in the carry */
4001
         *tmpc = ((*tmpc << d) | r) & MP_MASK;
4002
         ++tmpc;
4003

4004
         /* set the carry to the carry bits of the current word */
4005
         r = rr;
4006
      }
4007

4008
      /* set final carry */
4009
      if (r != 0u) {
4010
         c->dp[(c->used)++] = r;
4011
      }
4012
   }
4013
   mp_clamp(c);
4014
   return MP_OKAY;
4015
}
4016

4017
/* End: bn_mp_mul_2d.c */
4018

4019
/* Start: bn_mp_mul_d.c */
4020

4021
/* multiply by a digit */
4022
int mp_mul_d(const mp_int *a, mp_digit b, mp_int *c)
4023
{
4024
   mp_digit u, *tmpa, *tmpc;
4025
   mp_word  r;
4026
   int      ix, res, olduse;
4027

4028
   /* make sure c is big enough to hold a*b */
4029
   if (c->alloc < (a->used + 1)) {
4030
      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4031
         return res;
4032
      }
4033
   }
4034

4035
   /* get the original destinations used count */
4036
   olduse = c->used;
4037

4038
   /* set the sign */
4039
   c->sign = a->sign;
4040

4041
   /* alias for a->dp [source] */
4042
   tmpa = a->dp;
4043

4044
   /* alias for c->dp [dest] */
4045
   tmpc = c->dp;
4046

4047
   /* zero carry */
4048
   u = 0;
4049

4050
   /* compute columns */
4051
   for (ix = 0; ix < a->used; ix++) {
4052
      /* compute product and carry sum for this term */
4053
      r       = (mp_word)u + ((mp_word)*tmpa++ * (mp_word)b);
4054

4055
      /* mask off higher bits to get a single digit */
4056
      *tmpc++ = (mp_digit)(r & (mp_word)MP_MASK);
4057

4058
      /* send carry into next iteration */
4059
      u       = (mp_digit)(r >> (mp_word)DIGIT_BIT);
4060
   }
4061

4062
   /* store final carry [if any] and increment ix offset  */
4063
   *tmpc++ = u;
4064
   ++ix;
4065

4066
   /* now zero digits above the top */
4067
   while (ix++ < olduse) {
4068
      *tmpc++ = 0;
4069
   }
4070

4071
   /* set used count */
4072
   c->used = a->used + 1;
4073
   mp_clamp(c);
4074

4075
   return MP_OKAY;
4076
}
4077

4078
/* End: bn_mp_mul_d.c */
4079

4080
/* Start: bn_mp_mulmod.c */
4081

4082
/* d = a * b (mod c) */
4083
int mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)
4084
{
4085
   int     res;
4086
   mp_int  t;
4087

4088
   if ((res = mp_init_size(&t, c->used)) != MP_OKAY) {
4089
      return res;
4090
   }
4091

4092
   if ((res = mp_mul(a, b, &t)) != MP_OKAY) {
4093
      mp_clear(&t);
4094
      return res;
4095
   }
4096
   res = mp_mod(&t, c, d);
4097
   mp_clear(&t);
4098
   return res;
4099
}
4100

4101
/* End: bn_mp_mulmod.c */
4102

4103
/* Start: bn_mp_n_root.c */
4104

4105
/* wrapper function for mp_n_root_ex()
4106
 * computes c = (a)**(1/b) such that (c)**b <= a and (c+1)**b > a
4107
 */
4108
int mp_n_root(const mp_int *a, mp_digit b, mp_int *c)
4109
{
4110
   return mp_n_root_ex(a, b, c, 0);
4111
}
4112

4113
/* End: bn_mp_n_root.c */
4114

4115
/* Start: bn_mp_n_root_ex.c */
4116

4117
/* find the n'th root of an integer
4118
 *
4119
 * Result found such that (c)**b <= a and (c+1)**b > a
4120
 *
4121
 * This algorithm uses Newton's approximation
4122
 * x[i+1] = x[i] - f(x[i])/f'(x[i])
4123
 * which will find the root in log(N) time where
4124
 * each step involves a fair bit.  This is not meant to
4125
 * find huge roots [square and cube, etc].
4126
 */
4127
int mp_n_root_ex(const mp_int *a, mp_digit b, mp_int *c, int fast)
4128
{
4129
   mp_int  t1, t2, t3, a_;
4130
   int     res;
4131

4132
   /* input must be positive if b is even */
4133
   if (((b & 1u) == 0u) && (a->sign == MP_NEG)) {
4134
      return MP_VAL;
4135
   }
4136

4137
   if ((res = mp_init(&t1)) != MP_OKAY) {
4138
      return res;
4139
   }
4140

4141
   if ((res = mp_init(&t2)) != MP_OKAY) {
4142
      goto LBL_T1;
4143
   }
4144

4145
   if ((res = mp_init(&t3)) != MP_OKAY) {
4146
      goto LBL_T2;
4147
   }
4148

4149
   /* if a is negative fudge the sign but keep track */
4150
   a_ = *a;
4151
   a_.sign = MP_ZPOS;
4152

4153
   /* t2 = 2 */
4154
   mp_set(&t2, 2uL);
4155

4156
   do {
4157
      /* t1 = t2 */
4158
      if ((res = mp_copy(&t2, &t1)) != MP_OKAY) {
4159
         goto LBL_T3;
4160
      }
4161

4162
      /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
4163

4164
      /* t3 = t1**(b-1) */
4165
      if ((res = mp_expt_d_ex(&t1, b - 1u, &t3, fast)) != MP_OKAY) {
4166
         goto LBL_T3;
4167
      }
4168

4169
      /* numerator */
4170
      /* t2 = t1**b */
4171
      if ((res = mp_mul(&t3, &t1, &t2)) != MP_OKAY) {
4172
         goto LBL_T3;
4173
      }
4174

4175
      /* t2 = t1**b - a */
4176
      if ((res = mp_sub(&t2, &a_, &t2)) != MP_OKAY) {
4177
         goto LBL_T3;
4178
      }
4179

4180
      /* denominator */
4181
      /* t3 = t1**(b-1) * b  */
4182
      if ((res = mp_mul_d(&t3, b, &t3)) != MP_OKAY) {
4183
         goto LBL_T3;
4184
      }
4185

4186
      /* t3 = (t1**b - a)/(b * t1**(b-1)) */
4187
      if ((res = mp_div(&t2, &t3, &t3, NULL)) != MP_OKAY) {
4188
         goto LBL_T3;
4189
      }
4190

4191
      if ((res = mp_sub(&t1, &t3, &t2)) != MP_OKAY) {
4192
         goto LBL_T3;
4193
      }
4194
   }  while (mp_cmp(&t1, &t2) != MP_EQ);
4195

4196
   /* result can be off by a few so check */
4197
   for (;;) {
4198
      if ((res = mp_expt_d_ex(&t1, b, &t2, fast)) != MP_OKAY) {
4199
         goto LBL_T3;
4200
      }
4201

4202
      if (mp_cmp(&t2, &a_) == MP_GT) {
4203
         if ((res = mp_sub_d(&t1, 1uL, &t1)) != MP_OKAY) {
4204
            goto LBL_T3;
4205
         }
4206
      } else {
4207
         break;
4208
      }
4209
   }
4210

4211
   /* set the result */
4212
   mp_exch(&t1, c);
4213

4214
   /* set the sign of the result */
4215
   c->sign = a->sign;
4216

4217
   res = MP_OKAY;
4218

4219
LBL_T3:
4220
   mp_clear(&t3);
4221
LBL_T2:
4222
   mp_clear(&t2);
4223
LBL_T1:
4224
   mp_clear(&t1);
4225
   return res;
4226
}
4227

4228
/* End: bn_mp_n_root_ex.c */
4229

4230
/* Start: bn_mp_neg.c */
4231

4232
/* b = -a */
4233
int mp_neg(const mp_int *a, mp_int *b)
4234
{
4235
   int     res;
4236
   if (a != b) {
4237
      if ((res = mp_copy(a, b)) != MP_OKAY) {
4238
         return res;
4239
      }
4240
   }
4241

4242
   if (mp_iszero(b) != MP_YES) {
4243
      b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
4244
   } else {
4245
      b->sign = MP_ZPOS;
4246
   }
4247

4248
   return MP_OKAY;
4249
}
4250

4251
/* End: bn_mp_neg.c */
4252

4253
/* Start: bn_mp_or.c */
4254

4255
/* OR two ints together */
4256
int mp_or(const mp_int *a, const mp_int *b, mp_int *c)
4257
{
4258
   int     res, ix, px;
4259
   mp_int  t;
4260
   const mp_int *x;
4261

4262
   if (a->used > b->used) {
4263
      if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
4264
         return res;
4265
      }
4266
      px = b->used;
4267
      x = b;
4268
   } else {
4269
      if ((res = mp_init_copy(&t, b)) != MP_OKAY) {
4270
         return res;
4271
      }
4272
      px = a->used;
4273
      x = a;
4274
   }
4275

4276
   for (ix = 0; ix < px; ix++) {
4277
      t.dp[ix] |= x->dp[ix];
4278
   }
4279
   mp_clamp(&t);
4280
   mp_exch(c, &t);
4281
   mp_clear(&t);
4282
   return MP_OKAY;
4283
}
4284

4285
/* End: bn_mp_or.c */
4286

4287
/* Start: bn_mp_prime_fermat.c */
4288

4289
/* performs one Fermat test.
4290
 *
4291
 * If "a" were prime then b**a == b (mod a) since the order of
4292
 * the multiplicative sub-group would be phi(a) = a-1.  That means
4293
 * it would be the same as b**(a mod (a-1)) == b**1 == b (mod a).
4294
 *
4295
 * Sets result to 1 if the congruence holds, or zero otherwise.
4296
 */
4297
int mp_prime_fermat(const mp_int *a, const mp_int *b, int *result)
4298
{
4299
   mp_int  t;
4300
   int     err;
4301

4302
   /* default to composite  */
4303
   *result = MP_NO;
4304

4305
   /* ensure b > 1 */
4306
   if (mp_cmp_d(b, 1uL) != MP_GT) {
4307
      return MP_VAL;
4308
   }
4309

4310
   /* init t */
4311
   if ((err = mp_init(&t)) != MP_OKAY) {
4312
      return err;
4313
   }
4314

4315
   /* compute t = b**a mod a */
4316
   if ((err = mp_exptmod(b, a, a, &t)) != MP_OKAY) {
4317
      goto LBL_T;
4318
   }
4319

4320
   /* is it equal to b? */
4321
   if (mp_cmp(&t, b) == MP_EQ) {
4322
      *result = MP_YES;
4323
   }
4324

4325
   err = MP_OKAY;
4326
LBL_T:
4327
   mp_clear(&t);
4328
   return err;
4329
}
4330

4331
/* End: bn_mp_prime_fermat.c */
4332

4333
/* Start: bn_mp_prime_frobenius_underwood.c */
4334

4335
/*
4336
 *  See file bn_mp_prime_is_prime.c or the documentation in doc/bn.tex for the details
4337
 */
4338
#ifndef LTM_USE_FIPS_ONLY
4339

4340
#ifdef MP_8BIT
4341
/*
4342
 * floor of positive solution of
4343
 * (2^16)-1 = (a+4)*(2*a+5)
4344
 * TODO: Both values are smaller than N^(1/4), would have to use a bigint
4345
 *       for a instead but any a biger than about 120 are already so rare that
4346
 *       it is possible to ignore them and still get enough pseudoprimes.
4347
 *       But it is still a restriction of the set of available pseudoprimes
4348
 *       which makes this implementation less secure if used stand-alone.
4349
 */
4350
#define LTM_FROBENIUS_UNDERWOOD_A 177
4351
#else
4352
#define LTM_FROBENIUS_UNDERWOOD_A 32764
4353
#endif
4354
int mp_prime_frobenius_underwood(const mp_int *N, int *result)
4355
{
4356
   mp_int T1z, T2z, Np1z, sz, tz;
4357

4358
   int a, ap2, length, i, j, isset;
4359
   int e;
4360

4361
   *result = MP_NO;
4362

4363
   if ((e = mp_init_multi(&T1z, &T2z, &Np1z, &sz, &tz, NULL)) != MP_OKAY) {
4364
      return e;
4365
   }
4366

4367
   for (a = 0; a < LTM_FROBENIUS_UNDERWOOD_A; a++) {
4368
      /* TODO: That's ugly! No, really, it is! */
4369
      if ((a==2) || (a==4) || (a==7) || (a==8) || (a==10) ||
4370
          (a==14) || (a==18) || (a==23) || (a==26) || (a==28)) {
4371
         continue;
4372
      }
4373
      /* (32764^2 - 4) < 2^31, no bigint for >MP_8BIT needed) */
4374
      if ((e = mp_set_long(&T1z, (unsigned long)a)) != MP_OKAY) {
4375
         goto LBL_FU_ERR;
4376
      }
4377

4378
      if ((e = mp_sqr(&T1z, &T1z)) != MP_OKAY) {
4379
         goto LBL_FU_ERR;
4380
      }
4381

4382
      if ((e = mp_sub_d(&T1z, 4uL, &T1z)) != MP_OKAY) {
4383
         goto LBL_FU_ERR;
4384
      }
4385

4386
      if ((e = mp_kronecker(&T1z, N, &j)) != MP_OKAY) {
4387
         goto LBL_FU_ERR;
4388
      }
4389

4390
      if (j == -1) {
4391
         break;
4392
      }
4393

4394
      if (j == 0) {
4395
         /* composite */
4396
         goto LBL_FU_ERR;
4397
      }
4398
   }
4399
   /* Tell it a composite and set return value accordingly */
4400
   if (a >= LTM_FROBENIUS_UNDERWOOD_A) {
4401
      e = MP_ITER;
4402
      goto LBL_FU_ERR;
4403
   }
4404
   /* Composite if N and (a+4)*(2*a+5) are not coprime */
4405
   if ((e = mp_set_long(&T1z, (unsigned long)((a+4)*((2*a)+5)))) != MP_OKAY) {
4406
      goto LBL_FU_ERR;
4407
   }
4408

4409
   if ((e = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) {
4410
      goto LBL_FU_ERR;
4411
   }
4412

4413
   if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) {
4414
      goto LBL_FU_ERR;
4415
   }
4416

4417
   ap2 = a + 2;
4418
   if ((e = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) {
4419
      goto LBL_FU_ERR;
4420
   }
4421

4422
   mp_set(&sz, 1uL);
4423
   mp_set(&tz, 2uL);
4424
   length = mp_count_bits(&Np1z);
4425

4426
   for (i = length - 2; i >= 0; i--) {
4427
      /*
4428
       * temp = (sz*(a*sz+2*tz))%N;
4429
       * tz   = ((tz-sz)*(tz+sz))%N;
4430
       * sz   = temp;
4431
       */
4432
      if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {
4433
         goto LBL_FU_ERR;
4434
      }
4435

4436
      /* a = 0 at about 50% of the cases (non-square and odd input) */
4437
      if (a != 0) {
4438
         if ((e = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) {
4439
            goto LBL_FU_ERR;
4440
         }
4441
         if ((e = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) {
4442
            goto LBL_FU_ERR;
4443
         }
4444
      }
4445

4446
      if ((e = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) {
4447
         goto LBL_FU_ERR;
4448
      }
4449
      if ((e = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) {
4450
         goto LBL_FU_ERR;
4451
      }
4452
      if ((e = mp_add(&sz, &tz, &sz)) != MP_OKAY) {
4453
         goto LBL_FU_ERR;
4454
      }
4455
      if ((e = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) {
4456
         goto LBL_FU_ERR;
4457
      }
4458
      if ((e = mp_mod(&tz, N, &tz)) != MP_OKAY) {
4459
         goto LBL_FU_ERR;
4460
      }
4461
      if ((e = mp_mod(&T1z, N, &sz)) != MP_OKAY) {
4462
         goto LBL_FU_ERR;
4463
      }
4464
      if ((isset = mp_get_bit(&Np1z, i)) == MP_VAL) {
4465
         e = isset;
4466
         goto LBL_FU_ERR;
4467
      }
4468
      if (isset == MP_YES) {
4469
         /*
4470
          *  temp = (a+2) * sz + tz
4471
          *  tz   = 2 * tz - sz
4472
          *  sz   = temp
4473
          */
4474
         if (a == 0) {
4475
            if ((e = mp_mul_2(&sz, &T1z)) != MP_OKAY) {
4476
               goto LBL_FU_ERR;
4477
            }
4478
         } else {
4479
            if ((e = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) {
4480
               goto LBL_FU_ERR;
4481
            }
4482
         }
4483
         if ((e = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) {
4484
            goto LBL_FU_ERR;
4485
         }
4486
         if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {
4487
            goto LBL_FU_ERR;
4488
         }
4489
         if ((e = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) {
4490
            goto LBL_FU_ERR;
4491
         }
4492
         mp_exch(&sz, &T1z);
4493
      }
4494
   }
4495

4496
   if ((e = mp_set_long(&T1z, (unsigned long)((2 * a) + 5))) != MP_OKAY) {
4497
      goto LBL_FU_ERR;
4498
   }
4499
   if ((e = mp_mod(&T1z, N, &T1z)) != MP_OKAY) {
4500
      goto LBL_FU_ERR;
4501
   }
4502
   if ((mp_iszero(&sz) != MP_NO) && (mp_cmp(&tz, &T1z) == MP_EQ)) {
4503
      *result = MP_YES;
4504
      goto LBL_FU_ERR;
4505
   }
4506

4507
LBL_FU_ERR:
4508
   mp_clear_multi(&tz, &sz, &Np1z, &T2z, &T1z, NULL);
4509
   return e;
4510
}
4511

4512
#endif
4513

4514
/* End: bn_mp_prime_frobenius_underwood.c */
4515

4516
/* Start: bn_mp_prime_is_divisible.c */
4517

4518
/* determines if an integers is divisible by one
4519
 * of the first PRIME_SIZE primes or not
4520
 *
4521
 * sets result to 0 if not, 1 if yes
4522
 */
4523
int mp_prime_is_divisible(const mp_int *a, int *result)
4524
{
4525
   int     err, ix;
4526
   mp_digit res;
4527

4528
   /* default to not */
4529
   *result = MP_NO;
4530

4531
   for (ix = 0; ix < PRIME_SIZE; ix++) {
4532
      /* what is a mod LBL_prime_tab[ix] */
4533
      if ((err = mp_mod_d(a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4534
         return err;
4535
      }
4536

4537
      /* is the residue zero? */
4538
      if (res == 0u) {
4539
         *result = MP_YES;
4540
         return MP_OKAY;
4541
      }
4542
   }
4543

4544
   return MP_OKAY;
4545
}
4546

4547
/* End: bn_mp_prime_is_divisible.c */
4548

4549
/* Start: bn_mp_prime_is_prime.c */
4550

4551
/* portable integer log of two with small footprint */
4552
static unsigned int s_floor_ilog2(int value)
4553
{
4554
   unsigned int r = 0;
4555
   while ((value >>= 1) != 0) {
4556
      r++;
4557
   }
4558
   return r;
4559
}
4560

4561

4562
int mp_prime_is_prime(const mp_int *a, int t, int *result)
4563
{
4564
   mp_int  b;
4565
   int     ix, err, res, p_max = 0, size_a, len;
4566
   unsigned int fips_rand, mask;
4567

4568
   /* default to no */
4569
   *result = MP_NO;
4570

4571
   /* valid value of t? */
4572
   if (t > PRIME_SIZE) {
4573
      return MP_VAL;
4574
   }
4575

4576
   /* Some shortcuts */
4577
   /* N > 3 */
4578
   if (a->used == 1) {
4579
      if ((a->dp[0] == 0u) || (a->dp[0] == 1u)) {
4580
         *result = 0;
4581
         return MP_OKAY;
4582
      }
4583
      if (a->dp[0] == 2u) {
4584
         *result = 1;
4585
         return MP_OKAY;
4586
      }
4587
   }
4588

4589
   /* N must be odd */
4590
   if (mp_iseven(a) == MP_YES) {
4591
      return MP_OKAY;
4592
   }
4593
   /* N is not a perfect square: floor(sqrt(N))^2 != N */
4594
   if ((err = mp_is_square(a, &res)) != MP_OKAY) {
4595
      return err;
4596
   }
4597
   if (res != 0) {
4598
      return MP_OKAY;
4599
   }
4600

4601
   /* is the input equal to one of the primes in the table? */
4602
   for (ix = 0; ix < PRIME_SIZE; ix++) {
4603
      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4604
         *result = MP_YES;
4605
         return MP_OKAY;
4606
      }
4607
   }
4608
#ifdef MP_8BIT
4609
   /* The search in the loop above was exhaustive in this case */
4610
   if ((a->used == 1) && (PRIME_SIZE >= 31)) {
4611
      return MP_OKAY;
4612
   }
4613
#endif
4614

4615
   /* first perform trial division */
4616
   if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) {
4617
      return err;
4618
   }
4619

4620
   /* return if it was trivially divisible */
4621
   if (res == MP_YES) {
4622
      return MP_OKAY;
4623
   }
4624

4625
   /*
4626
       Run the Miller-Rabin test with base 2 for the BPSW test.
4627
    */
4628
   if ((err = mp_init_set(&b, 2uL)) != MP_OKAY) {
4629
      return err;
4630
   }
4631

4632
   if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4633
      goto LBL_B;
4634
   }
4635
   if (res == MP_NO) {
4636
      goto LBL_B;
4637
   }
4638
   /*
4639
      Rumours have it that Mathematica does a second M-R test with base 3.
4640
      Other rumours have it that their strong L-S test is slightly different.
4641
      It does not hurt, though, beside a bit of extra runtime.
4642
   */
4643
   b.dp[0]++;
4644
   if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4645
      goto LBL_B;
4646
   }
4647
   if (res == MP_NO) {
4648
      goto LBL_B;
4649
   }
4650

4651
   /*
4652
    * Both, the Frobenius-Underwood test and the the Lucas-Selfridge test are quite
4653
    * slow so if speed is an issue, define LTM_USE_FIPS_ONLY to use M-R tests with
4654
    * bases 2, 3 and t random bases.
4655
    */
4656
#ifndef LTM_USE_FIPS_ONLY
4657
   if (t >= 0) {
4658
      /*
4659
       * Use a Frobenius-Underwood test instead of the Lucas-Selfridge test for
4660
       * MP_8BIT (It is unknown if the Lucas-Selfridge test works with 16-bit
4661
       * integers but the necesssary analysis is on the todo-list).
4662
       */
4663
#if defined (MP_8BIT) || defined (LTM_USE_FROBENIUS_TEST)
4664
      err = mp_prime_frobenius_underwood(a, &res);
4665
      if ((err != MP_OKAY) && (err != MP_ITER)) {
4666
         goto LBL_B;
4667
      }
4668
      if (res == MP_NO) {
4669
         goto LBL_B;
4670
      }
4671
#else
4672
      if ((err = mp_prime_strong_lucas_selfridge(a, &res)) != MP_OKAY) {
4673
         goto LBL_B;
4674
      }
4675
      if (res == MP_NO) {
4676
         goto LBL_B;
4677
      }
4678
#endif
4679
   }
4680
#endif
4681

4682
   /* run at least one Miller-Rabin test with a random base */
4683
   if (t == 0) {
4684
      t = 1;
4685
   }
4686

4687
   /*
4688
      abs(t) extra rounds of M-R to extend the range of primes it can find if t < 0.
4689
      Only recommended if the input range is known to be < 3317044064679887385961981
4690

4691
      It uses the bases for a deterministic M-R test if input < 3317044064679887385961981
4692
      The caller has to check the size.
4693

4694
      Not for cryptographic use because with known bases strong M-R pseudoprimes can
4695
      be constructed. Use at least one M-R test with a random base (t >= 1).
4696

4697
      The 1119 bit large number
4698

4699
      80383745745363949125707961434194210813883768828755814583748891752229742737653\
4700
      33652186502336163960045457915042023603208766569966760987284043965408232928738\
4701
      79185086916685732826776177102938969773947016708230428687109997439976544144845\
4702
      34115587245063340927902227529622941498423068816854043264575340183297861112989\
4703
      60644845216191652872597534901
4704

4705
      has been constructed by F. Arnault (F. Arnault, "Rabin-Miller primality test:
4706
      composite numbers which pass it.",  Mathematics of Computation, 1995, 64. Jg.,
4707
      Nr. 209, S. 355-361), is a semiprime with the two factors
4708

4709
      40095821663949960541830645208454685300518816604113250877450620473800321707011\
4710
      96242716223191597219733582163165085358166969145233813917169287527980445796800\
4711
      452592031836601
4712

4713
      20047910831974980270915322604227342650259408302056625438725310236900160853505\
4714
      98121358111595798609866791081582542679083484572616906958584643763990222898400\
4715
      226296015918301
4716

4717
      and it is a strong pseudoprime to all forty-six prime M-R bases up to 200
4718

4719
      It does not fail the strong Bailley-PSP test as implemented here, it is just
4720
      given as an example, if not the reason to use the BPSW-test instead of M-R-tests
4721
      with a sequence of primes 2...n.
4722

4723
   */
4724
   if (t < 0) {
4725
      t = -t;
4726
      /*
4727
          Sorenson, Jonathan; Webster, Jonathan (2015).
4728
           "Strong Pseudoprimes to Twelve Prime Bases".
4729
       */
4730
      /* 0x437ae92817f9fc85b7e5 = 318665857834031151167461 */
4731
      if ((err =   mp_read_radix(&b, "437ae92817f9fc85b7e5", 16)) != MP_OKAY) {
4732
         goto LBL_B;
4733
      }
4734

4735
      if (mp_cmp(a, &b) == MP_LT) {
4736
         p_max = 12;
4737
      } else {
4738
         /* 0x2be6951adc5b22410a5fd = 3317044064679887385961981 */
4739
         if ((err = mp_read_radix(&b, "2be6951adc5b22410a5fd", 16)) != MP_OKAY) {
4740
            goto LBL_B;
4741
         }
4742

4743
         if (mp_cmp(a, &b) == MP_LT) {
4744
            p_max = 13;
4745
         } else {
4746
            err = MP_VAL;
4747
            goto LBL_B;
4748
         }
4749
      }
4750

4751
      /* for compatibility with the current API (well, compatible within a sign's width) */
4752
      if (p_max < t) {
4753
         p_max = t;
4754
      }
4755

4756
      if (p_max > PRIME_SIZE) {
4757
         err = MP_VAL;
4758
         goto LBL_B;
4759
      }
4760
      /* we did bases 2 and 3  already, skip them */
4761
      for (ix = 2; ix < p_max; ix++) {
4762
         mp_set(&b, ltm_prime_tab[ix]);
4763
         if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4764
            goto LBL_B;
4765
         }
4766
         if (res == MP_NO) {
4767
            goto LBL_B;
4768
         }
4769
      }
4770
   }
4771
   /*
4772
       Do "t" M-R tests with random bases between 3 and "a".
4773
       See Fips 186.4 p. 126ff
4774
   */
4775
   else if (t > 0) {
4776
      /*
4777
       * The mp_digit's have a defined bit-size but the size of the
4778
       * array a.dp is a simple 'int' and this library can not assume full
4779
       * compliance to the current C-standard (ISO/IEC 9899:2011) because
4780
       * it gets used for small embeded processors, too. Some of those MCUs
4781
       * have compilers that one cannot call standard compliant by any means.
4782
       * Hence the ugly type-fiddling in the following code.
4783
       */
4784
      size_a = mp_count_bits(a);
4785
      mask = (1u << s_floor_ilog2(size_a)) - 1u;
4786
      /*
4787
         Assuming the General Rieman hypothesis (never thought to write that in a
4788
         comment) the upper bound can be lowered to  2*(log a)^2.
4789
         E. Bach, "Explicit bounds for primality testing and related problems,"
4790
         Math. Comp. 55 (1990), 355-380.
4791

4792
            size_a = (size_a/10) * 7;
4793
            len = 2 * (size_a * size_a);
4794

4795
         E.g.: a number of size 2^2048 would be reduced to the upper limit
4796

4797
            floor(2048/10)*7 = 1428
4798
            2 * 1428^2       = 4078368
4799

4800
         (would have been ~4030331.9962 with floats and natural log instead)
4801
         That number is smaller than 2^28, the default bit-size of mp_digit.
4802
      */
4803

4804
      /*
4805
        How many tests, you might ask? Dana Jacobsen of Math::Prime::Util fame
4806
        does exactly 1. In words: one. Look at the end of _GMP_is_prime() in
4807
        Math-Prime-Util-GMP-0.50/primality.c if you do not believe it.
4808

4809
        The function mp_rand() goes to some length to use a cryptographically
4810
        good PRNG. That also means that the chance to always get the same base
4811
        in the loop is non-zero, although very low.
4812
        If the BPSW test and/or the addtional Frobenious test have been
4813
        performed instead of just the Miller-Rabin test with the bases 2 and 3,
4814
        a single extra test should suffice, so such a very unlikely event
4815
        will not do much harm.
4816

4817
        To preemptivly answer the dangling question: no, a witness does not
4818
        need to be prime.
4819
      */
4820
      for (ix = 0; ix < t; ix++) {
4821
         /* mp_rand() guarantees the first digit to be non-zero */
4822
         if ((err = mp_rand(&b, 1)) != MP_OKAY) {
4823
            goto LBL_B;
4824
         }
4825
         /*
4826
          * Reduce digit before casting because mp_digit might be bigger than
4827
          * an unsigned int and "mask" on the other side is most probably not.
4828
          */
4829
         fips_rand = (unsigned int)(b.dp[0] & (mp_digit) mask);
4830
#ifdef MP_8BIT
4831
         /*
4832
          * One 8-bit digit is too small, so concatenate two if the size of
4833
          * unsigned int allows for it.
4834
          */
4835
         if (((sizeof(unsigned int) * CHAR_BIT)/2) >= (sizeof(mp_digit) * CHAR_BIT)) {
4836
            if ((err = mp_rand(&b, 1)) != MP_OKAY) {
4837
               goto LBL_B;
4838
            }
4839
            fips_rand <<= sizeof(mp_digit) * CHAR_BIT;
4840
            fips_rand |= (unsigned int) b.dp[0];
4841
            fips_rand &= mask;
4842
         }
4843
#endif
4844
         if (fips_rand > (unsigned int)(INT_MAX - DIGIT_BIT)) {
4845
            len = INT_MAX / DIGIT_BIT;
4846
         } else {
4847
            len = (((int)fips_rand + DIGIT_BIT) / DIGIT_BIT);
4848
         }
4849
         /*  Unlikely. */
4850
         if (len < 0) {
4851
            ix--;
4852
            continue;
4853
         }
4854
         /*
4855
          * As mentioned above, one 8-bit digit is too small and
4856
          * although it can only happen in the unlikely case that
4857
          * an "unsigned int" is smaller than 16 bit a simple test
4858
          * is cheap and the correction even cheaper.
4859
          */
4860
#ifdef MP_8BIT
4861
         /* All "a" < 2^8 have been caught before */
4862
         if (len == 1) {
4863
            len++;
4864
         }
4865
#endif
4866
         if ((err = mp_rand(&b, len)) != MP_OKAY) {
4867
            goto LBL_B;
4868
         }
4869
         /*
4870
          * That number might got too big and the witness has to be
4871
          * smaller than or equal to "a"
4872
          */
4873
         len = mp_count_bits(&b);
4874
         if (len > size_a) {
4875
            len = len - size_a;
4876
            if ((err = mp_div_2d(&b, len, &b, NULL)) != MP_OKAY) {
4877
               goto LBL_B;
4878
            }
4879
         }
4880

4881
         /* Although the chance for b <= 3 is miniscule, try again. */
4882
         if (mp_cmp_d(&b, 3uL) != MP_GT) {
4883
            ix--;
4884
            continue;
4885
         }
4886
         if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4887
            goto LBL_B;
4888
         }
4889
         if (res == MP_NO) {
4890
            goto LBL_B;
4891
         }
4892
      }
4893
   }
4894

4895
   /* passed the test */
4896
   *result = MP_YES;
4897
LBL_B:
4898
   mp_clear(&b);
4899
   return err;
4900
}
4901

4902
/* End: bn_mp_prime_is_prime.c */
4903

4904
/* Start: bn_mp_prime_miller_rabin.c */
4905

4906
/* Miller-Rabin test of "a" to the base of "b" as described in
4907
 * HAC pp. 139 Algorithm 4.24
4908
 *
4909
 * Sets result to 0 if definitely composite or 1 if probably prime.
4910
 * Randomly the chance of error is no more than 1/4 and often
4911
 * very much lower.
4912
 */
4913
int mp_prime_miller_rabin(const mp_int *a, const mp_int *b, int *result)
4914
{
4915
   mp_int  n1, y, r;
4916
   int     s, j, err;
4917

4918
   /* default */
4919
   *result = MP_NO;
4920

4921
   /* ensure b > 1 */
4922
   if (mp_cmp_d(b, 1uL) != MP_GT) {
4923
      return MP_VAL;
4924
   }
4925

4926
   /* get n1 = a - 1 */
4927
   if ((err = mp_init_copy(&n1, a)) != MP_OKAY) {
4928
      return err;
4929
   }
4930
   if ((err = mp_sub_d(&n1, 1uL, &n1)) != MP_OKAY) {
4931
      goto LBL_N1;
4932
   }
4933

4934
   /* set 2**s * r = n1 */
4935
   if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) {
4936
      goto LBL_N1;
4937
   }
4938

4939
   /* count the number of least significant bits
4940
    * which are zero
4941
    */
4942
   s = mp_cnt_lsb(&r);
4943

4944
   /* now divide n - 1 by 2**s */
4945
   if ((err = mp_div_2d(&r, s, &r, NULL)) != MP_OKAY) {
4946
      goto LBL_R;
4947
   }
4948

4949
   /* compute y = b**r mod a */
4950
   if ((err = mp_init(&y)) != MP_OKAY) {
4951
      goto LBL_R;
4952
   }
4953
   if ((err = mp_exptmod(b, &r, a, &y)) != MP_OKAY) {
4954
      goto LBL_Y;
4955
   }
4956

4957
   /* if y != 1 and y != n1 do */
4958
   if ((mp_cmp_d(&y, 1uL) != MP_EQ) && (mp_cmp(&y, &n1) != MP_EQ)) {
4959
      j = 1;
4960
      /* while j <= s-1 and y != n1 */
4961
      while ((j <= (s - 1)) && (mp_cmp(&y, &n1) != MP_EQ)) {
4962
         if ((err = mp_sqrmod(&y, a, &y)) != MP_OKAY) {
4963
            goto LBL_Y;
4964
         }
4965

4966
         /* if y == 1 then composite */
4967
         if (mp_cmp_d(&y, 1uL) == MP_EQ) {
4968
            goto LBL_Y;
4969
         }
4970

4971
         ++j;
4972
      }
4973

4974
      /* if y != n1 then composite */
4975
      if (mp_cmp(&y, &n1) != MP_EQ) {
4976
         goto LBL_Y;
4977
      }
4978
   }
4979

4980
   /* probably prime now */
4981
   *result = MP_YES;
4982
LBL_Y:
4983
   mp_clear(&y);
4984
LBL_R:
4985
   mp_clear(&r);
4986
LBL_N1:
4987
   mp_clear(&n1);
4988
   return err;
4989
}
4990

4991
/* End: bn_mp_prime_miller_rabin.c */
4992

4993
/* Start: bn_mp_prime_next_prime.c */
4994

4995
/* finds the next prime after the number "a" using "t" trials
4996
 * of Miller-Rabin.
4997
 *
4998
 * bbs_style = 1 means the prime must be congruent to 3 mod 4
4999
 */
5000
int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
5001
{
5002
   int      err, res = MP_NO, x, y;
5003
   mp_digit res_tab[PRIME_SIZE], step, kstep;
5004
   mp_int   b;
5005

5006
   /* force positive */
5007
   a->sign = MP_ZPOS;
5008

5009
   /* simple algo if a is less than the largest prime in the table */
5010
   if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
5011
      /* find which prime it is bigger than */
5012
      for (x = PRIME_SIZE - 2; x >= 0; x--) {
5013
         if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
5014
            if (bbs_style == 1) {
5015
               /* ok we found a prime smaller or
5016
                * equal [so the next is larger]
5017
                *
5018
                * however, the prime must be
5019
                * congruent to 3 mod 4
5020
                */
5021
               if ((ltm_prime_tab[x + 1] & 3u) != 3u) {
5022
                  /* scan upwards for a prime congruent to 3 mod 4 */
5023
                  for (y = x + 1; y < PRIME_SIZE; y++) {
5024
                     if ((ltm_prime_tab[y] & 3u) == 3u) {
5025
                        mp_set(a, ltm_prime_tab[y]);
5026
                        return MP_OKAY;
5027
                     }
5028
                  }
5029
               }
5030
            } else {
5031
               mp_set(a, ltm_prime_tab[x + 1]);
5032
               return MP_OKAY;
5033
            }
5034
         }
5035
      }
5036
      /* at this point a maybe 1 */
5037
      if (mp_cmp_d(a, 1uL) == MP_EQ) {
5038
         mp_set(a, 2uL);
5039
         return MP_OKAY;
5040
      }
5041
      /* fall through to the sieve */
5042
   }
5043

5044
   /* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */
5045
   if (bbs_style == 1) {
5046
      kstep   = 4;
5047
   } else {
5048
      kstep   = 2;
5049
   }
5050

5051
   /* at this point we will use a combination of a sieve and Miller-Rabin */
5052

5053
   if (bbs_style == 1) {
5054
      /* if a mod 4 != 3 subtract the correct value to make it so */
5055
      if ((a->dp[0] & 3u) != 3u) {
5056
         if ((err = mp_sub_d(a, (a->dp[0] & 3u) + 1u, a)) != MP_OKAY) {
5057
            return err;
5058
         };
5059
      }
5060
   } else {
5061
      if (mp_iseven(a) == MP_YES) {
5062
         /* force odd */
5063
         if ((err = mp_sub_d(a, 1uL, a)) != MP_OKAY) {
5064
            return err;
5065
         }
5066
      }
5067
   }
5068

5069
   /* generate the restable */
5070
   for (x = 1; x < PRIME_SIZE; x++) {
5071
      if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
5072
         return err;
5073
      }
5074
   }
5075

5076
   /* init temp used for Miller-Rabin Testing */
5077
   if ((err = mp_init(&b)) != MP_OKAY) {
5078
      return err;
5079
   }
5080

5081
   for (;;) {
5082
      /* skip to the next non-trivially divisible candidate */
5083
      step = 0;
5084
      do {
5085
         /* y == 1 if any residue was zero [e.g. cannot be prime] */
5086
         y     =  0;
5087

5088
         /* increase step to next candidate */
5089
         step += kstep;
5090

5091
         /* compute the new residue without using division */
5092
         for (x = 1; x < PRIME_SIZE; x++) {
5093
            /* add the step to each residue */
5094
            res_tab[x] += kstep;
5095

5096
            /* subtract the modulus [instead of using division] */
5097
            if (res_tab[x] >= ltm_prime_tab[x]) {
5098
               res_tab[x]  -= ltm_prime_tab[x];
5099
            }
5100

5101
            /* set flag if zero */
5102
            if (res_tab[x] == 0u) {
5103
               y = 1;
5104
            }
5105
         }
5106
      } while ((y == 1) && (step < (((mp_digit)1 << DIGIT_BIT) - kstep)));
5107

5108
      /* add the step */
5109
      if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
5110
         goto LBL_ERR;
5111
      }
5112

5113
      /* if didn't pass sieve and step == MAX then skip test */
5114
      if ((y == 1) && (step >= (((mp_digit)1 << DIGIT_BIT) - kstep))) {
5115
         continue;
5116
      }
5117

5118
      if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
5119
         goto LBL_ERR;
5120
      }
5121
      if (res == MP_YES) {
5122
         break;
5123
      }
5124
   }
5125

5126
   err = MP_OKAY;
5127
LBL_ERR:
5128
   mp_clear(&b);
5129
   return err;
5130
}
5131

5132
/* End: bn_mp_prime_next_prime.c */
5133

5134
/* Start: bn_mp_prime_rabin_miller_trials.c */
5135

5136
static const struct {
5137
   int k, t;
5138
} sizes[] = {
5139
   {    80,    -1 }, /* Use deterministic algorithm for size <= 80 bits */
5140
   {    81,    39 },
5141
   {    96,    37 },
5142
   {   128,    32 },
5143
   {   160,    27 },
5144
   {   192,    21 },
5145
   {   256,    16 },
5146
   {   384,    10 },
5147
   {   512,     7 },
5148
   {   640,     6 },
5149
   {   768,     5 },
5150
   {   896,     4 },
5151
   {  1024,     4 },
5152
   {  2048,     2 },
5153
   {  4096,     1 },
5154
};
5155

5156
/* returns # of RM trials required for a given bit size and max. error of 2^(-96)*/
5157
int mp_prime_rabin_miller_trials(int size)
5158
{
5159
   int x;
5160

5161
   for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
5162
      if (sizes[x].k == size) {
5163
         return sizes[x].t;
5164
      } else if (sizes[x].k > size) {
5165
         return (x == 0) ? sizes[0].t : sizes[x - 1].t;
5166
      }
5167
   }
5168
   return sizes[x-1].t + 1;
5169
}
5170

5171
/* End: bn_mp_prime_rabin_miller_trials.c */
5172

5173
/* Start: bn_mp_prime_random_ex.c */
5174

5175
/* makes a truly random prime of a given size (bits),
5176
 *
5177
 * Flags are as follows:
5178
 *
5179
 *   LTM_PRIME_BBS      - make prime congruent to 3 mod 4
5180
 *   LTM_PRIME_SAFE     - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
5181
 *   LTM_PRIME_2MSB_ON  - make the 2nd highest bit one
5182
 *
5183
 * You have to supply a callback which fills in a buffer with random bytes.  "dat" is a parameter you can
5184
 * have passed to the callback (e.g. a state or something).  This function doesn't use "dat" itself
5185
 * so it can be NULL
5186
 *
5187
 */
5188

5189
/* This is possibly the mother of all prime generation functions, muahahahahaha! */
5190
int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
5191
{
5192
   unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
5193
   int res, err, bsize, maskOR_msb_offset;
5194

5195
   /* sanity check the input */
5196
   if ((size <= 1) || (t <= 0)) {
5197
      return MP_VAL;
5198
   }
5199

5200
   /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
5201
   if ((flags & LTM_PRIME_SAFE) != 0) {
5202
      flags |= LTM_PRIME_BBS;
5203
   }
5204

5205
   /* calc the byte size */
5206
   bsize = (size>>3) + ((size&7)?1:0);
5207

5208
   /* we need a buffer of bsize bytes */
5209
   tmp = OPT_CAST(unsigned char) XMALLOC((size_t)bsize);
5210
   if (tmp == NULL) {
5211
      return MP_MEM;
5212
   }
5213

5214
   /* calc the maskAND value for the MSbyte*/
5215
   maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
5216

5217
   /* calc the maskOR_msb */
5218
   maskOR_msb        = 0;
5219
   maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
5220
   if ((flags & LTM_PRIME_2MSB_ON) != 0) {
5221
      maskOR_msb       |= 0x80 >> ((9 - size) & 7);
5222
   }
5223

5224
   /* get the maskOR_lsb */
5225
   maskOR_lsb         = 1;
5226
   if ((flags & LTM_PRIME_BBS) != 0) {
5227
      maskOR_lsb     |= 3;
5228
   }
5229

5230
   do {
5231
      /* read the bytes */
5232
      if (cb(tmp, bsize, dat) != bsize) {
5233
         err = MP_VAL;
5234
         goto error;
5235
      }
5236

5237
      /* work over the MSbyte */
5238
      tmp[0]    &= maskAND;
5239
      tmp[0]    |= 1 << ((size - 1) & 7);
5240

5241
      /* mix in the maskORs */
5242
      tmp[maskOR_msb_offset]   |= maskOR_msb;
5243
      tmp[bsize-1]             |= maskOR_lsb;
5244

5245
      /* read it in */
5246
      if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) {
5247
         goto error;
5248
      }
5249

5250
      /* is it prime? */
5251
      if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
5252
         goto error;
5253
      }
5254
      if (res == MP_NO) {
5255
         continue;
5256
      }
5257

5258
      if ((flags & LTM_PRIME_SAFE) != 0) {
5259
         /* see if (a-1)/2 is prime */
5260
         if ((err = mp_sub_d(a, 1uL, a)) != MP_OKAY) {
5261
            goto error;
5262
         }
5263
         if ((err = mp_div_2(a, a)) != MP_OKAY) {
5264
            goto error;
5265
         }
5266

5267
         /* is it prime? */
5268
         if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
5269
            goto error;
5270
         }
5271
      }
5272
   } while (res == MP_NO);
5273

5274
   if ((flags & LTM_PRIME_SAFE) != 0) {
5275
      /* restore a to the original value */
5276
      if ((err = mp_mul_2(a, a)) != MP_OKAY) {
5277
         goto error;
5278
      }
5279
      if ((err = mp_add_d(a, 1uL, a)) != MP_OKAY) {
5280
         goto error;
5281
      }
5282
   }
5283

5284
   err = MP_OKAY;
5285
error:
5286
   XFREE(tmp);
5287
   return err;
5288
}
5289

5290
/* End: bn_mp_prime_random_ex.c */
5291

5292
/* Start: bn_mp_prime_strong_lucas_selfridge.c */
5293

5294
/*
5295
 *  See file bn_mp_prime_is_prime.c or the documentation in doc/bn.tex for the details
5296
 */
5297
#ifndef LTM_USE_FIPS_ONLY
5298

5299
/*
5300
 *  8-bit is just too small. You can try the Frobenius test
5301
 *  but that frobenius test can fail, too, for the same reason.
5302
 */
5303
#ifndef MP_8BIT
5304

5305
/*
5306
 * multiply bigint a with int d and put the result in c
5307
 * Like mp_mul_d() but with a signed long as the small input
5308
 */
5309
static int s_mp_mul_si(const mp_int *a, long d, mp_int *c)
5310
{
5311
   mp_int t;
5312
   int err, neg = 0;
5313

5314
   if ((err = mp_init(&t)) != MP_OKAY) {
5315
      return err;
5316
   }
5317
   if (d < 0) {
5318
      neg = 1;
5319
      d = -d;
5320
   }
5321

5322
   /*
5323
    * mp_digit might be smaller than a long, which excludes
5324
    * the use of mp_mul_d() here.
5325
    */
5326
   if ((err = mp_set_long(&t, (unsigned long) d)) != MP_OKAY) {
5327
      goto LBL_MPMULSI_ERR;
5328
   }
5329
   if ((err = mp_mul(a, &t, c)) != MP_OKAY) {
5330
      goto LBL_MPMULSI_ERR;
5331
   }
5332
   if (neg ==  1) {
5333
      c->sign = (a->sign == MP_NEG) ? MP_ZPOS: MP_NEG;
5334
   }
5335
LBL_MPMULSI_ERR:
5336
   mp_clear(&t);
5337
   return err;
5338
}
5339
/*
5340
    Strong Lucas-Selfridge test.
5341
    returns MP_YES if it is a strong L-S prime, MP_NO if it is composite
5342

5343
    Code ported from  Thomas Ray Nicely's implementation of the BPSW test
5344
    at http://www.trnicely.net/misc/bpsw.html
5345

5346
    Freeware copyright (C) 2016 Thomas R. Nicely <http://www.trnicely.net>.
5347
    Released into the public domain by the author, who disclaims any legal
5348
    liability arising from its use
5349

5350
    The multi-line comments are made by Thomas R. Nicely and are copied verbatim.
5351
    Additional comments marked "CZ" (without the quotes) are by the code-portist.
5352

5353
    (If that name sounds familiar, he is the guy who found the fdiv bug in the
5354
     Pentium (P5x, I think) Intel processor)
5355
*/
5356
int mp_prime_strong_lucas_selfridge(const mp_int *a, int *result)
5357
{
5358
   /* CZ TODO: choose better variable names! */
5359
   mp_int Dz, gcd, Np1, Uz, Vz, U2mz, V2mz, Qmz, Q2mz, Qkdz, T1z, T2z, T3z, T4z, Q2kdz;
5360
   /* CZ TODO: Some of them need the full 32 bit, hence the (temporary) exclusion of MP_8BIT */
5361
   int32_t D, Ds, J, sign, P, Q, r, s, u, Nbits;
5362
   int e;
5363
   int isset, oddness;
5364

5365
   *result = MP_NO;
5366
   /*
5367
   Find the first element D in the sequence {5, -7, 9, -11, 13, ...}
5368
   such that Jacobi(D,N) = -1 (Selfridge's algorithm). Theory
5369
   indicates that, if N is not a perfect square, D will "nearly
5370
   always" be "small." Just in case, an overflow trap for D is
5371
   included.
5372
   */
5373

5374
   if ((e = mp_init_multi(&Dz, &gcd, &Np1, &Uz, &Vz, &U2mz, &V2mz, &Qmz, &Q2mz, &Qkdz, &T1z, &T2z, &T3z, &T4z, &Q2kdz,
5375
                          NULL)) != MP_OKAY) {
5376
      return e;
5377
   }
5378

5379
   D = 5;
5380
   sign = 1;
5381

5382
   for (;;) {
5383
      Ds   = sign * D;
5384
      sign = -sign;
5385
      if ((e = mp_set_long(&Dz, (unsigned long)D)) != MP_OKAY) {
5386
         goto LBL_LS_ERR;
5387
      }
5388
      if ((e = mp_gcd(a, &Dz, &gcd)) != MP_OKAY) {
5389
         goto LBL_LS_ERR;
5390
      }
5391
      /* if 1 < GCD < N then N is composite with factor "D", and
5392
         Jacobi(D,N) is technically undefined (but often returned
5393
         as zero). */
5394
      if ((mp_cmp_d(&gcd, 1uL) == MP_GT) && (mp_cmp(&gcd, a) == MP_LT)) {
5395
         goto LBL_LS_ERR;
5396
      }
5397
      if (Ds < 0) {
5398
         Dz.sign = MP_NEG;
5399
      }
5400
      if ((e = mp_kronecker(&Dz, a, &J)) != MP_OKAY) {
5401
         goto LBL_LS_ERR;
5402
      }
5403

5404
      if (J == -1) {
5405
         break;
5406
      }
5407
      D += 2;
5408

5409
      if (D > (INT_MAX - 2)) {
5410
         e = MP_VAL;
5411
         goto LBL_LS_ERR;
5412
      }
5413
   }
5414

5415

5416

5417
   P = 1;              /* Selfridge's choice */
5418
   Q = (1 - Ds) / 4;   /* Required so D = P*P - 4*Q */
5419

5420
   /* NOTE: The conditions (a) N does not divide Q, and
5421
      (b) D is square-free or not a perfect square, are included by
5422
      some authors; e.g., "Prime numbers and computer methods for
5423
      factorization," Hans Riesel (2nd ed., 1994, Birkhauser, Boston),
5424
      p. 130. For this particular application of Lucas sequences,
5425
      these conditions were found to be immaterial. */
5426

5427
   /* Now calculate N - Jacobi(D,N) = N + 1 (even), and calculate the
5428
      odd positive integer d and positive integer s for which
5429
      N + 1 = 2^s*d (similar to the step for N - 1 in Miller's test).
5430
      The strong Lucas-Selfridge test then returns N as a strong
5431
      Lucas probable prime (slprp) if any of the following
5432
      conditions is met: U_d=0, V_d=0, V_2d=0, V_4d=0, V_8d=0,
5433
      V_16d=0, ..., etc., ending with V_{2^(s-1)*d}=V_{(N+1)/2}=0
5434
      (all equalities mod N). Thus d is the highest index of U that
5435
      must be computed (since V_2m is independent of U), compared
5436
      to U_{N+1} for the standard Lucas-Selfridge test; and no
5437
      index of V beyond (N+1)/2 is required, just as in the
5438
      standard Lucas-Selfridge test. However, the quantity Q^d must
5439
      be computed for use (if necessary) in the latter stages of
5440
      the test. The result is that the strong Lucas-Selfridge test
5441
      has a running time only slightly greater (order of 10 %) than
5442
      that of the standard Lucas-Selfridge test, while producing
5443
      only (roughly) 30 % as many pseudoprimes (and every strong
5444
      Lucas pseudoprime is also a standard Lucas pseudoprime). Thus
5445
      the evidence indicates that the strong Lucas-Selfridge test is
5446
      more effective than the standard Lucas-Selfridge test, and a
5447
      Baillie-PSW test based on the strong Lucas-Selfridge test
5448
      should be more reliable. */
5449

5450
   if ((e = mp_add_d(a, 1uL, &Np1)) != MP_OKAY) {
5451
      goto LBL_LS_ERR;
5452
   }
5453
   s = mp_cnt_lsb(&Np1);
5454

5455
   /* CZ
5456
    * This should round towards zero because
5457
    * Thomas R. Nicely used GMP's mpz_tdiv_q_2exp()
5458
    * and mp_div_2d() is equivalent. Additionally:
5459
    * dividing an even number by two does not produce
5460
    * any leftovers.
5461
    */
5462
   if ((e = mp_div_2d(&Np1, s, &Dz, NULL)) != MP_OKAY) {
5463
      goto LBL_LS_ERR;
5464
   }
5465
   /* We must now compute U_d and V_d. Since d is odd, the accumulated
5466
      values U and V are initialized to U_1 and V_1 (if the target
5467
      index were even, U and V would be initialized instead to U_0=0
5468
      and V_0=2). The values of U_2m and V_2m are also initialized to
5469
      U_1 and V_1; the FOR loop calculates in succession U_2 and V_2,
5470
      U_4 and V_4, U_8 and V_8, etc. If the corresponding bits
5471
      (1, 2, 3, ...) of t are on (the zero bit having been accounted
5472
      for in the initialization of U and V), these values are then
5473
      combined with the previous totals for U and V, using the
5474
      composition formulas for addition of indices. */
5475

5476
   mp_set(&Uz, 1uL);    /* U=U_1 */
5477
   mp_set(&Vz, (mp_digit)P);    /* V=V_1 */
5478
   mp_set(&U2mz, 1uL);  /* U_1 */
5479
   mp_set(&V2mz, (mp_digit)P);  /* V_1 */
5480

5481
   if (Q < 0) {
5482
      Q = -Q;
5483
      if ((e = mp_set_long(&Qmz, (unsigned long)Q)) != MP_OKAY) {
5484
         goto LBL_LS_ERR;
5485
      }
5486
      if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {
5487
         goto LBL_LS_ERR;
5488
      }
5489
      /* Initializes calculation of Q^d */
5490
      if ((e = mp_set_long(&Qkdz, (unsigned long)Q)) != MP_OKAY) {
5491
         goto LBL_LS_ERR;
5492
      }
5493
      Qmz.sign = MP_NEG;
5494
      Q2mz.sign = MP_NEG;
5495
      Qkdz.sign = MP_NEG;
5496
      Q = -Q;
5497
   } else {
5498
      if ((e = mp_set_long(&Qmz, (unsigned long)Q)) != MP_OKAY) {
5499
         goto LBL_LS_ERR;
5500
      }
5501
      if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {
5502
         goto LBL_LS_ERR;
5503
      }
5504
      /* Initializes calculation of Q^d */
5505
      if ((e = mp_set_long(&Qkdz, (unsigned long)Q)) != MP_OKAY) {
5506
         goto LBL_LS_ERR;
5507
      }
5508
   }
5509

5510
   Nbits = mp_count_bits(&Dz);
5511

5512
   for (u = 1; u < Nbits; u++) { /* zero bit off, already accounted for */
5513
      /* Formulas for doubling of indices (carried out mod N). Note that
5514
       * the indices denoted as "2m" are actually powers of 2, specifically
5515
       * 2^(ul-1) beginning each loop and 2^ul ending each loop.
5516
       *
5517
       * U_2m = U_m*V_m
5518
       * V_2m = V_m*V_m - 2*Q^m
5519
       */
5520

5521
      if ((e = mp_mul(&U2mz, &V2mz, &U2mz)) != MP_OKAY) {
5522
         goto LBL_LS_ERR;
5523
      }
5524
      if ((e = mp_mod(&U2mz, a, &U2mz)) != MP_OKAY) {
5525
         goto LBL_LS_ERR;
5526
      }
5527
      if ((e = mp_sqr(&V2mz, &V2mz)) != MP_OKAY) {
5528
         goto LBL_LS_ERR;
5529
      }
5530
      if ((e = mp_sub(&V2mz, &Q2mz, &V2mz)) != MP_OKAY) {
5531
         goto LBL_LS_ERR;
5532
      }
5533
      if ((e = mp_mod(&V2mz, a, &V2mz)) != MP_OKAY) {
5534
         goto LBL_LS_ERR;
5535
      }
5536
      /* Must calculate powers of Q for use in V_2m, also for Q^d later */
5537
      if ((e = mp_sqr(&Qmz, &Qmz)) != MP_OKAY) {
5538
         goto LBL_LS_ERR;
5539
      }
5540
      /* prevents overflow */ /* CZ  still necessary without a fixed prealloc'd mem.? */
5541
      if ((e = mp_mod(&Qmz, a, &Qmz)) != MP_OKAY) {
5542
         goto LBL_LS_ERR;
5543
      }
5544
      if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {
5545
         goto LBL_LS_ERR;
5546
      }
5547
      if ((isset = mp_get_bit(&Dz, u)) == MP_VAL) {
5548
         e = isset;
5549
         goto LBL_LS_ERR;
5550
      }
5551
      if (isset == MP_YES) {
5552
         /* Formulas for addition of indices (carried out mod N);
5553
          *
5554
          * U_(m+n) = (U_m*V_n + U_n*V_m)/2
5555
          * V_(m+n) = (V_m*V_n + D*U_m*U_n)/2
5556
          *
5557
          * Be careful with division by 2 (mod N)!
5558
          */
5559
         if ((e = mp_mul(&U2mz, &Vz, &T1z)) != MP_OKAY) {
5560
            goto LBL_LS_ERR;
5561
         }
5562
         if ((e = mp_mul(&Uz, &V2mz, &T2z)) != MP_OKAY) {
5563
            goto LBL_LS_ERR;
5564
         }
5565
         if ((e = mp_mul(&V2mz, &Vz, &T3z)) != MP_OKAY) {
5566
            goto LBL_LS_ERR;
5567
         }
5568
         if ((e = mp_mul(&U2mz, &Uz, &T4z)) != MP_OKAY) {
5569
            goto LBL_LS_ERR;
5570
         }
5571
         if ((e = s_mp_mul_si(&T4z, (long)Ds, &T4z)) != MP_OKAY) {
5572
            goto LBL_LS_ERR;
5573
         }
5574
         if ((e = mp_add(&T1z, &T2z, &Uz)) != MP_OKAY) {
5575
            goto LBL_LS_ERR;
5576
         }
5577
         if (mp_isodd(&Uz) != MP_NO) {
5578
            if ((e = mp_add(&Uz, a, &Uz)) != MP_OKAY) {
5579
               goto LBL_LS_ERR;
5580
            }
5581
         }
5582
         /* CZ
5583
          * This should round towards negative infinity because
5584
          * Thomas R. Nicely used GMP's mpz_fdiv_q_2exp().
5585
          * But mp_div_2() does not do so, it is truncating instead.
5586
          */
5587
         oddness = mp_isodd(&Uz);
5588
         if ((e = mp_div_2(&Uz, &Uz)) != MP_OKAY) {
5589
            goto LBL_LS_ERR;
5590
         }
5591
         if ((Uz.sign == MP_NEG) && (oddness != MP_NO)) {
5592
            if ((e = mp_sub_d(&Uz, 1uL, &Uz)) != MP_OKAY) {
5593
               goto LBL_LS_ERR;
5594
            }
5595
         }
5596
         if ((e = mp_add(&T3z, &T4z, &Vz)) != MP_OKAY) {
5597
            goto LBL_LS_ERR;
5598
         }
5599
         if (mp_isodd(&Vz) != MP_NO) {
5600
            if ((e = mp_add(&Vz, a, &Vz)) != MP_OKAY) {
5601
               goto LBL_LS_ERR;
5602
            }
5603
         }
5604
         oddness = mp_isodd(&Vz);
5605
         if ((e = mp_div_2(&Vz, &Vz)) != MP_OKAY) {
5606
            goto LBL_LS_ERR;
5607
         }
5608
         if ((Vz.sign == MP_NEG) && (oddness != MP_NO)) {
5609
            if ((e = mp_sub_d(&Vz, 1uL, &Vz)) != MP_OKAY) {
5610
               goto LBL_LS_ERR;
5611
            }
5612
         }
5613
         if ((e = mp_mod(&Uz, a, &Uz)) != MP_OKAY) {
5614
            goto LBL_LS_ERR;
5615
         }
5616
         if ((e = mp_mod(&Vz, a, &Vz)) != MP_OKAY) {
5617
            goto LBL_LS_ERR;
5618
         }
5619
         /* Calculating Q^d for later use */
5620
         if ((e = mp_mul(&Qkdz, &Qmz, &Qkdz)) != MP_OKAY) {
5621
            goto LBL_LS_ERR;
5622
         }
5623
         if ((e = mp_mod(&Qkdz, a, &Qkdz)) != MP_OKAY) {
5624
            goto LBL_LS_ERR;
5625
         }
5626
      }
5627
   }
5628

5629
   /* If U_d or V_d is congruent to 0 mod N, then N is a prime or a
5630
      strong Lucas pseudoprime. */
5631
   if ((mp_iszero(&Uz) != MP_NO) || (mp_iszero(&Vz) != MP_NO)) {
5632
      *result = MP_YES;
5633
      goto LBL_LS_ERR;
5634
   }
5635

5636
   /* NOTE: Ribenboim ("The new book of prime number records," 3rd ed.,
5637
      1995/6) omits the condition V0 on p.142, but includes it on
5638
      p. 130. The condition is NECESSARY; otherwise the test will
5639
      return false negatives---e.g., the primes 29 and 2000029 will be
5640
      returned as composite. */
5641

5642
   /* Otherwise, we must compute V_2d, V_4d, V_8d, ..., V_{2^(s-1)*d}
5643
      by repeated use of the formula V_2m = V_m*V_m - 2*Q^m. If any of
5644
      these are congruent to 0 mod N, then N is a prime or a strong
5645
      Lucas pseudoprime. */
5646

5647
   /* Initialize 2*Q^(d*2^r) for V_2m */
5648
   if ((e = mp_mul_2(&Qkdz, &Q2kdz)) != MP_OKAY) {
5649
      goto LBL_LS_ERR;
5650
   }
5651

5652
   for (r = 1; r < s; r++) {
5653
      if ((e = mp_sqr(&Vz, &Vz)) != MP_OKAY) {
5654
         goto LBL_LS_ERR;
5655
      }
5656
      if ((e = mp_sub(&Vz, &Q2kdz, &Vz)) != MP_OKAY) {
5657
         goto LBL_LS_ERR;
5658
      }
5659
      if ((e = mp_mod(&Vz, a, &Vz)) != MP_OKAY) {
5660
         goto LBL_LS_ERR;
5661
      }
5662
      if (mp_iszero(&Vz) != MP_NO) {
5663
         *result = MP_YES;
5664
         goto LBL_LS_ERR;
5665
      }
5666
      /* Calculate Q^{d*2^r} for next r (final iteration irrelevant). */
5667
      if (r < (s - 1)) {
5668
         if ((e = mp_sqr(&Qkdz, &Qkdz)) != MP_OKAY) {
5669
            goto LBL_LS_ERR;
5670
         }
5671
         if ((e = mp_mod(&Qkdz, a, &Qkdz)) != MP_OKAY) {
5672
            goto LBL_LS_ERR;
5673
         }
5674
         if ((e = mp_mul_2(&Qkdz, &Q2kdz)) != MP_OKAY) {
5675
            goto LBL_LS_ERR;
5676
         }
5677
      }
5678
   }
5679
LBL_LS_ERR:
5680
   mp_clear_multi(&Q2kdz, &T4z, &T3z, &T2z, &T1z, &Qkdz, &Q2mz, &Qmz, &V2mz, &U2mz, &Vz, &Uz, &Np1, &gcd, &Dz, NULL);
5681
   return e;
5682
}
5683
#endif
5684
#endif
5685

5686
/* End: bn_mp_prime_strong_lucas_selfridge.c */
5687

5688
/* Start: bn_mp_radix_size.c */
5689

5690
/* returns size of ASCII reprensentation */
5691
int mp_radix_size(const mp_int *a, int radix, int *size)
5692
{
5693
   int     res, digs;
5694
   mp_int  t;
5695
   mp_digit d;
5696

5697
   *size = 0;
5698

5699
   /* make sure the radix is in range */
5700
   if ((radix < 2) || (radix > 64)) {
5701
      return MP_VAL;
5702
   }
5703

5704
   if (mp_iszero(a) == MP_YES) {
5705
      *size = 2;
5706
      return MP_OKAY;
5707
   }
5708

5709
   /* special case for binary */
5710
   if (radix == 2) {
5711
      *size = mp_count_bits(a) + ((a->sign == MP_NEG) ? 1 : 0) + 1;
5712
      return MP_OKAY;
5713
   }
5714

5715
   /* digs is the digit count */
5716
   digs = 0;
5717

5718
   /* if it's negative add one for the sign */
5719
   if (a->sign == MP_NEG) {
5720
      ++digs;
5721
   }
5722

5723
   /* init a copy of the input */
5724
   if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
5725
      return res;
5726
   }
5727

5728
   /* force temp to positive */
5729
   t.sign = MP_ZPOS;
5730

5731
   /* fetch out all of the digits */
5732
   while (mp_iszero(&t) == MP_NO) {
5733
      if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
5734
         mp_clear(&t);
5735
         return res;
5736
      }
5737
      ++digs;
5738
   }
5739
   mp_clear(&t);
5740

5741
   /* return digs + 1, the 1 is for the NULL byte that would be required. */
5742
   *size = digs + 1;
5743
   return MP_OKAY;
5744
}
5745

5746
/* End: bn_mp_radix_size.c */
5747

5748
/* Start: bn_mp_radix_smap.c */
5749

5750
/* chars used in radix conversions */
5751
const char *const mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
5752
const uint8_t mp_s_rmap_reverse[] = {
5753
   0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, /* ()*+,-./ */
5754
   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 01234567 */
5755
   0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 89:;<=>? */
5756
   0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, /* @ABCDEFG */
5757
   0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, /* HIJKLMNO */
5758
   0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, /* PQRSTUVW */
5759
   0x21, 0x22, 0x23, 0xff, 0xff, 0xff, 0xff, 0xff, /* XYZ[\]^_ */
5760
   0xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, /* `abcdefg */
5761
   0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, /* hijklmno */
5762
   0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, /* pqrstuvw */
5763
   0x3b, 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, /* xyz{|}~. */
5764
};
5765
const size_t mp_s_rmap_reverse_sz = sizeof(mp_s_rmap_reverse);
5766

5767
/* End: bn_mp_radix_smap.c */
5768

5769
/* Start: bn_mp_rand.c */
5770

5771
/* First the OS-specific special cases
5772
 * - *BSD
5773
 * - Windows
5774
 */
5775
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
5776
#define MP_ARC4RANDOM
5777
#define MP_GEN_RANDOM_MAX     0xffffffffu
5778
#define MP_GEN_RANDOM_SHIFT   32
5779

5780
static int s_read_arc4random(mp_digit *p)
5781
{
5782
   mp_digit d = 0, msk = 0;
5783
   do {
5784
      d <<= MP_GEN_RANDOM_SHIFT;
5785
      d |= ((mp_digit) arc4random());
5786
      msk <<= MP_GEN_RANDOM_SHIFT;
5787
      msk |= (MP_MASK & MP_GEN_RANDOM_MAX);
5788
   } while ((MP_MASK & msk) != MP_MASK);
5789
   *p = d;
5790
   return MP_OKAY;
5791
}
5792
#endif
5793

5794
#if defined(_WIN32) || defined(_WIN32_WCE)
5795
#define MP_WIN_CSP
5796

5797
#ifndef _WIN32_WINNT
5798
#define _WIN32_WINNT 0x0400
5799
#endif
5800
#ifdef _WIN32_WCE
5801
#define UNDER_CE
5802
#define ARM
5803
#endif
5804

5805
#define WIN32_LEAN_AND_MEAN
5806
#include <windows.h>
5807
#include <ntsecapi.h>
5808

5809
static int s_read_win_csp(mp_digit *p)
5810
{
5811
   int ret = -1;
5812
   if (RtlGenRandom(p, sizeof(*p)) == TRUE) {
5813
      ret = MP_OKAY;
5814
   }
5815
   return ret;
5816
}
5817
#endif /* WIN32 */
5818

5819
#if !defined(MP_WIN_CSP) && defined(__linux__) && defined(__GLIBC_PREREQ)
5820
#if __GLIBC_PREREQ(2, 25)
5821
#define MP_GETRANDOM
5822
#include <sys/random.h>
5823
#include <errno.h>
5824

5825
static int s_read_getrandom(mp_digit *p)
5826
{
5827
   int ret;
5828
   do {
5829
      ret = getrandom(p, sizeof(*p), 0);
5830
   } while ((ret == -1) && (errno == EINTR));
5831
   if (ret == sizeof(*p)) return MP_OKAY;
5832
   return -1;
5833
}
5834
#endif
5835
#endif
5836

5837
/* We assume all platforms besides windows provide "/dev/urandom".
5838
 * In case yours doesn't, define MP_NO_DEV_URANDOM at compile-time.
5839
 */
5840
#if !defined(MP_WIN_CSP) && !defined(MP_NO_DEV_URANDOM)
5841
#ifndef MP_DEV_URANDOM
5842
#define MP_DEV_URANDOM "/dev/urandom"
5843
#endif
5844
#include <fcntl.h>
5845
#include <errno.h>
5846
#include <unistd.h>
5847

5848
static int s_read_dev_urandom(mp_digit *p)
5849
{
5850
   ssize_t r;
5851
   int fd;
5852
   do {
5853
      fd = open(MP_DEV_URANDOM, O_RDONLY);
5854
   } while ((fd == -1) && (errno == EINTR));
5855
   if (fd == -1) return -1;
5856
   do {
5857
      r = read(fd, p, sizeof(*p));
5858
   } while ((r == -1) && (errno == EINTR));
5859
   close(fd);
5860
   if (r != sizeof(*p)) return -1;
5861
   return MP_OKAY;
5862
}
5863
#endif
5864

5865
#if defined(MP_PRNG_ENABLE_LTM_RNG)
5866
unsigned long (*ltm_rng)(unsigned char *out, unsigned long outlen, void (*callback)(void));
5867
void (*ltm_rng_callback)(void);
5868

5869
static int s_read_ltm_rng(mp_digit *p)
5870
{
5871
   unsigned long ret;
5872
   if (ltm_rng == NULL) return -1;
5873
   ret = ltm_rng((void *)p, sizeof(*p), ltm_rng_callback);
5874
   if (ret != sizeof(*p)) return -1;
5875
   return MP_OKAY;
5876
}
5877
#endif
5878

5879
static int s_rand_digit(mp_digit *p)
5880
{
5881
   int ret = -1;
5882

5883
#if defined(MP_ARC4RANDOM)
5884
   ret = s_read_arc4random(p);
5885
   if (ret == MP_OKAY) return ret;
5886
#endif
5887

5888
#if defined(MP_WIN_CSP)
5889
   ret = s_read_win_csp(p);
5890
   if (ret == MP_OKAY) return ret;
5891
#else
5892

5893
#if defined(MP_GETRANDOM)
5894
   ret = s_read_getrandom(p);
5895
   if (ret == MP_OKAY) return ret;
5896
#endif
5897
#if defined(MP_DEV_URANDOM)
5898
   ret = s_read_dev_urandom(p);
5899
   if (ret == MP_OKAY) return ret;
5900
#endif
5901

5902
#endif /* MP_WIN_CSP */
5903

5904
#if defined(MP_PRNG_ENABLE_LTM_RNG)
5905
   ret = s_read_ltm_rng(p);
5906
   if (ret == MP_OKAY) return ret;
5907
#endif
5908

5909
   return ret;
5910
}
5911

5912
/* makes a pseudo-random int of a given size */
5913
int mp_rand_digit(mp_digit *r)
5914
{
5915
   int ret = s_rand_digit(r);
5916
   *r &= MP_MASK;
5917
   return ret;
5918
}
5919

5920
int mp_rand(mp_int *a, int digits)
5921
{
5922
   int     res;
5923
   mp_digit d;
5924

5925
   mp_zero(a);
5926
   if (digits <= 0) {
5927
      return MP_OKAY;
5928
   }
5929

5930
   /* first place a random non-zero digit */
5931
   do {
5932
      if (mp_rand_digit(&d) != MP_OKAY) {
5933
         return MP_VAL;
5934
      }
5935
   } while (d == 0u);
5936

5937
   if ((res = mp_add_d(a, d, a)) != MP_OKAY) {
5938
      return res;
5939
   }
5940

5941
   while (--digits > 0) {
5942
      if ((res = mp_lshd(a, 1)) != MP_OKAY) {
5943
         return res;
5944
      }
5945

5946
      if (mp_rand_digit(&d) != MP_OKAY) {
5947
         return MP_VAL;
5948
      }
5949
      if ((res = mp_add_d(a, d, a)) != MP_OKAY) {
5950
         return res;
5951
      }
5952
   }
5953

5954
   return MP_OKAY;
5955
}
5956

5957
/* Start: bn_mp_read_radix.c */
5958

5959
/* read a string [ASCII] in a given radix */
5960
int mp_read_radix(mp_int *a, const char *str, int radix)
5961
{
5962
   int     y, res, neg;
5963
   unsigned pos;
5964
   char    ch;
5965

5966
   /* zero the digit bignum */
5967
   mp_zero(a);
5968

5969
   /* make sure the radix is ok */
5970
   if ((radix < 2) || (radix > 64)) {
5971
      return MP_VAL;
5972
   }
5973

5974
   /* if the leading digit is a
5975
    * minus set the sign to negative.
5976
    */
5977
   if (*str == '-') {
5978
      ++str;
5979
      neg = MP_NEG;
5980
   } else {
5981
      neg = MP_ZPOS;
5982
   }
5983

5984
   /* set the integer to the default of zero */
5985
   mp_zero(a);
5986

5987
   /* process each digit of the string */
5988
   while (*str != '\0') {
5989
      /* if the radix <= 36 the conversion is case insensitive
5990
       * this allows numbers like 1AB and 1ab to represent the same  value
5991
       * [e.g. in hex]
5992
       */
5993
      ch = (radix <= 36) ? (char)toupper((int)*str) : *str;
5994
      pos = (unsigned)(ch - '(');
5995
      if (mp_s_rmap_reverse_sz < pos) {
5996
         break;
5997
      }
5998
      y = (int)mp_s_rmap_reverse[pos];
5999

6000
      /* if the char was found in the map
6001
       * and is less than the given radix add it
6002
       * to the number, otherwise exit the loop.
6003
       */
6004
      if ((y == 0xff) || (y >= radix)) {
6005
         break;
6006
      }
6007
      if ((res = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) {
6008
         return res;
6009
      }
6010
      if ((res = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) {
6011
         return res;
6012
      }
6013
      ++str;
6014
   }
6015

6016
   /* if an illegal character was found, fail. */
6017
   if (!((*str == '\0') || (*str == '\r') || (*str == '\n'))) {
6018
      mp_zero(a);
6019
      return MP_VAL;
6020
   }
6021

6022
   /* set the sign only if a != 0 */
6023
   if (mp_iszero(a) != MP_YES) {
6024
      a->sign = neg;
6025
   }
6026
   return MP_OKAY;
6027
}
6028

6029
/* End: bn_mp_read_radix.c */
6030

6031
/* Start: bn_mp_read_signed_bin.c */
6032

6033
/* read signed bin, big endian, first byte is 0==positive or 1==negative */
6034
int mp_read_signed_bin(mp_int *a, const unsigned char *b, int c)
6035
{
6036
   int     res;
6037

6038
   /* read magnitude */
6039
   if ((res = mp_read_unsigned_bin(a, b + 1, c - 1)) != MP_OKAY) {
6040
      return res;
6041
   }
6042

6043
   /* first byte is 0 for positive, non-zero for negative */
6044
   if (b[0] == (unsigned char)0) {
6045
      a->sign = MP_ZPOS;
6046
   } else {
6047
      a->sign = MP_NEG;
6048
   }
6049

6050
   return MP_OKAY;
6051
}
6052

6053
/* End: bn_mp_read_signed_bin.c */
6054

6055
/* Start: bn_mp_read_unsigned_bin.c */
6056

6057
/* reads a unsigned char array, assumes the msb is stored first [big endian] */
6058
int mp_read_unsigned_bin(mp_int *a, const unsigned char *b, int c)
6059
{
6060
   int     res;
6061

6062
   /* make sure there are at least two digits */
6063
   if (a->alloc < 2) {
6064
      if ((res = mp_grow(a, 2)) != MP_OKAY) {
6065
         return res;
6066
      }
6067
   }
6068

6069
   /* zero the int */
6070
   mp_zero(a);
6071

6072
   /* read the bytes in */
6073
   while (c-- > 0) {
6074
      if ((res = mp_mul_2d(a, 8, a)) != MP_OKAY) {
6075
         return res;
6076
      }
6077

6078
#ifndef MP_8BIT
6079
      a->dp[0] |= *b++;
6080
      a->used += 1;
6081
#else
6082
      a->dp[0] = (*b & MP_MASK);
6083
      a->dp[1] |= ((*b++ >> 7) & 1u);
6084
      a->used += 2;
6085
#endif
6086
   }
6087
   mp_clamp(a);
6088
   return MP_OKAY;
6089
}
6090

6091
/* End: bn_mp_read_unsigned_bin.c */
6092

6093
/* Start: bn_mp_reduce.c */
6094

6095
/* reduces x mod m, assumes 0 < x < m**2, mu is
6096
 * precomputed via mp_reduce_setup.
6097
 * From HAC pp.604 Algorithm 14.42
6098
 */
6099
int mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
6100
{
6101
   mp_int  q;
6102
   int     res, um = m->used;
6103

6104
   /* q = x */
6105
   if ((res = mp_init_copy(&q, x)) != MP_OKAY) {
6106
      return res;
6107
   }
6108

6109
   /* q1 = x / b**(k-1)  */
6110
   mp_rshd(&q, um - 1);
6111

6112
   /* according to HAC this optimization is ok */
6113
   if ((mp_digit)um > ((mp_digit)1 << (DIGIT_BIT - 1))) {
6114
      if ((res = mp_mul(&q, mu, &q)) != MP_OKAY) {
6115
         goto CLEANUP;
6116
      }
6117
   } else {
6118
      if ((res = s_mp_mul_high_digs(&q, mu, &q, um)) != MP_OKAY) {
6119
         goto CLEANUP;
6120
      }
6121
   }
6122

6123
   /* q3 = q2 / b**(k+1) */
6124
   mp_rshd(&q, um + 1);
6125

6126
   /* x = x mod b**(k+1), quick (no division) */
6127
   if ((res = mp_mod_2d(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
6128
      goto CLEANUP;
6129
   }
6130

6131
   /* q = q * m mod b**(k+1), quick (no division) */
6132
   if ((res = s_mp_mul_digs(&q, m, &q, um + 1)) != MP_OKAY) {
6133
      goto CLEANUP;
6134
   }
6135

6136
   /* x = x - q */
6137
   if ((res = mp_sub(x, &q, x)) != MP_OKAY) {
6138
      goto CLEANUP;
6139
   }
6140

6141
   /* If x < 0, add b**(k+1) to it */
6142
   if (mp_cmp_d(x, 0uL) == MP_LT) {
6143
      mp_set(&q, 1uL);
6144
      if ((res = mp_lshd(&q, um + 1)) != MP_OKAY)
6145
         goto CLEANUP;
6146
      if ((res = mp_add(x, &q, x)) != MP_OKAY)
6147
         goto CLEANUP;
6148
   }
6149

6150
   /* Back off if it's too big */
6151
   while (mp_cmp(x, m) != MP_LT) {
6152
      if ((res = s_mp_sub(x, m, x)) != MP_OKAY) {
6153
         goto CLEANUP;
6154
      }
6155
   }
6156

6157
CLEANUP:
6158
   mp_clear(&q);
6159

6160
   return res;
6161
}
6162

6163
/* End: bn_mp_reduce.c */
6164

6165
/* Start: bn_mp_reduce_2k.c */
6166

6167
/* reduces a modulo n where n is of the form 2**p - d */
6168
int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
6169
{
6170
   mp_int q;
6171
   int    p, res;
6172

6173
   if ((res = mp_init(&q)) != MP_OKAY) {
6174
      return res;
6175
   }
6176

6177
   p = mp_count_bits(n);
6178
top:
6179
   /* q = a/2**p, a = a mod 2**p */
6180
   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
6181
      goto LBL_ERR;
6182
   }
6183

6184
   if (d != 1u) {
6185
      /* q = q * d */
6186
      if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
6187
         goto LBL_ERR;
6188
      }
6189
   }
6190

6191
   /* a = a + q */
6192
   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
6193
      goto LBL_ERR;
6194
   }
6195

6196
   if (mp_cmp_mag(a, n) != MP_LT) {
6197
      if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
6198
         goto LBL_ERR;
6199
      }
6200
      goto top;
6201
   }
6202

6203
LBL_ERR:
6204
   mp_clear(&q);
6205
   return res;
6206
}
6207

6208
/* End: bn_mp_reduce_2k.c */
6209

6210
/* Start: bn_mp_reduce_2k_l.c */
6211

6212
/* reduces a modulo n where n is of the form 2**p - d
6213
   This differs from reduce_2k since "d" can be larger
6214
   than a single digit.
6215
*/
6216
int mp_reduce_2k_l(mp_int *a, const mp_int *n, const mp_int *d)
6217
{
6218
   mp_int q;
6219
   int    p, res;
6220

6221
   if ((res = mp_init(&q)) != MP_OKAY) {
6222
      return res;
6223
   }
6224

6225
   p = mp_count_bits(n);
6226
top:
6227
   /* q = a/2**p, a = a mod 2**p */
6228
   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
6229
      goto LBL_ERR;
6230
   }
6231

6232
   /* q = q * d */
6233
   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
6234
      goto LBL_ERR;
6235
   }
6236

6237
   /* a = a + q */
6238
   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
6239
      goto LBL_ERR;
6240
   }
6241

6242
   if (mp_cmp_mag(a, n) != MP_LT) {
6243
      if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
6244
         goto LBL_ERR;
6245
      }
6246
      goto top;
6247
   }
6248

6249
LBL_ERR:
6250
   mp_clear(&q);
6251
   return res;
6252
}
6253

6254
/* End: bn_mp_reduce_2k_l.c */
6255

6256
/* Start: bn_mp_reduce_2k_setup.c */
6257

6258
/* determines the setup value */
6259
int mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
6260
{
6261
   int res, p;
6262
   mp_int tmp;
6263

6264
   if ((res = mp_init(&tmp)) != MP_OKAY) {
6265
      return res;
6266
   }
6267

6268
   p = mp_count_bits(a);
6269
   if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
6270
      mp_clear(&tmp);
6271
      return res;
6272
   }
6273

6274
   if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
6275
      mp_clear(&tmp);
6276
      return res;
6277
   }
6278

6279
   *d = tmp.dp[0];
6280
   mp_clear(&tmp);
6281
   return MP_OKAY;
6282
}
6283

6284
/* End: bn_mp_reduce_2k_setup.c */
6285

6286
/* Start: bn_mp_reduce_2k_setup_l.c */
6287

6288
/* determines the setup value */
6289
int mp_reduce_2k_setup_l(const mp_int *a, mp_int *d)
6290
{
6291
   int    res;
6292
   mp_int tmp;
6293

6294
   if ((res = mp_init(&tmp)) != MP_OKAY) {
6295
      return res;
6296
   }
6297

6298
   if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
6299
      goto LBL_ERR;
6300
   }
6301

6302
   if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
6303
      goto LBL_ERR;
6304
   }
6305

6306
LBL_ERR:
6307
   mp_clear(&tmp);
6308
   return res;
6309
}
6310

6311
/* End: bn_mp_reduce_2k_setup_l.c */
6312

6313
/* Start: bn_mp_reduce_is_2k.c */
6314

6315
/* determines if mp_reduce_2k can be used */
6316
int mp_reduce_is_2k(const mp_int *a)
6317
{
6318
   int ix, iy, iw;
6319
   mp_digit iz;
6320

6321
   if (a->used == 0) {
6322
      return MP_NO;
6323
   } else if (a->used == 1) {
6324
      return MP_YES;
6325
   } else if (a->used > 1) {
6326
      iy = mp_count_bits(a);
6327
      iz = 1;
6328
      iw = 1;
6329

6330
      /* Test every bit from the second digit up, must be 1 */
6331
      for (ix = DIGIT_BIT; ix < iy; ix++) {
6332
         if ((a->dp[iw] & iz) == 0u) {
6333
            return MP_NO;
6334
         }
6335
         iz <<= 1;
6336
         if (iz > (mp_digit)MP_MASK) {
6337
            ++iw;
6338
            iz = 1;
6339
         }
6340
      }
6341
   }
6342
   return MP_YES;
6343
}
6344

6345
/* End: bn_mp_reduce_is_2k.c */
6346

6347
/* Start: bn_mp_reduce_is_2k_l.c */
6348

6349
/* determines if reduce_2k_l can be used */
6350
int mp_reduce_is_2k_l(const mp_int *a)
6351
{
6352
   int ix, iy;
6353

6354
   if (a->used == 0) {
6355
      return MP_NO;
6356
   } else if (a->used == 1) {
6357
      return MP_YES;
6358
   } else if (a->used > 1) {
6359
      /* if more than half of the digits are -1 we're sold */
6360
      for (iy = ix = 0; ix < a->used; ix++) {
6361
         if (a->dp[ix] == MP_MASK) {
6362
            ++iy;
6363
         }
6364
      }
6365
      return (iy >= (a->used/2)) ? MP_YES : MP_NO;
6366

6367
   }
6368
   return MP_NO;
6369
}
6370

6371
/* End: bn_mp_reduce_is_2k_l.c */
6372

6373
/* Start: bn_mp_reduce_setup.c */
6374

6375
/* pre-calculate the value required for Barrett reduction
6376
 * For a given modulus "b" it calulates the value required in "a"
6377
 */
6378
int mp_reduce_setup(mp_int *a, const mp_int *b)
6379
{
6380
   int     res;
6381

6382
   if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
6383
      return res;
6384
   }
6385
   return mp_div(a, b, a, NULL);
6386
}
6387

6388
/* End: bn_mp_reduce_setup.c */
6389

6390
/* Start: bn_mp_rshd.c */
6391

6392
/* shift right a certain amount of digits */
6393
void mp_rshd(mp_int *a, int b)
6394
{
6395
   int     x;
6396

6397
   /* if b <= 0 then ignore it */
6398
   if (b <= 0) {
6399
      return;
6400
   }
6401

6402
   /* if b > used then simply zero it and return */
6403
   if (a->used <= b) {
6404
      mp_zero(a);
6405
      return;
6406
   }
6407

6408
   {
6409
      mp_digit *bottom, *top;
6410

6411
      /* shift the digits down */
6412

6413
      /* bottom */
6414
      bottom = a->dp;
6415

6416
      /* top [offset into digits] */
6417
      top = a->dp + b;
6418

6419
      /* this is implemented as a sliding window where
6420
       * the window is b-digits long and digits from
6421
       * the top of the window are copied to the bottom
6422
       *
6423
       * e.g.
6424

6425
       b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
6426
                   /\                   |      ---->
6427
                    \-------------------/      ---->
6428
       */
6429
      for (x = 0; x < (a->used - b); x++) {
6430
         *bottom++ = *top++;
6431
      }
6432

6433
      /* zero the top digits */
6434
      for (; x < a->used; x++) {
6435
         *bottom++ = 0;
6436
      }
6437
   }
6438

6439
   /* remove excess digits */
6440
   a->used -= b;
6441
}
6442

6443
/* End: bn_mp_rshd.c */
6444

6445
/* Start: bn_mp_set.c */
6446

6447
/* set to a digit */
6448
void mp_set(mp_int *a, mp_digit b)
6449
{
6450
   mp_zero(a);
6451
   a->dp[0] = b & MP_MASK;
6452
   a->used  = (a->dp[0] != 0u) ? 1 : 0;
6453
}
6454

6455
/* End: bn_mp_set.c */
6456

6457
/* Start: bn_mp_set_int.c */
6458

6459
/* set a 32-bit const */
6460
int mp_set_int(mp_int *a, unsigned long b)
6461
{
6462
   int     x, res;
6463

6464
   mp_zero(a);
6465

6466
   /* set four bits at a time */
6467
   for (x = 0; x < 8; x++) {
6468
      /* shift the number up four bits */
6469
      if ((res = mp_mul_2d(a, 4, a)) != MP_OKAY) {
6470
         return res;
6471
      }
6472

6473
      /* OR in the top four bits of the source */
6474
      a->dp[0] |= (mp_digit)(b >> 28) & 15uL;
6475

6476
      /* shift the source up to the next four bits */
6477
      b <<= 4;
6478

6479
      /* ensure that digits are not clamped off */
6480
      a->used += 1;
6481
   }
6482
   mp_clamp(a);
6483
   return MP_OKAY;
6484
}
6485

6486
/* End: bn_mp_set_int.c */
6487

6488
/* Start: bn_mp_set_long.c */
6489

6490
/* set a platform dependent unsigned long int */
6491
MP_SET_XLONG(mp_set_long, unsigned long)
6492

6493
/* End: bn_mp_set_long.c */
6494

6495
/* Start: bn_mp_set_long_long.c */
6496

6497
/* set a platform dependent unsigned long long int */
6498
MP_SET_XLONG(mp_set_long_long, unsigned long long)
6499

6500
/* End: bn_mp_set_long_long.c */
6501

6502
/* Start: bn_mp_shrink.c */
6503

6504
/* shrink a bignum */
6505
int mp_shrink(mp_int *a)
6506
{
6507
   mp_digit *tmp;
6508
   int used = 1;
6509

6510
   if (a->used > 0) {
6511
      used = a->used;
6512
   }
6513

6514
   if (a->alloc != used) {
6515
      if ((tmp = OPT_CAST(mp_digit) XREALLOC(a->dp, sizeof(mp_digit) * (size_t)used)) == NULL) {
6516
         return MP_MEM;
6517
      }
6518
      a->dp    = tmp;
6519
      a->alloc = used;
6520
   }
6521
   return MP_OKAY;
6522
}
6523

6524
/* End: bn_mp_shrink.c */
6525

6526
/* Start: bn_mp_signed_bin_size.c */
6527

6528
/* get the size for an signed equivalent */
6529
int mp_signed_bin_size(const mp_int *a)
6530
{
6531
   return 1 + mp_unsigned_bin_size(a);
6532
}
6533

6534
/* End: bn_mp_signed_bin_size.c */
6535

6536
/* Start: bn_mp_sqr.c */
6537

6538
/* computes b = a*a */
6539
int mp_sqr(const mp_int *a, mp_int *b)
6540
{
6541
   int     res;
6542

6543
   /* use Toom-Cook? */
6544
   if (a->used >= TOOM_SQR_CUTOFF) {
6545
      res = mp_toom_sqr(a, b);
6546
      /* Karatsuba? */
6547
   } else
6548
      if (a->used >= KARATSUBA_SQR_CUTOFF) {
6549
         res = mp_karatsuba_sqr(a, b);
6550
      } else
6551
      {
6552
         /* can we use the fast comba multiplier? */
6553
         if ((((a->used * 2) + 1) < (int)MP_WARRAY) &&
6554
             (a->used <
6555
              (int)(1u << (((sizeof(mp_word) * (size_t)CHAR_BIT) - (2u * (size_t)DIGIT_BIT)) - 1u)))) {
6556
            res = fast_s_mp_sqr(a, b);
6557
         } else
6558
         {
6559
            res = s_mp_sqr(a, b);
6560
         }
6561
      }
6562
   b->sign = MP_ZPOS;
6563
   return res;
6564
}
6565

6566
/* End: bn_mp_sqr.c */
6567

6568
/* Start: bn_mp_sqrmod.c */
6569

6570
/* c = a * a (mod b) */
6571
int mp_sqrmod(const mp_int *a, const mp_int *b, mp_int *c)
6572
{
6573
   int     res;
6574
   mp_int  t;
6575

6576
   if ((res = mp_init(&t)) != MP_OKAY) {
6577
      return res;
6578
   }
6579

6580
   if ((res = mp_sqr(a, &t)) != MP_OKAY) {
6581
      mp_clear(&t);
6582
      return res;
6583
   }
6584
   res = mp_mod(&t, b, c);
6585
   mp_clear(&t);
6586
   return res;
6587
}
6588

6589
/* End: bn_mp_sqrmod.c */
6590

6591
/* Start: bn_mp_sqrt.c */
6592

6593
/* this function is less generic than mp_n_root, simpler and faster */
6594
int mp_sqrt(const mp_int *arg, mp_int *ret)
6595
{
6596
   int res;
6597
   mp_int t1, t2;
6598

6599
   /* must be positive */
6600
   if (arg->sign == MP_NEG) {
6601
      return MP_VAL;
6602
   }
6603

6604
   /* easy out */
6605
   if (mp_iszero(arg) == MP_YES) {
6606
      mp_zero(ret);
6607
      return MP_OKAY;
6608
   }
6609

6610
   if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) {
6611
      return res;
6612
   }
6613

6614
   if ((res = mp_init(&t2)) != MP_OKAY) {
6615
      goto E2;
6616
   }
6617

6618
   /* First approx. (not very bad for large arg) */
6619
   mp_rshd(&t1, t1.used/2);
6620

6621
   /* t1 > 0  */
6622
   if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {
6623
      goto E1;
6624
   }
6625
   if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {
6626
      goto E1;
6627
   }
6628
   if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {
6629
      goto E1;
6630
   }
6631
   /* And now t1 > sqrt(arg) */
6632
   do {
6633
      if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {
6634
         goto E1;
6635
      }
6636
      if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {
6637
         goto E1;
6638
      }
6639
      if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {
6640
         goto E1;
6641
      }
6642
      /* t1 >= sqrt(arg) >= t2 at this point */
6643
   } while (mp_cmp_mag(&t1, &t2) == MP_GT);
6644

6645
   mp_exch(&t1, ret);
6646

6647
E1:
6648
   mp_clear(&t2);
6649
E2:
6650
   mp_clear(&t1);
6651
   return res;
6652
}
6653

6654
/* End: bn_mp_sqrt.c */
6655

6656
/* Start: bn_mp_sqrtmod_prime.c */
6657

6658
/* Tonelli-Shanks algorithm
6659
 * https://en.wikipedia.org/wiki/Tonelli%E2%80%93Shanks_algorithm
6660
 * https://gmplib.org/list-archives/gmp-discuss/2013-April/005300.html
6661
 *
6662
 */
6663

6664
int mp_sqrtmod_prime(const mp_int *n, const mp_int *prime, mp_int *ret)
6665
{
6666
   int res, legendre;
6667
   mp_int t1, C, Q, S, Z, M, T, R, two;
6668
   mp_digit i;
6669

6670
   /* first handle the simple cases */
6671
   if (mp_cmp_d(n, 0uL) == MP_EQ) {
6672
      mp_zero(ret);
6673
      return MP_OKAY;
6674
   }
6675
   if (mp_cmp_d(prime, 2uL) == MP_EQ)                            return MP_VAL; /* prime must be odd */
6676
   if ((res = mp_jacobi(n, prime, &legendre)) != MP_OKAY)        return res;
6677
   if (legendre == -1)                                           return MP_VAL; /* quadratic non-residue mod prime */
6678

6679
   if ((res = mp_init_multi(&t1, &C, &Q, &S, &Z, &M, &T, &R, &two, NULL)) != MP_OKAY) {
6680
      return res;
6681
   }
6682

6683
   /* SPECIAL CASE: if prime mod 4 == 3
6684
    * compute directly: res = n^(prime+1)/4 mod prime
6685
    * Handbook of Applied Cryptography algorithm 3.36
6686
    */
6687
   if ((res = mp_mod_d(prime, 4uL, &i)) != MP_OKAY)               goto cleanup;
6688
   if (i == 3u) {
6689
      if ((res = mp_add_d(prime, 1uL, &t1)) != MP_OKAY)           goto cleanup;
6690
      if ((res = mp_div_2(&t1, &t1)) != MP_OKAY)                  goto cleanup;
6691
      if ((res = mp_div_2(&t1, &t1)) != MP_OKAY)                  goto cleanup;
6692
      if ((res = mp_exptmod(n, &t1, prime, ret)) != MP_OKAY)      goto cleanup;
6693
      res = MP_OKAY;
6694
      goto cleanup;
6695
   }
6696

6697
   /* NOW: Tonelli-Shanks algorithm */
6698

6699
   /* factor out powers of 2 from prime-1, defining Q and S as: prime-1 = Q*2^S */
6700
   if ((res = mp_copy(prime, &Q)) != MP_OKAY)                    goto cleanup;
6701
   if ((res = mp_sub_d(&Q, 1uL, &Q)) != MP_OKAY)                 goto cleanup;
6702
   /* Q = prime - 1 */
6703
   mp_zero(&S);
6704
   /* S = 0 */
6705
   while (mp_iseven(&Q) != MP_NO) {
6706
      if ((res = mp_div_2(&Q, &Q)) != MP_OKAY)                    goto cleanup;
6707
      /* Q = Q / 2 */
6708
      if ((res = mp_add_d(&S, 1uL, &S)) != MP_OKAY)               goto cleanup;
6709
      /* S = S + 1 */
6710
   }
6711

6712
   /* find a Z such that the Legendre symbol (Z|prime) == -1 */
6713
   if ((res = mp_set_int(&Z, 2uL)) != MP_OKAY)                    goto cleanup;
6714
   /* Z = 2 */
6715
   while (1) {
6716
      if ((res = mp_jacobi(&Z, prime, &legendre)) != MP_OKAY)     goto cleanup;
6717
      if (legendre == -1) break;
6718
      if ((res = mp_add_d(&Z, 1uL, &Z)) != MP_OKAY)               goto cleanup;
6719
      /* Z = Z + 1 */
6720
   }
6721

6722
   if ((res = mp_exptmod(&Z, &Q, prime, &C)) != MP_OKAY)         goto cleanup;
6723
   /* C = Z ^ Q mod prime */
6724
   if ((res = mp_add_d(&Q, 1uL, &t1)) != MP_OKAY)                goto cleanup;
6725
   if ((res = mp_div_2(&t1, &t1)) != MP_OKAY)                    goto cleanup;
6726
   /* t1 = (Q + 1) / 2 */
6727
   if ((res = mp_exptmod(n, &t1, prime, &R)) != MP_OKAY)         goto cleanup;
6728
   /* R = n ^ ((Q + 1) / 2) mod prime */
6729
   if ((res = mp_exptmod(n, &Q, prime, &T)) != MP_OKAY)          goto cleanup;
6730
   /* T = n ^ Q mod prime */
6731
   if ((res = mp_copy(&S, &M)) != MP_OKAY)                       goto cleanup;
6732
   /* M = S */
6733
   if ((res = mp_set_int(&two, 2uL)) != MP_OKAY)                 goto cleanup;
6734

6735
   res = MP_VAL;
6736
   while (1) {
6737
      if ((res = mp_copy(&T, &t1)) != MP_OKAY)                    goto cleanup;
6738
      i = 0;
6739
      while (1) {
6740
         if (mp_cmp_d(&t1, 1uL) == MP_EQ) break;
6741
         if ((res = mp_exptmod(&t1, &two, prime, &t1)) != MP_OKAY) goto cleanup;
6742
         i++;
6743
      }
6744
      if (i == 0u) {
6745
         if ((res = mp_copy(&R, ret)) != MP_OKAY)                  goto cleanup;
6746
         res = MP_OKAY;
6747
         goto cleanup;
6748
      }
6749
      if ((res = mp_sub_d(&M, i, &t1)) != MP_OKAY)                goto cleanup;
6750
      if ((res = mp_sub_d(&t1, 1uL, &t1)) != MP_OKAY)             goto cleanup;
6751
      if ((res = mp_exptmod(&two, &t1, prime, &t1)) != MP_OKAY)   goto cleanup;
6752
      /* t1 = 2 ^ (M - i - 1) */
6753
      if ((res = mp_exptmod(&C, &t1, prime, &t1)) != MP_OKAY)     goto cleanup;
6754
      /* t1 = C ^ (2 ^ (M - i - 1)) mod prime */
6755
      if ((res = mp_sqrmod(&t1, prime, &C)) != MP_OKAY)           goto cleanup;
6756
      /* C = (t1 * t1) mod prime */
6757
      if ((res = mp_mulmod(&R, &t1, prime, &R)) != MP_OKAY)       goto cleanup;
6758
      /* R = (R * t1) mod prime */
6759
      if ((res = mp_mulmod(&T, &C, prime, &T)) != MP_OKAY)        goto cleanup;
6760
      /* T = (T * C) mod prime */
6761
      mp_set(&M, i);
6762
      /* M = i */
6763
   }
6764

6765
cleanup:
6766
   mp_clear_multi(&t1, &C, &Q, &S, &Z, &M, &T, &R, &two, NULL);
6767
   return res;
6768
}
6769

6770
/* End: bn_mp_sqrtmod_prime.c */
6771

6772
/* Start: bn_mp_sub.c */
6773

6774
/* high level subtraction (handles signs) */
6775
int mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
6776
{
6777
   int     sa, sb, res;
6778

6779
   sa = a->sign;
6780
   sb = b->sign;
6781

6782
   if (sa != sb) {
6783
      /* subtract a negative from a positive, OR */
6784
      /* subtract a positive from a negative. */
6785
      /* In either case, ADD their magnitudes, */
6786
      /* and use the sign of the first number. */
6787
      c->sign = sa;
6788
      res = s_mp_add(a, b, c);
6789
   } else {
6790
      /* subtract a positive from a positive, OR */
6791
      /* subtract a negative from a negative. */
6792
      /* First, take the difference between their */
6793
      /* magnitudes, then... */
6794
      if (mp_cmp_mag(a, b) != MP_LT) {
6795
         /* Copy the sign from the first */
6796
         c->sign = sa;
6797
         /* The first has a larger or equal magnitude */
6798
         res = s_mp_sub(a, b, c);
6799
      } else {
6800
         /* The result has the *opposite* sign from */
6801
         /* the first number. */
6802
         c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
6803
         /* The second has a larger magnitude */
6804
         res = s_mp_sub(b, a, c);
6805
      }
6806
   }
6807
   return res;
6808
}
6809

6810
/* End: bn_mp_sub.c */
6811

6812
/* Start: bn_mp_sub_d.c */
6813

6814
/* single digit subtraction */
6815
int mp_sub_d(const mp_int *a, mp_digit b, mp_int *c)
6816
{
6817
   mp_digit *tmpa, *tmpc, mu;
6818
   int       res, ix, oldused;
6819

6820
   /* grow c as required */
6821
   if (c->alloc < (a->used + 1)) {
6822
      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
6823
         return res;
6824
      }
6825
   }
6826

6827
   /* if a is negative just do an unsigned
6828
    * addition [with fudged signs]
6829
    */
6830
   if (a->sign == MP_NEG) {
6831
      mp_int a_ = *a;
6832
      a_.sign = MP_ZPOS;
6833
      res     = mp_add_d(&a_, b, c);
6834
      c->sign = MP_NEG;
6835

6836
      /* clamp */
6837
      mp_clamp(c);
6838

6839
      return res;
6840
   }
6841

6842
   /* setup regs */
6843
   oldused = c->used;
6844
   tmpa    = a->dp;
6845
   tmpc    = c->dp;
6846

6847
   /* if a <= b simply fix the single digit */
6848
   if (((a->used == 1) && (a->dp[0] <= b)) || (a->used == 0)) {
6849
      if (a->used == 1) {
6850
         *tmpc++ = b - *tmpa;
6851
      } else {
6852
         *tmpc++ = b;
6853
      }
6854
      ix      = 1;
6855

6856
      /* negative/1digit */
6857
      c->sign = MP_NEG;
6858
      c->used = 1;
6859
   } else {
6860
      /* positive/size */
6861
      c->sign = MP_ZPOS;
6862
      c->used = a->used;
6863

6864
      /* subtract first digit */
6865
      *tmpc    = *tmpa++ - b;
6866
      mu       = *tmpc >> ((sizeof(mp_digit) * (size_t)CHAR_BIT) - 1u);
6867
      *tmpc++ &= MP_MASK;
6868

6869
      /* handle rest of the digits */
6870
      for (ix = 1; ix < a->used; ix++) {
6871
         *tmpc    = *tmpa++ - mu;
6872
         mu       = *tmpc >> ((sizeof(mp_digit) * (size_t)CHAR_BIT) - 1u);
6873
         *tmpc++ &= MP_MASK;
6874
      }
6875
   }
6876

6877
   /* zero excess digits */
6878
   while (ix++ < oldused) {
6879
      *tmpc++ = 0;
6880
   }
6881
   mp_clamp(c);
6882
   return MP_OKAY;
6883
}
6884

6885
/* End: bn_mp_sub_d.c */
6886

6887
/* Start: bn_mp_submod.c */
6888

6889
/* d = a - b (mod c) */
6890
int mp_submod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)
6891
{
6892
   int     res;
6893
   mp_int  t;
6894

6895

6896
   if ((res = mp_init(&t)) != MP_OKAY) {
6897
      return res;
6898
   }
6899

6900
   if ((res = mp_sub(a, b, &t)) != MP_OKAY) {
6901
      mp_clear(&t);
6902
      return res;
6903
   }
6904
   res = mp_mod(&t, c, d);
6905
   mp_clear(&t);
6906
   return res;
6907
}
6908

6909
/* End: bn_mp_submod.c */
6910

6911
/* Start: bn_mp_tc_and.c */
6912

6913
/* two complement and */
6914
int mp_tc_and(const mp_int *a, const mp_int *b, mp_int *c)
6915
{
6916
   int res = MP_OKAY, bits, abits, bbits;
6917
   int as = mp_isneg(a), bs = mp_isneg(b);
6918
   mp_int *mx = NULL, _mx, acpy, bcpy;
6919

6920
   if ((as != MP_NO) || (bs != MP_NO)) {
6921
      abits = mp_count_bits(a);
6922
      bbits = mp_count_bits(b);
6923
      bits = MAX(abits, bbits);
6924
      res = mp_init_set_int(&_mx, 1uL);
6925
      if (res != MP_OKAY) {
6926
         goto end;
6927
      }
6928

6929
      mx = &_mx;
6930
      res = mp_mul_2d(mx, bits + 1, mx);
6931
      if (res != MP_OKAY) {
6932
         goto end;
6933
      }
6934

6935
      if (as != MP_NO) {
6936
         res = mp_init(&acpy);
6937
         if (res != MP_OKAY) {
6938
            goto end;
6939
         }
6940

6941
         res = mp_add(mx, a, &acpy);
6942
         if (res != MP_OKAY) {
6943
            mp_clear(&acpy);
6944
            goto end;
6945
         }
6946
         a = &acpy;
6947
      }
6948
      if (bs != MP_NO) {
6949
         res = mp_init(&bcpy);
6950
         if (res != MP_OKAY) {
6951
            goto end;
6952
         }
6953

6954
         res = mp_add(mx, b, &bcpy);
6955
         if (res != MP_OKAY) {
6956
            mp_clear(&bcpy);
6957
            goto end;
6958
         }
6959
         b = &bcpy;
6960
      }
6961
   }
6962

6963
   res = mp_and(a, b, c);
6964

6965
   if ((as != MP_NO) && (bs != MP_NO) && (res == MP_OKAY)) {
6966
      res = mp_sub(c, mx, c);
6967
   }
6968

6969
end:
6970
   if (a == &acpy) {
6971
      mp_clear(&acpy);
6972
   }
6973

6974
   if (b == &bcpy) {
6975
      mp_clear(&bcpy);
6976
   }
6977

6978
   if (mx == &_mx) {
6979
      mp_clear(mx);
6980
   }
6981

6982
   return res;
6983
}
6984

6985
/* End: bn_mp_tc_and.c */
6986

6987
/* Start: bn_mp_tc_div_2d.c */
6988

6989
/* two complement right shift */
6990
int mp_tc_div_2d(const mp_int *a, int b, mp_int *c)
6991
{
6992
   int res;
6993
   if (mp_isneg(a) == MP_NO) {
6994
      return mp_div_2d(a, b, c, NULL);
6995
   }
6996

6997
   res = mp_add_d(a, 1uL, c);
6998
   if (res != MP_OKAY) {
6999
      return res;
7000
   }
7001

7002
   res = mp_div_2d(c, b, c, NULL);
7003
   return (res == MP_OKAY) ? mp_sub_d(c, 1uL, c) : res;
7004
}
7005

7006
/* End: bn_mp_tc_div_2d.c */
7007

7008
/* Start: bn_mp_tc_or.c */
7009

7010
/* two complement or */
7011
int mp_tc_or(const mp_int *a, const mp_int *b, mp_int *c)
7012
{
7013
   int res = MP_OKAY, bits, abits, bbits;
7014
   int as = mp_isneg(a), bs = mp_isneg(b);
7015
   mp_int *mx = NULL, _mx, acpy, bcpy;
7016

7017
   if ((as != MP_NO) || (bs != MP_NO)) {
7018
      abits = mp_count_bits(a);
7019
      bbits = mp_count_bits(b);
7020
      bits = MAX(abits, bbits);
7021
      res = mp_init_set_int(&_mx, 1uL);
7022
      if (res != MP_OKAY) {
7023
         goto end;
7024
      }
7025

7026
      mx = &_mx;
7027
      res = mp_mul_2d(mx, bits + 1, mx);
7028
      if (res != MP_OKAY) {
7029
         goto end;
7030
      }
7031

7032
      if (as != MP_NO) {
7033
         res = mp_init(&acpy);
7034
         if (res != MP_OKAY) {
7035
            goto end;
7036
         }
7037

7038
         res = mp_add(mx, a, &acpy);
7039
         if (res != MP_OKAY) {
7040
            mp_clear(&acpy);
7041
            goto end;
7042
         }
7043
         a = &acpy;
7044
      }
7045
      if (bs != MP_NO) {
7046
         res = mp_init(&bcpy);
7047
         if (res != MP_OKAY) {
7048
            goto end;
7049
         }
7050

7051
         res = mp_add(mx, b, &bcpy);
7052
         if (res != MP_OKAY) {
7053
            mp_clear(&bcpy);
7054
            goto end;
7055
         }
7056
         b = &bcpy;
7057
      }
7058
   }
7059

7060
   res = mp_or(a, b, c);
7061

7062
   if (((as != MP_NO) || (bs != MP_NO)) && (res == MP_OKAY)) {
7063
      res = mp_sub(c, mx, c);
7064
   }
7065

7066
end:
7067
   if (a == &acpy) {
7068
      mp_clear(&acpy);
7069
   }
7070

7071
   if (b == &bcpy) {
7072
      mp_clear(&bcpy);
7073
   }
7074

7075
   if (mx == &_mx) {
7076
      mp_clear(mx);
7077
   }
7078

7079
   return res;
7080
}
7081

7082
/* End: bn_mp_tc_or.c */
7083

7084
/* Start: bn_mp_tc_xor.c */
7085

7086
/* two complement xor */
7087
int mp_tc_xor(const mp_int *a, const mp_int *b, mp_int *c)
7088
{
7089
   int res = MP_OKAY, bits, abits, bbits;
7090
   int as = mp_isneg(a), bs = mp_isneg(b);
7091
   mp_int *mx = NULL, _mx, acpy, bcpy;
7092

7093
   if ((as != MP_NO) || (bs != MP_NO)) {
7094
      abits = mp_count_bits(a);
7095
      bbits = mp_count_bits(b);
7096
      bits = MAX(abits, bbits);
7097
      res = mp_init_set_int(&_mx, 1uL);
7098
      if (res != MP_OKAY) {
7099
         goto end;
7100
      }
7101

7102
      mx = &_mx;
7103
      res = mp_mul_2d(mx, bits + 1, mx);
7104
      if (res != MP_OKAY) {
7105
         goto end;
7106
      }
7107

7108
      if (as != MP_NO) {
7109
         res = mp_init(&acpy);
7110
         if (res != MP_OKAY) {
7111
            goto end;
7112
         }
7113

7114
         res = mp_add(mx, a, &acpy);
7115
         if (res != MP_OKAY) {
7116
            mp_clear(&acpy);
7117
            goto end;
7118
         }
7119
         a = &acpy;
7120
      }
7121
      if (bs != MP_NO) {
7122
         res = mp_init(&bcpy);
7123
         if (res != MP_OKAY) {
7124
            goto end;
7125
         }
7126

7127
         res = mp_add(mx, b, &bcpy);
7128
         if (res != MP_OKAY) {
7129
            mp_clear(&bcpy);
7130
            goto end;
7131
         }
7132
         b = &bcpy;
7133
      }
7134
   }
7135

7136
   res = mp_xor(a, b, c);
7137

7138
   if ((as != bs) && (res == MP_OKAY)) {
7139
      res = mp_sub(c, mx, c);
7140
   }
7141

7142
end:
7143
   if (a == &acpy) {
7144
      mp_clear(&acpy);
7145
   }
7146

7147
   if (b == &bcpy) {
7148
      mp_clear(&bcpy);
7149
   }
7150

7151
   if (mx == &_mx) {
7152
      mp_clear(mx);
7153
   }
7154

7155
   return res;
7156
}
7157

7158
/* End: bn_mp_tc_xor.c */
7159

7160
/* Start: bn_mp_to_signed_bin.c */
7161

7162
/* store in signed [big endian] format */
7163
int mp_to_signed_bin(const mp_int *a, unsigned char *b)
7164
{
7165
   int     res;
7166

7167
   if ((res = mp_to_unsigned_bin(a, b + 1)) != MP_OKAY) {
7168
      return res;
7169
   }
7170
   b[0] = (a->sign == MP_ZPOS) ? (unsigned char)0 : (unsigned char)1;
7171
   return MP_OKAY;
7172
}
7173

7174
/* End: bn_mp_to_signed_bin.c */
7175

7176
/* Start: bn_mp_to_signed_bin_n.c */
7177

7178
/* store in signed [big endian] format */
7179
int mp_to_signed_bin_n(const mp_int *a, unsigned char *b, unsigned long *outlen)
7180
{
7181
   if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
7182
      return MP_VAL;
7183
   }
7184
   *outlen = (unsigned long)mp_signed_bin_size(a);
7185
   return mp_to_signed_bin(a, b);
7186
}
7187

7188
/* End: bn_mp_to_signed_bin_n.c */
7189

7190
/* Start: bn_mp_to_unsigned_bin.c */
7191

7192
/* store in unsigned [big endian] format */
7193
int mp_to_unsigned_bin(const mp_int *a, unsigned char *b)
7194
{
7195
   int     x, res;
7196
   mp_int  t;
7197

7198
   if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7199
      return res;
7200
   }
7201

7202
   x = 0;
7203
   while (mp_iszero(&t) == MP_NO) {
7204
#ifndef MP_8BIT
7205
      b[x++] = (unsigned char)(t.dp[0] & 255u);
7206
#else
7207
      b[x++] = (unsigned char)(t.dp[0] | ((t.dp[1] & 1u) << 7));
7208
#endif
7209
      if ((res = mp_div_2d(&t, 8, &t, NULL)) != MP_OKAY) {
7210
         mp_clear(&t);
7211
         return res;
7212
      }
7213
   }
7214
   bn_reverse(b, x);
7215
   mp_clear(&t);
7216
   return MP_OKAY;
7217
}
7218

7219
/* End: bn_mp_to_unsigned_bin.c */
7220

7221
/* Start: bn_mp_to_unsigned_bin_n.c */
7222

7223
/* store in unsigned [big endian] format */
7224
int mp_to_unsigned_bin_n(const mp_int *a, unsigned char *b, unsigned long *outlen)
7225
{
7226
   if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
7227
      return MP_VAL;
7228
   }
7229
   *outlen = (unsigned long)mp_unsigned_bin_size(a);
7230
   return mp_to_unsigned_bin(a, b);
7231
}
7232

7233
/* End: bn_mp_to_unsigned_bin_n.c */
7234

7235
/* Start: bn_mp_toom_mul.c */
7236

7237
/* multiplication using the Toom-Cook 3-way algorithm
7238
 *
7239
 * Much more complicated than Karatsuba but has a lower
7240
 * asymptotic running time of O(N**1.464).  This algorithm is
7241
 * only particularly useful on VERY large inputs
7242
 * (we're talking 1000s of digits here...).
7243
*/
7244
int mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
7245
{
7246
   mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
7247
   int res, B;
7248

7249
   /* init temps */
7250
   if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
7251
                            &a0, &a1, &a2, &b0, &b1,
7252
                            &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
7253
      return res;
7254
   }
7255

7256
   /* B */
7257
   B = MIN(a->used, b->used) / 3;
7258

7259
   /* a = a2 * B**2 + a1 * B + a0 */
7260
   if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
7261
      goto LBL_ERR;
7262
   }
7263

7264
   if ((res = mp_copy(a, &a1)) != MP_OKAY) {
7265
      goto LBL_ERR;
7266
   }
7267
   mp_rshd(&a1, B);
7268
   if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {
7269
      goto LBL_ERR;
7270
   }
7271

7272
   if ((res = mp_copy(a, &a2)) != MP_OKAY) {
7273
      goto LBL_ERR;
7274
   }
7275
   mp_rshd(&a2, B*2);
7276

7277
   /* b = b2 * B**2 + b1 * B + b0 */
7278
   if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
7279
      goto LBL_ERR;
7280
   }
7281

7282
   if ((res = mp_copy(b, &b1)) != MP_OKAY) {
7283
      goto LBL_ERR;
7284
   }
7285
   mp_rshd(&b1, B);
7286
   (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
7287

7288
   if ((res = mp_copy(b, &b2)) != MP_OKAY) {
7289
      goto LBL_ERR;
7290
   }
7291
   mp_rshd(&b2, B*2);
7292

7293
   /* w0 = a0*b0 */
7294
   if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
7295
      goto LBL_ERR;
7296
   }
7297

7298
   /* w4 = a2 * b2 */
7299
   if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
7300
      goto LBL_ERR;
7301
   }
7302

7303
   /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
7304
   if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
7305
      goto LBL_ERR;
7306
   }
7307
   if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7308
      goto LBL_ERR;
7309
   }
7310
   if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7311
      goto LBL_ERR;
7312
   }
7313
   if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
7314
      goto LBL_ERR;
7315
   }
7316

7317
   if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
7318
      goto LBL_ERR;
7319
   }
7320
   if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
7321
      goto LBL_ERR;
7322
   }
7323
   if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
7324
      goto LBL_ERR;
7325
   }
7326
   if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
7327
      goto LBL_ERR;
7328
   }
7329

7330
   if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
7331
      goto LBL_ERR;
7332
   }
7333

7334
   /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
7335
   if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
7336
      goto LBL_ERR;
7337
   }
7338
   if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7339
      goto LBL_ERR;
7340
   }
7341
   if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7342
      goto LBL_ERR;
7343
   }
7344
   if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7345
      goto LBL_ERR;
7346
   }
7347

7348
   if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
7349
      goto LBL_ERR;
7350
   }
7351
   if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
7352
      goto LBL_ERR;
7353
   }
7354
   if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
7355
      goto LBL_ERR;
7356
   }
7357
   if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
7358
      goto LBL_ERR;
7359
   }
7360

7361
   if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
7362
      goto LBL_ERR;
7363
   }
7364

7365

7366
   /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
7367
   if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
7368
      goto LBL_ERR;
7369
   }
7370
   if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7371
      goto LBL_ERR;
7372
   }
7373
   if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
7374
      goto LBL_ERR;
7375
   }
7376
   if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
7377
      goto LBL_ERR;
7378
   }
7379
   if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
7380
      goto LBL_ERR;
7381
   }
7382

7383
   /* now solve the matrix
7384

7385
      0  0  0  0  1
7386
      1  2  4  8  16
7387
      1  1  1  1  1
7388
      16 8  4  2  1
7389
      1  0  0  0  0
7390

7391
      using 12 subtractions, 4 shifts,
7392
             2 small divisions and 1 small multiplication
7393
    */
7394

7395
   /* r1 - r4 */
7396
   if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
7397
      goto LBL_ERR;
7398
   }
7399
   /* r3 - r0 */
7400
   if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
7401
      goto LBL_ERR;
7402
   }
7403
   /* r1/2 */
7404
   if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
7405
      goto LBL_ERR;
7406
   }
7407
   /* r3/2 */
7408
   if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
7409
      goto LBL_ERR;
7410
   }
7411
   /* r2 - r0 - r4 */
7412
   if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
7413
      goto LBL_ERR;
7414
   }
7415
   if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
7416
      goto LBL_ERR;
7417
   }
7418
   /* r1 - r2 */
7419
   if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7420
      goto LBL_ERR;
7421
   }
7422
   /* r3 - r2 */
7423
   if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7424
      goto LBL_ERR;
7425
   }
7426
   /* r1 - 8r0 */
7427
   if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
7428
      goto LBL_ERR;
7429
   }
7430
   if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
7431
      goto LBL_ERR;
7432
   }
7433
   /* r3 - 8r4 */
7434
   if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
7435
      goto LBL_ERR;
7436
   }
7437
   if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
7438
      goto LBL_ERR;
7439
   }
7440
   /* 3r2 - r1 - r3 */
7441
   if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {
7442
      goto LBL_ERR;
7443
   }
7444
   if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
7445
      goto LBL_ERR;
7446
   }
7447
   if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
7448
      goto LBL_ERR;
7449
   }
7450
   /* r1 - r2 */
7451
   if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7452
      goto LBL_ERR;
7453
   }
7454
   /* r3 - r2 */
7455
   if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7456
      goto LBL_ERR;
7457
   }
7458
   /* r1/3 */
7459
   if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
7460
      goto LBL_ERR;
7461
   }
7462
   /* r3/3 */
7463
   if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
7464
      goto LBL_ERR;
7465
   }
7466

7467
   /* at this point shift W[n] by B*n */
7468
   if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
7469
      goto LBL_ERR;
7470
   }
7471
   if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
7472
      goto LBL_ERR;
7473
   }
7474
   if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
7475
      goto LBL_ERR;
7476
   }
7477
   if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
7478
      goto LBL_ERR;
7479
   }
7480

7481
   if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
7482
      goto LBL_ERR;
7483
   }
7484
   if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
7485
      goto LBL_ERR;
7486
   }
7487
   if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
7488
      goto LBL_ERR;
7489
   }
7490
   if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
7491
      goto LBL_ERR;
7492
   }
7493

7494
LBL_ERR:
7495
   mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
7496
                  &a0, &a1, &a2, &b0, &b1,
7497
                  &b2, &tmp1, &tmp2, NULL);
7498
   return res;
7499
}
7500

7501
/* End: bn_mp_toom_mul.c */
7502

7503
/* Start: bn_mp_toom_sqr.c */
7504

7505
/* squaring using Toom-Cook 3-way algorithm */
7506
int mp_toom_sqr(const mp_int *a, mp_int *b)
7507
{
7508
   mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
7509
   int res, B;
7510

7511
   /* init temps */
7512
   if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
7513
      return res;
7514
   }
7515

7516
   /* B */
7517
   B = a->used / 3;
7518

7519
   /* a = a2 * B**2 + a1 * B + a0 */
7520
   if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
7521
      goto LBL_ERR;
7522
   }
7523

7524
   if ((res = mp_copy(a, &a1)) != MP_OKAY) {
7525
      goto LBL_ERR;
7526
   }
7527
   mp_rshd(&a1, B);
7528
   if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {
7529
      goto LBL_ERR;
7530
   }
7531

7532
   if ((res = mp_copy(a, &a2)) != MP_OKAY) {
7533
      goto LBL_ERR;
7534
   }
7535
   mp_rshd(&a2, B*2);
7536

7537
   /* w0 = a0*a0 */
7538
   if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
7539
      goto LBL_ERR;
7540
   }
7541

7542
   /* w4 = a2 * a2 */
7543
   if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
7544
      goto LBL_ERR;
7545
   }
7546

7547
   /* w1 = (a2 + 2(a1 + 2a0))**2 */
7548
   if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
7549
      goto LBL_ERR;
7550
   }
7551
   if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7552
      goto LBL_ERR;
7553
   }
7554
   if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7555
      goto LBL_ERR;
7556
   }
7557
   if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
7558
      goto LBL_ERR;
7559
   }
7560

7561
   if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
7562
      goto LBL_ERR;
7563
   }
7564

7565
   /* w3 = (a0 + 2(a1 + 2a2))**2 */
7566
   if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
7567
      goto LBL_ERR;
7568
   }
7569
   if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
7570
      goto LBL_ERR;
7571
   }
7572
   if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
7573
      goto LBL_ERR;
7574
   }
7575
   if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7576
      goto LBL_ERR;
7577
   }
7578

7579
   if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
7580
      goto LBL_ERR;
7581
   }
7582

7583

7584
   /* w2 = (a2 + a1 + a0)**2 */
7585
   if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
7586
      goto LBL_ERR;
7587
   }
7588
   if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
7589
      goto LBL_ERR;
7590
   }
7591
   if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
7592
      goto LBL_ERR;
7593
   }
7594

7595
   /* now solve the matrix
7596

7597
      0  0  0  0  1
7598
      1  2  4  8  16
7599
      1  1  1  1  1
7600
      16 8  4  2  1
7601
      1  0  0  0  0
7602

7603
      using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
7604
    */
7605

7606
   /* r1 - r4 */
7607
   if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
7608
      goto LBL_ERR;
7609
   }
7610
   /* r3 - r0 */
7611
   if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
7612
      goto LBL_ERR;
7613
   }
7614
   /* r1/2 */
7615
   if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
7616
      goto LBL_ERR;
7617
   }
7618
   /* r3/2 */
7619
   if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
7620
      goto LBL_ERR;
7621
   }
7622
   /* r2 - r0 - r4 */
7623
   if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
7624
      goto LBL_ERR;
7625
   }
7626
   if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
7627
      goto LBL_ERR;
7628
   }
7629
   /* r1 - r2 */
7630
   if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7631
      goto LBL_ERR;
7632
   }
7633
   /* r3 - r2 */
7634
   if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7635
      goto LBL_ERR;
7636
   }
7637
   /* r1 - 8r0 */
7638
   if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
7639
      goto LBL_ERR;
7640
   }
7641
   if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
7642
      goto LBL_ERR;
7643
   }
7644
   /* r3 - 8r4 */
7645
   if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
7646
      goto LBL_ERR;
7647
   }
7648
   if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
7649
      goto LBL_ERR;
7650
   }
7651
   /* 3r2 - r1 - r3 */
7652
   if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {
7653
      goto LBL_ERR;
7654
   }
7655
   if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
7656
      goto LBL_ERR;
7657
   }
7658
   if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
7659
      goto LBL_ERR;
7660
   }
7661
   /* r1 - r2 */
7662
   if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
7663
      goto LBL_ERR;
7664
   }
7665
   /* r3 - r2 */
7666
   if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
7667
      goto LBL_ERR;
7668
   }
7669
   /* r1/3 */
7670
   if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
7671
      goto LBL_ERR;
7672
   }
7673
   /* r3/3 */
7674
   if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
7675
      goto LBL_ERR;
7676
   }
7677

7678
   /* at this point shift W[n] by B*n */
7679
   if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
7680
      goto LBL_ERR;
7681
   }
7682
   if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
7683
      goto LBL_ERR;
7684
   }
7685
   if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
7686
      goto LBL_ERR;
7687
   }
7688
   if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
7689
      goto LBL_ERR;
7690
   }
7691

7692
   if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
7693
      goto LBL_ERR;
7694
   }
7695
   if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
7696
      goto LBL_ERR;
7697
   }
7698
   if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
7699
      goto LBL_ERR;
7700
   }
7701
   if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
7702
      goto LBL_ERR;
7703
   }
7704

7705
LBL_ERR:
7706
   mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
7707
   return res;
7708
}
7709

7710
/* End: bn_mp_toom_sqr.c */
7711

7712
/* Start: bn_mp_toradix.c */
7713

7714
/* stores a bignum as a ASCII string in a given radix (2..64) */
7715
int mp_toradix(const mp_int *a, char *str, int radix)
7716
{
7717
   int     res, digs;
7718
   mp_int  t;
7719
   mp_digit d;
7720
   char   *_s = str;
7721

7722
   /* check range of the radix */
7723
   if ((radix < 2) || (radix > 64)) {
7724
      return MP_VAL;
7725
   }
7726

7727
   /* quick out if its zero */
7728
   if (mp_iszero(a) == MP_YES) {
7729
      *str++ = '0';
7730
      *str = '\0';
7731
      return MP_OKAY;
7732
   }
7733

7734
   if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7735
      return res;
7736
   }
7737

7738
   /* if it is negative output a - */
7739
   if (t.sign == MP_NEG) {
7740
      ++_s;
7741
      *str++ = '-';
7742
      t.sign = MP_ZPOS;
7743
   }
7744

7745
   digs = 0;
7746
   while (mp_iszero(&t) == MP_NO) {
7747
      if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
7748
         mp_clear(&t);
7749
         return res;
7750
      }
7751
      *str++ = mp_s_rmap[d];
7752
      ++digs;
7753
   }
7754

7755
   /* reverse the digits of the string.  In this case _s points
7756
    * to the first digit [exluding the sign] of the number]
7757
    */
7758
   bn_reverse((unsigned char *)_s, digs);
7759

7760
   /* append a NULL so the string is properly terminated */
7761
   *str = '\0';
7762

7763
   mp_clear(&t);
7764
   return MP_OKAY;
7765
}
7766

7767
/* End: bn_mp_toradix.c */
7768

7769
/* Start: bn_mp_toradix_n.c */
7770

7771
/* stores a bignum as a ASCII string in a given radix (2..64)
7772
 *
7773
 * Stores upto maxlen-1 chars and always a NULL byte
7774
 */
7775
int mp_toradix_n(const mp_int *a, char *str, int radix, int maxlen)
7776
{
7777
   int     res, digs;
7778
   mp_int  t;
7779
   mp_digit d;
7780
   char   *_s = str;
7781

7782
   /* check range of the maxlen, radix */
7783
   if ((maxlen < 2) || (radix < 2) || (radix > 64)) {
7784
      return MP_VAL;
7785
   }
7786

7787
   /* quick out if its zero */
7788
   if (mp_iszero(a) == MP_YES) {
7789
      *str++ = '0';
7790
      *str = '\0';
7791
      return MP_OKAY;
7792
   }
7793

7794
   if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7795
      return res;
7796
   }
7797

7798
   /* if it is negative output a - */
7799
   if (t.sign == MP_NEG) {
7800
      /* we have to reverse our digits later... but not the - sign!! */
7801
      ++_s;
7802

7803
      /* store the flag and mark the number as positive */
7804
      *str++ = '-';
7805
      t.sign = MP_ZPOS;
7806

7807
      /* subtract a char */
7808
      --maxlen;
7809
   }
7810

7811
   digs = 0;
7812
   while (mp_iszero(&t) == MP_NO) {
7813
      if (--maxlen < 1) {
7814
         /* no more room */
7815
         break;
7816
      }
7817
      if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
7818
         mp_clear(&t);
7819
         return res;
7820
      }
7821
      *str++ = mp_s_rmap[d];
7822
      ++digs;
7823
   }
7824

7825
   /* reverse the digits of the string.  In this case _s points
7826
    * to the first digit [exluding the sign] of the number
7827
    */
7828
   bn_reverse((unsigned char *)_s, digs);
7829

7830
   /* append a NULL so the string is properly terminated */
7831
   *str = '\0';
7832

7833
   mp_clear(&t);
7834
   return MP_OKAY;
7835
}
7836

7837
/* End: bn_mp_toradix_n.c */
7838

7839
/* Start: bn_mp_unsigned_bin_size.c */
7840

7841
/* get the size for an unsigned equivalent */
7842
int mp_unsigned_bin_size(const mp_int *a)
7843
{
7844
   int     size = mp_count_bits(a);
7845
   return (size / 8) + ((((unsigned)size & 7u) != 0u) ? 1 : 0);
7846
}
7847

7848
/* End: bn_mp_unsigned_bin_size.c */
7849

7850
/* Start: bn_mp_xor.c */
7851

7852
/* XOR two ints together */
7853
int mp_xor(const mp_int *a, const mp_int *b, mp_int *c)
7854
{
7855
   int     res, ix, px;
7856
   mp_int  t;
7857
   const mp_int *x;
7858

7859
   if (a->used > b->used) {
7860
      if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
7861
         return res;
7862
      }
7863
      px = b->used;
7864
      x = b;
7865
   } else {
7866
      if ((res = mp_init_copy(&t, b)) != MP_OKAY) {
7867
         return res;
7868
      }
7869
      px = a->used;
7870
      x = a;
7871
   }
7872

7873
   for (ix = 0; ix < px; ix++) {
7874
      t.dp[ix] ^= x->dp[ix];
7875
   }
7876
   mp_clamp(&t);
7877
   mp_exch(c, &t);
7878
   mp_clear(&t);
7879
   return MP_OKAY;
7880
}
7881

7882
/* End: bn_mp_xor.c */
7883

7884
/* Start: bn_mp_zero.c */
7885

7886
/* set to zero */
7887
void mp_zero(mp_int *a)
7888
{
7889
   int       n;
7890
   mp_digit *tmp;
7891

7892
   a->sign = MP_ZPOS;
7893
   a->used = 0;
7894

7895
   tmp = a->dp;
7896
   for (n = 0; n < a->alloc; n++) {
7897
      *tmp++ = 0;
7898
   }
7899
}
7900

7901
/* End: bn_mp_zero.c */
7902

7903
/* Start: bn_prime_tab.c */
7904

7905
const mp_digit ltm_prime_tab[] = {
7906
   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
7907
   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
7908
   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
7909
   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
7910
#ifndef MP_8BIT
7911
   0x0083,
7912
   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
7913
   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
7914
   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
7915
   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
7916

7917
   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
7918
   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
7919
   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
7920
   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
7921
   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
7922
   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
7923
   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
7924
   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
7925

7926
   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
7927
   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
7928
   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
7929
   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
7930
   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
7931
   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
7932
   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
7933
   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
7934

7935
   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
7936
   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
7937
   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
7938
   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
7939
   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
7940
   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
7941
   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
7942
   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
7943
#endif
7944
};
7945

7946
/* End: bn_prime_tab.c */
7947

7948
/* Start: bn_reverse.c */
7949

7950
/* reverse an array, used for radix code */
7951
void bn_reverse(unsigned char *s, int len)
7952
{
7953
   int     ix, iy;
7954
   unsigned char t;
7955

7956
   ix = 0;
7957
   iy = len - 1;
7958
   while (ix < iy) {
7959
      t     = s[ix];
7960
      s[ix] = s[iy];
7961
      s[iy] = t;
7962
      ++ix;
7963
      --iy;
7964
   }
7965
}
7966

7967
/* End: bn_reverse.c */
7968

7969
/* Start: bn_s_mp_add.c */
7970

7971
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
7972
int s_mp_add(const mp_int *a, const mp_int *b, mp_int *c)
7973
{
7974
   const mp_int *x;
7975
   int     olduse, res, min, max;
7976

7977
   /* find sizes, we let |a| <= |b| which means we have to sort
7978
    * them.  "x" will point to the input with the most digits
7979
    */
7980
   if (a->used > b->used) {
7981
      min = b->used;
7982
      max = a->used;
7983
      x = a;
7984
   } else {
7985
      min = a->used;
7986
      max = b->used;
7987
      x = b;
7988
   }
7989

7990
   /* init result */
7991
   if (c->alloc < (max + 1)) {
7992
      if ((res = mp_grow(c, max + 1)) != MP_OKAY) {
7993
         return res;
7994
      }
7995
   }
7996

7997
   /* get old used digit count and set new one */
7998
   olduse = c->used;
7999
   c->used = max + 1;
8000

8001
   {
8002
      mp_digit u, *tmpa, *tmpb, *tmpc;
8003
      int i;
8004

8005
      /* alias for digit pointers */
8006

8007
      /* first input */
8008
      tmpa = a->dp;
8009

8010
      /* second input */
8011
      tmpb = b->dp;
8012

8013
      /* destination */
8014
      tmpc = c->dp;
8015

8016
      /* zero the carry */
8017
      u = 0;
8018
      for (i = 0; i < min; i++) {
8019
         /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
8020
         *tmpc = *tmpa++ + *tmpb++ + u;
8021

8022
         /* U = carry bit of T[i] */
8023
         u = *tmpc >> (mp_digit)DIGIT_BIT;
8024

8025
         /* take away carry bit from T[i] */
8026
         *tmpc++ &= MP_MASK;
8027
      }
8028

8029
      /* now copy higher words if any, that is in A+B
8030
       * if A or B has more digits add those in
8031
       */
8032
      if (min != max) {
8033
         for (; i < max; i++) {
8034
            /* T[i] = X[i] + U */
8035
            *tmpc = x->dp[i] + u;
8036

8037
            /* U = carry bit of T[i] */
8038
            u = *tmpc >> (mp_digit)DIGIT_BIT;
8039

8040
            /* take away carry bit from T[i] */
8041
            *tmpc++ &= MP_MASK;
8042
         }
8043
      }
8044

8045
      /* add carry */
8046
      *tmpc++ = u;
8047

8048
      /* clear digits above oldused */
8049
      for (i = c->used; i < olduse; i++) {
8050
         *tmpc++ = 0;
8051
      }
8052
   }
8053

8054
   mp_clamp(c);
8055
   return MP_OKAY;
8056
}
8057

8058
/* End: bn_s_mp_add.c */
8059

8060
/* Start: bn_s_mp_exptmod.c */
8061

8062
#ifdef MP_LOW_MEM
8063
#   define TAB_SIZE 32
8064
#else
8065
#   define TAB_SIZE 256
8066
#endif
8067

8068
int s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
8069
{
8070
   mp_int  M[TAB_SIZE], res, mu;
8071
   mp_digit buf;
8072
   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
8073
   int (*redux)(mp_int *x, const mp_int *m, const mp_int *mu);
8074

8075
   /* find window size */
8076
   x = mp_count_bits(X);
8077
   if (x <= 7) {
8078
      winsize = 2;
8079
   } else if (x <= 36) {
8080
      winsize = 3;
8081
   } else if (x <= 140) {
8082
      winsize = 4;
8083
   } else if (x <= 450) {
8084
      winsize = 5;
8085
   } else if (x <= 1303) {
8086
      winsize = 6;
8087
   } else if (x <= 3529) {
8088
      winsize = 7;
8089
   } else {
8090
      winsize = 8;
8091
   }
8092

8093
#ifdef MP_LOW_MEM
8094
   if (winsize > 5) {
8095
      winsize = 5;
8096
   }
8097
#endif
8098

8099
   /* init M array */
8100
   /* init first cell */
8101
   if ((err = mp_init(&M[1])) != MP_OKAY) {
8102
      return err;
8103
   }
8104

8105
   /* now init the second half of the array */
8106
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
8107
      if ((err = mp_init(&M[x])) != MP_OKAY) {
8108
         for (y = 1<<(winsize-1); y < x; y++) {
8109
            mp_clear(&M[y]);
8110
         }
8111
         mp_clear(&M[1]);
8112
         return err;
8113
      }
8114
   }
8115

8116
   /* create mu, used for Barrett reduction */
8117
   if ((err = mp_init(&mu)) != MP_OKAY) {
8118
      goto LBL_M;
8119
   }
8120

8121
   if (redmode == 0) {
8122
      if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) {
8123
         goto LBL_MU;
8124
      }
8125
      redux = mp_reduce;
8126
   } else {
8127
      if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) {
8128
         goto LBL_MU;
8129
      }
8130
      redux = mp_reduce_2k_l;
8131
   }
8132

8133
   /* create M table
8134
    *
8135
    * The M table contains powers of the base,
8136
    * e.g. M[x] = G**x mod P
8137
    *
8138
    * The first half of the table is not
8139
    * computed though accept for M[0] and M[1]
8140
    */
8141
   if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
8142
      goto LBL_MU;
8143
   }
8144

8145
   /* compute the value at M[1<<(winsize-1)] by squaring
8146
    * M[1] (winsize-1) times
8147
    */
8148
   if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
8149
      goto LBL_MU;
8150
   }
8151

8152
   for (x = 0; x < (winsize - 1); x++) {
8153
      /* square it */
8154
      if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)],
8155
                        &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {
8156
         goto LBL_MU;
8157
      }
8158

8159
      /* reduce modulo P */
8160
      if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
8161
         goto LBL_MU;
8162
      }
8163
   }
8164

8165
   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
8166
    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
8167
    */
8168
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
8169
      if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
8170
         goto LBL_MU;
8171
      }
8172
      if ((err = redux(&M[x], P, &mu)) != MP_OKAY) {
8173
         goto LBL_MU;
8174
      }
8175
   }
8176

8177
   /* setup result */
8178
   if ((err = mp_init(&res)) != MP_OKAY) {
8179
      goto LBL_MU;
8180
   }
8181
   mp_set(&res, 1uL);
8182

8183
   /* set initial mode and bit cnt */
8184
   mode   = 0;
8185
   bitcnt = 1;
8186
   buf    = 0;
8187
   digidx = X->used - 1;
8188
   bitcpy = 0;
8189
   bitbuf = 0;
8190

8191
   for (;;) {
8192
      /* grab next digit as required */
8193
      if (--bitcnt == 0) {
8194
         /* if digidx == -1 we are out of digits */
8195
         if (digidx == -1) {
8196
            break;
8197
         }
8198
         /* read next digit and reset the bitcnt */
8199
         buf    = X->dp[digidx--];
8200
         bitcnt = (int)DIGIT_BIT;
8201
      }
8202

8203
      /* grab the next msb from the exponent */
8204
      y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
8205
      buf <<= (mp_digit)1;
8206

8207
      /* if the bit is zero and mode == 0 then we ignore it
8208
       * These represent the leading zero bits before the first 1 bit
8209
       * in the exponent.  Technically this opt is not required but it
8210
       * does lower the # of trivial squaring/reductions used
8211
       */
8212
      if ((mode == 0) && (y == 0)) {
8213
         continue;
8214
      }
8215

8216
      /* if the bit is zero and mode == 1 then we square */
8217
      if ((mode == 1) && (y == 0)) {
8218
         if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
8219
            goto LBL_RES;
8220
         }
8221
         if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8222
            goto LBL_RES;
8223
         }
8224
         continue;
8225
      }
8226

8227
      /* else we add it to the window */
8228
      bitbuf |= (y << (winsize - ++bitcpy));
8229
      mode    = 2;
8230

8231
      if (bitcpy == winsize) {
8232
         /* ok window is filled so square as required and multiply  */
8233
         /* square first */
8234
         for (x = 0; x < winsize; x++) {
8235
            if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
8236
               goto LBL_RES;
8237
            }
8238
            if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8239
               goto LBL_RES;
8240
            }
8241
         }
8242

8243
         /* then multiply */
8244
         if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
8245
            goto LBL_RES;
8246
         }
8247
         if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8248
            goto LBL_RES;
8249
         }
8250

8251
         /* empty window and reset */
8252
         bitcpy = 0;
8253
         bitbuf = 0;
8254
         mode   = 1;
8255
      }
8256
   }
8257

8258
   /* if bits remain then square/multiply */
8259
   if ((mode == 2) && (bitcpy > 0)) {
8260
      /* square then multiply if the bit is set */
8261
      for (x = 0; x < bitcpy; x++) {
8262
         if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
8263
            goto LBL_RES;
8264
         }
8265
         if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8266
            goto LBL_RES;
8267
         }
8268

8269
         bitbuf <<= 1;
8270
         if ((bitbuf & (1 << winsize)) != 0) {
8271
            /* then multiply */
8272
            if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
8273
               goto LBL_RES;
8274
            }
8275
            if ((err = redux(&res, P, &mu)) != MP_OKAY) {
8276
               goto LBL_RES;
8277
            }
8278
         }
8279
      }
8280
   }
8281

8282
   mp_exch(&res, Y);
8283
   err = MP_OKAY;
8284
LBL_RES:
8285
   mp_clear(&res);
8286
LBL_MU:
8287
   mp_clear(&mu);
8288
LBL_M:
8289
   mp_clear(&M[1]);
8290
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
8291
      mp_clear(&M[x]);
8292
   }
8293
   return err;
8294
}
8295

8296
/* End: bn_s_mp_exptmod.c */
8297

8298
/* Start: bn_s_mp_mul_digs.c */
8299

8300
/* multiplies |a| * |b| and only computes upto digs digits of result
8301
 * HAC pp. 595, Algorithm 14.12  Modified so you can control how
8302
 * many digits of output are created.
8303
 */
8304
int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
8305
{
8306
   mp_int  t;
8307
   int     res, pa, pb, ix, iy;
8308
   mp_digit u;
8309
   mp_word r;
8310
   mp_digit tmpx, *tmpt, *tmpy;
8311

8312
   /* can we use the fast multiplier? */
8313
   if ((digs < (int)MP_WARRAY) &&
8314
       (MIN(a->used, b->used) <
8315
        (int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
8316
      return fast_s_mp_mul_digs(a, b, c, digs);
8317
   }
8318

8319
   if ((res = mp_init_size(&t, digs)) != MP_OKAY) {
8320
      return res;
8321
   }
8322
   t.used = digs;
8323

8324
   /* compute the digits of the product directly */
8325
   pa = a->used;
8326
   for (ix = 0; ix < pa; ix++) {
8327
      /* set the carry to zero */
8328
      u = 0;
8329

8330
      /* limit ourselves to making digs digits of output */
8331
      pb = MIN(b->used, digs - ix);
8332

8333
      /* setup some aliases */
8334
      /* copy of the digit from a used within the nested loop */
8335
      tmpx = a->dp[ix];
8336

8337
      /* an alias for the destination shifted ix places */
8338
      tmpt = t.dp + ix;
8339

8340
      /* an alias for the digits of b */
8341
      tmpy = b->dp;
8342

8343
      /* compute the columns of the output and propagate the carry */
8344
      for (iy = 0; iy < pb; iy++) {
8345
         /* compute the column as a mp_word */
8346
         r       = (mp_word)*tmpt +
8347
                   ((mp_word)tmpx * (mp_word)*tmpy++) +
8348
                   (mp_word)u;
8349

8350
         /* the new column is the lower part of the result */
8351
         *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8352

8353
         /* get the carry word from the result */
8354
         u       = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8355
      }
8356
      /* set carry if it is placed below digs */
8357
      if ((ix + iy) < digs) {
8358
         *tmpt = u;
8359
      }
8360
   }
8361

8362
   mp_clamp(&t);
8363
   mp_exch(&t, c);
8364

8365
   mp_clear(&t);
8366
   return MP_OKAY;
8367
}
8368

8369
/* End: bn_s_mp_mul_digs.c */
8370

8371
/* Start: bn_s_mp_mul_high_digs.c */
8372

8373
/* multiplies |a| * |b| and does not compute the lower digs digits
8374
 * [meant to get the higher part of the product]
8375
 */
8376
int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
8377
{
8378
   mp_int  t;
8379
   int     res, pa, pb, ix, iy;
8380
   mp_digit u;
8381
   mp_word r;
8382
   mp_digit tmpx, *tmpt, *tmpy;
8383

8384
   /* can we use the fast multiplier? */
8385
   if (((a->used + b->used + 1) < (int)MP_WARRAY)
8386
       && (MIN(a->used, b->used) < (int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {
8387
      return fast_s_mp_mul_high_digs(a, b, c, digs);
8388
   }
8389

8390
   if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
8391
      return res;
8392
   }
8393
   t.used = a->used + b->used + 1;
8394

8395
   pa = a->used;
8396
   pb = b->used;
8397
   for (ix = 0; ix < pa; ix++) {
8398
      /* clear the carry */
8399
      u = 0;
8400

8401
      /* left hand side of A[ix] * B[iy] */
8402
      tmpx = a->dp[ix];
8403

8404
      /* alias to the address of where the digits will be stored */
8405
      tmpt = &(t.dp[digs]);
8406

8407
      /* alias for where to read the right hand side from */
8408
      tmpy = b->dp + (digs - ix);
8409

8410
      for (iy = digs - ix; iy < pb; iy++) {
8411
         /* calculate the double precision result */
8412
         r       = (mp_word)*tmpt +
8413
                   ((mp_word)tmpx * (mp_word)*tmpy++) +
8414
                   (mp_word)u;
8415

8416
         /* get the lower part */
8417
         *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8418

8419
         /* carry the carry */
8420
         u       = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8421
      }
8422
      *tmpt = u;
8423
   }
8424
   mp_clamp(&t);
8425
   mp_exch(&t, c);
8426
   mp_clear(&t);
8427
   return MP_OKAY;
8428
}
8429

8430
/* End: bn_s_mp_mul_high_digs.c */
8431

8432
/* Start: bn_s_mp_sqr.c */
8433

8434
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
8435
int s_mp_sqr(const mp_int *a, mp_int *b)
8436
{
8437
   mp_int  t;
8438
   int     res, ix, iy, pa;
8439
   mp_word r;
8440
   mp_digit u, tmpx, *tmpt;
8441

8442
   pa = a->used;
8443
   if ((res = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) {
8444
      return res;
8445
   }
8446

8447
   /* default used is maximum possible size */
8448
   t.used = (2 * pa) + 1;
8449

8450
   for (ix = 0; ix < pa; ix++) {
8451
      /* first calculate the digit at 2*ix */
8452
      /* calculate double precision result */
8453
      r = (mp_word)t.dp[2*ix] +
8454
          ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]);
8455

8456
      /* store lower part in result */
8457
      t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);
8458

8459
      /* get the carry */
8460
      u           = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8461

8462
      /* left hand side of A[ix] * A[iy] */
8463
      tmpx        = a->dp[ix];
8464

8465
      /* alias for where to store the results */
8466
      tmpt        = t.dp + ((2 * ix) + 1);
8467

8468
      for (iy = ix + 1; iy < pa; iy++) {
8469
         /* first calculate the product */
8470
         r       = (mp_word)tmpx * (mp_word)a->dp[iy];
8471

8472
         /* now calculate the double precision result, note we use
8473
          * addition instead of *2 since it's easier to optimize
8474
          */
8475
         r       = (mp_word)*tmpt + r + r + (mp_word)u;
8476

8477
         /* store lower part */
8478
         *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8479

8480
         /* get carry */
8481
         u       = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8482
      }
8483
      /* propagate upwards */
8484
      while (u != 0uL) {
8485
         r       = (mp_word)*tmpt + (mp_word)u;
8486
         *tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);
8487
         u       = (mp_digit)(r >> (mp_word)DIGIT_BIT);
8488
      }
8489
   }
8490

8491
   mp_clamp(&t);
8492
   mp_exch(&t, b);
8493
   mp_clear(&t);
8494
   return MP_OKAY;
8495
}
8496

8497
/* End: bn_s_mp_sqr.c */
8498

8499
/* Start: bn_s_mp_sub.c */
8500

8501
/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
8502
int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
8503
{
8504
   int     olduse, res, min, max;
8505

8506
   /* find sizes */
8507
   min = b->used;
8508
   max = a->used;
8509

8510
   /* init result */
8511
   if (c->alloc < max) {
8512
      if ((res = mp_grow(c, max)) != MP_OKAY) {
8513
         return res;
8514
      }
8515
   }
8516
   olduse = c->used;
8517
   c->used = max;
8518

8519
   {
8520
      mp_digit u, *tmpa, *tmpb, *tmpc;
8521
      int i;
8522

8523
      /* alias for digit pointers */
8524
      tmpa = a->dp;
8525
      tmpb = b->dp;
8526
      tmpc = c->dp;
8527

8528
      /* set carry to zero */
8529
      u = 0;
8530
      for (i = 0; i < min; i++) {
8531
         /* T[i] = A[i] - B[i] - U */
8532
         *tmpc = (*tmpa++ - *tmpb++) - u;
8533

8534
         /* U = carry bit of T[i]
8535
          * Note this saves performing an AND operation since
8536
          * if a carry does occur it will propagate all the way to the
8537
          * MSB.  As a result a single shift is enough to get the carry
8538
          */
8539
         u = *tmpc >> (((size_t)CHAR_BIT * sizeof(mp_digit)) - 1u);
8540

8541
         /* Clear carry from T[i] */
8542
         *tmpc++ &= MP_MASK;
8543
      }
8544

8545
      /* now copy higher words if any, e.g. if A has more digits than B  */
8546
      for (; i < max; i++) {
8547
         /* T[i] = A[i] - U */
8548
         *tmpc = *tmpa++ - u;
8549

8550
         /* U = carry bit of T[i] */
8551
         u = *tmpc >> (((size_t)CHAR_BIT * sizeof(mp_digit)) - 1u);
8552

8553
         /* Clear carry from T[i] */
8554
         *tmpc++ &= MP_MASK;
8555
      }
8556

8557
      /* clear digits above used (since we may not have grown result above) */
8558
      for (i = c->used; i < olduse; i++) {
8559
         *tmpc++ = 0;
8560
      }
8561
   }
8562

8563
   mp_clamp(c);
8564
   return MP_OKAY;
8565
}
8566

8567
/* End: bn_s_mp_sub.c */
8568

8569
/* Start: bncore.c */
8570

8571
/* Known optimal configurations
8572

8573
 CPU                    /Compiler     /MUL CUTOFF/SQR CUTOFF
8574
-------------------------------------------------------------
8575
 Intel P4 Northwood     /GCC v3.4.1   /        88/       128/LTM 0.32 ;-)
8576
 AMD Athlon64           /GCC v3.4.4   /        80/       120/LTM 0.35
8577

8578
*/
8579

8580
int     KARATSUBA_MUL_CUTOFF = 80,      /* Min. number of digits before Karatsuba multiplication is used. */
8581
        KARATSUBA_SQR_CUTOFF = 120,     /* Min. number of digits before Karatsuba squaring is used. */
8582

8583
        TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
8584
        TOOM_SQR_CUTOFF      = 400;
8585

8586
/* End: bncore.c */
8587

8588
Product

Resources

Company