/* LibTomMath, multiple-precision integer library -- Tom St Denis1*2* LibTomMath is a library that provides multiple-precision3* integer arithmetic as well as number theoretic functionality.4*5* The library was designed directly after the MPI library by6* Michael Fromberger but has been written from scratch with7* additional optimizations in place.8*9* SPDX-License-Identifier: Unlicense10*/1112#include <stdarg.h>13#include "tommath_private.h"1415/* Start: bn_fast_mp_invmod.c */1617/* computes the modular inverse via binary extended euclidean algorithm,18* that is c = 1/a mod b19*20* Based on slow invmod except this is optimized for the case where b is21* odd as per HAC Note 14.64 on pp. 61022*/23int fast_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c)24{25mp_int x, y, u, v, B, D;26int res, neg;2728/* 2. [modified] b must be odd */29if (mp_iseven(b) == MP_YES) {30return MP_VAL;31}3233/* init all our temps */34if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {35return res;36}3738/* x == modulus, y == value to invert */39if ((res = mp_copy(b, &x)) != MP_OKAY) {40goto LBL_ERR;41}4243/* we need y = |a| */44if ((res = mp_mod(a, b, &y)) != MP_OKAY) {45goto LBL_ERR;46}4748/* if one of x,y is zero return an error! */49if ((mp_iszero(&x) == MP_YES) || (mp_iszero(&y) == MP_YES)) {50res = MP_VAL;51goto LBL_ERR;52}5354/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */55if ((res = mp_copy(&x, &u)) != MP_OKAY) {56goto LBL_ERR;57}58if ((res = mp_copy(&y, &v)) != MP_OKAY) {59goto LBL_ERR;60}61mp_set(&D, 1uL);6263top:64/* 4. while u is even do */65while (mp_iseven(&u) == MP_YES) {66/* 4.1 u = u/2 */67if ((res = mp_div_2(&u, &u)) != MP_OKAY) {68goto LBL_ERR;69}70/* 4.2 if B is odd then */71if (mp_isodd(&B) == MP_YES) {72if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {73goto LBL_ERR;74}75}76/* B = B/2 */77if ((res = mp_div_2(&B, &B)) != MP_OKAY) {78goto LBL_ERR;79}80}8182/* 5. while v is even do */83while (mp_iseven(&v) == MP_YES) {84/* 5.1 v = v/2 */85if ((res = mp_div_2(&v, &v)) != MP_OKAY) {86goto LBL_ERR;87}88/* 5.2 if D is odd then */89if (mp_isodd(&D) == MP_YES) {90/* D = (D-x)/2 */91if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {92goto LBL_ERR;93}94}95/* D = D/2 */96if ((res = mp_div_2(&D, &D)) != MP_OKAY) {97goto LBL_ERR;98}99}100101/* 6. if u >= v then */102if (mp_cmp(&u, &v) != MP_LT) {103/* u = u - v, B = B - D */104if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {105goto LBL_ERR;106}107108if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {109goto LBL_ERR;110}111} else {112/* v - v - u, D = D - B */113if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {114goto LBL_ERR;115}116117if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {118goto LBL_ERR;119}120}121122/* if not zero goto step 4 */123if (mp_iszero(&u) == MP_NO) {124goto top;125}126127/* now a = C, b = D, gcd == g*v */128129/* if v != 1 then there is no inverse */130if (mp_cmp_d(&v, 1uL) != MP_EQ) {131res = MP_VAL;132goto LBL_ERR;133}134135/* b is now the inverse */136neg = a->sign;137while (D.sign == MP_NEG) {138if ((res = mp_add(&D, b, &D)) != MP_OKAY) {139goto LBL_ERR;140}141}142143/* too big */144while (mp_cmp_mag(&D, b) != MP_LT) {145if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {146goto LBL_ERR;147}148}149150mp_exch(&D, c);151c->sign = neg;152res = MP_OKAY;153154LBL_ERR:155mp_clear_multi(&x, &y, &u, &v, &B, &D, NULL);156return res;157}158159/* End: bn_fast_mp_invmod.c */160161/* Start: bn_fast_mp_montgomery_reduce.c */162163/* computes xR**-1 == x (mod N) via Montgomery Reduction164*165* This is an optimized implementation of montgomery_reduce166* which uses the comba method to quickly calculate the columns of the167* reduction.168*169* Based on Algorithm 14.32 on pp.601 of HAC.170*/171int fast_mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)172{173int ix, res, olduse;174mp_word W[MP_WARRAY];175176if (x->used > (int)MP_WARRAY) {177return MP_VAL;178}179180/* get old used count */181olduse = x->used;182183/* grow a as required */184if (x->alloc < (n->used + 1)) {185if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) {186return res;187}188}189190/* first we have to get the digits of the input into191* an array of double precision words W[...]192*/193{194mp_word *_W;195mp_digit *tmpx;196197/* alias for the W[] array */198_W = W;199200/* alias for the digits of x*/201tmpx = x->dp;202203/* copy the digits of a into W[0..a->used-1] */204for (ix = 0; ix < x->used; ix++) {205*_W++ = *tmpx++;206}207208/* zero the high words of W[a->used..m->used*2] */209for (; ix < ((n->used * 2) + 1); ix++) {210*_W++ = 0;211}212}213214/* now we proceed to zero successive digits215* from the least significant upwards216*/217for (ix = 0; ix < n->used; ix++) {218/* mu = ai * m' mod b219*220* We avoid a double precision multiplication (which isn't required)221* by casting the value down to a mp_digit. Note this requires222* that W[ix-1] have the carry cleared (see after the inner loop)223*/224mp_digit mu;225mu = ((W[ix] & MP_MASK) * rho) & MP_MASK;226227/* a = a + mu * m * b**i228*229* This is computed in place and on the fly. The multiplication230* by b**i is handled by offseting which columns the results231* are added to.232*233* Note the comba method normally doesn't handle carries in the234* inner loop In this case we fix the carry from the previous235* column since the Montgomery reduction requires digits of the236* result (so far) [see above] to work. This is237* handled by fixing up one carry after the inner loop. The238* carry fixups are done in order so after these loops the239* first m->used words of W[] have the carries fixed240*/241{242int iy;243mp_digit *tmpn;244mp_word *_W;245246/* alias for the digits of the modulus */247tmpn = n->dp;248249/* Alias for the columns set by an offset of ix */250_W = W + ix;251252/* inner loop */253for (iy = 0; iy < n->used; iy++) {254*_W++ += (mp_word)mu * (mp_word)*tmpn++;255}256}257258/* now fix carry for next digit, W[ix+1] */259W[ix + 1] += W[ix] >> (mp_word)DIGIT_BIT;260}261262/* now we have to propagate the carries and263* shift the words downward [all those least264* significant digits we zeroed].265*/266{267mp_digit *tmpx;268mp_word *_W, *_W1;269270/* nox fix rest of carries */271272/* alias for current word */273_W1 = W + ix;274275/* alias for next word, where the carry goes */276_W = W + ++ix;277278for (; ix <= ((n->used * 2) + 1); ix++) {279*_W++ += *_W1++ >> (mp_word)DIGIT_BIT;280}281282/* copy out, A = A/b**n283*284* The result is A/b**n but instead of converting from an285* array of mp_word to mp_digit than calling mp_rshd286* we just copy them in the right order287*/288289/* alias for destination word */290tmpx = x->dp;291292/* alias for shifted double precision result */293_W = W + n->used;294295for (ix = 0; ix < (n->used + 1); ix++) {296*tmpx++ = *_W++ & (mp_word)MP_MASK;297}298299/* zero oldused digits, if the input a was larger than300* m->used+1 we'll have to clear the digits301*/302for (; ix < olduse; ix++) {303*tmpx++ = 0;304}305}306307/* set the max used and clamp */308x->used = n->used + 1;309mp_clamp(x);310311/* if A >= m then A = A - m */312if (mp_cmp_mag(x, n) != MP_LT) {313return s_mp_sub(x, n, x);314}315return MP_OKAY;316}317318/* End: bn_fast_mp_montgomery_reduce.c */319320/* Start: bn_fast_s_mp_mul_digs.c */321322/* Fast (comba) multiplier323*324* This is the fast column-array [comba] multiplier. It is325* designed to compute the columns of the product first326* then handle the carries afterwards. This has the effect327* of making the nested loops that compute the columns very328* simple and schedulable on super-scalar processors.329*330* This has been modified to produce a variable number of331* digits of output so if say only a half-product is required332* you don't have to compute the upper half (a feature333* required for fast Barrett reduction).334*335* Based on Algorithm 14.12 on pp.595 of HAC.336*337*/338int fast_s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)339{340int olduse, res, pa, ix, iz;341mp_digit W[MP_WARRAY];342mp_word _W;343344/* grow the destination as required */345if (c->alloc < digs) {346if ((res = mp_grow(c, digs)) != MP_OKAY) {347return res;348}349}350351/* number of output digits to produce */352pa = MIN(digs, a->used + b->used);353354/* clear the carry */355_W = 0;356for (ix = 0; ix < pa; ix++) {357int tx, ty;358int iy;359mp_digit *tmpx, *tmpy;360361/* get offsets into the two bignums */362ty = MIN(b->used-1, ix);363tx = ix - ty;364365/* setup temp aliases */366tmpx = a->dp + tx;367tmpy = b->dp + ty;368369/* this is the number of times the loop will iterrate, essentially370while (tx++ < a->used && ty-- >= 0) { ... }371*/372iy = MIN(a->used-tx, ty+1);373374/* execute loop */375for (iz = 0; iz < iy; ++iz) {376_W += (mp_word)*tmpx++ * (mp_word)*tmpy--;377378}379380/* store term */381W[ix] = (mp_digit)_W & MP_MASK;382383/* make next carry */384_W = _W >> (mp_word)DIGIT_BIT;385}386387/* setup dest */388olduse = c->used;389c->used = pa;390391{392mp_digit *tmpc;393tmpc = c->dp;394for (ix = 0; ix < pa; ix++) {395/* now extract the previous digit [below the carry] */396*tmpc++ = W[ix];397}398399/* clear unused digits [that existed in the old copy of c] */400for (; ix < olduse; ix++) {401*tmpc++ = 0;402}403}404mp_clamp(c);405return MP_OKAY;406}407408/* End: bn_fast_s_mp_mul_digs.c */409410/* Start: bn_fast_s_mp_mul_high_digs.c */411412/* this is a modified version of fast_s_mul_digs that only produces413* output digits *above* digs. See the comments for fast_s_mul_digs414* to see how it works.415*416* This is used in the Barrett reduction since for one of the multiplications417* only the higher digits were needed. This essentially halves the work.418*419* Based on Algorithm 14.12 on pp.595 of HAC.420*/421int fast_s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)422{423int olduse, res, pa, ix, iz;424mp_digit W[MP_WARRAY];425mp_word _W;426427/* grow the destination as required */428pa = a->used + b->used;429if (c->alloc < pa) {430if ((res = mp_grow(c, pa)) != MP_OKAY) {431return res;432}433}434435/* number of output digits to produce */436pa = a->used + b->used;437_W = 0;438for (ix = digs; ix < pa; ix++) {439int tx, ty, iy;440mp_digit *tmpx, *tmpy;441442/* get offsets into the two bignums */443ty = MIN(b->used-1, ix);444tx = ix - ty;445446/* setup temp aliases */447tmpx = a->dp + tx;448tmpy = b->dp + ty;449450/* this is the number of times the loop will iterrate, essentially its451while (tx++ < a->used && ty-- >= 0) { ... }452*/453iy = MIN(a->used-tx, ty+1);454455/* execute loop */456for (iz = 0; iz < iy; iz++) {457_W += (mp_word)*tmpx++ * (mp_word)*tmpy--;458}459460/* store term */461W[ix] = (mp_digit)_W & MP_MASK;462463/* make next carry */464_W = _W >> (mp_word)DIGIT_BIT;465}466467/* setup dest */468olduse = c->used;469c->used = pa;470471{472mp_digit *tmpc;473474tmpc = c->dp + digs;475for (ix = digs; ix < pa; ix++) {476/* now extract the previous digit [below the carry] */477*tmpc++ = W[ix];478}479480/* clear unused digits [that existed in the old copy of c] */481for (; ix < olduse; ix++) {482*tmpc++ = 0;483}484}485mp_clamp(c);486return MP_OKAY;487}488489/* End: bn_fast_s_mp_mul_high_digs.c */490491/* Start: bn_fast_s_mp_sqr.c */492493/* the jist of squaring...494* you do like mult except the offset of the tmpx [one that495* starts closer to zero] can't equal the offset of tmpy.496* So basically you set up iy like before then you min it with497* (ty-tx) so that it never happens. You double all those498* you add in the inner loop499500After that loop you do the squares and add them in.501*/502503int fast_s_mp_sqr(const mp_int *a, mp_int *b)504{505int olduse, res, pa, ix, iz;506mp_digit W[MP_WARRAY], *tmpx;507mp_word W1;508509/* grow the destination as required */510pa = a->used + a->used;511if (b->alloc < pa) {512if ((res = mp_grow(b, pa)) != MP_OKAY) {513return res;514}515}516517/* number of output digits to produce */518W1 = 0;519for (ix = 0; ix < pa; ix++) {520int tx, ty, iy;521mp_word _W;522mp_digit *tmpy;523524/* clear counter */525_W = 0;526527/* get offsets into the two bignums */528ty = MIN(a->used-1, ix);529tx = ix - ty;530531/* setup temp aliases */532tmpx = a->dp + tx;533tmpy = a->dp + ty;534535/* this is the number of times the loop will iterrate, essentially536while (tx++ < a->used && ty-- >= 0) { ... }537*/538iy = MIN(a->used-tx, ty+1);539540/* now for squaring tx can never equal ty541* we halve the distance since they approach at a rate of 2x542* and we have to round because odd cases need to be executed543*/544iy = MIN(iy, ((ty-tx)+1)>>1);545546/* execute loop */547for (iz = 0; iz < iy; iz++) {548_W += (mp_word)*tmpx++ * (mp_word)*tmpy--;549}550551/* double the inner product and add carry */552_W = _W + _W + W1;553554/* even columns have the square term in them */555if (((unsigned)ix & 1u) == 0u) {556_W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];557}558559/* store it */560W[ix] = _W & MP_MASK;561562/* make next carry */563W1 = _W >> (mp_word)DIGIT_BIT;564}565566/* setup dest */567olduse = b->used;568b->used = a->used+a->used;569570{571mp_digit *tmpb;572tmpb = b->dp;573for (ix = 0; ix < pa; ix++) {574*tmpb++ = W[ix] & MP_MASK;575}576577/* clear unused digits [that existed in the old copy of c] */578for (; ix < olduse; ix++) {579*tmpb++ = 0;580}581}582mp_clamp(b);583return MP_OKAY;584}585586/* End: bn_fast_s_mp_sqr.c */587588/* Start: bn_mp_2expt.c */589590/* computes a = 2**b591*592* Simple algorithm which zeroes the int, grows it then just sets one bit593* as required.594*/595int mp_2expt(mp_int *a, int b)596{597int res;598599/* zero a as per default */600mp_zero(a);601602/* grow a to accomodate the single bit */603if ((res = mp_grow(a, (b / DIGIT_BIT) + 1)) != MP_OKAY) {604return res;605}606607/* set the used count of where the bit will go */608a->used = (b / DIGIT_BIT) + 1;609610/* put the single bit in its place */611a->dp[b / DIGIT_BIT] = (mp_digit)1 << (mp_digit)(b % DIGIT_BIT);612613return MP_OKAY;614}615616/* End: bn_mp_2expt.c */617618/* Start: bn_mp_abs.c */619620/* b = |a|621*622* Simple function copies the input and fixes the sign to positive623*/624int mp_abs(const mp_int *a, mp_int *b)625{626int res;627628/* copy a to b */629if (a != b) {630if ((res = mp_copy(a, b)) != MP_OKAY) {631return res;632}633}634635/* force the sign of b to positive */636b->sign = MP_ZPOS;637638return MP_OKAY;639}640641/* End: bn_mp_abs.c */642643/* Start: bn_mp_add.c */644645/* high level addition (handles signs) */646int mp_add(const mp_int *a, const mp_int *b, mp_int *c)647{648int sa, sb, res;649650/* get sign of both inputs */651sa = a->sign;652sb = b->sign;653654/* handle two cases, not four */655if (sa == sb) {656/* both positive or both negative */657/* add their magnitudes, copy the sign */658c->sign = sa;659res = s_mp_add(a, b, c);660} else {661/* one positive, the other negative */662/* subtract the one with the greater magnitude from */663/* the one of the lesser magnitude. The result gets */664/* the sign of the one with the greater magnitude. */665if (mp_cmp_mag(a, b) == MP_LT) {666c->sign = sb;667res = s_mp_sub(b, a, c);668} else {669c->sign = sa;670res = s_mp_sub(a, b, c);671}672}673return res;674}675676/* End: bn_mp_add.c */677678/* Start: bn_mp_add_d.c */679680/* single digit addition */681int mp_add_d(const mp_int *a, mp_digit b, mp_int *c)682{683int res, ix, oldused;684mp_digit *tmpa, *tmpc, mu;685686/* grow c as required */687if (c->alloc < (a->used + 1)) {688if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {689return res;690}691}692693/* if a is negative and |a| >= b, call c = |a| - b */694if ((a->sign == MP_NEG) && ((a->used > 1) || (a->dp[0] >= b))) {695mp_int a_ = *a;696/* temporarily fix sign of a */697a_.sign = MP_ZPOS;698699/* c = |a| - b */700res = mp_sub_d(&a_, b, c);701702/* fix sign */703c->sign = MP_NEG;704705/* clamp */706mp_clamp(c);707708return res;709}710711/* old number of used digits in c */712oldused = c->used;713714/* source alias */715tmpa = a->dp;716717/* destination alias */718tmpc = c->dp;719720/* if a is positive */721if (a->sign == MP_ZPOS) {722/* add digit, after this we're propagating723* the carry.724*/725*tmpc = *tmpa++ + b;726mu = *tmpc >> DIGIT_BIT;727*tmpc++ &= MP_MASK;728729/* now handle rest of the digits */730for (ix = 1; ix < a->used; ix++) {731*tmpc = *tmpa++ + mu;732mu = *tmpc >> DIGIT_BIT;733*tmpc++ &= MP_MASK;734}735/* set final carry */736ix++;737*tmpc++ = mu;738739/* setup size */740c->used = a->used + 1;741} else {742/* a was negative and |a| < b */743c->used = 1;744745/* the result is a single digit */746if (a->used == 1) {747*tmpc++ = b - a->dp[0];748} else {749*tmpc++ = b;750}751752/* setup count so the clearing of oldused753* can fall through correctly754*/755ix = 1;756}757758/* sign always positive */759c->sign = MP_ZPOS;760761/* now zero to oldused */762while (ix++ < oldused) {763*tmpc++ = 0;764}765mp_clamp(c);766767return MP_OKAY;768}769770/* End: bn_mp_add_d.c */771772/* Start: bn_mp_addmod.c */773774/* d = a + b (mod c) */775int mp_addmod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)776{777int res;778mp_int t;779780if ((res = mp_init(&t)) != MP_OKAY) {781return res;782}783784if ((res = mp_add(a, b, &t)) != MP_OKAY) {785mp_clear(&t);786return res;787}788res = mp_mod(&t, c, d);789mp_clear(&t);790return res;791}792793/* End: bn_mp_addmod.c */794795/* Start: bn_mp_and.c */796797/* AND two ints together */798int mp_and(const mp_int *a, const mp_int *b, mp_int *c)799{800int res, ix, px;801mp_int t;802const mp_int *x;803804if (a->used > b->used) {805if ((res = mp_init_copy(&t, a)) != MP_OKAY) {806return res;807}808px = b->used;809x = b;810} else {811if ((res = mp_init_copy(&t, b)) != MP_OKAY) {812return res;813}814px = a->used;815x = a;816}817818for (ix = 0; ix < px; ix++) {819t.dp[ix] &= x->dp[ix];820}821822/* zero digits above the last from the smallest mp_int */823for (; ix < t.used; ix++) {824t.dp[ix] = 0;825}826827mp_clamp(&t);828mp_exch(c, &t);829mp_clear(&t);830return MP_OKAY;831}832833/* End: bn_mp_and.c */834835/* Start: bn_mp_clamp.c */836837/* trim unused digits838*839* This is used to ensure that leading zero digits are840* trimed and the leading "used" digit will be non-zero841* Typically very fast. Also fixes the sign if there842* are no more leading digits843*/844void mp_clamp(mp_int *a)845{846/* decrease used while the most significant digit is847* zero.848*/849while ((a->used > 0) && (a->dp[a->used - 1] == 0u)) {850--(a->used);851}852853/* reset the sign flag if used == 0 */854if (a->used == 0) {855a->sign = MP_ZPOS;856}857}858859/* End: bn_mp_clamp.c */860861/* Start: bn_mp_clear.c */862863/* clear one (frees) */864void mp_clear(mp_int *a)865{866int i;867868/* only do anything if a hasn't been freed previously */869if (a->dp != NULL) {870/* first zero the digits */871for (i = 0; i < a->used; i++) {872a->dp[i] = 0;873}874875/* free ram */876XFREE(a->dp);877878/* reset members to make debugging easier */879a->dp = NULL;880a->alloc = a->used = 0;881a->sign = MP_ZPOS;882}883}884885/* End: bn_mp_clear.c */886887/* Start: bn_mp_clear_multi.c */888889void mp_clear_multi(mp_int *mp, ...)890{891mp_int *next_mp = mp;892va_list args;893va_start(args, mp);894while (next_mp != NULL) {895mp_clear(next_mp);896next_mp = va_arg(args, mp_int *);897}898va_end(args);899}900901/* End: bn_mp_clear_multi.c */902903/* Start: bn_mp_cmp.c */904905/* compare two ints (signed)*/906int mp_cmp(const mp_int *a, const mp_int *b)907{908/* compare based on sign */909if (a->sign != b->sign) {910if (a->sign == MP_NEG) {911return MP_LT;912} else {913return MP_GT;914}915}916917/* compare digits */918if (a->sign == MP_NEG) {919/* if negative compare opposite direction */920return mp_cmp_mag(b, a);921} else {922return mp_cmp_mag(a, b);923}924}925926/* End: bn_mp_cmp.c */927928/* Start: bn_mp_cmp_d.c */929930/* compare a digit */931int mp_cmp_d(const mp_int *a, mp_digit b)932{933/* compare based on sign */934if (a->sign == MP_NEG) {935return MP_LT;936}937938/* compare based on magnitude */939if (a->used > 1) {940return MP_GT;941}942943/* compare the only digit of a to b */944if (a->dp[0] > b) {945return MP_GT;946} else if (a->dp[0] < b) {947return MP_LT;948} else {949return MP_EQ;950}951}952953/* End: bn_mp_cmp_d.c */954955/* Start: bn_mp_cmp_mag.c */956957/* compare maginitude of two ints (unsigned) */958int mp_cmp_mag(const mp_int *a, const mp_int *b)959{960int n;961mp_digit *tmpa, *tmpb;962963/* compare based on # of non-zero digits */964if (a->used > b->used) {965return MP_GT;966}967968if (a->used < b->used) {969return MP_LT;970}971972/* alias for a */973tmpa = a->dp + (a->used - 1);974975/* alias for b */976tmpb = b->dp + (a->used - 1);977978/* compare based on digits */979for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {980if (*tmpa > *tmpb) {981return MP_GT;982}983984if (*tmpa < *tmpb) {985return MP_LT;986}987}988return MP_EQ;989}990991/* End: bn_mp_cmp_mag.c */992993/* Start: bn_mp_cnt_lsb.c */994995static const int lnz[16] = {9964, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0997};998999/* Counts the number of lsbs which are zero before the first zero bit */1000int mp_cnt_lsb(const mp_int *a)1001{1002int x;1003mp_digit q, qq;10041005/* easy out */1006if (mp_iszero(a) == MP_YES) {1007return 0;1008}10091010/* scan lower digits until non-zero */1011for (x = 0; (x < a->used) && (a->dp[x] == 0u); x++) {}1012q = a->dp[x];1013x *= DIGIT_BIT;10141015/* now scan this digit until a 1 is found */1016if ((q & 1u) == 0u) {1017do {1018qq = q & 15u;1019x += lnz[qq];1020q >>= 4;1021} while (qq == 0u);1022}1023return x;1024}10251026/* End: bn_mp_cnt_lsb.c */10271028/* Start: bn_mp_complement.c */10291030/* b = ~a */1031int mp_complement(const mp_int *a, mp_int *b)1032{1033int res = mp_neg(a, b);1034return (res == MP_OKAY) ? mp_sub_d(b, 1uL, b) : res;1035}10361037/* End: bn_mp_complement.c */10381039/* Start: bn_mp_copy.c */10401041/* copy, b = a */1042int mp_copy(const mp_int *a, mp_int *b)1043{1044int res, n;10451046/* if dst == src do nothing */1047if (a == b) {1048return MP_OKAY;1049}10501051/* grow dest */1052if (b->alloc < a->used) {1053if ((res = mp_grow(b, a->used)) != MP_OKAY) {1054return res;1055}1056}10571058/* zero b and copy the parameters over */1059{1060mp_digit *tmpa, *tmpb;10611062/* pointer aliases */10631064/* source */1065tmpa = a->dp;10661067/* destination */1068tmpb = b->dp;10691070/* copy all the digits */1071for (n = 0; n < a->used; n++) {1072*tmpb++ = *tmpa++;1073}10741075/* clear high digits */1076for (; n < b->used; n++) {1077*tmpb++ = 0;1078}1079}10801081/* copy used count and sign */1082b->used = a->used;1083b->sign = a->sign;1084return MP_OKAY;1085}10861087/* End: bn_mp_copy.c */10881089/* Start: bn_mp_count_bits.c */10901091/* returns the number of bits in an int */1092int mp_count_bits(const mp_int *a)1093{1094int r;1095mp_digit q;10961097/* shortcut */1098if (a->used == 0) {1099return 0;1100}11011102/* get number of digits and add that */1103r = (a->used - 1) * DIGIT_BIT;11041105/* take the last digit and count the bits in it */1106q = a->dp[a->used - 1];1107while (q > (mp_digit)0) {1108++r;1109q >>= (mp_digit)1;1110}1111return r;1112}11131114/* End: bn_mp_count_bits.c */11151116/* Start: bn_mp_div.c */11171118/* integer signed division.1119* c*b + d == a [e.g. a/b, c=quotient, d=remainder]1120* HAC pp.598 Algorithm 14.201121*1122* Note that the description in HAC is horribly1123* incomplete. For example, it doesn't consider1124* the case where digits are removed from 'x' in1125* the inner loop. It also doesn't consider the1126* case that y has fewer than three digits, etc..1127*1128* The overall algorithm is as described as1129* 14.20 from HAC but fixed to treat these cases.1130*/1131int mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d)1132{1133mp_int q, x, y, t1, t2;1134int res, n, t, i, norm, neg;11351136/* is divisor zero ? */1137if (mp_iszero(b) == MP_YES) {1138return MP_VAL;1139}11401141/* if a < b then q=0, r = a */1142if (mp_cmp_mag(a, b) == MP_LT) {1143if (d != NULL) {1144res = mp_copy(a, d);1145} else {1146res = MP_OKAY;1147}1148if (c != NULL) {1149mp_zero(c);1150}1151return res;1152}11531154if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) {1155return res;1156}1157q.used = a->used + 2;11581159if ((res = mp_init(&t1)) != MP_OKAY) {1160goto LBL_Q;1161}11621163if ((res = mp_init(&t2)) != MP_OKAY) {1164goto LBL_T1;1165}11661167if ((res = mp_init_copy(&x, a)) != MP_OKAY) {1168goto LBL_T2;1169}11701171if ((res = mp_init_copy(&y, b)) != MP_OKAY) {1172goto LBL_X;1173}11741175/* fix the sign */1176neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;1177x.sign = y.sign = MP_ZPOS;11781179/* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */1180norm = mp_count_bits(&y) % DIGIT_BIT;1181if (norm < (DIGIT_BIT - 1)) {1182norm = (DIGIT_BIT - 1) - norm;1183if ((res = mp_mul_2d(&x, norm, &x)) != MP_OKAY) {1184goto LBL_Y;1185}1186if ((res = mp_mul_2d(&y, norm, &y)) != MP_OKAY) {1187goto LBL_Y;1188}1189} else {1190norm = 0;1191}11921193/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */1194n = x.used - 1;1195t = y.used - 1;11961197/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */1198if ((res = mp_lshd(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */1199goto LBL_Y;1200}12011202while (mp_cmp(&x, &y) != MP_LT) {1203++(q.dp[n - t]);1204if ((res = mp_sub(&x, &y, &x)) != MP_OKAY) {1205goto LBL_Y;1206}1207}12081209/* reset y by shifting it back down */1210mp_rshd(&y, n - t);12111212/* step 3. for i from n down to (t + 1) */1213for (i = n; i >= (t + 1); i--) {1214if (i > x.used) {1215continue;1216}12171218/* step 3.1 if xi == yt then set q{i-t-1} to b-1,1219* otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */1220if (x.dp[i] == y.dp[t]) {1221q.dp[(i - t) - 1] = ((mp_digit)1 << (mp_digit)DIGIT_BIT) - (mp_digit)1;1222} else {1223mp_word tmp;1224tmp = (mp_word)x.dp[i] << (mp_word)DIGIT_BIT;1225tmp |= (mp_word)x.dp[i - 1];1226tmp /= (mp_word)y.dp[t];1227if (tmp > (mp_word)MP_MASK) {1228tmp = MP_MASK;1229}1230q.dp[(i - t) - 1] = (mp_digit)(tmp & (mp_word)MP_MASK);1231}12321233/* while (q{i-t-1} * (yt * b + y{t-1})) >1234xi * b**2 + xi-1 * b + xi-212351236do q{i-t-1} -= 1;1237*/1238q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] + 1uL) & (mp_digit)MP_MASK;1239do {1240q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & (mp_digit)MP_MASK;12411242/* find left hand */1243mp_zero(&t1);1244t1.dp[0] = ((t - 1) < 0) ? 0u : y.dp[t - 1];1245t1.dp[1] = y.dp[t];1246t1.used = 2;1247if ((res = mp_mul_d(&t1, q.dp[(i - t) - 1], &t1)) != MP_OKAY) {1248goto LBL_Y;1249}12501251/* find right hand */1252t2.dp[0] = ((i - 2) < 0) ? 0u : x.dp[i - 2];1253t2.dp[1] = ((i - 1) < 0) ? 0u : x.dp[i - 1];1254t2.dp[2] = x.dp[i];1255t2.used = 3;1256} while (mp_cmp_mag(&t1, &t2) == MP_GT);12571258/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */1259if ((res = mp_mul_d(&y, q.dp[(i - t) - 1], &t1)) != MP_OKAY) {1260goto LBL_Y;1261}12621263if ((res = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) {1264goto LBL_Y;1265}12661267if ((res = mp_sub(&x, &t1, &x)) != MP_OKAY) {1268goto LBL_Y;1269}12701271/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */1272if (x.sign == MP_NEG) {1273if ((res = mp_copy(&y, &t1)) != MP_OKAY) {1274goto LBL_Y;1275}1276if ((res = mp_lshd(&t1, (i - t) - 1)) != MP_OKAY) {1277goto LBL_Y;1278}1279if ((res = mp_add(&x, &t1, &x)) != MP_OKAY) {1280goto LBL_Y;1281}12821283q.dp[(i - t) - 1] = (q.dp[(i - t) - 1] - 1uL) & MP_MASK;1284}1285}12861287/* now q is the quotient and x is the remainder1288* [which we have to normalize]1289*/12901291/* get sign before writing to c */1292x.sign = (x.used == 0) ? MP_ZPOS : a->sign;12931294if (c != NULL) {1295mp_clamp(&q);1296mp_exch(&q, c);1297c->sign = neg;1298}12991300if (d != NULL) {1301if ((res = mp_div_2d(&x, norm, &x, NULL)) != MP_OKAY) {1302goto LBL_Y;1303}1304mp_exch(&x, d);1305}13061307res = MP_OKAY;13081309LBL_Y:1310mp_clear(&y);1311LBL_X:1312mp_clear(&x);1313LBL_T2:1314mp_clear(&t2);1315LBL_T1:1316mp_clear(&t1);1317LBL_Q:1318mp_clear(&q);1319return res;1320}13211322/* End: bn_mp_div.c */13231324/* Start: bn_mp_div_2.c */13251326/* b = a/2 */1327int mp_div_2(const mp_int *a, mp_int *b)1328{1329int x, res, oldused;13301331/* copy */1332if (b->alloc < a->used) {1333if ((res = mp_grow(b, a->used)) != MP_OKAY) {1334return res;1335}1336}13371338oldused = b->used;1339b->used = a->used;1340{1341mp_digit r, rr, *tmpa, *tmpb;13421343/* source alias */1344tmpa = a->dp + b->used - 1;13451346/* dest alias */1347tmpb = b->dp + b->used - 1;13481349/* carry */1350r = 0;1351for (x = b->used - 1; x >= 0; x--) {1352/* get the carry for the next iteration */1353rr = *tmpa & 1u;13541355/* shift the current digit, add in carry and store */1356*tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));13571358/* forward carry to next iteration */1359r = rr;1360}13611362/* zero excess digits */1363tmpb = b->dp + b->used;1364for (x = b->used; x < oldused; x++) {1365*tmpb++ = 0;1366}1367}1368b->sign = a->sign;1369mp_clamp(b);1370return MP_OKAY;1371}13721373/* End: bn_mp_div_2.c */13741375/* Start: bn_mp_div_2d.c */13761377/* shift right by a certain bit count (store quotient in c, optional remainder in d) */1378int mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d)1379{1380mp_digit D, r, rr;1381int x, res;13821383/* if the shift count is <= 0 then we do no work */1384if (b <= 0) {1385res = mp_copy(a, c);1386if (d != NULL) {1387mp_zero(d);1388}1389return res;1390}13911392/* copy */1393if ((res = mp_copy(a, c)) != MP_OKAY) {1394return res;1395}1396/* 'a' should not be used after here - it might be the same as d */13971398/* get the remainder */1399if (d != NULL) {1400if ((res = mp_mod_2d(a, b, d)) != MP_OKAY) {1401return res;1402}1403}14041405/* shift by as many digits in the bit count */1406if (b >= DIGIT_BIT) {1407mp_rshd(c, b / DIGIT_BIT);1408}14091410/* shift any bit count < DIGIT_BIT */1411D = (mp_digit)(b % DIGIT_BIT);1412if (D != 0u) {1413mp_digit *tmpc, mask, shift;14141415/* mask */1416mask = ((mp_digit)1 << D) - 1uL;14171418/* shift for lsb */1419shift = (mp_digit)DIGIT_BIT - D;14201421/* alias */1422tmpc = c->dp + (c->used - 1);14231424/* carry */1425r = 0;1426for (x = c->used - 1; x >= 0; x--) {1427/* get the lower bits of this word in a temp */1428rr = *tmpc & mask;14291430/* shift the current word and mix in the carry bits from the previous word */1431*tmpc = (*tmpc >> D) | (r << shift);1432--tmpc;14331434/* set the carry to the carry bits of the current word found above */1435r = rr;1436}1437}1438mp_clamp(c);1439return MP_OKAY;1440}14411442/* End: bn_mp_div_2d.c */14431444/* Start: bn_mp_div_3.c */14451446/* divide by three (based on routine from MPI and the GMP manual) */1447int mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)1448{1449mp_int q;1450mp_word w, t;1451mp_digit b;1452int res, ix;14531454/* b = 2**DIGIT_BIT / 3 */1455b = ((mp_word)1 << (mp_word)DIGIT_BIT) / (mp_word)3;14561457if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {1458return res;1459}14601461q.used = a->used;1462q.sign = a->sign;1463w = 0;1464for (ix = a->used - 1; ix >= 0; ix--) {1465w = (w << (mp_word)DIGIT_BIT) | (mp_word)a->dp[ix];14661467if (w >= 3u) {1468/* multiply w by [1/3] */1469t = (w * (mp_word)b) >> (mp_word)DIGIT_BIT;14701471/* now subtract 3 * [w/3] from w, to get the remainder */1472w -= t+t+t;14731474/* fixup the remainder as required since1475* the optimization is not exact.1476*/1477while (w >= 3u) {1478t += 1u;1479w -= 3u;1480}1481} else {1482t = 0;1483}1484q.dp[ix] = (mp_digit)t;1485}14861487/* [optional] store the remainder */1488if (d != NULL) {1489*d = (mp_digit)w;1490}14911492/* [optional] store the quotient */1493if (c != NULL) {1494mp_clamp(&q);1495mp_exch(&q, c);1496}1497mp_clear(&q);14981499return res;1500}15011502/* End: bn_mp_div_3.c */15031504/* Start: bn_mp_div_d.c */15051506static int s_is_power_of_two(mp_digit b, int *p)1507{1508int x;15091510/* fast return if no power of two */1511if ((b == 0u) || ((b & (b-1u)) != 0u)) {1512return 0;1513}15141515for (x = 0; x < DIGIT_BIT; x++) {1516if (b == ((mp_digit)1<<(mp_digit)x)) {1517*p = x;1518return 1;1519}1520}1521return 0;1522}15231524/* single digit division (based on routine from MPI) */1525int mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)1526{1527mp_int q;1528mp_word w;1529mp_digit t;1530int res, ix;15311532/* cannot divide by zero */1533if (b == 0u) {1534return MP_VAL;1535}15361537/* quick outs */1538if ((b == 1u) || (mp_iszero(a) == MP_YES)) {1539if (d != NULL) {1540*d = 0;1541}1542if (c != NULL) {1543return mp_copy(a, c);1544}1545return MP_OKAY;1546}15471548/* power of two ? */1549if (s_is_power_of_two(b, &ix) == 1) {1550if (d != NULL) {1551*d = a->dp[0] & (((mp_digit)1<<(mp_digit)ix) - 1uL);1552}1553if (c != NULL) {1554return mp_div_2d(a, ix, c, NULL);1555}1556return MP_OKAY;1557}15581559/* three? */1560if (b == 3u) {1561return mp_div_3(a, c, d);1562}15631564/* no easy answer [c'est la vie]. Just division */1565if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {1566return res;1567}15681569q.used = a->used;1570q.sign = a->sign;1571w = 0;1572for (ix = a->used - 1; ix >= 0; ix--) {1573w = (w << (mp_word)DIGIT_BIT) | (mp_word)a->dp[ix];15741575if (w >= b) {1576t = (mp_digit)(w / b);1577w -= (mp_word)t * (mp_word)b;1578} else {1579t = 0;1580}1581q.dp[ix] = t;1582}15831584if (d != NULL) {1585*d = (mp_digit)w;1586}15871588if (c != NULL) {1589mp_clamp(&q);1590mp_exch(&q, c);1591}1592mp_clear(&q);15931594return res;1595}15961597/* End: bn_mp_div_d.c */15981599/* Start: bn_mp_dr_is_modulus.c */16001601/* determines if a number is a valid DR modulus */1602int mp_dr_is_modulus(const mp_int *a)1603{1604int ix;16051606/* must be at least two digits */1607if (a->used < 2) {1608return 0;1609}16101611/* must be of the form b**k - a [a <= b] so all1612* but the first digit must be equal to -1 (mod b).1613*/1614for (ix = 1; ix < a->used; ix++) {1615if (a->dp[ix] != MP_MASK) {1616return 0;1617}1618}1619return 1;1620}16211622/* End: bn_mp_dr_is_modulus.c */16231624/* Start: bn_mp_dr_reduce.c */16251626/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.1627*1628* Based on algorithm from the paper1629*1630* "Generating Efficient Primes for Discrete Log Cryptosystems"1631* Chae Hoon Lim, Pil Joong Lee,1632* POSTECH Information Research Laboratories1633*1634* The modulus must be of a special format [see manual]1635*1636* Has been modified to use algorithm 7.10 from the LTM book instead1637*1638* Input x must be in the range 0 <= x <= (n-1)**21639*/1640int mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)1641{1642int err, i, m;1643mp_word r;1644mp_digit mu, *tmpx1, *tmpx2;16451646/* m = digits in modulus */1647m = n->used;16481649/* ensure that "x" has at least 2m digits */1650if (x->alloc < (m + m)) {1651if ((err = mp_grow(x, m + m)) != MP_OKAY) {1652return err;1653}1654}16551656/* top of loop, this is where the code resumes if1657* another reduction pass is required.1658*/1659top:1660/* aliases for digits */1661/* alias for lower half of x */1662tmpx1 = x->dp;16631664/* alias for upper half of x, or x/B**m */1665tmpx2 = x->dp + m;16661667/* set carry to zero */1668mu = 0;16691670/* compute (x mod B**m) + k * [x/B**m] inline and inplace */1671for (i = 0; i < m; i++) {1672r = ((mp_word)*tmpx2++ * (mp_word)k) + *tmpx1 + mu;1673*tmpx1++ = (mp_digit)(r & MP_MASK);1674mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));1675}16761677/* set final carry */1678*tmpx1++ = mu;16791680/* zero words above m */1681for (i = m + 1; i < x->used; i++) {1682*tmpx1++ = 0;1683}16841685/* clamp, sub and return */1686mp_clamp(x);16871688/* if x >= n then subtract and reduce again1689* Each successive "recursion" makes the input smaller and smaller.1690*/1691if (mp_cmp_mag(x, n) != MP_LT) {1692if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {1693return err;1694}1695goto top;1696}1697return MP_OKAY;1698}16991700/* End: bn_mp_dr_reduce.c */17011702/* Start: bn_mp_dr_setup.c */1703#include "tommath_private.h"17041705/* determines the setup value */1706void mp_dr_setup(const mp_int *a, mp_digit *d)1707{1708/* the casts are required if DIGIT_BIT is one less than1709* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]1710*/1711*d = (mp_digit)(((mp_word)1 << (mp_word)DIGIT_BIT) - (mp_word)a->dp[0]);1712}17131714/* End: bn_mp_dr_setup.c */17151716/* Start: bn_mp_exch.c */17171718/* swap the elements of two integers, for cases where you can't simply swap the1719* mp_int pointers around1720*/1721void mp_exch(mp_int *a, mp_int *b)1722{1723mp_int t;17241725t = *a;1726*a = *b;1727*b = t;1728}17291730/* End: bn_mp_exch.c */17311732/* Start: bn_mp_export.c */17331734/* based on gmp's mpz_export.1735* see http://gmplib.org/manual/Integer-Import-and-Export.html1736*/1737int mp_export(void *rop, size_t *countp, int order, size_t size,1738int endian, size_t nails, const mp_int *op)1739{1740int result;1741size_t odd_nails, nail_bytes, i, j, bits, count;1742unsigned char odd_nail_mask;17431744mp_int t;17451746if ((result = mp_init_copy(&t, op)) != MP_OKAY) {1747return result;1748}17491750if (endian == 0) {1751union {1752unsigned int i;1753char c[4];1754} lint;1755lint.i = 0x01020304;17561757endian = (lint.c[0] == '\x04') ? -1 : 1;1758}17591760odd_nails = (nails % 8u);1761odd_nail_mask = 0xff;1762for (i = 0; i < odd_nails; ++i) {1763odd_nail_mask ^= (unsigned char)(1u << (7u - i));1764}1765nail_bytes = nails / 8u;17661767bits = (size_t)mp_count_bits(&t);1768count = (bits / ((size * 8u) - nails)) + (((bits % ((size * 8u) - nails)) != 0u) ? 1u : 0u);17691770for (i = 0; i < count; ++i) {1771for (j = 0; j < size; ++j) {1772unsigned char *byte = (unsigned char *)rop +1773(((order == -1) ? i : ((count - 1u) - i)) * size) +1774((endian == -1) ? j : ((size - 1u) - j));17751776if (j >= (size - nail_bytes)) {1777*byte = 0;1778continue;1779}17801781*byte = (unsigned char)((j == ((size - nail_bytes) - 1u)) ? (t.dp[0] & odd_nail_mask) : (t.dp[0] & 0xFFuL));17821783if ((result = mp_div_2d(&t, (j == ((size - nail_bytes) - 1u)) ? (int)(8u - odd_nails) : 8, &t, NULL)) != MP_OKAY) {1784mp_clear(&t);1785return result;1786}1787}1788}17891790mp_clear(&t);17911792if (countp != NULL) {1793*countp = count;1794}17951796return MP_OKAY;1797}17981799/* End: bn_mp_export.c */18001801/* Start: bn_mp_expt_d.c */18021803/* wrapper function for mp_expt_d_ex() */1804int mp_expt_d(const mp_int *a, mp_digit b, mp_int *c)1805{1806return mp_expt_d_ex(a, b, c, 0);1807}18081809/* End: bn_mp_expt_d.c */18101811/* Start: bn_mp_expt_d_ex.c */18121813/* calculate c = a**b using a square-multiply algorithm */1814int mp_expt_d_ex(const mp_int *a, mp_digit b, mp_int *c, int fast)1815{1816int res;1817unsigned int x;18181819mp_int g;18201821if ((res = mp_init_copy(&g, a)) != MP_OKAY) {1822return res;1823}18241825/* set initial result */1826mp_set(c, 1uL);18271828if (fast != 0) {1829while (b > 0u) {1830/* if the bit is set multiply */1831if ((b & 1u) != 0u) {1832if ((res = mp_mul(c, &g, c)) != MP_OKAY) {1833mp_clear(&g);1834return res;1835}1836}18371838/* square */1839if (b > 1u) {1840if ((res = mp_sqr(&g, &g)) != MP_OKAY) {1841mp_clear(&g);1842return res;1843}1844}18451846/* shift to next bit */1847b >>= 1;1848}1849} else {1850for (x = 0; x < (unsigned)DIGIT_BIT; x++) {1851/* square */1852if ((res = mp_sqr(c, c)) != MP_OKAY) {1853mp_clear(&g);1854return res;1855}18561857/* if the bit is set multiply */1858if ((b & ((mp_digit)1 << (DIGIT_BIT - 1))) != 0u) {1859if ((res = mp_mul(c, &g, c)) != MP_OKAY) {1860mp_clear(&g);1861return res;1862}1863}18641865/* shift to next bit */1866b <<= 1;1867}1868} /* if ... else */18691870mp_clear(&g);1871return MP_OKAY;1872}18731874/* End: bn_mp_expt_d_ex.c */18751876/* Start: bn_mp_exptmod.c */18771878/* this is a shell function that calls either the normal or Montgomery1879* exptmod functions. Originally the call to the montgomery code was1880* embedded in the normal function but that wasted alot of stack space1881* for nothing (since 99% of the time the Montgomery code would be called)1882*/1883int mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y)1884{1885int dr;18861887/* modulus P must be positive */1888if (P->sign == MP_NEG) {1889return MP_VAL;1890}18911892/* if exponent X is negative we have to recurse */1893if (X->sign == MP_NEG) {1894mp_int tmpG, tmpX;1895int err;18961897/* first compute 1/G mod P */1898if ((err = mp_init(&tmpG)) != MP_OKAY) {1899return err;1900}1901if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {1902mp_clear(&tmpG);1903return err;1904}19051906/* now get |X| */1907if ((err = mp_init(&tmpX)) != MP_OKAY) {1908mp_clear(&tmpG);1909return err;1910}1911if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {1912mp_clear_multi(&tmpG, &tmpX, NULL);1913return err;1914}19151916/* and now compute (1/G)**|X| instead of G**X [X < 0] */1917err = mp_exptmod(&tmpG, &tmpX, P, Y);1918mp_clear_multi(&tmpG, &tmpX, NULL);1919return err;1920}19211922/* modified diminished radix reduction */1923if (mp_reduce_is_2k_l(P) == MP_YES) {1924return s_mp_exptmod(G, X, P, Y, 1);1925}19261927/* is it a DR modulus? */1928dr = mp_dr_is_modulus(P);19291930/* if not, is it a unrestricted DR modulus? */1931if (dr == 0) {1932dr = mp_reduce_is_2k(P) << 1;1933}19341935/* if the modulus is odd or dr != 0 use the montgomery method */1936if ((mp_isodd(P) == MP_YES) || (dr != 0)) {1937return mp_exptmod_fast(G, X, P, Y, dr);1938} else {1939/* otherwise use the generic Barrett reduction technique */1940return s_mp_exptmod(G, X, P, Y, 0);1941}1942}19431944/* End: bn_mp_exptmod.c */19451946/* Start: bn_mp_exptmod_fast.c */19471948/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.851949*1950* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.1951* The value of k changes based on the size of the exponent.1952*1953* Uses Montgomery or Diminished Radix reduction [whichever appropriate]1954*/19551956#ifdef MP_LOW_MEM1957# define TAB_SIZE 321958#else1959# define TAB_SIZE 2561960#endif19611962int mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)1963{1964mp_int M[TAB_SIZE], res;1965mp_digit buf, mp;1966int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;19671968/* use a pointer to the reduction algorithm. This allows us to use1969* one of many reduction algorithms without modding the guts of1970* the code with if statements everywhere.1971*/1972int (*redux)(mp_int *x, const mp_int *n, mp_digit rho);19731974/* find window size */1975x = mp_count_bits(X);1976if (x <= 7) {1977winsize = 2;1978} else if (x <= 36) {1979winsize = 3;1980} else if (x <= 140) {1981winsize = 4;1982} else if (x <= 450) {1983winsize = 5;1984} else if (x <= 1303) {1985winsize = 6;1986} else if (x <= 3529) {1987winsize = 7;1988} else {1989winsize = 8;1990}19911992#ifdef MP_LOW_MEM1993if (winsize > 5) {1994winsize = 5;1995}1996#endif19971998/* init M array */1999/* init first cell */2000if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {2001return err;2002}20032004/* now init the second half of the array */2005for (x = 1<<(winsize-1); x < (1 << winsize); x++) {2006if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {2007for (y = 1<<(winsize-1); y < x; y++) {2008mp_clear(&M[y]);2009}2010mp_clear(&M[1]);2011return err;2012}2013}20142015/* determine and setup reduction code */2016if (redmode == 0) {2017/* now setup montgomery */2018if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {2019goto LBL_M;2020}20212022/* automatically pick the comba one if available (saves quite a few calls/ifs) */2023if ((((P->used * 2) + 1) < (int)MP_WARRAY) &&2024(P->used < (1 << ((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))))) {2025redux = fast_mp_montgomery_reduce;2026} else2027{2028/* use slower baseline Montgomery method */2029redux = mp_montgomery_reduce;2030}2031} else if (redmode == 1) {2032/* setup DR reduction for moduli of the form B**k - b */2033mp_dr_setup(P, &mp);2034redux = mp_dr_reduce;2035} else {2036/* setup DR reduction for moduli of the form 2**k - b */2037if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {2038goto LBL_M;2039}2040redux = mp_reduce_2k;2041}20422043/* setup result */2044if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) {2045goto LBL_M;2046}20472048/* create M table2049*20502051*2052* The first half of the table is not computed though accept for M[0] and M[1]2053*/20542055if (redmode == 0) {2056/* now we need R mod m */2057if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {2058goto LBL_RES;2059}20602061/* now set M[1] to G * R mod m */2062if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) {2063goto LBL_RES;2064}2065} else {2066mp_set(&res, 1uL);2067if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {2068goto LBL_RES;2069}2070}20712072/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */2073if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {2074goto LBL_RES;2075}20762077for (x = 0; x < (winsize - 1); x++) {2078if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {2079goto LBL_RES;2080}2081if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) {2082goto LBL_RES;2083}2084}20852086/* create upper table */2087for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {2088if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {2089goto LBL_RES;2090}2091if ((err = redux(&M[x], P, mp)) != MP_OKAY) {2092goto LBL_RES;2093}2094}20952096/* set initial mode and bit cnt */2097mode = 0;2098bitcnt = 1;2099buf = 0;2100digidx = X->used - 1;2101bitcpy = 0;2102bitbuf = 0;21032104for (;;) {2105/* grab next digit as required */2106if (--bitcnt == 0) {2107/* if digidx == -1 we are out of digits so break */2108if (digidx == -1) {2109break;2110}2111/* read next digit and reset bitcnt */2112buf = X->dp[digidx--];2113bitcnt = (int)DIGIT_BIT;2114}21152116/* grab the next msb from the exponent */2117y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;2118buf <<= (mp_digit)1;21192120/* if the bit is zero and mode == 0 then we ignore it2121* These represent the leading zero bits before the first 1 bit2122* in the exponent. Technically this opt is not required but it2123* does lower the # of trivial squaring/reductions used2124*/2125if ((mode == 0) && (y == 0)) {2126continue;2127}21282129/* if the bit is zero and mode == 1 then we square */2130if ((mode == 1) && (y == 0)) {2131if ((err = mp_sqr(&res, &res)) != MP_OKAY) {2132goto LBL_RES;2133}2134if ((err = redux(&res, P, mp)) != MP_OKAY) {2135goto LBL_RES;2136}2137continue;2138}21392140/* else we add it to the window */2141bitbuf |= (y << (winsize - ++bitcpy));2142mode = 2;21432144if (bitcpy == winsize) {2145/* ok window is filled so square as required and multiply */2146/* square first */2147for (x = 0; x < winsize; x++) {2148if ((err = mp_sqr(&res, &res)) != MP_OKAY) {2149goto LBL_RES;2150}2151if ((err = redux(&res, P, mp)) != MP_OKAY) {2152goto LBL_RES;2153}2154}21552156/* then multiply */2157if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {2158goto LBL_RES;2159}2160if ((err = redux(&res, P, mp)) != MP_OKAY) {2161goto LBL_RES;2162}21632164/* empty window and reset */2165bitcpy = 0;2166bitbuf = 0;2167mode = 1;2168}2169}21702171/* if bits remain then square/multiply */2172if ((mode == 2) && (bitcpy > 0)) {2173/* square then multiply if the bit is set */2174for (x = 0; x < bitcpy; x++) {2175if ((err = mp_sqr(&res, &res)) != MP_OKAY) {2176goto LBL_RES;2177}2178if ((err = redux(&res, P, mp)) != MP_OKAY) {2179goto LBL_RES;2180}21812182/* get next bit of the window */2183bitbuf <<= 1;2184if ((bitbuf & (1 << winsize)) != 0) {2185/* then multiply */2186if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {2187goto LBL_RES;2188}2189if ((err = redux(&res, P, mp)) != MP_OKAY) {2190goto LBL_RES;2191}2192}2193}2194}21952196if (redmode == 0) {2197/* fixup result if Montgomery reduction is used2198* recall that any value in a Montgomery system is2199* actually multiplied by R mod n. So we have2200* to reduce one more time to cancel out the factor2201* of R.2202*/2203if ((err = redux(&res, P, mp)) != MP_OKAY) {2204goto LBL_RES;2205}2206}22072208/* swap res with Y */2209mp_exch(&res, Y);2210err = MP_OKAY;2211LBL_RES:2212mp_clear(&res);2213LBL_M:2214mp_clear(&M[1]);2215for (x = 1<<(winsize-1); x < (1 << winsize); x++) {2216mp_clear(&M[x]);2217}2218return err;2219}22202221/* End: bn_mp_exptmod_fast.c */22222223/* Start: bn_mp_exteuclid.c */22242225/* Extended euclidean algorithm of (a, b) produces2226a*u1 + b*u2 = u32227*/2228int mp_exteuclid(const mp_int *a, const mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)2229{2230mp_int u1, u2, u3, v1, v2, v3, t1, t2, t3, q, tmp;2231int err;22322233if ((err = mp_init_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL)) != MP_OKAY) {2234return err;2235}22362237/* initialize, (u1,u2,u3) = (1,0,a) */2238mp_set(&u1, 1uL);2239if ((err = mp_copy(a, &u3)) != MP_OKAY) {2240goto LBL_ERR;2241}22422243/* initialize, (v1,v2,v3) = (0,1,b) */2244mp_set(&v2, 1uL);2245if ((err = mp_copy(b, &v3)) != MP_OKAY) {2246goto LBL_ERR;2247}22482249/* loop while v3 != 0 */2250while (mp_iszero(&v3) == MP_NO) {2251/* q = u3/v3 */2252if ((err = mp_div(&u3, &v3, &q, NULL)) != MP_OKAY) {2253goto LBL_ERR;2254}22552256/* (t1,t2,t3) = (u1,u2,u3) - (v1,v2,v3)q */2257if ((err = mp_mul(&v1, &q, &tmp)) != MP_OKAY) {2258goto LBL_ERR;2259}2260if ((err = mp_sub(&u1, &tmp, &t1)) != MP_OKAY) {2261goto LBL_ERR;2262}2263if ((err = mp_mul(&v2, &q, &tmp)) != MP_OKAY) {2264goto LBL_ERR;2265}2266if ((err = mp_sub(&u2, &tmp, &t2)) != MP_OKAY) {2267goto LBL_ERR;2268}2269if ((err = mp_mul(&v3, &q, &tmp)) != MP_OKAY) {2270goto LBL_ERR;2271}2272if ((err = mp_sub(&u3, &tmp, &t3)) != MP_OKAY) {2273goto LBL_ERR;2274}22752276/* (u1,u2,u3) = (v1,v2,v3) */2277if ((err = mp_copy(&v1, &u1)) != MP_OKAY) {2278goto LBL_ERR;2279}2280if ((err = mp_copy(&v2, &u2)) != MP_OKAY) {2281goto LBL_ERR;2282}2283if ((err = mp_copy(&v3, &u3)) != MP_OKAY) {2284goto LBL_ERR;2285}22862287/* (v1,v2,v3) = (t1,t2,t3) */2288if ((err = mp_copy(&t1, &v1)) != MP_OKAY) {2289goto LBL_ERR;2290}2291if ((err = mp_copy(&t2, &v2)) != MP_OKAY) {2292goto LBL_ERR;2293}2294if ((err = mp_copy(&t3, &v3)) != MP_OKAY) {2295goto LBL_ERR;2296}2297}22982299/* make sure U3 >= 0 */2300if (u3.sign == MP_NEG) {2301if ((err = mp_neg(&u1, &u1)) != MP_OKAY) {2302goto LBL_ERR;2303}2304if ((err = mp_neg(&u2, &u2)) != MP_OKAY) {2305goto LBL_ERR;2306}2307if ((err = mp_neg(&u3, &u3)) != MP_OKAY) {2308goto LBL_ERR;2309}2310}23112312/* copy result out */2313if (U1 != NULL) {2314mp_exch(U1, &u1);2315}2316if (U2 != NULL) {2317mp_exch(U2, &u2);2318}2319if (U3 != NULL) {2320mp_exch(U3, &u3);2321}23222323err = MP_OKAY;2324LBL_ERR:2325mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL);2326return err;2327}23282329/* End: bn_mp_exteuclid.c */23302331/* Start: bn_mp_gcd.c */23322333/* Greatest Common Divisor using the binary method */2334int mp_gcd(const mp_int *a, const mp_int *b, mp_int *c)2335{2336mp_int u, v;2337int k, u_lsb, v_lsb, res;23382339/* either zero than gcd is the largest */2340if (mp_iszero(a) == MP_YES) {2341return mp_abs(b, c);2342}2343if (mp_iszero(b) == MP_YES) {2344return mp_abs(a, c);2345}23462347/* get copies of a and b we can modify */2348if ((res = mp_init_copy(&u, a)) != MP_OKAY) {2349return res;2350}23512352if ((res = mp_init_copy(&v, b)) != MP_OKAY) {2353goto LBL_U;2354}23552356/* must be positive for the remainder of the algorithm */2357u.sign = v.sign = MP_ZPOS;23582359/* B1. Find the common power of two for u and v */2360u_lsb = mp_cnt_lsb(&u);2361v_lsb = mp_cnt_lsb(&v);2362k = MIN(u_lsb, v_lsb);23632364if (k > 0) {2365/* divide the power of two out */2366if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {2367goto LBL_V;2368}23692370if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {2371goto LBL_V;2372}2373}23742375/* divide any remaining factors of two out */2376if (u_lsb != k) {2377if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {2378goto LBL_V;2379}2380}23812382if (v_lsb != k) {2383if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {2384goto LBL_V;2385}2386}23872388while (mp_iszero(&v) == MP_NO) {2389/* make sure v is the largest */2390if (mp_cmp_mag(&u, &v) == MP_GT) {2391/* swap u and v to make sure v is >= u */2392mp_exch(&u, &v);2393}23942395/* subtract smallest from largest */2396if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {2397goto LBL_V;2398}23992400/* Divide out all factors of two */2401if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {2402goto LBL_V;2403}2404}24052406/* multiply by 2**k which we divided out at the beginning */2407if ((res = mp_mul_2d(&u, k, c)) != MP_OKAY) {2408goto LBL_V;2409}2410c->sign = MP_ZPOS;2411res = MP_OKAY;2412LBL_V:2413mp_clear(&u);2414LBL_U:2415mp_clear(&v);2416return res;2417}24182419/* End: bn_mp_gcd.c */24202421/* Start: bn_mp_get_bit.c */24222423/* Checks the bit at position b and returns MP_YES2424if the bit is 1, MP_NO if it is 0 and MP_VAL2425in case of error */2426int mp_get_bit(const mp_int *a, int b)2427{2428int limb;2429mp_digit bit, isset;24302431if (b < 0) {2432return MP_VAL;2433}24342435limb = b / DIGIT_BIT;24362437/*2438* Zero is a special value with the member "used" set to zero.2439* Needs to be tested before the check for the upper boundary2440* otherwise (limb >= a->used) would be true for a = 02441*/24422443if (mp_iszero(a) != MP_NO) {2444return MP_NO;2445}24462447if (limb >= a->used) {2448return MP_VAL;2449}24502451bit = (mp_digit)(1) << (b % DIGIT_BIT);24522453isset = a->dp[limb] & bit;2454return (isset != 0u) ? MP_YES : MP_NO;2455}24562457/* End: bn_mp_get_bit.c */24582459/* Start: bn_mp_get_double.c */24602461double mp_get_double(const mp_int *a)2462{2463int i;2464double d = 0.0, fac = 1.0;2465for (i = 0; i < DIGIT_BIT; ++i) {2466fac *= 2.0;2467}2468for (i = USED(a); i --> 0;) {2469d = (d * fac) + (double)DIGIT(a, i);2470}2471return (mp_isneg(a) != MP_NO) ? -d : d;2472}24732474/* End: bn_mp_get_double.c */24752476/* Start: bn_mp_get_int.c */24772478/* get the lower 32-bits of an mp_int */2479unsigned long mp_get_int(const mp_int *a)2480{2481int i;2482mp_min_u32 res;24832484if (a->used == 0) {2485return 0;2486}24872488/* get number of digits of the lsb we have to read */2489i = MIN(a->used, ((((int)sizeof(unsigned long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;24902491/* get most significant digit of result */2492res = DIGIT(a, i);24932494while (--i >= 0) {2495res = (res << DIGIT_BIT) | DIGIT(a, i);2496}24972498/* force result to 32-bits always so it is consistent on non 32-bit platforms */2499return res & 0xFFFFFFFFUL;2500}25012502/* End: bn_mp_get_int.c */25032504/* Start: bn_mp_get_long.c */25052506/* get the lower unsigned long of an mp_int, platform dependent */2507unsigned long mp_get_long(const mp_int *a)2508{2509int i;2510unsigned long res;25112512if (a->used == 0) {2513return 0;2514}25152516/* get number of digits of the lsb we have to read */2517i = MIN(a->used, ((((int)sizeof(unsigned long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;25182519/* get most significant digit of result */2520res = DIGIT(a, i);25212522#if (ULONG_MAX != 0xffffffffuL) || (DIGIT_BIT < 32)2523while (--i >= 0) {2524res = (res << DIGIT_BIT) | DIGIT(a, i);2525}2526#endif2527return res;2528}25292530/* End: bn_mp_get_long.c */25312532/* Start: bn_mp_get_long_long.c */25332534/* get the lower unsigned long long of an mp_int, platform dependent */2535unsigned long long mp_get_long_long(const mp_int *a)2536{2537int i;2538unsigned long long res;25392540if (a->used == 0) {2541return 0;2542}25432544/* get number of digits of the lsb we have to read */2545i = MIN(a->used, ((((int)sizeof(unsigned long long) * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT)) - 1;25462547/* get most significant digit of result */2548res = DIGIT(a, i);25492550#if DIGIT_BIT < 642551while (--i >= 0) {2552res = (res << DIGIT_BIT) | DIGIT(a, i);2553}2554#endif2555return res;2556}25572558/* End: bn_mp_get_long_long.c */25592560/* Start: bn_mp_grow.c */25612562/* grow as required */2563int mp_grow(mp_int *a, int size)2564{2565int i;2566mp_digit *tmp;25672568/* if the alloc size is smaller alloc more ram */2569if (a->alloc < size) {2570/* ensure there are always at least MP_PREC digits extra on top */2571size += (MP_PREC * 2) - (size % MP_PREC);25722573/* reallocate the array a->dp2574*2575* We store the return in a temporary variable2576* in case the operation failed we don't want2577* to overwrite the dp member of a.2578*/2579tmp = OPT_CAST(mp_digit) XREALLOC(a->dp, sizeof(mp_digit) * (size_t)size);2580if (tmp == NULL) {2581/* reallocation failed but "a" is still valid [can be freed] */2582return MP_MEM;2583}25842585/* reallocation succeeded so set a->dp */2586a->dp = tmp;25872588/* zero excess digits */2589i = a->alloc;2590a->alloc = size;2591for (; i < a->alloc; i++) {2592a->dp[i] = 0;2593}2594}2595return MP_OKAY;2596}25972598/* End: bn_mp_grow.c */25992600/* Start: bn_mp_import.c */26012602/* based on gmp's mpz_import.2603* see http://gmplib.org/manual/Integer-Import-and-Export.html2604*/2605int mp_import(mp_int *rop, size_t count, int order, size_t size,2606int endian, size_t nails, const void *op)2607{2608int result;2609size_t odd_nails, nail_bytes, i, j;2610unsigned char odd_nail_mask;26112612mp_zero(rop);26132614if (endian == 0) {2615union {2616unsigned int i;2617char c[4];2618} lint;2619lint.i = 0x01020304;26202621endian = (lint.c[0] == '\x04') ? -1 : 1;2622}26232624odd_nails = (nails % 8u);2625odd_nail_mask = 0xff;2626for (i = 0; i < odd_nails; ++i) {2627odd_nail_mask ^= (unsigned char)(1u << (7u - i));2628}2629nail_bytes = nails / 8u;26302631for (i = 0; i < count; ++i) {2632for (j = 0; j < (size - nail_bytes); ++j) {2633unsigned char byte = *((unsigned char *)op +2634(((order == 1) ? i : ((count - 1u) - i)) * size) +2635((endian == 1) ? (j + nail_bytes) : (((size - 1u) - j) - nail_bytes)));26362637if ((result = mp_mul_2d(rop, (j == 0u) ? (int)(8u - odd_nails) : 8, rop)) != MP_OKAY) {2638return result;2639}26402641rop->dp[0] |= (j == 0u) ? (mp_digit)(byte & odd_nail_mask) : (mp_digit)byte;2642rop->used += 1;2643}2644}26452646mp_clamp(rop);26472648return MP_OKAY;2649}26502651/* End: bn_mp_import.c */26522653/* Start: bn_mp_init.c */26542655/* init a new mp_int */2656int mp_init(mp_int *a)2657{2658int i;26592660/* allocate memory required and clear it */2661a->dp = OPT_CAST(mp_digit) XMALLOC(sizeof(mp_digit) * (size_t)MP_PREC);2662if (a->dp == NULL) {2663return MP_MEM;2664}26652666/* set the digits to zero */2667for (i = 0; i < MP_PREC; i++) {2668a->dp[i] = 0;2669}26702671/* set the used to zero, allocated digits to the default precision2672* and sign to positive */2673a->used = 0;2674a->alloc = MP_PREC;2675a->sign = MP_ZPOS;26762677return MP_OKAY;2678}26792680/* End: bn_mp_init.c */26812682/* Start: bn_mp_init_copy.c */26832684/* creates "a" then copies b into it */2685int mp_init_copy(mp_int *a, const mp_int *b)2686{2687int res;26882689if ((res = mp_init_size(a, b->used)) != MP_OKAY) {2690return res;2691}26922693if ((res = mp_copy(b, a)) != MP_OKAY) {2694mp_clear(a);2695}26962697return res;2698}26992700/* End: bn_mp_init_copy.c */27012702/* Start: bn_mp_init_multi.c */27032704int mp_init_multi(mp_int *mp, ...)2705{2706mp_err res = MP_OKAY; /* Assume ok until proven otherwise */2707int n = 0; /* Number of ok inits */2708mp_int *cur_arg = mp;2709va_list args;27102711va_start(args, mp); /* init args to next argument from caller */2712while (cur_arg != NULL) {2713if (mp_init(cur_arg) != MP_OKAY) {2714/* Oops - error! Back-track and mp_clear what we already2715succeeded in init-ing, then return error.2716*/2717va_list clean_args;27182719/* now start cleaning up */2720cur_arg = mp;2721va_start(clean_args, mp);2722while (n-- != 0) {2723mp_clear(cur_arg);2724cur_arg = va_arg(clean_args, mp_int *);2725}2726va_end(clean_args);2727res = MP_MEM;2728break;2729}2730n++;2731cur_arg = va_arg(args, mp_int *);2732}2733va_end(args);2734return res; /* Assumed ok, if error flagged above. */2735}27362737/* End: bn_mp_init_multi.c */27382739/* Start: bn_mp_init_set.c */27402741/* initialize and set a digit */2742int mp_init_set(mp_int *a, mp_digit b)2743{2744int err;2745if ((err = mp_init(a)) != MP_OKAY) {2746return err;2747}2748mp_set(a, b);2749return err;2750}27512752/* End: bn_mp_init_set.c */27532754/* Start: bn_mp_init_set_int.c */27552756/* initialize and set a digit */2757int mp_init_set_int(mp_int *a, unsigned long b)2758{2759int err;2760if ((err = mp_init(a)) != MP_OKAY) {2761return err;2762}2763return mp_set_int(a, b);2764}27652766/* End: bn_mp_init_set_int.c */27672768/* Start: bn_mp_init_size.c */27692770/* init an mp_init for a given size */2771int mp_init_size(mp_int *a, int size)2772{2773int x;27742775/* pad size so there are always extra digits */2776size += (MP_PREC * 2) - (size % MP_PREC);27772778/* alloc mem */2779a->dp = OPT_CAST(mp_digit) XMALLOC(sizeof(mp_digit) * (size_t)size);2780if (a->dp == NULL) {2781return MP_MEM;2782}27832784/* set the members */2785a->used = 0;2786a->alloc = size;2787a->sign = MP_ZPOS;27882789/* zero the digits */2790for (x = 0; x < size; x++) {2791a->dp[x] = 0;2792}27932794return MP_OKAY;2795}27962797/* End: bn_mp_init_size.c */27982799/* Start: bn_mp_invmod.c */28002801/* hac 14.61, pp608 */2802int mp_invmod(const mp_int *a, const mp_int *b, mp_int *c)2803{2804/* b cannot be negative and has to be >1 */2805if ((b->sign == MP_NEG) || (mp_cmp_d(b, 1uL) != MP_GT)) {2806return MP_VAL;2807}28082809/* if the modulus is odd we can use a faster routine instead */2810if ((mp_isodd(b) == MP_YES)) {2811return fast_mp_invmod(a, b, c);2812}28132814return mp_invmod_slow(a, b, c);2815}28162817/* End: bn_mp_invmod.c */28182819/* Start: bn_mp_invmod_slow.c */28202821/* hac 14.61, pp608 */2822int mp_invmod_slow(const mp_int *a, const mp_int *b, mp_int *c)2823{2824mp_int x, y, u, v, A, B, C, D;2825int res;28262827/* b cannot be negative */2828if ((b->sign == MP_NEG) || (mp_iszero(b) == MP_YES)) {2829return MP_VAL;2830}28312832/* init temps */2833if ((res = mp_init_multi(&x, &y, &u, &v,2834&A, &B, &C, &D, NULL)) != MP_OKAY) {2835return res;2836}28372838/* x = a, y = b */2839if ((res = mp_mod(a, b, &x)) != MP_OKAY) {2840goto LBL_ERR;2841}2842if ((res = mp_copy(b, &y)) != MP_OKAY) {2843goto LBL_ERR;2844}28452846/* 2. [modified] if x,y are both even then return an error! */2847if ((mp_iseven(&x) == MP_YES) && (mp_iseven(&y) == MP_YES)) {2848res = MP_VAL;2849goto LBL_ERR;2850}28512852/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */2853if ((res = mp_copy(&x, &u)) != MP_OKAY) {2854goto LBL_ERR;2855}2856if ((res = mp_copy(&y, &v)) != MP_OKAY) {2857goto LBL_ERR;2858}2859mp_set(&A, 1uL);2860mp_set(&D, 1uL);28612862top:2863/* 4. while u is even do */2864while (mp_iseven(&u) == MP_YES) {2865/* 4.1 u = u/2 */2866if ((res = mp_div_2(&u, &u)) != MP_OKAY) {2867goto LBL_ERR;2868}2869/* 4.2 if A or B is odd then */2870if ((mp_isodd(&A) == MP_YES) || (mp_isodd(&B) == MP_YES)) {2871/* A = (A+y)/2, B = (B-x)/2 */2872if ((res = mp_add(&A, &y, &A)) != MP_OKAY) {2873goto LBL_ERR;2874}2875if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {2876goto LBL_ERR;2877}2878}2879/* A = A/2, B = B/2 */2880if ((res = mp_div_2(&A, &A)) != MP_OKAY) {2881goto LBL_ERR;2882}2883if ((res = mp_div_2(&B, &B)) != MP_OKAY) {2884goto LBL_ERR;2885}2886}28872888/* 5. while v is even do */2889while (mp_iseven(&v) == MP_YES) {2890/* 5.1 v = v/2 */2891if ((res = mp_div_2(&v, &v)) != MP_OKAY) {2892goto LBL_ERR;2893}2894/* 5.2 if C or D is odd then */2895if ((mp_isodd(&C) == MP_YES) || (mp_isodd(&D) == MP_YES)) {2896/* C = (C+y)/2, D = (D-x)/2 */2897if ((res = mp_add(&C, &y, &C)) != MP_OKAY) {2898goto LBL_ERR;2899}2900if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {2901goto LBL_ERR;2902}2903}2904/* C = C/2, D = D/2 */2905if ((res = mp_div_2(&C, &C)) != MP_OKAY) {2906goto LBL_ERR;2907}2908if ((res = mp_div_2(&D, &D)) != MP_OKAY) {2909goto LBL_ERR;2910}2911}29122913/* 6. if u >= v then */2914if (mp_cmp(&u, &v) != MP_LT) {2915/* u = u - v, A = A - C, B = B - D */2916if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {2917goto LBL_ERR;2918}29192920if ((res = mp_sub(&A, &C, &A)) != MP_OKAY) {2921goto LBL_ERR;2922}29232924if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {2925goto LBL_ERR;2926}2927} else {2928/* v - v - u, C = C - A, D = D - B */2929if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {2930goto LBL_ERR;2931}29322933if ((res = mp_sub(&C, &A, &C)) != MP_OKAY) {2934goto LBL_ERR;2935}29362937if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {2938goto LBL_ERR;2939}2940}29412942/* if not zero goto step 4 */2943if (mp_iszero(&u) == MP_NO)2944goto top;29452946/* now a = C, b = D, gcd == g*v */29472948/* if v != 1 then there is no inverse */2949if (mp_cmp_d(&v, 1uL) != MP_EQ) {2950res = MP_VAL;2951goto LBL_ERR;2952}29532954/* if its too low */2955while (mp_cmp_d(&C, 0uL) == MP_LT) {2956if ((res = mp_add(&C, b, &C)) != MP_OKAY) {2957goto LBL_ERR;2958}2959}29602961/* too big */2962while (mp_cmp_mag(&C, b) != MP_LT) {2963if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {2964goto LBL_ERR;2965}2966}29672968/* C is now the inverse */2969mp_exch(&C, c);2970res = MP_OKAY;2971LBL_ERR:2972mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL);2973return res;2974}29752976/* End: bn_mp_invmod_slow.c */29772978/* Start: bn_mp_is_square.c */29792980/* Check if remainders are possible squares - fast exclude non-squares */2981static const char rem_128[128] = {29820, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29830, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29841, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29851, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29860, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29871, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29881, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,29891, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 12990};29912992static const char rem_105[105] = {29930, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,29940, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,29950, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,29961, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,29970, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,29981, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,29991, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 13000};30013002/* Store non-zero to ret if arg is square, and zero if not */3003int mp_is_square(const mp_int *arg, int *ret)3004{3005int res;3006mp_digit c;3007mp_int t;3008unsigned long r;30093010/* Default to Non-square :) */3011*ret = MP_NO;30123013if (arg->sign == MP_NEG) {3014return MP_VAL;3015}30163017/* digits used? (TSD) */3018if (arg->used == 0) {3019return MP_OKAY;3020}30213022/* First check mod 128 (suppose that DIGIT_BIT is at least 7) */3023if (rem_128[127u & DIGIT(arg, 0)] == (char)1) {3024return MP_OKAY;3025}30263027/* Next check mod 105 (3*5*7) */3028if ((res = mp_mod_d(arg, 105uL, &c)) != MP_OKAY) {3029return res;3030}3031if (rem_105[c] == (char)1) {3032return MP_OKAY;3033}303430353036if ((res = mp_init_set_int(&t, 11L*13L*17L*19L*23L*29L*31L)) != MP_OKAY) {3037return res;3038}3039if ((res = mp_mod(arg, &t, &t)) != MP_OKAY) {3040goto LBL_ERR;3041}3042r = mp_get_int(&t);3043/* Check for other prime modules, note it's not an ERROR but we must3044* free "t" so the easiest way is to goto LBL_ERR. We know that res3045* is already equal to MP_OKAY from the mp_mod call3046*/3047if (((1uL<<(r%11uL)) & 0x5C4uL) != 0uL) goto LBL_ERR;3048if (((1uL<<(r%13uL)) & 0x9E4uL) != 0uL) goto LBL_ERR;3049if (((1uL<<(r%17uL)) & 0x5CE8uL) != 0uL) goto LBL_ERR;3050if (((1uL<<(r%19uL)) & 0x4F50CuL) != 0uL) goto LBL_ERR;3051if (((1uL<<(r%23uL)) & 0x7ACCA0uL) != 0uL) goto LBL_ERR;3052if (((1uL<<(r%29uL)) & 0xC2EDD0CuL) != 0uL) goto LBL_ERR;3053if (((1uL<<(r%31uL)) & 0x6DE2B848uL) != 0uL) goto LBL_ERR;30543055/* Final check - is sqr(sqrt(arg)) == arg ? */3056if ((res = mp_sqrt(arg, &t)) != MP_OKAY) {3057goto LBL_ERR;3058}3059if ((res = mp_sqr(&t, &t)) != MP_OKAY) {3060goto LBL_ERR;3061}30623063*ret = (mp_cmp_mag(&t, arg) == MP_EQ) ? MP_YES : MP_NO;3064LBL_ERR:3065mp_clear(&t);3066return res;3067}30683069/* End: bn_mp_is_square.c */30703071/* Start: bn_mp_jacobi.c */30723073/* computes the jacobi c = (a | n) (or Legendre if n is prime)3074* Kept for legacy reasons, please use mp_kronecker() instead3075*/3076int mp_jacobi(const mp_int *a, const mp_int *n, int *c)3077{3078/* if a < 0 return MP_VAL */3079if (mp_isneg(a) == MP_YES) {3080return MP_VAL;3081}30823083/* if n <= 0 return MP_VAL */3084if (mp_cmp_d(n, 0uL) != MP_GT) {3085return MP_VAL;3086}30873088return mp_kronecker(a, n, c);3089}30903091/* End: bn_mp_jacobi.c */30923093/* Start: bn_mp_karatsuba_mul.c */30943095/* c = |a| * |b| using Karatsuba Multiplication using3096* three half size multiplications3097*3098* Let B represent the radix [e.g. 2**DIGIT_BIT] and3099* let n represent half of the number of digits in3100* the min(a,b)3101*3102* a = a1 * B**n + a03103* b = b1 * B**n + b03104*3105* Then, a * b =>3106a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b03107*3108* Note that a1b1 and a0b0 are used twice and only need to be3109* computed once. So in total three half size (half # of3110* digit) multiplications are performed, a0b0, a1b1 and3111* (a1+b1)(a0+b0)3112*3113* Note that a multiplication of half the digits requires3114* 1/4th the number of single precision multiplications so in3115* total after one call 25% of the single precision multiplications3116* are saved. Note also that the call to mp_mul can end up back3117* in this function if the a0, a1, b0, or b1 are above the threshold.3118* This is known as divide-and-conquer and leads to the famous3119* O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than3120* the standard O(N**2) that the baseline/comba methods use.3121* Generally though the overhead of this method doesn't pay off3122* until a certain size (N ~ 80) is reached.3123*/3124int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)3125{3126mp_int x0, x1, y0, y1, t1, x0y0, x1y1;3127int B, err;31283129/* default the return code to an error */3130err = MP_MEM;31313132/* min # of digits */3133B = MIN(a->used, b->used);31343135/* now divide in two */3136B = B >> 1;31373138/* init copy all the temps */3139if (mp_init_size(&x0, B) != MP_OKAY)3140goto LBL_ERR;3141if (mp_init_size(&x1, a->used - B) != MP_OKAY)3142goto X0;3143if (mp_init_size(&y0, B) != MP_OKAY)3144goto X1;3145if (mp_init_size(&y1, b->used - B) != MP_OKAY)3146goto Y0;31473148/* init temps */3149if (mp_init_size(&t1, B * 2) != MP_OKAY)3150goto Y1;3151if (mp_init_size(&x0y0, B * 2) != MP_OKAY)3152goto T1;3153if (mp_init_size(&x1y1, B * 2) != MP_OKAY)3154goto X0Y0;31553156/* now shift the digits */3157x0.used = y0.used = B;3158x1.used = a->used - B;3159y1.used = b->used - B;31603161{3162int x;3163mp_digit *tmpa, *tmpb, *tmpx, *tmpy;31643165/* we copy the digits directly instead of using higher level functions3166* since we also need to shift the digits3167*/3168tmpa = a->dp;3169tmpb = b->dp;31703171tmpx = x0.dp;3172tmpy = y0.dp;3173for (x = 0; x < B; x++) {3174*tmpx++ = *tmpa++;3175*tmpy++ = *tmpb++;3176}31773178tmpx = x1.dp;3179for (x = B; x < a->used; x++) {3180*tmpx++ = *tmpa++;3181}31823183tmpy = y1.dp;3184for (x = B; x < b->used; x++) {3185*tmpy++ = *tmpb++;3186}3187}31883189/* only need to clamp the lower words since by definition the3190* upper words x1/y1 must have a known number of digits3191*/3192mp_clamp(&x0);3193mp_clamp(&y0);31943195/* now calc the products x0y0 and x1y1 */3196/* after this x0 is no longer required, free temp [x0==t2]! */3197if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY)3198goto X1Y1; /* x0y0 = x0*y0 */3199if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY)3200goto X1Y1; /* x1y1 = x1*y1 */32013202/* now calc x1+x0 and y1+y0 */3203if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)3204goto X1Y1; /* t1 = x1 - x0 */3205if (s_mp_add(&y1, &y0, &x0) != MP_OKAY)3206goto X1Y1; /* t2 = y1 - y0 */3207if (mp_mul(&t1, &x0, &t1) != MP_OKAY)3208goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */32093210/* add x0y0 */3211if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY)3212goto X1Y1; /* t2 = x0y0 + x1y1 */3213if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY)3214goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */32153216/* shift by B */3217if (mp_lshd(&t1, B) != MP_OKAY)3218goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */3219if (mp_lshd(&x1y1, B * 2) != MP_OKAY)3220goto X1Y1; /* x1y1 = x1y1 << 2*B */32213222if (mp_add(&x0y0, &t1, &t1) != MP_OKAY)3223goto X1Y1; /* t1 = x0y0 + t1 */3224if (mp_add(&t1, &x1y1, c) != MP_OKAY)3225goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */32263227/* Algorithm succeeded set the return code to MP_OKAY */3228err = MP_OKAY;32293230X1Y1:3231mp_clear(&x1y1);3232X0Y0:3233mp_clear(&x0y0);3234T1:3235mp_clear(&t1);3236Y1:3237mp_clear(&y1);3238Y0:3239mp_clear(&y0);3240X1:3241mp_clear(&x1);3242X0:3243mp_clear(&x0);3244LBL_ERR:3245return err;3246}32473248/* End: bn_mp_karatsuba_mul.c */32493250/* Start: bn_mp_karatsuba_sqr.c */32513252/* Karatsuba squaring, computes b = a*a using three3253* half size squarings3254*3255* See comments of karatsuba_mul for details. It3256* is essentially the same algorithm but merely3257* tuned to perform recursive squarings.3258*/3259int mp_karatsuba_sqr(const mp_int *a, mp_int *b)3260{3261mp_int x0, x1, t1, t2, x0x0, x1x1;3262int B, err;32633264err = MP_MEM;32653266/* min # of digits */3267B = a->used;32683269/* now divide in two */3270B = B >> 1;32713272/* init copy all the temps */3273if (mp_init_size(&x0, B) != MP_OKAY)3274goto LBL_ERR;3275if (mp_init_size(&x1, a->used - B) != MP_OKAY)3276goto X0;32773278/* init temps */3279if (mp_init_size(&t1, a->used * 2) != MP_OKAY)3280goto X1;3281if (mp_init_size(&t2, a->used * 2) != MP_OKAY)3282goto T1;3283if (mp_init_size(&x0x0, B * 2) != MP_OKAY)3284goto T2;3285if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY)3286goto X0X0;32873288{3289int x;3290mp_digit *dst, *src;32913292src = a->dp;32933294/* now shift the digits */3295dst = x0.dp;3296for (x = 0; x < B; x++) {3297*dst++ = *src++;3298}32993300dst = x1.dp;3301for (x = B; x < a->used; x++) {3302*dst++ = *src++;3303}3304}33053306x0.used = B;3307x1.used = a->used - B;33083309mp_clamp(&x0);33103311/* now calc the products x0*x0 and x1*x1 */3312if (mp_sqr(&x0, &x0x0) != MP_OKAY)3313goto X1X1; /* x0x0 = x0*x0 */3314if (mp_sqr(&x1, &x1x1) != MP_OKAY)3315goto X1X1; /* x1x1 = x1*x1 */33163317/* now calc (x1+x0)**2 */3318if (s_mp_add(&x1, &x0, &t1) != MP_OKAY)3319goto X1X1; /* t1 = x1 - x0 */3320if (mp_sqr(&t1, &t1) != MP_OKAY)3321goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */33223323/* add x0y0 */3324if (s_mp_add(&x0x0, &x1x1, &t2) != MP_OKAY)3325goto X1X1; /* t2 = x0x0 + x1x1 */3326if (s_mp_sub(&t1, &t2, &t1) != MP_OKAY)3327goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */33283329/* shift by B */3330if (mp_lshd(&t1, B) != MP_OKAY)3331goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */3332if (mp_lshd(&x1x1, B * 2) != MP_OKAY)3333goto X1X1; /* x1x1 = x1x1 << 2*B */33343335if (mp_add(&x0x0, &t1, &t1) != MP_OKAY)3336goto X1X1; /* t1 = x0x0 + t1 */3337if (mp_add(&t1, &x1x1, b) != MP_OKAY)3338goto X1X1; /* t1 = x0x0 + t1 + x1x1 */33393340err = MP_OKAY;33413342X1X1:3343mp_clear(&x1x1);3344X0X0:3345mp_clear(&x0x0);3346T2:3347mp_clear(&t2);3348T1:3349mp_clear(&t1);3350X1:3351mp_clear(&x1);3352X0:3353mp_clear(&x0);3354LBL_ERR:3355return err;3356}33573358/* End: bn_mp_karatsuba_sqr.c */33593360/* Start: bn_mp_kronecker.c */33613362/*3363Kronecker symbol (a|p)3364Straightforward implementation of algorithm 1.4.10 in3365Henri Cohen: "A Course in Computational Algebraic Number Theory"33663367@book{cohen2013course,3368title={A course in computational algebraic number theory},3369author={Cohen, Henri},3370volume={138},3371year={2013},3372publisher={Springer Science \& Business Media}3373}3374*/3375int mp_kronecker(const mp_int *a, const mp_int *p, int *c)3376{3377mp_int a1, p1, r;33783379int e = MP_OKAY;3380int v, k;33813382static const int table[8] = {0, 1, 0, -1, 0, -1, 0, 1};33833384if (mp_iszero(p) != MP_NO) {3385if ((a->used == 1) && (a->dp[0] == 1u)) {3386*c = 1;3387return e;3388} else {3389*c = 0;3390return e;3391}3392}33933394if ((mp_iseven(a) != MP_NO) && (mp_iseven(p) != MP_NO)) {3395*c = 0;3396return e;3397}33983399if ((e = mp_init_copy(&a1, a)) != MP_OKAY) {3400return e;3401}3402if ((e = mp_init_copy(&p1, p)) != MP_OKAY) {3403goto LBL_KRON_0;3404}34053406v = mp_cnt_lsb(&p1);3407if ((e = mp_div_2d(&p1, v, &p1, NULL)) != MP_OKAY) {3408goto LBL_KRON_1;3409}34103411if ((v & 0x1) == 0) {3412k = 1;3413} else {3414k = table[a->dp[0] & 7u];3415}34163417if (p1.sign == MP_NEG) {3418p1.sign = MP_ZPOS;3419if (a1.sign == MP_NEG) {3420k = -k;3421}3422}34233424if ((e = mp_init(&r)) != MP_OKAY) {3425goto LBL_KRON_1;3426}34273428for (;;) {3429if (mp_iszero(&a1) != MP_NO) {3430if (mp_cmp_d(&p1, 1uL) == MP_EQ) {3431*c = k;3432goto LBL_KRON;3433} else {3434*c = 0;3435goto LBL_KRON;3436}3437}34383439v = mp_cnt_lsb(&a1);3440if ((e = mp_div_2d(&a1, v, &a1, NULL)) != MP_OKAY) {3441goto LBL_KRON;3442}34433444if ((v & 0x1) == 1) {3445k = k * table[p1.dp[0] & 7u];3446}34473448if (a1.sign == MP_NEG) {3449/*3450* Compute k = (-1)^((a1)*(p1-1)/4) * k3451* a1.dp[0] + 1 cannot overflow because the MSB3452* of the type mp_digit is not set by definition3453*/3454if (((a1.dp[0] + 1u) & p1.dp[0] & 2u) != 0u) {3455k = -k;3456}3457} else {3458/* compute k = (-1)^((a1-1)*(p1-1)/4) * k */3459if ((a1.dp[0] & p1.dp[0] & 2u) != 0u) {3460k = -k;3461}3462}34633464if ((e = mp_copy(&a1, &r)) != MP_OKAY) {3465goto LBL_KRON;3466}3467r.sign = MP_ZPOS;3468if ((e = mp_mod(&p1, &r, &a1)) != MP_OKAY) {3469goto LBL_KRON;3470}3471if ((e = mp_copy(&r, &p1)) != MP_OKAY) {3472goto LBL_KRON;3473}3474}34753476LBL_KRON:3477mp_clear(&r);3478LBL_KRON_1:3479mp_clear(&p1);3480LBL_KRON_0:3481mp_clear(&a1);34823483return e;3484}34853486/* End: bn_mp_kronecker.c */34873488/* Start: bn_mp_lcm.c */34893490/* computes least common multiple as |a*b|/(a, b) */3491int mp_lcm(const mp_int *a, const mp_int *b, mp_int *c)3492{3493int res;3494mp_int t1, t2;349534963497if ((res = mp_init_multi(&t1, &t2, NULL)) != MP_OKAY) {3498return res;3499}35003501/* t1 = get the GCD of the two inputs */3502if ((res = mp_gcd(a, b, &t1)) != MP_OKAY) {3503goto LBL_T;3504}35053506/* divide the smallest by the GCD */3507if (mp_cmp_mag(a, b) == MP_LT) {3508/* store quotient in t2 such that t2 * b is the LCM */3509if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {3510goto LBL_T;3511}3512res = mp_mul(b, &t2, c);3513} else {3514/* store quotient in t2 such that t2 * a is the LCM */3515if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {3516goto LBL_T;3517}3518res = mp_mul(a, &t2, c);3519}35203521/* fix the sign to positive */3522c->sign = MP_ZPOS;35233524LBL_T:3525mp_clear_multi(&t1, &t2, NULL);3526return res;3527}35283529/* End: bn_mp_lcm.c */35303531/* Start: bn_mp_lshd.c */35323533/* shift left a certain amount of digits */3534int mp_lshd(mp_int *a, int b)3535{3536int x, res;35373538/* if its less than zero return */3539if (b <= 0) {3540return MP_OKAY;3541}3542/* no need to shift 0 around */3543if (mp_iszero(a) == MP_YES) {3544return MP_OKAY;3545}35463547/* grow to fit the new digits */3548if (a->alloc < (a->used + b)) {3549if ((res = mp_grow(a, a->used + b)) != MP_OKAY) {3550return res;3551}3552}35533554{3555mp_digit *top, *bottom;35563557/* increment the used by the shift amount then copy upwards */3558a->used += b;35593560/* top */3561top = a->dp + a->used - 1;35623563/* base */3564bottom = (a->dp + a->used - 1) - b;35653566/* much like mp_rshd this is implemented using a sliding window3567* except the window goes the otherway around. Copying from3568* the bottom to the top. see bn_mp_rshd.c for more info.3569*/3570for (x = a->used - 1; x >= b; x--) {3571*top-- = *bottom--;3572}35733574/* zero the lower digits */3575top = a->dp;3576for (x = 0; x < b; x++) {3577*top++ = 0;3578}3579}3580return MP_OKAY;3581}35823583/* End: bn_mp_lshd.c */35843585/* Start: bn_mp_mod.c */35863587/* c = a mod b, 0 <= c < b if b > 0, b < c <= 0 if b < 0 */3588int mp_mod(const mp_int *a, const mp_int *b, mp_int *c)3589{3590mp_int t;3591int res;35923593if ((res = mp_init_size(&t, b->used)) != MP_OKAY) {3594return res;3595}35963597if ((res = mp_div(a, b, NULL, &t)) != MP_OKAY) {3598mp_clear(&t);3599return res;3600}36013602if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {3603res = MP_OKAY;3604mp_exch(&t, c);3605} else {3606res = mp_add(b, &t, c);3607}36083609mp_clear(&t);3610return res;3611}36123613/* End: bn_mp_mod.c */36143615/* Start: bn_mp_mod_2d.c */36163617/* calc a value mod 2**b */3618int mp_mod_2d(const mp_int *a, int b, mp_int *c)3619{3620int x, res;36213622/* if b is <= 0 then zero the int */3623if (b <= 0) {3624mp_zero(c);3625return MP_OKAY;3626}36273628/* if the modulus is larger than the value than return */3629if (b >= (a->used * DIGIT_BIT)) {3630res = mp_copy(a, c);3631return res;3632}36333634/* copy */3635if ((res = mp_copy(a, c)) != MP_OKAY) {3636return res;3637}36383639/* zero digits above the last digit of the modulus */3640for (x = (b / DIGIT_BIT) + (((b % DIGIT_BIT) == 0) ? 0 : 1); x < c->used; x++) {3641c->dp[x] = 0;3642}3643/* clear the digit that is not completely outside/inside the modulus */3644c->dp[b / DIGIT_BIT] &=3645((mp_digit)1 << (mp_digit)(b % DIGIT_BIT)) - (mp_digit)1;3646mp_clamp(c);3647return MP_OKAY;3648}36493650/* End: bn_mp_mod_2d.c */36513652/* Start: bn_mp_mod_d.c */36533654int mp_mod_d(const mp_int *a, mp_digit b, mp_digit *c)3655{3656return mp_div_d(a, b, NULL, c);3657}36583659/* End: bn_mp_mod_d.c */36603661/* Start: bn_mp_montgomery_calc_normalization.c */36623663/*3664* shifts with subtractions when the result is greater than b.3665*3666* The method is slightly modified to shift B unconditionally upto just under3667* the leading bit of b. This saves alot of multiple precision shifting.3668*/3669int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b)3670{3671int x, bits, res;36723673/* how many bits of last digit does b use */3674bits = mp_count_bits(b) % DIGIT_BIT;36753676if (b->used > 1) {3677if ((res = mp_2expt(a, ((b->used - 1) * DIGIT_BIT) + bits - 1)) != MP_OKAY) {3678return res;3679}3680} else {3681mp_set(a, 1uL);3682bits = 1;3683}368436853686/* now compute C = A * B mod b */3687for (x = bits - 1; x < (int)DIGIT_BIT; x++) {3688if ((res = mp_mul_2(a, a)) != MP_OKAY) {3689return res;3690}3691if (mp_cmp_mag(a, b) != MP_LT) {3692if ((res = s_mp_sub(a, b, a)) != MP_OKAY) {3693return res;3694}3695}3696}36973698return MP_OKAY;3699}37003701/* End: bn_mp_montgomery_calc_normalization.c */37023703/* Start: bn_mp_montgomery_reduce.c */37043705/* computes xR**-1 == x (mod N) via Montgomery Reduction */3706int mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)3707{3708int ix, res, digs;3709mp_digit mu;37103711/* can the fast reduction [comba] method be used?3712*3713* Note that unlike in mul you're safely allowed *less*3714* than the available columns [255 per default] since carries3715* are fixed up in the inner loop.3716*/3717digs = (n->used * 2) + 1;3718if ((digs < (int)MP_WARRAY) &&3719(x->used <= (int)MP_WARRAY) &&3720(n->used <3721(int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {3722return fast_mp_montgomery_reduce(x, n, rho);3723}37243725/* grow the input as required */3726if (x->alloc < digs) {3727if ((res = mp_grow(x, digs)) != MP_OKAY) {3728return res;3729}3730}3731x->used = digs;37323733for (ix = 0; ix < n->used; ix++) {3734/* mu = ai * rho mod b3735*3736* The value of rho must be precalculated via3737* montgomery_setup() such that3738* it equals -1/n0 mod b this allows the3739* following inner loop to reduce the3740* input one digit at a time3741*/3742mu = (mp_digit)(((mp_word)x->dp[ix] * (mp_word)rho) & MP_MASK);37433744/* a = a + mu * m * b**i */3745{3746int iy;3747mp_digit *tmpn, *tmpx, u;3748mp_word r;37493750/* alias for digits of the modulus */3751tmpn = n->dp;37523753/* alias for the digits of x [the input] */3754tmpx = x->dp + ix;37553756/* set the carry to zero */3757u = 0;37583759/* Multiply and add in place */3760for (iy = 0; iy < n->used; iy++) {3761/* compute product and sum */3762r = ((mp_word)mu * (mp_word)*tmpn++) +3763(mp_word)u + (mp_word)*tmpx;37643765/* get carry */3766u = (mp_digit)(r >> (mp_word)DIGIT_BIT);37673768/* fix digit */3769*tmpx++ = (mp_digit)(r & (mp_word)MP_MASK);3770}3771/* At this point the ix'th digit of x should be zero */377237733774/* propagate carries upwards as required*/3775while (u != 0u) {3776*tmpx += u;3777u = *tmpx >> DIGIT_BIT;3778*tmpx++ &= MP_MASK;3779}3780}3781}37823783/* at this point the n.used'th least3784* significant digits of x are all zero3785* which means we can shift x to the3786* right by n.used digits and the3787* residue is unchanged.3788*/37893790/* x = x/b**n.used */3791mp_clamp(x);3792mp_rshd(x, n->used);37933794/* if x >= n then x = x - n */3795if (mp_cmp_mag(x, n) != MP_LT) {3796return s_mp_sub(x, n, x);3797}37983799return MP_OKAY;3800}38013802/* End: bn_mp_montgomery_reduce.c */38033804/* Start: bn_mp_montgomery_setup.c */38053806/* setups the montgomery reduction stuff */3807int mp_montgomery_setup(const mp_int *n, mp_digit *rho)3808{3809mp_digit x, b;38103811/* fast inversion mod 2**k3812*3813* Based on the fact that3814*3815* XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)3816* => 2*X*A - X*X*A*A = 13817* => 2*(1) - (1) = 13818*/3819b = n->dp[0];38203821if ((b & 1u) == 0u) {3822return MP_VAL;3823}38243825x = (((b + 2u) & 4u) << 1) + b; /* here x*a==1 mod 2**4 */3826x *= 2u - (b * x); /* here x*a==1 mod 2**8 */3827#if !defined(MP_8BIT)3828x *= 2u - (b * x); /* here x*a==1 mod 2**16 */3829#endif3830#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))3831x *= 2u - (b * x); /* here x*a==1 mod 2**32 */3832#endif3833#ifdef MP_64BIT3834x *= 2u - (b * x); /* here x*a==1 mod 2**64 */3835#endif38363837/* rho = -1/m mod b */3838*rho = (mp_digit)(((mp_word)1 << (mp_word)DIGIT_BIT) - x) & MP_MASK;38393840return MP_OKAY;3841}38423843/* End: bn_mp_montgomery_setup.c */38443845/* Start: bn_mp_mul.c */38463847/* high level multiplication (handles sign) */3848int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)3849{3850int res, neg;3851neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;38523853/* use Toom-Cook? */3854if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) {3855res = mp_toom_mul(a, b, c);3856} else3857/* use Karatsuba? */3858if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {3859res = mp_karatsuba_mul(a, b, c);3860} else3861{3862/* can we use the fast multiplier?3863*3864* The fast multiplier can be used if the output will3865* have less than MP_WARRAY digits and the number of3866* digits won't affect carry propagation3867*/3868int digs = a->used + b->used + 1;38693870if ((digs < (int)MP_WARRAY) &&3871(MIN(a->used, b->used) <=3872(int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {3873res = fast_s_mp_mul_digs(a, b, c, digs);3874} else3875{3876res = s_mp_mul(a, b, c); /* uses s_mp_mul_digs */3877}3878}3879c->sign = (c->used > 0) ? neg : MP_ZPOS;3880return res;3881}38823883/* End: bn_mp_mul.c */38843885/* Start: bn_mp_mul_2.c */38863887/* b = a*2 */3888int mp_mul_2(const mp_int *a, mp_int *b)3889{3890int x, res, oldused;38913892/* grow to accomodate result */3893if (b->alloc < (a->used + 1)) {3894if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) {3895return res;3896}3897}38983899oldused = b->used;3900b->used = a->used;39013902{3903mp_digit r, rr, *tmpa, *tmpb;39043905/* alias for source */3906tmpa = a->dp;39073908/* alias for dest */3909tmpb = b->dp;39103911/* carry */3912r = 0;3913for (x = 0; x < a->used; x++) {39143915/* get what will be the *next* carry bit from the3916* MSB of the current digit3917*/3918rr = *tmpa >> (mp_digit)(DIGIT_BIT - 1);39193920/* now shift up this digit, add in the carry [from the previous] */3921*tmpb++ = ((*tmpa++ << 1uL) | r) & MP_MASK;39223923/* copy the carry that would be from the source3924* digit into the next iteration3925*/3926r = rr;3927}39283929/* new leading digit? */3930if (r != 0u) {3931/* add a MSB which is always 1 at this point */3932*tmpb = 1;3933++(b->used);3934}39353936/* now zero any excess digits on the destination3937* that we didn't write to3938*/3939tmpb = b->dp + b->used;3940for (x = b->used; x < oldused; x++) {3941*tmpb++ = 0;3942}3943}3944b->sign = a->sign;3945return MP_OKAY;3946}39473948/* End: bn_mp_mul_2.c */39493950/* Start: bn_mp_mul_2d.c */39513952/* shift left by a certain bit count */3953int mp_mul_2d(const mp_int *a, int b, mp_int *c)3954{3955mp_digit d;3956int res;39573958/* copy */3959if (a != c) {3960if ((res = mp_copy(a, c)) != MP_OKAY) {3961return res;3962}3963}39643965if (c->alloc < (c->used + (b / DIGIT_BIT) + 1)) {3966if ((res = mp_grow(c, c->used + (b / DIGIT_BIT) + 1)) != MP_OKAY) {3967return res;3968}3969}39703971/* shift by as many digits in the bit count */3972if (b >= DIGIT_BIT) {3973if ((res = mp_lshd(c, b / DIGIT_BIT)) != MP_OKAY) {3974return res;3975}3976}39773978/* shift any bit count < DIGIT_BIT */3979d = (mp_digit)(b % DIGIT_BIT);3980if (d != 0u) {3981mp_digit *tmpc, shift, mask, r, rr;3982int x;39833984/* bitmask for carries */3985mask = ((mp_digit)1 << d) - (mp_digit)1;39863987/* shift for msbs */3988shift = (mp_digit)DIGIT_BIT - d;39893990/* alias */3991tmpc = c->dp;39923993/* carry */3994r = 0;3995for (x = 0; x < c->used; x++) {3996/* get the higher bits of the current word */3997rr = (*tmpc >> shift) & mask;39983999/* shift the current word and OR in the carry */4000*tmpc = ((*tmpc << d) | r) & MP_MASK;4001++tmpc;40024003/* set the carry to the carry bits of the current word */4004r = rr;4005}40064007/* set final carry */4008if (r != 0u) {4009c->dp[(c->used)++] = r;4010}4011}4012mp_clamp(c);4013return MP_OKAY;4014}40154016/* End: bn_mp_mul_2d.c */40174018/* Start: bn_mp_mul_d.c */40194020/* multiply by a digit */4021int mp_mul_d(const mp_int *a, mp_digit b, mp_int *c)4022{4023mp_digit u, *tmpa, *tmpc;4024mp_word r;4025int ix, res, olduse;40264027/* make sure c is big enough to hold a*b */4028if (c->alloc < (a->used + 1)) {4029if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {4030return res;4031}4032}40334034/* get the original destinations used count */4035olduse = c->used;40364037/* set the sign */4038c->sign = a->sign;40394040/* alias for a->dp [source] */4041tmpa = a->dp;40424043/* alias for c->dp [dest] */4044tmpc = c->dp;40454046/* zero carry */4047u = 0;40484049/* compute columns */4050for (ix = 0; ix < a->used; ix++) {4051/* compute product and carry sum for this term */4052r = (mp_word)u + ((mp_word)*tmpa++ * (mp_word)b);40534054/* mask off higher bits to get a single digit */4055*tmpc++ = (mp_digit)(r & (mp_word)MP_MASK);40564057/* send carry into next iteration */4058u = (mp_digit)(r >> (mp_word)DIGIT_BIT);4059}40604061/* store final carry [if any] and increment ix offset */4062*tmpc++ = u;4063++ix;40644065/* now zero digits above the top */4066while (ix++ < olduse) {4067*tmpc++ = 0;4068}40694070/* set used count */4071c->used = a->used + 1;4072mp_clamp(c);40734074return MP_OKAY;4075}40764077/* End: bn_mp_mul_d.c */40784079/* Start: bn_mp_mulmod.c */40804081/* d = a * b (mod c) */4082int mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)4083{4084int res;4085mp_int t;40864087if ((res = mp_init_size(&t, c->used)) != MP_OKAY) {4088return res;4089}40904091if ((res = mp_mul(a, b, &t)) != MP_OKAY) {4092mp_clear(&t);4093return res;4094}4095res = mp_mod(&t, c, d);4096mp_clear(&t);4097return res;4098}40994100/* End: bn_mp_mulmod.c */41014102/* Start: bn_mp_n_root.c */41034104/* wrapper function for mp_n_root_ex()4105* computes c = (a)**(1/b) such that (c)**b <= a and (c+1)**b > a4106*/4107int mp_n_root(const mp_int *a, mp_digit b, mp_int *c)4108{4109return mp_n_root_ex(a, b, c, 0);4110}41114112/* End: bn_mp_n_root.c */41134114/* Start: bn_mp_n_root_ex.c */41154116/* find the n'th root of an integer4117*4118* Result found such that (c)**b <= a and (c+1)**b > a4119*4120* This algorithm uses Newton's approximation4121* x[i+1] = x[i] - f(x[i])/f'(x[i])4122* which will find the root in log(N) time where4123* each step involves a fair bit. This is not meant to4124* find huge roots [square and cube, etc].4125*/4126int mp_n_root_ex(const mp_int *a, mp_digit b, mp_int *c, int fast)4127{4128mp_int t1, t2, t3, a_;4129int res;41304131/* input must be positive if b is even */4132if (((b & 1u) == 0u) && (a->sign == MP_NEG)) {4133return MP_VAL;4134}41354136if ((res = mp_init(&t1)) != MP_OKAY) {4137return res;4138}41394140if ((res = mp_init(&t2)) != MP_OKAY) {4141goto LBL_T1;4142}41434144if ((res = mp_init(&t3)) != MP_OKAY) {4145goto LBL_T2;4146}41474148/* if a is negative fudge the sign but keep track */4149a_ = *a;4150a_.sign = MP_ZPOS;41514152/* t2 = 2 */4153mp_set(&t2, 2uL);41544155do {4156/* t1 = t2 */4157if ((res = mp_copy(&t2, &t1)) != MP_OKAY) {4158goto LBL_T3;4159}41604161/* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */41624163/* t3 = t1**(b-1) */4164if ((res = mp_expt_d_ex(&t1, b - 1u, &t3, fast)) != MP_OKAY) {4165goto LBL_T3;4166}41674168/* numerator */4169/* t2 = t1**b */4170if ((res = mp_mul(&t3, &t1, &t2)) != MP_OKAY) {4171goto LBL_T3;4172}41734174/* t2 = t1**b - a */4175if ((res = mp_sub(&t2, &a_, &t2)) != MP_OKAY) {4176goto LBL_T3;4177}41784179/* denominator */4180/* t3 = t1**(b-1) * b */4181if ((res = mp_mul_d(&t3, b, &t3)) != MP_OKAY) {4182goto LBL_T3;4183}41844185/* t3 = (t1**b - a)/(b * t1**(b-1)) */4186if ((res = mp_div(&t2, &t3, &t3, NULL)) != MP_OKAY) {4187goto LBL_T3;4188}41894190if ((res = mp_sub(&t1, &t3, &t2)) != MP_OKAY) {4191goto LBL_T3;4192}4193} while (mp_cmp(&t1, &t2) != MP_EQ);41944195/* result can be off by a few so check */4196for (;;) {4197if ((res = mp_expt_d_ex(&t1, b, &t2, fast)) != MP_OKAY) {4198goto LBL_T3;4199}42004201if (mp_cmp(&t2, &a_) == MP_GT) {4202if ((res = mp_sub_d(&t1, 1uL, &t1)) != MP_OKAY) {4203goto LBL_T3;4204}4205} else {4206break;4207}4208}42094210/* set the result */4211mp_exch(&t1, c);42124213/* set the sign of the result */4214c->sign = a->sign;42154216res = MP_OKAY;42174218LBL_T3:4219mp_clear(&t3);4220LBL_T2:4221mp_clear(&t2);4222LBL_T1:4223mp_clear(&t1);4224return res;4225}42264227/* End: bn_mp_n_root_ex.c */42284229/* Start: bn_mp_neg.c */42304231/* b = -a */4232int mp_neg(const mp_int *a, mp_int *b)4233{4234int res;4235if (a != b) {4236if ((res = mp_copy(a, b)) != MP_OKAY) {4237return res;4238}4239}42404241if (mp_iszero(b) != MP_YES) {4242b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;4243} else {4244b->sign = MP_ZPOS;4245}42464247return MP_OKAY;4248}42494250/* End: bn_mp_neg.c */42514252/* Start: bn_mp_or.c */42534254/* OR two ints together */4255int mp_or(const mp_int *a, const mp_int *b, mp_int *c)4256{4257int res, ix, px;4258mp_int t;4259const mp_int *x;42604261if (a->used > b->used) {4262if ((res = mp_init_copy(&t, a)) != MP_OKAY) {4263return res;4264}4265px = b->used;4266x = b;4267} else {4268if ((res = mp_init_copy(&t, b)) != MP_OKAY) {4269return res;4270}4271px = a->used;4272x = a;4273}42744275for (ix = 0; ix < px; ix++) {4276t.dp[ix] |= x->dp[ix];4277}4278mp_clamp(&t);4279mp_exch(c, &t);4280mp_clear(&t);4281return MP_OKAY;4282}42834284/* End: bn_mp_or.c */42854286/* Start: bn_mp_prime_fermat.c */42874288/* performs one Fermat test.4289*4290* If "a" were prime then b**a == b (mod a) since the order of4291* the multiplicative sub-group would be phi(a) = a-1. That means4292* it would be the same as b**(a mod (a-1)) == b**1 == b (mod a).4293*4294* Sets result to 1 if the congruence holds, or zero otherwise.4295*/4296int mp_prime_fermat(const mp_int *a, const mp_int *b, int *result)4297{4298mp_int t;4299int err;43004301/* default to composite */4302*result = MP_NO;43034304/* ensure b > 1 */4305if (mp_cmp_d(b, 1uL) != MP_GT) {4306return MP_VAL;4307}43084309/* init t */4310if ((err = mp_init(&t)) != MP_OKAY) {4311return err;4312}43134314/* compute t = b**a mod a */4315if ((err = mp_exptmod(b, a, a, &t)) != MP_OKAY) {4316goto LBL_T;4317}43184319/* is it equal to b? */4320if (mp_cmp(&t, b) == MP_EQ) {4321*result = MP_YES;4322}43234324err = MP_OKAY;4325LBL_T:4326mp_clear(&t);4327return err;4328}43294330/* End: bn_mp_prime_fermat.c */43314332/* Start: bn_mp_prime_frobenius_underwood.c */43334334/*4335* See file bn_mp_prime_is_prime.c or the documentation in doc/bn.tex for the details4336*/4337#ifndef LTM_USE_FIPS_ONLY43384339#ifdef MP_8BIT4340/*4341* floor of positive solution of4342* (2^16)-1 = (a+4)*(2*a+5)4343* TODO: Both values are smaller than N^(1/4), would have to use a bigint4344* for a instead but any a biger than about 120 are already so rare that4345* it is possible to ignore them and still get enough pseudoprimes.4346* But it is still a restriction of the set of available pseudoprimes4347* which makes this implementation less secure if used stand-alone.4348*/4349#define LTM_FROBENIUS_UNDERWOOD_A 1774350#else4351#define LTM_FROBENIUS_UNDERWOOD_A 327644352#endif4353int mp_prime_frobenius_underwood(const mp_int *N, int *result)4354{4355mp_int T1z, T2z, Np1z, sz, tz;43564357int a, ap2, length, i, j, isset;4358int e;43594360*result = MP_NO;43614362if ((e = mp_init_multi(&T1z, &T2z, &Np1z, &sz, &tz, NULL)) != MP_OKAY) {4363return e;4364}43654366for (a = 0; a < LTM_FROBENIUS_UNDERWOOD_A; a++) {4367/* TODO: That's ugly! No, really, it is! */4368if ((a==2) || (a==4) || (a==7) || (a==8) || (a==10) ||4369(a==14) || (a==18) || (a==23) || (a==26) || (a==28)) {4370continue;4371}4372/* (32764^2 - 4) < 2^31, no bigint for >MP_8BIT needed) */4373if ((e = mp_set_long(&T1z, (unsigned long)a)) != MP_OKAY) {4374goto LBL_FU_ERR;4375}43764377if ((e = mp_sqr(&T1z, &T1z)) != MP_OKAY) {4378goto LBL_FU_ERR;4379}43804381if ((e = mp_sub_d(&T1z, 4uL, &T1z)) != MP_OKAY) {4382goto LBL_FU_ERR;4383}43844385if ((e = mp_kronecker(&T1z, N, &j)) != MP_OKAY) {4386goto LBL_FU_ERR;4387}43884389if (j == -1) {4390break;4391}43924393if (j == 0) {4394/* composite */4395goto LBL_FU_ERR;4396}4397}4398/* Tell it a composite and set return value accordingly */4399if (a >= LTM_FROBENIUS_UNDERWOOD_A) {4400e = MP_ITER;4401goto LBL_FU_ERR;4402}4403/* Composite if N and (a+4)*(2*a+5) are not coprime */4404if ((e = mp_set_long(&T1z, (unsigned long)((a+4)*((2*a)+5)))) != MP_OKAY) {4405goto LBL_FU_ERR;4406}44074408if ((e = mp_gcd(N, &T1z, &T1z)) != MP_OKAY) {4409goto LBL_FU_ERR;4410}44114412if (!((T1z.used == 1) && (T1z.dp[0] == 1u))) {4413goto LBL_FU_ERR;4414}44154416ap2 = a + 2;4417if ((e = mp_add_d(N, 1uL, &Np1z)) != MP_OKAY) {4418goto LBL_FU_ERR;4419}44204421mp_set(&sz, 1uL);4422mp_set(&tz, 2uL);4423length = mp_count_bits(&Np1z);44244425for (i = length - 2; i >= 0; i--) {4426/*4427* temp = (sz*(a*sz+2*tz))%N;4428* tz = ((tz-sz)*(tz+sz))%N;4429* sz = temp;4430*/4431if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {4432goto LBL_FU_ERR;4433}44344435/* a = 0 at about 50% of the cases (non-square and odd input) */4436if (a != 0) {4437if ((e = mp_mul_d(&sz, (mp_digit)a, &T1z)) != MP_OKAY) {4438goto LBL_FU_ERR;4439}4440if ((e = mp_add(&T1z, &T2z, &T2z)) != MP_OKAY) {4441goto LBL_FU_ERR;4442}4443}44444445if ((e = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) {4446goto LBL_FU_ERR;4447}4448if ((e = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) {4449goto LBL_FU_ERR;4450}4451if ((e = mp_add(&sz, &tz, &sz)) != MP_OKAY) {4452goto LBL_FU_ERR;4453}4454if ((e = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) {4455goto LBL_FU_ERR;4456}4457if ((e = mp_mod(&tz, N, &tz)) != MP_OKAY) {4458goto LBL_FU_ERR;4459}4460if ((e = mp_mod(&T1z, N, &sz)) != MP_OKAY) {4461goto LBL_FU_ERR;4462}4463if ((isset = mp_get_bit(&Np1z, i)) == MP_VAL) {4464e = isset;4465goto LBL_FU_ERR;4466}4467if (isset == MP_YES) {4468/*4469* temp = (a+2) * sz + tz4470* tz = 2 * tz - sz4471* sz = temp4472*/4473if (a == 0) {4474if ((e = mp_mul_2(&sz, &T1z)) != MP_OKAY) {4475goto LBL_FU_ERR;4476}4477} else {4478if ((e = mp_mul_d(&sz, (mp_digit)ap2, &T1z)) != MP_OKAY) {4479goto LBL_FU_ERR;4480}4481}4482if ((e = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) {4483goto LBL_FU_ERR;4484}4485if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {4486goto LBL_FU_ERR;4487}4488if ((e = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) {4489goto LBL_FU_ERR;4490}4491mp_exch(&sz, &T1z);4492}4493}44944495if ((e = mp_set_long(&T1z, (unsigned long)((2 * a) + 5))) != MP_OKAY) {4496goto LBL_FU_ERR;4497}4498if ((e = mp_mod(&T1z, N, &T1z)) != MP_OKAY) {4499goto LBL_FU_ERR;4500}4501if ((mp_iszero(&sz) != MP_NO) && (mp_cmp(&tz, &T1z) == MP_EQ)) {4502*result = MP_YES;4503goto LBL_FU_ERR;4504}45054506LBL_FU_ERR:4507mp_clear_multi(&tz, &sz, &Np1z, &T2z, &T1z, NULL);4508return e;4509}45104511#endif45124513/* End: bn_mp_prime_frobenius_underwood.c */45144515/* Start: bn_mp_prime_is_divisible.c */45164517/* determines if an integers is divisible by one4518* of the first PRIME_SIZE primes or not4519*4520* sets result to 0 if not, 1 if yes4521*/4522int mp_prime_is_divisible(const mp_int *a, int *result)4523{4524int err, ix;4525mp_digit res;45264527/* default to not */4528*result = MP_NO;45294530for (ix = 0; ix < PRIME_SIZE; ix++) {4531/* what is a mod LBL_prime_tab[ix] */4532if ((err = mp_mod_d(a, ltm_prime_tab[ix], &res)) != MP_OKAY) {4533return err;4534}45354536/* is the residue zero? */4537if (res == 0u) {4538*result = MP_YES;4539return MP_OKAY;4540}4541}45424543return MP_OKAY;4544}45454546/* End: bn_mp_prime_is_divisible.c */45474548/* Start: bn_mp_prime_is_prime.c */45494550/* portable integer log of two with small footprint */4551static unsigned int s_floor_ilog2(int value)4552{4553unsigned int r = 0;4554while ((value >>= 1) != 0) {4555r++;4556}4557return r;4558}455945604561int mp_prime_is_prime(const mp_int *a, int t, int *result)4562{4563mp_int b;4564int ix, err, res, p_max = 0, size_a, len;4565unsigned int fips_rand, mask;45664567/* default to no */4568*result = MP_NO;45694570/* valid value of t? */4571if (t > PRIME_SIZE) {4572return MP_VAL;4573}45744575/* Some shortcuts */4576/* N > 3 */4577if (a->used == 1) {4578if ((a->dp[0] == 0u) || (a->dp[0] == 1u)) {4579*result = 0;4580return MP_OKAY;4581}4582if (a->dp[0] == 2u) {4583*result = 1;4584return MP_OKAY;4585}4586}45874588/* N must be odd */4589if (mp_iseven(a) == MP_YES) {4590return MP_OKAY;4591}4592/* N is not a perfect square: floor(sqrt(N))^2 != N */4593if ((err = mp_is_square(a, &res)) != MP_OKAY) {4594return err;4595}4596if (res != 0) {4597return MP_OKAY;4598}45994600/* is the input equal to one of the primes in the table? */4601for (ix = 0; ix < PRIME_SIZE; ix++) {4602if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {4603*result = MP_YES;4604return MP_OKAY;4605}4606}4607#ifdef MP_8BIT4608/* The search in the loop above was exhaustive in this case */4609if ((a->used == 1) && (PRIME_SIZE >= 31)) {4610return MP_OKAY;4611}4612#endif46134614/* first perform trial division */4615if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) {4616return err;4617}46184619/* return if it was trivially divisible */4620if (res == MP_YES) {4621return MP_OKAY;4622}46234624/*4625Run the Miller-Rabin test with base 2 for the BPSW test.4626*/4627if ((err = mp_init_set(&b, 2uL)) != MP_OKAY) {4628return err;4629}46304631if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {4632goto LBL_B;4633}4634if (res == MP_NO) {4635goto LBL_B;4636}4637/*4638Rumours have it that Mathematica does a second M-R test with base 3.4639Other rumours have it that their strong L-S test is slightly different.4640It does not hurt, though, beside a bit of extra runtime.4641*/4642b.dp[0]++;4643if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {4644goto LBL_B;4645}4646if (res == MP_NO) {4647goto LBL_B;4648}46494650/*4651* Both, the Frobenius-Underwood test and the the Lucas-Selfridge test are quite4652* slow so if speed is an issue, define LTM_USE_FIPS_ONLY to use M-R tests with4653* bases 2, 3 and t random bases.4654*/4655#ifndef LTM_USE_FIPS_ONLY4656if (t >= 0) {4657/*4658* Use a Frobenius-Underwood test instead of the Lucas-Selfridge test for4659* MP_8BIT (It is unknown if the Lucas-Selfridge test works with 16-bit4660* integers but the necesssary analysis is on the todo-list).4661*/4662#if defined (MP_8BIT) || defined (LTM_USE_FROBENIUS_TEST)4663err = mp_prime_frobenius_underwood(a, &res);4664if ((err != MP_OKAY) && (err != MP_ITER)) {4665goto LBL_B;4666}4667if (res == MP_NO) {4668goto LBL_B;4669}4670#else4671if ((err = mp_prime_strong_lucas_selfridge(a, &res)) != MP_OKAY) {4672goto LBL_B;4673}4674if (res == MP_NO) {4675goto LBL_B;4676}4677#endif4678}4679#endif46804681/* run at least one Miller-Rabin test with a random base */4682if (t == 0) {4683t = 1;4684}46854686/*4687abs(t) extra rounds of M-R to extend the range of primes it can find if t < 0.4688Only recommended if the input range is known to be < 331704406467988738596198146894690It uses the bases for a deterministic M-R test if input < 33170440646798873859619814691The caller has to check the size.46924693Not for cryptographic use because with known bases strong M-R pseudoprimes can4694be constructed. Use at least one M-R test with a random base (t >= 1).46954696The 1119 bit large number4697469880383745745363949125707961434194210813883768828755814583748891752229742737653\469933652186502336163960045457915042023603208766569966760987284043965408232928738\470079185086916685732826776177102938969773947016708230428687109997439976544144845\470134115587245063340927902227529622941498423068816854043264575340183297861112989\47026064484521619165287259753490147034704has been constructed by F. Arnault (F. Arnault, "Rabin-Miller primality test:4705composite numbers which pass it.", Mathematics of Computation, 1995, 64. Jg.,4706Nr. 209, S. 355-361), is a semiprime with the two factors4707470840095821663949960541830645208454685300518816604113250877450620473800321707011\470996242716223191597219733582163165085358166969145233813917169287527980445796800\47104525920318366014711471220047910831974980270915322604227342650259408302056625438725310236900160853505\471398121358111595798609866791081582542679083484572616906958584643763990222898400\471422629601591830147154716and it is a strong pseudoprime to all forty-six prime M-R bases up to 20047174718It does not fail the strong Bailley-PSP test as implemented here, it is just4719given as an example, if not the reason to use the BPSW-test instead of M-R-tests4720with a sequence of primes 2...n.47214722*/4723if (t < 0) {4724t = -t;4725/*4726Sorenson, Jonathan; Webster, Jonathan (2015).4727"Strong Pseudoprimes to Twelve Prime Bases".4728*/4729/* 0x437ae92817f9fc85b7e5 = 318665857834031151167461 */4730if ((err = mp_read_radix(&b, "437ae92817f9fc85b7e5", 16)) != MP_OKAY) {4731goto LBL_B;4732}47334734if (mp_cmp(a, &b) == MP_LT) {4735p_max = 12;4736} else {4737/* 0x2be6951adc5b22410a5fd = 3317044064679887385961981 */4738if ((err = mp_read_radix(&b, "2be6951adc5b22410a5fd", 16)) != MP_OKAY) {4739goto LBL_B;4740}47414742if (mp_cmp(a, &b) == MP_LT) {4743p_max = 13;4744} else {4745err = MP_VAL;4746goto LBL_B;4747}4748}47494750/* for compatibility with the current API (well, compatible within a sign's width) */4751if (p_max < t) {4752p_max = t;4753}47544755if (p_max > PRIME_SIZE) {4756err = MP_VAL;4757goto LBL_B;4758}4759/* we did bases 2 and 3 already, skip them */4760for (ix = 2; ix < p_max; ix++) {4761mp_set(&b, ltm_prime_tab[ix]);4762if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {4763goto LBL_B;4764}4765if (res == MP_NO) {4766goto LBL_B;4767}4768}4769}4770/*4771Do "t" M-R tests with random bases between 3 and "a".4772See Fips 186.4 p. 126ff4773*/4774else if (t > 0) {4775/*4776* The mp_digit's have a defined bit-size but the size of the4777* array a.dp is a simple 'int' and this library can not assume full4778* compliance to the current C-standard (ISO/IEC 9899:2011) because4779* it gets used for small embeded processors, too. Some of those MCUs4780* have compilers that one cannot call standard compliant by any means.4781* Hence the ugly type-fiddling in the following code.4782*/4783size_a = mp_count_bits(a);4784mask = (1u << s_floor_ilog2(size_a)) - 1u;4785/*4786Assuming the General Rieman hypothesis (never thought to write that in a4787comment) the upper bound can be lowered to 2*(log a)^2.4788E. Bach, "Explicit bounds for primality testing and related problems,"4789Math. Comp. 55 (1990), 355-380.47904791size_a = (size_a/10) * 7;4792len = 2 * (size_a * size_a);47934794E.g.: a number of size 2^2048 would be reduced to the upper limit47954796floor(2048/10)*7 = 142847972 * 1428^2 = 407836847984799(would have been ~4030331.9962 with floats and natural log instead)4800That number is smaller than 2^28, the default bit-size of mp_digit.4801*/48024803/*4804How many tests, you might ask? Dana Jacobsen of Math::Prime::Util fame4805does exactly 1. In words: one. Look at the end of _GMP_is_prime() in4806Math-Prime-Util-GMP-0.50/primality.c if you do not believe it.48074808The function mp_rand() goes to some length to use a cryptographically4809good PRNG. That also means that the chance to always get the same base4810in the loop is non-zero, although very low.4811If the BPSW test and/or the addtional Frobenious test have been4812performed instead of just the Miller-Rabin test with the bases 2 and 3,4813a single extra test should suffice, so such a very unlikely event4814will not do much harm.48154816To preemptivly answer the dangling question: no, a witness does not4817need to be prime.4818*/4819for (ix = 0; ix < t; ix++) {4820/* mp_rand() guarantees the first digit to be non-zero */4821if ((err = mp_rand(&b, 1)) != MP_OKAY) {4822goto LBL_B;4823}4824/*4825* Reduce digit before casting because mp_digit might be bigger than4826* an unsigned int and "mask" on the other side is most probably not.4827*/4828fips_rand = (unsigned int)(b.dp[0] & (mp_digit) mask);4829#ifdef MP_8BIT4830/*4831* One 8-bit digit is too small, so concatenate two if the size of4832* unsigned int allows for it.4833*/4834if (((sizeof(unsigned int) * CHAR_BIT)/2) >= (sizeof(mp_digit) * CHAR_BIT)) {4835if ((err = mp_rand(&b, 1)) != MP_OKAY) {4836goto LBL_B;4837}4838fips_rand <<= sizeof(mp_digit) * CHAR_BIT;4839fips_rand |= (unsigned int) b.dp[0];4840fips_rand &= mask;4841}4842#endif4843if (fips_rand > (unsigned int)(INT_MAX - DIGIT_BIT)) {4844len = INT_MAX / DIGIT_BIT;4845} else {4846len = (((int)fips_rand + DIGIT_BIT) / DIGIT_BIT);4847}4848/* Unlikely. */4849if (len < 0) {4850ix--;4851continue;4852}4853/*4854* As mentioned above, one 8-bit digit is too small and4855* although it can only happen in the unlikely case that4856* an "unsigned int" is smaller than 16 bit a simple test4857* is cheap and the correction even cheaper.4858*/4859#ifdef MP_8BIT4860/* All "a" < 2^8 have been caught before */4861if (len == 1) {4862len++;4863}4864#endif4865if ((err = mp_rand(&b, len)) != MP_OKAY) {4866goto LBL_B;4867}4868/*4869* That number might got too big and the witness has to be4870* smaller than or equal to "a"4871*/4872len = mp_count_bits(&b);4873if (len > size_a) {4874len = len - size_a;4875if ((err = mp_div_2d(&b, len, &b, NULL)) != MP_OKAY) {4876goto LBL_B;4877}4878}48794880/* Although the chance for b <= 3 is miniscule, try again. */4881if (mp_cmp_d(&b, 3uL) != MP_GT) {4882ix--;4883continue;4884}4885if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {4886goto LBL_B;4887}4888if (res == MP_NO) {4889goto LBL_B;4890}4891}4892}48934894/* passed the test */4895*result = MP_YES;4896LBL_B:4897mp_clear(&b);4898return err;4899}49004901/* End: bn_mp_prime_is_prime.c */49024903/* Start: bn_mp_prime_miller_rabin.c */49044905/* Miller-Rabin test of "a" to the base of "b" as described in4906* HAC pp. 139 Algorithm 4.244907*4908* Sets result to 0 if definitely composite or 1 if probably prime.4909* Randomly the chance of error is no more than 1/4 and often4910* very much lower.4911*/4912int mp_prime_miller_rabin(const mp_int *a, const mp_int *b, int *result)4913{4914mp_int n1, y, r;4915int s, j, err;49164917/* default */4918*result = MP_NO;49194920/* ensure b > 1 */4921if (mp_cmp_d(b, 1uL) != MP_GT) {4922return MP_VAL;4923}49244925/* get n1 = a - 1 */4926if ((err = mp_init_copy(&n1, a)) != MP_OKAY) {4927return err;4928}4929if ((err = mp_sub_d(&n1, 1uL, &n1)) != MP_OKAY) {4930goto LBL_N1;4931}49324933/* set 2**s * r = n1 */4934if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) {4935goto LBL_N1;4936}49374938/* count the number of least significant bits4939* which are zero4940*/4941s = mp_cnt_lsb(&r);49424943/* now divide n - 1 by 2**s */4944if ((err = mp_div_2d(&r, s, &r, NULL)) != MP_OKAY) {4945goto LBL_R;4946}49474948/* compute y = b**r mod a */4949if ((err = mp_init(&y)) != MP_OKAY) {4950goto LBL_R;4951}4952if ((err = mp_exptmod(b, &r, a, &y)) != MP_OKAY) {4953goto LBL_Y;4954}49554956/* if y != 1 and y != n1 do */4957if ((mp_cmp_d(&y, 1uL) != MP_EQ) && (mp_cmp(&y, &n1) != MP_EQ)) {4958j = 1;4959/* while j <= s-1 and y != n1 */4960while ((j <= (s - 1)) && (mp_cmp(&y, &n1) != MP_EQ)) {4961if ((err = mp_sqrmod(&y, a, &y)) != MP_OKAY) {4962goto LBL_Y;4963}49644965/* if y == 1 then composite */4966if (mp_cmp_d(&y, 1uL) == MP_EQ) {4967goto LBL_Y;4968}49694970++j;4971}49724973/* if y != n1 then composite */4974if (mp_cmp(&y, &n1) != MP_EQ) {4975goto LBL_Y;4976}4977}49784979/* probably prime now */4980*result = MP_YES;4981LBL_Y:4982mp_clear(&y);4983LBL_R:4984mp_clear(&r);4985LBL_N1:4986mp_clear(&n1);4987return err;4988}49894990/* End: bn_mp_prime_miller_rabin.c */49914992/* Start: bn_mp_prime_next_prime.c */49934994/* finds the next prime after the number "a" using "t" trials4995* of Miller-Rabin.4996*4997* bbs_style = 1 means the prime must be congruent to 3 mod 44998*/4999int mp_prime_next_prime(mp_int *a, int t, int bbs_style)5000{5001int err, res = MP_NO, x, y;5002mp_digit res_tab[PRIME_SIZE], step, kstep;5003mp_int b;50045005/* force positive */5006a->sign = MP_ZPOS;50075008/* simple algo if a is less than the largest prime in the table */5009if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {5010/* find which prime it is bigger than */5011for (x = PRIME_SIZE - 2; x >= 0; x--) {5012if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {5013if (bbs_style == 1) {5014/* ok we found a prime smaller or5015* equal [so the next is larger]5016*5017* however, the prime must be5018* congruent to 3 mod 45019*/5020if ((ltm_prime_tab[x + 1] & 3u) != 3u) {5021/* scan upwards for a prime congruent to 3 mod 4 */5022for (y = x + 1; y < PRIME_SIZE; y++) {5023if ((ltm_prime_tab[y] & 3u) == 3u) {5024mp_set(a, ltm_prime_tab[y]);5025return MP_OKAY;5026}5027}5028}5029} else {5030mp_set(a, ltm_prime_tab[x + 1]);5031return MP_OKAY;5032}5033}5034}5035/* at this point a maybe 1 */5036if (mp_cmp_d(a, 1uL) == MP_EQ) {5037mp_set(a, 2uL);5038return MP_OKAY;5039}5040/* fall through to the sieve */5041}50425043/* generate a prime congruent to 3 mod 4 or 1/3 mod 4? */5044if (bbs_style == 1) {5045kstep = 4;5046} else {5047kstep = 2;5048}50495050/* at this point we will use a combination of a sieve and Miller-Rabin */50515052if (bbs_style == 1) {5053/* if a mod 4 != 3 subtract the correct value to make it so */5054if ((a->dp[0] & 3u) != 3u) {5055if ((err = mp_sub_d(a, (a->dp[0] & 3u) + 1u, a)) != MP_OKAY) {5056return err;5057};5058}5059} else {5060if (mp_iseven(a) == MP_YES) {5061/* force odd */5062if ((err = mp_sub_d(a, 1uL, a)) != MP_OKAY) {5063return err;5064}5065}5066}50675068/* generate the restable */5069for (x = 1; x < PRIME_SIZE; x++) {5070if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {5071return err;5072}5073}50745075/* init temp used for Miller-Rabin Testing */5076if ((err = mp_init(&b)) != MP_OKAY) {5077return err;5078}50795080for (;;) {5081/* skip to the next non-trivially divisible candidate */5082step = 0;5083do {5084/* y == 1 if any residue was zero [e.g. cannot be prime] */5085y = 0;50865087/* increase step to next candidate */5088step += kstep;50895090/* compute the new residue without using division */5091for (x = 1; x < PRIME_SIZE; x++) {5092/* add the step to each residue */5093res_tab[x] += kstep;50945095/* subtract the modulus [instead of using division] */5096if (res_tab[x] >= ltm_prime_tab[x]) {5097res_tab[x] -= ltm_prime_tab[x];5098}50995100/* set flag if zero */5101if (res_tab[x] == 0u) {5102y = 1;5103}5104}5105} while ((y == 1) && (step < (((mp_digit)1 << DIGIT_BIT) - kstep)));51065107/* add the step */5108if ((err = mp_add_d(a, step, a)) != MP_OKAY) {5109goto LBL_ERR;5110}51115112/* if didn't pass sieve and step == MAX then skip test */5113if ((y == 1) && (step >= (((mp_digit)1 << DIGIT_BIT) - kstep))) {5114continue;5115}51165117if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {5118goto LBL_ERR;5119}5120if (res == MP_YES) {5121break;5122}5123}51245125err = MP_OKAY;5126LBL_ERR:5127mp_clear(&b);5128return err;5129}51305131/* End: bn_mp_prime_next_prime.c */51325133/* Start: bn_mp_prime_rabin_miller_trials.c */51345135static const struct {5136int k, t;5137} sizes[] = {5138{ 80, -1 }, /* Use deterministic algorithm for size <= 80 bits */5139{ 81, 39 },5140{ 96, 37 },5141{ 128, 32 },5142{ 160, 27 },5143{ 192, 21 },5144{ 256, 16 },5145{ 384, 10 },5146{ 512, 7 },5147{ 640, 6 },5148{ 768, 5 },5149{ 896, 4 },5150{ 1024, 4 },5151{ 2048, 2 },5152{ 4096, 1 },5153};51545155/* returns # of RM trials required for a given bit size and max. error of 2^(-96)*/5156int mp_prime_rabin_miller_trials(int size)5157{5158int x;51595160for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {5161if (sizes[x].k == size) {5162return sizes[x].t;5163} else if (sizes[x].k > size) {5164return (x == 0) ? sizes[0].t : sizes[x - 1].t;5165}5166}5167return sizes[x-1].t + 1;5168}51695170/* End: bn_mp_prime_rabin_miller_trials.c */51715172/* Start: bn_mp_prime_random_ex.c */51735174/* makes a truly random prime of a given size (bits),5175*5176* Flags are as follows:5177*5178* LTM_PRIME_BBS - make prime congruent to 3 mod 45179* LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)5180* LTM_PRIME_2MSB_ON - make the 2nd highest bit one5181*5182* You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can5183* have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself5184* so it can be NULL5185*5186*/51875188/* This is possibly the mother of all prime generation functions, muahahahahaha! */5189int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)5190{5191unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;5192int res, err, bsize, maskOR_msb_offset;51935194/* sanity check the input */5195if ((size <= 1) || (t <= 0)) {5196return MP_VAL;5197}51985199/* LTM_PRIME_SAFE implies LTM_PRIME_BBS */5200if ((flags & LTM_PRIME_SAFE) != 0) {5201flags |= LTM_PRIME_BBS;5202}52035204/* calc the byte size */5205bsize = (size>>3) + ((size&7)?1:0);52065207/* we need a buffer of bsize bytes */5208tmp = OPT_CAST(unsigned char) XMALLOC((size_t)bsize);5209if (tmp == NULL) {5210return MP_MEM;5211}52125213/* calc the maskAND value for the MSbyte*/5214maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));52155216/* calc the maskOR_msb */5217maskOR_msb = 0;5218maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;5219if ((flags & LTM_PRIME_2MSB_ON) != 0) {5220maskOR_msb |= 0x80 >> ((9 - size) & 7);5221}52225223/* get the maskOR_lsb */5224maskOR_lsb = 1;5225if ((flags & LTM_PRIME_BBS) != 0) {5226maskOR_lsb |= 3;5227}52285229do {5230/* read the bytes */5231if (cb(tmp, bsize, dat) != bsize) {5232err = MP_VAL;5233goto error;5234}52355236/* work over the MSbyte */5237tmp[0] &= maskAND;5238tmp[0] |= 1 << ((size - 1) & 7);52395240/* mix in the maskORs */5241tmp[maskOR_msb_offset] |= maskOR_msb;5242tmp[bsize-1] |= maskOR_lsb;52435244/* read it in */5245if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) {5246goto error;5247}52485249/* is it prime? */5250if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {5251goto error;5252}5253if (res == MP_NO) {5254continue;5255}52565257if ((flags & LTM_PRIME_SAFE) != 0) {5258/* see if (a-1)/2 is prime */5259if ((err = mp_sub_d(a, 1uL, a)) != MP_OKAY) {5260goto error;5261}5262if ((err = mp_div_2(a, a)) != MP_OKAY) {5263goto error;5264}52655266/* is it prime? */5267if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {5268goto error;5269}5270}5271} while (res == MP_NO);52725273if ((flags & LTM_PRIME_SAFE) != 0) {5274/* restore a to the original value */5275if ((err = mp_mul_2(a, a)) != MP_OKAY) {5276goto error;5277}5278if ((err = mp_add_d(a, 1uL, a)) != MP_OKAY) {5279goto error;5280}5281}52825283err = MP_OKAY;5284error:5285XFREE(tmp);5286return err;5287}52885289/* End: bn_mp_prime_random_ex.c */52905291/* Start: bn_mp_prime_strong_lucas_selfridge.c */52925293/*5294* See file bn_mp_prime_is_prime.c or the documentation in doc/bn.tex for the details5295*/5296#ifndef LTM_USE_FIPS_ONLY52975298/*5299* 8-bit is just too small. You can try the Frobenius test5300* but that frobenius test can fail, too, for the same reason.5301*/5302#ifndef MP_8BIT53035304/*5305* multiply bigint a with int d and put the result in c5306* Like mp_mul_d() but with a signed long as the small input5307*/5308static int s_mp_mul_si(const mp_int *a, long d, mp_int *c)5309{5310mp_int t;5311int err, neg = 0;53125313if ((err = mp_init(&t)) != MP_OKAY) {5314return err;5315}5316if (d < 0) {5317neg = 1;5318d = -d;5319}53205321/*5322* mp_digit might be smaller than a long, which excludes5323* the use of mp_mul_d() here.5324*/5325if ((err = mp_set_long(&t, (unsigned long) d)) != MP_OKAY) {5326goto LBL_MPMULSI_ERR;5327}5328if ((err = mp_mul(a, &t, c)) != MP_OKAY) {5329goto LBL_MPMULSI_ERR;5330}5331if (neg == 1) {5332c->sign = (a->sign == MP_NEG) ? MP_ZPOS: MP_NEG;5333}5334LBL_MPMULSI_ERR:5335mp_clear(&t);5336return err;5337}5338/*5339Strong Lucas-Selfridge test.5340returns MP_YES if it is a strong L-S prime, MP_NO if it is composite53415342Code ported from Thomas Ray Nicely's implementation of the BPSW test5343at http://www.trnicely.net/misc/bpsw.html53445345Freeware copyright (C) 2016 Thomas R. Nicely <http://www.trnicely.net>.5346Released into the public domain by the author, who disclaims any legal5347liability arising from its use53485349The multi-line comments are made by Thomas R. Nicely and are copied verbatim.5350Additional comments marked "CZ" (without the quotes) are by the code-portist.53515352(If that name sounds familiar, he is the guy who found the fdiv bug in the5353Pentium (P5x, I think) Intel processor)5354*/5355int mp_prime_strong_lucas_selfridge(const mp_int *a, int *result)5356{5357/* CZ TODO: choose better variable names! */5358mp_int Dz, gcd, Np1, Uz, Vz, U2mz, V2mz, Qmz, Q2mz, Qkdz, T1z, T2z, T3z, T4z, Q2kdz;5359/* CZ TODO: Some of them need the full 32 bit, hence the (temporary) exclusion of MP_8BIT */5360int32_t D, Ds, J, sign, P, Q, r, s, u, Nbits;5361int e;5362int isset, oddness;53635364*result = MP_NO;5365/*5366Find the first element D in the sequence {5, -7, 9, -11, 13, ...}5367such that Jacobi(D,N) = -1 (Selfridge's algorithm). Theory5368indicates that, if N is not a perfect square, D will "nearly5369always" be "small." Just in case, an overflow trap for D is5370included.5371*/53725373if ((e = mp_init_multi(&Dz, &gcd, &Np1, &Uz, &Vz, &U2mz, &V2mz, &Qmz, &Q2mz, &Qkdz, &T1z, &T2z, &T3z, &T4z, &Q2kdz,5374NULL)) != MP_OKAY) {5375return e;5376}53775378D = 5;5379sign = 1;53805381for (;;) {5382Ds = sign * D;5383sign = -sign;5384if ((e = mp_set_long(&Dz, (unsigned long)D)) != MP_OKAY) {5385goto LBL_LS_ERR;5386}5387if ((e = mp_gcd(a, &Dz, &gcd)) != MP_OKAY) {5388goto LBL_LS_ERR;5389}5390/* if 1 < GCD < N then N is composite with factor "D", and5391Jacobi(D,N) is technically undefined (but often returned5392as zero). */5393if ((mp_cmp_d(&gcd, 1uL) == MP_GT) && (mp_cmp(&gcd, a) == MP_LT)) {5394goto LBL_LS_ERR;5395}5396if (Ds < 0) {5397Dz.sign = MP_NEG;5398}5399if ((e = mp_kronecker(&Dz, a, &J)) != MP_OKAY) {5400goto LBL_LS_ERR;5401}54025403if (J == -1) {5404break;5405}5406D += 2;54075408if (D > (INT_MAX - 2)) {5409e = MP_VAL;5410goto LBL_LS_ERR;5411}5412}5413541454155416P = 1; /* Selfridge's choice */5417Q = (1 - Ds) / 4; /* Required so D = P*P - 4*Q */54185419/* NOTE: The conditions (a) N does not divide Q, and5420(b) D is square-free or not a perfect square, are included by5421some authors; e.g., "Prime numbers and computer methods for5422factorization," Hans Riesel (2nd ed., 1994, Birkhauser, Boston),5423p. 130. For this particular application of Lucas sequences,5424these conditions were found to be immaterial. */54255426/* Now calculate N - Jacobi(D,N) = N + 1 (even), and calculate the5427odd positive integer d and positive integer s for which5428N + 1 = 2^s*d (similar to the step for N - 1 in Miller's test).5429The strong Lucas-Selfridge test then returns N as a strong5430Lucas probable prime (slprp) if any of the following5431conditions is met: U_d=0, V_d=0, V_2d=0, V_4d=0, V_8d=0,5432V_16d=0, ..., etc., ending with V_{2^(s-1)*d}=V_{(N+1)/2}=05433(all equalities mod N). Thus d is the highest index of U that5434must be computed (since V_2m is independent of U), compared5435to U_{N+1} for the standard Lucas-Selfridge test; and no5436index of V beyond (N+1)/2 is required, just as in the5437standard Lucas-Selfridge test. However, the quantity Q^d must5438be computed for use (if necessary) in the latter stages of5439the test. The result is that the strong Lucas-Selfridge test5440has a running time only slightly greater (order of 10 %) than5441that of the standard Lucas-Selfridge test, while producing5442only (roughly) 30 % as many pseudoprimes (and every strong5443Lucas pseudoprime is also a standard Lucas pseudoprime). Thus5444the evidence indicates that the strong Lucas-Selfridge test is5445more effective than the standard Lucas-Selfridge test, and a5446Baillie-PSW test based on the strong Lucas-Selfridge test5447should be more reliable. */54485449if ((e = mp_add_d(a, 1uL, &Np1)) != MP_OKAY) {5450goto LBL_LS_ERR;5451}5452s = mp_cnt_lsb(&Np1);54535454/* CZ5455* This should round towards zero because5456* Thomas R. Nicely used GMP's mpz_tdiv_q_2exp()5457* and mp_div_2d() is equivalent. Additionally:5458* dividing an even number by two does not produce5459* any leftovers.5460*/5461if ((e = mp_div_2d(&Np1, s, &Dz, NULL)) != MP_OKAY) {5462goto LBL_LS_ERR;5463}5464/* We must now compute U_d and V_d. Since d is odd, the accumulated5465values U and V are initialized to U_1 and V_1 (if the target5466index were even, U and V would be initialized instead to U_0=05467and V_0=2). The values of U_2m and V_2m are also initialized to5468U_1 and V_1; the FOR loop calculates in succession U_2 and V_2,5469U_4 and V_4, U_8 and V_8, etc. If the corresponding bits5470(1, 2, 3, ...) of t are on (the zero bit having been accounted5471for in the initialization of U and V), these values are then5472combined with the previous totals for U and V, using the5473composition formulas for addition of indices. */54745475mp_set(&Uz, 1uL); /* U=U_1 */5476mp_set(&Vz, (mp_digit)P); /* V=V_1 */5477mp_set(&U2mz, 1uL); /* U_1 */5478mp_set(&V2mz, (mp_digit)P); /* V_1 */54795480if (Q < 0) {5481Q = -Q;5482if ((e = mp_set_long(&Qmz, (unsigned long)Q)) != MP_OKAY) {5483goto LBL_LS_ERR;5484}5485if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {5486goto LBL_LS_ERR;5487}5488/* Initializes calculation of Q^d */5489if ((e = mp_set_long(&Qkdz, (unsigned long)Q)) != MP_OKAY) {5490goto LBL_LS_ERR;5491}5492Qmz.sign = MP_NEG;5493Q2mz.sign = MP_NEG;5494Qkdz.sign = MP_NEG;5495Q = -Q;5496} else {5497if ((e = mp_set_long(&Qmz, (unsigned long)Q)) != MP_OKAY) {5498goto LBL_LS_ERR;5499}5500if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {5501goto LBL_LS_ERR;5502}5503/* Initializes calculation of Q^d */5504if ((e = mp_set_long(&Qkdz, (unsigned long)Q)) != MP_OKAY) {5505goto LBL_LS_ERR;5506}5507}55085509Nbits = mp_count_bits(&Dz);55105511for (u = 1; u < Nbits; u++) { /* zero bit off, already accounted for */5512/* Formulas for doubling of indices (carried out mod N). Note that5513* the indices denoted as "2m" are actually powers of 2, specifically5514* 2^(ul-1) beginning each loop and 2^ul ending each loop.5515*5516* U_2m = U_m*V_m5517* V_2m = V_m*V_m - 2*Q^m5518*/55195520if ((e = mp_mul(&U2mz, &V2mz, &U2mz)) != MP_OKAY) {5521goto LBL_LS_ERR;5522}5523if ((e = mp_mod(&U2mz, a, &U2mz)) != MP_OKAY) {5524goto LBL_LS_ERR;5525}5526if ((e = mp_sqr(&V2mz, &V2mz)) != MP_OKAY) {5527goto LBL_LS_ERR;5528}5529if ((e = mp_sub(&V2mz, &Q2mz, &V2mz)) != MP_OKAY) {5530goto LBL_LS_ERR;5531}5532if ((e = mp_mod(&V2mz, a, &V2mz)) != MP_OKAY) {5533goto LBL_LS_ERR;5534}5535/* Must calculate powers of Q for use in V_2m, also for Q^d later */5536if ((e = mp_sqr(&Qmz, &Qmz)) != MP_OKAY) {5537goto LBL_LS_ERR;5538}5539/* prevents overflow */ /* CZ still necessary without a fixed prealloc'd mem.? */5540if ((e = mp_mod(&Qmz, a, &Qmz)) != MP_OKAY) {5541goto LBL_LS_ERR;5542}5543if ((e = mp_mul_2(&Qmz, &Q2mz)) != MP_OKAY) {5544goto LBL_LS_ERR;5545}5546if ((isset = mp_get_bit(&Dz, u)) == MP_VAL) {5547e = isset;5548goto LBL_LS_ERR;5549}5550if (isset == MP_YES) {5551/* Formulas for addition of indices (carried out mod N);5552*5553* U_(m+n) = (U_m*V_n + U_n*V_m)/25554* V_(m+n) = (V_m*V_n + D*U_m*U_n)/25555*5556* Be careful with division by 2 (mod N)!5557*/5558if ((e = mp_mul(&U2mz, &Vz, &T1z)) != MP_OKAY) {5559goto LBL_LS_ERR;5560}5561if ((e = mp_mul(&Uz, &V2mz, &T2z)) != MP_OKAY) {5562goto LBL_LS_ERR;5563}5564if ((e = mp_mul(&V2mz, &Vz, &T3z)) != MP_OKAY) {5565goto LBL_LS_ERR;5566}5567if ((e = mp_mul(&U2mz, &Uz, &T4z)) != MP_OKAY) {5568goto LBL_LS_ERR;5569}5570if ((e = s_mp_mul_si(&T4z, (long)Ds, &T4z)) != MP_OKAY) {5571goto LBL_LS_ERR;5572}5573if ((e = mp_add(&T1z, &T2z, &Uz)) != MP_OKAY) {5574goto LBL_LS_ERR;5575}5576if (mp_isodd(&Uz) != MP_NO) {5577if ((e = mp_add(&Uz, a, &Uz)) != MP_OKAY) {5578goto LBL_LS_ERR;5579}5580}5581/* CZ5582* This should round towards negative infinity because5583* Thomas R. Nicely used GMP's mpz_fdiv_q_2exp().5584* But mp_div_2() does not do so, it is truncating instead.5585*/5586oddness = mp_isodd(&Uz);5587if ((e = mp_div_2(&Uz, &Uz)) != MP_OKAY) {5588goto LBL_LS_ERR;5589}5590if ((Uz.sign == MP_NEG) && (oddness != MP_NO)) {5591if ((e = mp_sub_d(&Uz, 1uL, &Uz)) != MP_OKAY) {5592goto LBL_LS_ERR;5593}5594}5595if ((e = mp_add(&T3z, &T4z, &Vz)) != MP_OKAY) {5596goto LBL_LS_ERR;5597}5598if (mp_isodd(&Vz) != MP_NO) {5599if ((e = mp_add(&Vz, a, &Vz)) != MP_OKAY) {5600goto LBL_LS_ERR;5601}5602}5603oddness = mp_isodd(&Vz);5604if ((e = mp_div_2(&Vz, &Vz)) != MP_OKAY) {5605goto LBL_LS_ERR;5606}5607if ((Vz.sign == MP_NEG) && (oddness != MP_NO)) {5608if ((e = mp_sub_d(&Vz, 1uL, &Vz)) != MP_OKAY) {5609goto LBL_LS_ERR;5610}5611}5612if ((e = mp_mod(&Uz, a, &Uz)) != MP_OKAY) {5613goto LBL_LS_ERR;5614}5615if ((e = mp_mod(&Vz, a, &Vz)) != MP_OKAY) {5616goto LBL_LS_ERR;5617}5618/* Calculating Q^d for later use */5619if ((e = mp_mul(&Qkdz, &Qmz, &Qkdz)) != MP_OKAY) {5620goto LBL_LS_ERR;5621}5622if ((e = mp_mod(&Qkdz, a, &Qkdz)) != MP_OKAY) {5623goto LBL_LS_ERR;5624}5625}5626}56275628/* If U_d or V_d is congruent to 0 mod N, then N is a prime or a5629strong Lucas pseudoprime. */5630if ((mp_iszero(&Uz) != MP_NO) || (mp_iszero(&Vz) != MP_NO)) {5631*result = MP_YES;5632goto LBL_LS_ERR;5633}56345635/* NOTE: Ribenboim ("The new book of prime number records," 3rd ed.,56361995/6) omits the condition V0 on p.142, but includes it on5637p. 130. The condition is NECESSARY; otherwise the test will5638return false negatives---e.g., the primes 29 and 2000029 will be5639returned as composite. */56405641/* Otherwise, we must compute V_2d, V_4d, V_8d, ..., V_{2^(s-1)*d}5642by repeated use of the formula V_2m = V_m*V_m - 2*Q^m. If any of5643these are congruent to 0 mod N, then N is a prime or a strong5644Lucas pseudoprime. */56455646/* Initialize 2*Q^(d*2^r) for V_2m */5647if ((e = mp_mul_2(&Qkdz, &Q2kdz)) != MP_OKAY) {5648goto LBL_LS_ERR;5649}56505651for (r = 1; r < s; r++) {5652if ((e = mp_sqr(&Vz, &Vz)) != MP_OKAY) {5653goto LBL_LS_ERR;5654}5655if ((e = mp_sub(&Vz, &Q2kdz, &Vz)) != MP_OKAY) {5656goto LBL_LS_ERR;5657}5658if ((e = mp_mod(&Vz, a, &Vz)) != MP_OKAY) {5659goto LBL_LS_ERR;5660}5661if (mp_iszero(&Vz) != MP_NO) {5662*result = MP_YES;5663goto LBL_LS_ERR;5664}5665/* Calculate Q^{d*2^r} for next r (final iteration irrelevant). */5666if (r < (s - 1)) {5667if ((e = mp_sqr(&Qkdz, &Qkdz)) != MP_OKAY) {5668goto LBL_LS_ERR;5669}5670if ((e = mp_mod(&Qkdz, a, &Qkdz)) != MP_OKAY) {5671goto LBL_LS_ERR;5672}5673if ((e = mp_mul_2(&Qkdz, &Q2kdz)) != MP_OKAY) {5674goto LBL_LS_ERR;5675}5676}5677}5678LBL_LS_ERR:5679mp_clear_multi(&Q2kdz, &T4z, &T3z, &T2z, &T1z, &Qkdz, &Q2mz, &Qmz, &V2mz, &U2mz, &Vz, &Uz, &Np1, &gcd, &Dz, NULL);5680return e;5681}5682#endif5683#endif56845685/* End: bn_mp_prime_strong_lucas_selfridge.c */56865687/* Start: bn_mp_radix_size.c */56885689/* returns size of ASCII reprensentation */5690int mp_radix_size(const mp_int *a, int radix, int *size)5691{5692int res, digs;5693mp_int t;5694mp_digit d;56955696*size = 0;56975698/* make sure the radix is in range */5699if ((radix < 2) || (radix > 64)) {5700return MP_VAL;5701}57025703if (mp_iszero(a) == MP_YES) {5704*size = 2;5705return MP_OKAY;5706}57075708/* special case for binary */5709if (radix == 2) {5710*size = mp_count_bits(a) + ((a->sign == MP_NEG) ? 1 : 0) + 1;5711return MP_OKAY;5712}57135714/* digs is the digit count */5715digs = 0;57165717/* if it's negative add one for the sign */5718if (a->sign == MP_NEG) {5719++digs;5720}57215722/* init a copy of the input */5723if ((res = mp_init_copy(&t, a)) != MP_OKAY) {5724return res;5725}57265727/* force temp to positive */5728t.sign = MP_ZPOS;57295730/* fetch out all of the digits */5731while (mp_iszero(&t) == MP_NO) {5732if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {5733mp_clear(&t);5734return res;5735}5736++digs;5737}5738mp_clear(&t);57395740/* return digs + 1, the 1 is for the NULL byte that would be required. */5741*size = digs + 1;5742return MP_OKAY;5743}57445745/* End: bn_mp_radix_size.c */57465747/* Start: bn_mp_radix_smap.c */57485749/* chars used in radix conversions */5750const char *const mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";5751const uint8_t mp_s_rmap_reverse[] = {57520xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, /* ()*+,-./ */57530x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 01234567 */57540x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 89:;<=>? */57550xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, /* @ABCDEFG */57560x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, /* HIJKLMNO */57570x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, /* PQRSTUVW */57580x21, 0x22, 0x23, 0xff, 0xff, 0xff, 0xff, 0xff, /* XYZ[\]^_ */57590xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, /* `abcdefg */57600x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, /* hijklmno */57610x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, /* pqrstuvw */57620x3b, 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, /* xyz{|}~. */5763};5764const size_t mp_s_rmap_reverse_sz = sizeof(mp_s_rmap_reverse);57655766/* End: bn_mp_radix_smap.c */57675768/* Start: bn_mp_rand.c */57695770/* First the OS-specific special cases5771* - *BSD5772* - Windows5773*/5774#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)5775#define MP_ARC4RANDOM5776#define MP_GEN_RANDOM_MAX 0xffffffffu5777#define MP_GEN_RANDOM_SHIFT 3257785779static int s_read_arc4random(mp_digit *p)5780{5781mp_digit d = 0, msk = 0;5782do {5783d <<= MP_GEN_RANDOM_SHIFT;5784d |= ((mp_digit) arc4random());5785msk <<= MP_GEN_RANDOM_SHIFT;5786msk |= (MP_MASK & MP_GEN_RANDOM_MAX);5787} while ((MP_MASK & msk) != MP_MASK);5788*p = d;5789return MP_OKAY;5790}5791#endif57925793#if defined(_WIN32) || defined(_WIN32_WCE)5794#define MP_WIN_CSP57955796#ifndef _WIN32_WINNT5797#define _WIN32_WINNT 0x04005798#endif5799#ifdef _WIN32_WCE5800#define UNDER_CE5801#define ARM5802#endif58035804#define WIN32_LEAN_AND_MEAN5805#include <windows.h>5806#include <ntsecapi.h>58075808static int s_read_win_csp(mp_digit *p)5809{5810int ret = -1;5811if (RtlGenRandom(p, sizeof(*p)) == TRUE) {5812ret = MP_OKAY;5813}5814return ret;5815}5816#endif /* WIN32 */58175818#if !defined(MP_WIN_CSP) && defined(__linux__) && defined(__GLIBC_PREREQ)5819#if __GLIBC_PREREQ(2, 25)5820#define MP_GETRANDOM5821#include <sys/random.h>5822#include <errno.h>58235824static int s_read_getrandom(mp_digit *p)5825{5826int ret;5827do {5828ret = getrandom(p, sizeof(*p), 0);5829} while ((ret == -1) && (errno == EINTR));5830if (ret == sizeof(*p)) return MP_OKAY;5831return -1;5832}5833#endif5834#endif58355836/* We assume all platforms besides windows provide "/dev/urandom".5837* In case yours doesn't, define MP_NO_DEV_URANDOM at compile-time.5838*/5839#if !defined(MP_WIN_CSP) && !defined(MP_NO_DEV_URANDOM)5840#ifndef MP_DEV_URANDOM5841#define MP_DEV_URANDOM "/dev/urandom"5842#endif5843#include <fcntl.h>5844#include <errno.h>5845#include <unistd.h>58465847static int s_read_dev_urandom(mp_digit *p)5848{5849ssize_t r;5850int fd;5851do {5852fd = open(MP_DEV_URANDOM, O_RDONLY);5853} while ((fd == -1) && (errno == EINTR));5854if (fd == -1) return -1;5855do {5856r = read(fd, p, sizeof(*p));5857} while ((r == -1) && (errno == EINTR));5858close(fd);5859if (r != sizeof(*p)) return -1;5860return MP_OKAY;5861}5862#endif58635864#if defined(MP_PRNG_ENABLE_LTM_RNG)5865unsigned long (*ltm_rng)(unsigned char *out, unsigned long outlen, void (*callback)(void));5866void (*ltm_rng_callback)(void);58675868static int s_read_ltm_rng(mp_digit *p)5869{5870unsigned long ret;5871if (ltm_rng == NULL) return -1;5872ret = ltm_rng((void *)p, sizeof(*p), ltm_rng_callback);5873if (ret != sizeof(*p)) return -1;5874return MP_OKAY;5875}5876#endif58775878static int s_rand_digit(mp_digit *p)5879{5880int ret = -1;58815882#if defined(MP_ARC4RANDOM)5883ret = s_read_arc4random(p);5884if (ret == MP_OKAY) return ret;5885#endif58865887#if defined(MP_WIN_CSP)5888ret = s_read_win_csp(p);5889if (ret == MP_OKAY) return ret;5890#else58915892#if defined(MP_GETRANDOM)5893ret = s_read_getrandom(p);5894if (ret == MP_OKAY) return ret;5895#endif5896#if defined(MP_DEV_URANDOM)5897ret = s_read_dev_urandom(p);5898if (ret == MP_OKAY) return ret;5899#endif59005901#endif /* MP_WIN_CSP */59025903#if defined(MP_PRNG_ENABLE_LTM_RNG)5904ret = s_read_ltm_rng(p);5905if (ret == MP_OKAY) return ret;5906#endif59075908return ret;5909}59105911/* makes a pseudo-random int of a given size */5912int mp_rand_digit(mp_digit *r)5913{5914int ret = s_rand_digit(r);5915*r &= MP_MASK;5916return ret;5917}59185919int mp_rand(mp_int *a, int digits)5920{5921int res;5922mp_digit d;59235924mp_zero(a);5925if (digits <= 0) {5926return MP_OKAY;5927}59285929/* first place a random non-zero digit */5930do {5931if (mp_rand_digit(&d) != MP_OKAY) {5932return MP_VAL;5933}5934} while (d == 0u);59355936if ((res = mp_add_d(a, d, a)) != MP_OKAY) {5937return res;5938}59395940while (--digits > 0) {5941if ((res = mp_lshd(a, 1)) != MP_OKAY) {5942return res;5943}59445945if (mp_rand_digit(&d) != MP_OKAY) {5946return MP_VAL;5947}5948if ((res = mp_add_d(a, d, a)) != MP_OKAY) {5949return res;5950}5951}59525953return MP_OKAY;5954}59555956/* Start: bn_mp_read_radix.c */59575958/* read a string [ASCII] in a given radix */5959int mp_read_radix(mp_int *a, const char *str, int radix)5960{5961int y, res, neg;5962unsigned pos;5963char ch;59645965/* zero the digit bignum */5966mp_zero(a);59675968/* make sure the radix is ok */5969if ((radix < 2) || (radix > 64)) {5970return MP_VAL;5971}59725973/* if the leading digit is a5974* minus set the sign to negative.5975*/5976if (*str == '-') {5977++str;5978neg = MP_NEG;5979} else {5980neg = MP_ZPOS;5981}59825983/* set the integer to the default of zero */5984mp_zero(a);59855986/* process each digit of the string */5987while (*str != '\0') {5988/* if the radix <= 36 the conversion is case insensitive5989* this allows numbers like 1AB and 1ab to represent the same value5990* [e.g. in hex]5991*/5992ch = (radix <= 36) ? (char)toupper((int)*str) : *str;5993pos = (unsigned)(ch - '(');5994if (mp_s_rmap_reverse_sz < pos) {5995break;5996}5997y = (int)mp_s_rmap_reverse[pos];59985999/* if the char was found in the map6000* and is less than the given radix add it6001* to the number, otherwise exit the loop.6002*/6003if ((y == 0xff) || (y >= radix)) {6004break;6005}6006if ((res = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) {6007return res;6008}6009if ((res = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) {6010return res;6011}6012++str;6013}60146015/* if an illegal character was found, fail. */6016if (!((*str == '\0') || (*str == '\r') || (*str == '\n'))) {6017mp_zero(a);6018return MP_VAL;6019}60206021/* set the sign only if a != 0 */6022if (mp_iszero(a) != MP_YES) {6023a->sign = neg;6024}6025return MP_OKAY;6026}60276028/* End: bn_mp_read_radix.c */60296030/* Start: bn_mp_read_signed_bin.c */60316032/* read signed bin, big endian, first byte is 0==positive or 1==negative */6033int mp_read_signed_bin(mp_int *a, const unsigned char *b, int c)6034{6035int res;60366037/* read magnitude */6038if ((res = mp_read_unsigned_bin(a, b + 1, c - 1)) != MP_OKAY) {6039return res;6040}60416042/* first byte is 0 for positive, non-zero for negative */6043if (b[0] == (unsigned char)0) {6044a->sign = MP_ZPOS;6045} else {6046a->sign = MP_NEG;6047}60486049return MP_OKAY;6050}60516052/* End: bn_mp_read_signed_bin.c */60536054/* Start: bn_mp_read_unsigned_bin.c */60556056/* reads a unsigned char array, assumes the msb is stored first [big endian] */6057int mp_read_unsigned_bin(mp_int *a, const unsigned char *b, int c)6058{6059int res;60606061/* make sure there are at least two digits */6062if (a->alloc < 2) {6063if ((res = mp_grow(a, 2)) != MP_OKAY) {6064return res;6065}6066}60676068/* zero the int */6069mp_zero(a);60706071/* read the bytes in */6072while (c-- > 0) {6073if ((res = mp_mul_2d(a, 8, a)) != MP_OKAY) {6074return res;6075}60766077#ifndef MP_8BIT6078a->dp[0] |= *b++;6079a->used += 1;6080#else6081a->dp[0] = (*b & MP_MASK);6082a->dp[1] |= ((*b++ >> 7) & 1u);6083a->used += 2;6084#endif6085}6086mp_clamp(a);6087return MP_OKAY;6088}60896090/* End: bn_mp_read_unsigned_bin.c */60916092/* Start: bn_mp_reduce.c */60936094/* reduces x mod m, assumes 0 < x < m**2, mu is6095* precomputed via mp_reduce_setup.6096* From HAC pp.604 Algorithm 14.426097*/6098int mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)6099{6100mp_int q;6101int res, um = m->used;61026103/* q = x */6104if ((res = mp_init_copy(&q, x)) != MP_OKAY) {6105return res;6106}61076108/* q1 = x / b**(k-1) */6109mp_rshd(&q, um - 1);61106111/* according to HAC this optimization is ok */6112if ((mp_digit)um > ((mp_digit)1 << (DIGIT_BIT - 1))) {6113if ((res = mp_mul(&q, mu, &q)) != MP_OKAY) {6114goto CLEANUP;6115}6116} else {6117if ((res = s_mp_mul_high_digs(&q, mu, &q, um)) != MP_OKAY) {6118goto CLEANUP;6119}6120}61216122/* q3 = q2 / b**(k+1) */6123mp_rshd(&q, um + 1);61246125/* x = x mod b**(k+1), quick (no division) */6126if ((res = mp_mod_2d(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {6127goto CLEANUP;6128}61296130/* q = q * m mod b**(k+1), quick (no division) */6131if ((res = s_mp_mul_digs(&q, m, &q, um + 1)) != MP_OKAY) {6132goto CLEANUP;6133}61346135/* x = x - q */6136if ((res = mp_sub(x, &q, x)) != MP_OKAY) {6137goto CLEANUP;6138}61396140/* If x < 0, add b**(k+1) to it */6141if (mp_cmp_d(x, 0uL) == MP_LT) {6142mp_set(&q, 1uL);6143if ((res = mp_lshd(&q, um + 1)) != MP_OKAY)6144goto CLEANUP;6145if ((res = mp_add(x, &q, x)) != MP_OKAY)6146goto CLEANUP;6147}61486149/* Back off if it's too big */6150while (mp_cmp(x, m) != MP_LT) {6151if ((res = s_mp_sub(x, m, x)) != MP_OKAY) {6152goto CLEANUP;6153}6154}61556156CLEANUP:6157mp_clear(&q);61586159return res;6160}61616162/* End: bn_mp_reduce.c */61636164/* Start: bn_mp_reduce_2k.c */61656166/* reduces a modulo n where n is of the form 2**p - d */6167int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)6168{6169mp_int q;6170int p, res;61716172if ((res = mp_init(&q)) != MP_OKAY) {6173return res;6174}61756176p = mp_count_bits(n);6177top:6178/* q = a/2**p, a = a mod 2**p */6179if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {6180goto LBL_ERR;6181}61826183if (d != 1u) {6184/* q = q * d */6185if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {6186goto LBL_ERR;6187}6188}61896190/* a = a + q */6191if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {6192goto LBL_ERR;6193}61946195if (mp_cmp_mag(a, n) != MP_LT) {6196if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {6197goto LBL_ERR;6198}6199goto top;6200}62016202LBL_ERR:6203mp_clear(&q);6204return res;6205}62066207/* End: bn_mp_reduce_2k.c */62086209/* Start: bn_mp_reduce_2k_l.c */62106211/* reduces a modulo n where n is of the form 2**p - d6212This differs from reduce_2k since "d" can be larger6213than a single digit.6214*/6215int mp_reduce_2k_l(mp_int *a, const mp_int *n, const mp_int *d)6216{6217mp_int q;6218int p, res;62196220if ((res = mp_init(&q)) != MP_OKAY) {6221return res;6222}62236224p = mp_count_bits(n);6225top:6226/* q = a/2**p, a = a mod 2**p */6227if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {6228goto LBL_ERR;6229}62306231/* q = q * d */6232if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {6233goto LBL_ERR;6234}62356236/* a = a + q */6237if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {6238goto LBL_ERR;6239}62406241if (mp_cmp_mag(a, n) != MP_LT) {6242if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {6243goto LBL_ERR;6244}6245goto top;6246}62476248LBL_ERR:6249mp_clear(&q);6250return res;6251}62526253/* End: bn_mp_reduce_2k_l.c */62546255/* Start: bn_mp_reduce_2k_setup.c */62566257/* determines the setup value */6258int mp_reduce_2k_setup(const mp_int *a, mp_digit *d)6259{6260int res, p;6261mp_int tmp;62626263if ((res = mp_init(&tmp)) != MP_OKAY) {6264return res;6265}62666267p = mp_count_bits(a);6268if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {6269mp_clear(&tmp);6270return res;6271}62726273if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {6274mp_clear(&tmp);6275return res;6276}62776278*d = tmp.dp[0];6279mp_clear(&tmp);6280return MP_OKAY;6281}62826283/* End: bn_mp_reduce_2k_setup.c */62846285/* Start: bn_mp_reduce_2k_setup_l.c */62866287/* determines the setup value */6288int mp_reduce_2k_setup_l(const mp_int *a, mp_int *d)6289{6290int res;6291mp_int tmp;62926293if ((res = mp_init(&tmp)) != MP_OKAY) {6294return res;6295}62966297if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {6298goto LBL_ERR;6299}63006301if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {6302goto LBL_ERR;6303}63046305LBL_ERR:6306mp_clear(&tmp);6307return res;6308}63096310/* End: bn_mp_reduce_2k_setup_l.c */63116312/* Start: bn_mp_reduce_is_2k.c */63136314/* determines if mp_reduce_2k can be used */6315int mp_reduce_is_2k(const mp_int *a)6316{6317int ix, iy, iw;6318mp_digit iz;63196320if (a->used == 0) {6321return MP_NO;6322} else if (a->used == 1) {6323return MP_YES;6324} else if (a->used > 1) {6325iy = mp_count_bits(a);6326iz = 1;6327iw = 1;63286329/* Test every bit from the second digit up, must be 1 */6330for (ix = DIGIT_BIT; ix < iy; ix++) {6331if ((a->dp[iw] & iz) == 0u) {6332return MP_NO;6333}6334iz <<= 1;6335if (iz > (mp_digit)MP_MASK) {6336++iw;6337iz = 1;6338}6339}6340}6341return MP_YES;6342}63436344/* End: bn_mp_reduce_is_2k.c */63456346/* Start: bn_mp_reduce_is_2k_l.c */63476348/* determines if reduce_2k_l can be used */6349int mp_reduce_is_2k_l(const mp_int *a)6350{6351int ix, iy;63526353if (a->used == 0) {6354return MP_NO;6355} else if (a->used == 1) {6356return MP_YES;6357} else if (a->used > 1) {6358/* if more than half of the digits are -1 we're sold */6359for (iy = ix = 0; ix < a->used; ix++) {6360if (a->dp[ix] == MP_MASK) {6361++iy;6362}6363}6364return (iy >= (a->used/2)) ? MP_YES : MP_NO;63656366}6367return MP_NO;6368}63696370/* End: bn_mp_reduce_is_2k_l.c */63716372/* Start: bn_mp_reduce_setup.c */63736374/* pre-calculate the value required for Barrett reduction6375* For a given modulus "b" it calulates the value required in "a"6376*/6377int mp_reduce_setup(mp_int *a, const mp_int *b)6378{6379int res;63806381if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {6382return res;6383}6384return mp_div(a, b, a, NULL);6385}63866387/* End: bn_mp_reduce_setup.c */63886389/* Start: bn_mp_rshd.c */63906391/* shift right a certain amount of digits */6392void mp_rshd(mp_int *a, int b)6393{6394int x;63956396/* if b <= 0 then ignore it */6397if (b <= 0) {6398return;6399}64006401/* if b > used then simply zero it and return */6402if (a->used <= b) {6403mp_zero(a);6404return;6405}64066407{6408mp_digit *bottom, *top;64096410/* shift the digits down */64116412/* bottom */6413bottom = a->dp;64146415/* top [offset into digits] */6416top = a->dp + b;64176418/* this is implemented as a sliding window where6419* the window is b-digits long and digits from6420* the top of the window are copied to the bottom6421*6422* e.g.64236424b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->6425/\ | ---->6426\-------------------/ ---->6427*/6428for (x = 0; x < (a->used - b); x++) {6429*bottom++ = *top++;6430}64316432/* zero the top digits */6433for (; x < a->used; x++) {6434*bottom++ = 0;6435}6436}64376438/* remove excess digits */6439a->used -= b;6440}64416442/* End: bn_mp_rshd.c */64436444/* Start: bn_mp_set.c */64456446/* set to a digit */6447void mp_set(mp_int *a, mp_digit b)6448{6449mp_zero(a);6450a->dp[0] = b & MP_MASK;6451a->used = (a->dp[0] != 0u) ? 1 : 0;6452}64536454/* End: bn_mp_set.c */64556456/* Start: bn_mp_set_int.c */64576458/* set a 32-bit const */6459int mp_set_int(mp_int *a, unsigned long b)6460{6461int x, res;64626463mp_zero(a);64646465/* set four bits at a time */6466for (x = 0; x < 8; x++) {6467/* shift the number up four bits */6468if ((res = mp_mul_2d(a, 4, a)) != MP_OKAY) {6469return res;6470}64716472/* OR in the top four bits of the source */6473a->dp[0] |= (mp_digit)(b >> 28) & 15uL;64746475/* shift the source up to the next four bits */6476b <<= 4;64776478/* ensure that digits are not clamped off */6479a->used += 1;6480}6481mp_clamp(a);6482return MP_OKAY;6483}64846485/* End: bn_mp_set_int.c */64866487/* Start: bn_mp_set_long.c */64886489/* set a platform dependent unsigned long int */6490MP_SET_XLONG(mp_set_long, unsigned long)64916492/* End: bn_mp_set_long.c */64936494/* Start: bn_mp_set_long_long.c */64956496/* set a platform dependent unsigned long long int */6497MP_SET_XLONG(mp_set_long_long, unsigned long long)64986499/* End: bn_mp_set_long_long.c */65006501/* Start: bn_mp_shrink.c */65026503/* shrink a bignum */6504int mp_shrink(mp_int *a)6505{6506mp_digit *tmp;6507int used = 1;65086509if (a->used > 0) {6510used = a->used;6511}65126513if (a->alloc != used) {6514if ((tmp = OPT_CAST(mp_digit) XREALLOC(a->dp, sizeof(mp_digit) * (size_t)used)) == NULL) {6515return MP_MEM;6516}6517a->dp = tmp;6518a->alloc = used;6519}6520return MP_OKAY;6521}65226523/* End: bn_mp_shrink.c */65246525/* Start: bn_mp_signed_bin_size.c */65266527/* get the size for an signed equivalent */6528int mp_signed_bin_size(const mp_int *a)6529{6530return 1 + mp_unsigned_bin_size(a);6531}65326533/* End: bn_mp_signed_bin_size.c */65346535/* Start: bn_mp_sqr.c */65366537/* computes b = a*a */6538int mp_sqr(const mp_int *a, mp_int *b)6539{6540int res;65416542/* use Toom-Cook? */6543if (a->used >= TOOM_SQR_CUTOFF) {6544res = mp_toom_sqr(a, b);6545/* Karatsuba? */6546} else6547if (a->used >= KARATSUBA_SQR_CUTOFF) {6548res = mp_karatsuba_sqr(a, b);6549} else6550{6551/* can we use the fast comba multiplier? */6552if ((((a->used * 2) + 1) < (int)MP_WARRAY) &&6553(a->used <6554(int)(1u << (((sizeof(mp_word) * (size_t)CHAR_BIT) - (2u * (size_t)DIGIT_BIT)) - 1u)))) {6555res = fast_s_mp_sqr(a, b);6556} else6557{6558res = s_mp_sqr(a, b);6559}6560}6561b->sign = MP_ZPOS;6562return res;6563}65646565/* End: bn_mp_sqr.c */65666567/* Start: bn_mp_sqrmod.c */65686569/* c = a * a (mod b) */6570int mp_sqrmod(const mp_int *a, const mp_int *b, mp_int *c)6571{6572int res;6573mp_int t;65746575if ((res = mp_init(&t)) != MP_OKAY) {6576return res;6577}65786579if ((res = mp_sqr(a, &t)) != MP_OKAY) {6580mp_clear(&t);6581return res;6582}6583res = mp_mod(&t, b, c);6584mp_clear(&t);6585return res;6586}65876588/* End: bn_mp_sqrmod.c */65896590/* Start: bn_mp_sqrt.c */65916592/* this function is less generic than mp_n_root, simpler and faster */6593int mp_sqrt(const mp_int *arg, mp_int *ret)6594{6595int res;6596mp_int t1, t2;65976598/* must be positive */6599if (arg->sign == MP_NEG) {6600return MP_VAL;6601}66026603/* easy out */6604if (mp_iszero(arg) == MP_YES) {6605mp_zero(ret);6606return MP_OKAY;6607}66086609if ((res = mp_init_copy(&t1, arg)) != MP_OKAY) {6610return res;6611}66126613if ((res = mp_init(&t2)) != MP_OKAY) {6614goto E2;6615}66166617/* First approx. (not very bad for large arg) */6618mp_rshd(&t1, t1.used/2);66196620/* t1 > 0 */6621if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {6622goto E1;6623}6624if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {6625goto E1;6626}6627if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {6628goto E1;6629}6630/* And now t1 > sqrt(arg) */6631do {6632if ((res = mp_div(arg, &t1, &t2, NULL)) != MP_OKAY) {6633goto E1;6634}6635if ((res = mp_add(&t1, &t2, &t1)) != MP_OKAY) {6636goto E1;6637}6638if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) {6639goto E1;6640}6641/* t1 >= sqrt(arg) >= t2 at this point */6642} while (mp_cmp_mag(&t1, &t2) == MP_GT);66436644mp_exch(&t1, ret);66456646E1:6647mp_clear(&t2);6648E2:6649mp_clear(&t1);6650return res;6651}66526653/* End: bn_mp_sqrt.c */66546655/* Start: bn_mp_sqrtmod_prime.c */66566657/* Tonelli-Shanks algorithm6658* https://en.wikipedia.org/wiki/Tonelli%E2%80%93Shanks_algorithm6659* https://gmplib.org/list-archives/gmp-discuss/2013-April/005300.html6660*6661*/66626663int mp_sqrtmod_prime(const mp_int *n, const mp_int *prime, mp_int *ret)6664{6665int res, legendre;6666mp_int t1, C, Q, S, Z, M, T, R, two;6667mp_digit i;66686669/* first handle the simple cases */6670if (mp_cmp_d(n, 0uL) == MP_EQ) {6671mp_zero(ret);6672return MP_OKAY;6673}6674if (mp_cmp_d(prime, 2uL) == MP_EQ) return MP_VAL; /* prime must be odd */6675if ((res = mp_jacobi(n, prime, &legendre)) != MP_OKAY) return res;6676if (legendre == -1) return MP_VAL; /* quadratic non-residue mod prime */66776678if ((res = mp_init_multi(&t1, &C, &Q, &S, &Z, &M, &T, &R, &two, NULL)) != MP_OKAY) {6679return res;6680}66816682/* SPECIAL CASE: if prime mod 4 == 36683* compute directly: res = n^(prime+1)/4 mod prime6684* Handbook of Applied Cryptography algorithm 3.366685*/6686if ((res = mp_mod_d(prime, 4uL, &i)) != MP_OKAY) goto cleanup;6687if (i == 3u) {6688if ((res = mp_add_d(prime, 1uL, &t1)) != MP_OKAY) goto cleanup;6689if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) goto cleanup;6690if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) goto cleanup;6691if ((res = mp_exptmod(n, &t1, prime, ret)) != MP_OKAY) goto cleanup;6692res = MP_OKAY;6693goto cleanup;6694}66956696/* NOW: Tonelli-Shanks algorithm */66976698/* factor out powers of 2 from prime-1, defining Q and S as: prime-1 = Q*2^S */6699if ((res = mp_copy(prime, &Q)) != MP_OKAY) goto cleanup;6700if ((res = mp_sub_d(&Q, 1uL, &Q)) != MP_OKAY) goto cleanup;6701/* Q = prime - 1 */6702mp_zero(&S);6703/* S = 0 */6704while (mp_iseven(&Q) != MP_NO) {6705if ((res = mp_div_2(&Q, &Q)) != MP_OKAY) goto cleanup;6706/* Q = Q / 2 */6707if ((res = mp_add_d(&S, 1uL, &S)) != MP_OKAY) goto cleanup;6708/* S = S + 1 */6709}67106711/* find a Z such that the Legendre symbol (Z|prime) == -1 */6712if ((res = mp_set_int(&Z, 2uL)) != MP_OKAY) goto cleanup;6713/* Z = 2 */6714while (1) {6715if ((res = mp_jacobi(&Z, prime, &legendre)) != MP_OKAY) goto cleanup;6716if (legendre == -1) break;6717if ((res = mp_add_d(&Z, 1uL, &Z)) != MP_OKAY) goto cleanup;6718/* Z = Z + 1 */6719}67206721if ((res = mp_exptmod(&Z, &Q, prime, &C)) != MP_OKAY) goto cleanup;6722/* C = Z ^ Q mod prime */6723if ((res = mp_add_d(&Q, 1uL, &t1)) != MP_OKAY) goto cleanup;6724if ((res = mp_div_2(&t1, &t1)) != MP_OKAY) goto cleanup;6725/* t1 = (Q + 1) / 2 */6726if ((res = mp_exptmod(n, &t1, prime, &R)) != MP_OKAY) goto cleanup;6727/* R = n ^ ((Q + 1) / 2) mod prime */6728if ((res = mp_exptmod(n, &Q, prime, &T)) != MP_OKAY) goto cleanup;6729/* T = n ^ Q mod prime */6730if ((res = mp_copy(&S, &M)) != MP_OKAY) goto cleanup;6731/* M = S */6732if ((res = mp_set_int(&two, 2uL)) != MP_OKAY) goto cleanup;67336734res = MP_VAL;6735while (1) {6736if ((res = mp_copy(&T, &t1)) != MP_OKAY) goto cleanup;6737i = 0;6738while (1) {6739if (mp_cmp_d(&t1, 1uL) == MP_EQ) break;6740if ((res = mp_exptmod(&t1, &two, prime, &t1)) != MP_OKAY) goto cleanup;6741i++;6742}6743if (i == 0u) {6744if ((res = mp_copy(&R, ret)) != MP_OKAY) goto cleanup;6745res = MP_OKAY;6746goto cleanup;6747}6748if ((res = mp_sub_d(&M, i, &t1)) != MP_OKAY) goto cleanup;6749if ((res = mp_sub_d(&t1, 1uL, &t1)) != MP_OKAY) goto cleanup;6750if ((res = mp_exptmod(&two, &t1, prime, &t1)) != MP_OKAY) goto cleanup;6751/* t1 = 2 ^ (M - i - 1) */6752if ((res = mp_exptmod(&C, &t1, prime, &t1)) != MP_OKAY) goto cleanup;6753/* t1 = C ^ (2 ^ (M - i - 1)) mod prime */6754if ((res = mp_sqrmod(&t1, prime, &C)) != MP_OKAY) goto cleanup;6755/* C = (t1 * t1) mod prime */6756if ((res = mp_mulmod(&R, &t1, prime, &R)) != MP_OKAY) goto cleanup;6757/* R = (R * t1) mod prime */6758if ((res = mp_mulmod(&T, &C, prime, &T)) != MP_OKAY) goto cleanup;6759/* T = (T * C) mod prime */6760mp_set(&M, i);6761/* M = i */6762}67636764cleanup:6765mp_clear_multi(&t1, &C, &Q, &S, &Z, &M, &T, &R, &two, NULL);6766return res;6767}67686769/* End: bn_mp_sqrtmod_prime.c */67706771/* Start: bn_mp_sub.c */67726773/* high level subtraction (handles signs) */6774int mp_sub(const mp_int *a, const mp_int *b, mp_int *c)6775{6776int sa, sb, res;67776778sa = a->sign;6779sb = b->sign;67806781if (sa != sb) {6782/* subtract a negative from a positive, OR */6783/* subtract a positive from a negative. */6784/* In either case, ADD their magnitudes, */6785/* and use the sign of the first number. */6786c->sign = sa;6787res = s_mp_add(a, b, c);6788} else {6789/* subtract a positive from a positive, OR */6790/* subtract a negative from a negative. */6791/* First, take the difference between their */6792/* magnitudes, then... */6793if (mp_cmp_mag(a, b) != MP_LT) {6794/* Copy the sign from the first */6795c->sign = sa;6796/* The first has a larger or equal magnitude */6797res = s_mp_sub(a, b, c);6798} else {6799/* The result has the *opposite* sign from */6800/* the first number. */6801c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;6802/* The second has a larger magnitude */6803res = s_mp_sub(b, a, c);6804}6805}6806return res;6807}68086809/* End: bn_mp_sub.c */68106811/* Start: bn_mp_sub_d.c */68126813/* single digit subtraction */6814int mp_sub_d(const mp_int *a, mp_digit b, mp_int *c)6815{6816mp_digit *tmpa, *tmpc, mu;6817int res, ix, oldused;68186819/* grow c as required */6820if (c->alloc < (a->used + 1)) {6821if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {6822return res;6823}6824}68256826/* if a is negative just do an unsigned6827* addition [with fudged signs]6828*/6829if (a->sign == MP_NEG) {6830mp_int a_ = *a;6831a_.sign = MP_ZPOS;6832res = mp_add_d(&a_, b, c);6833c->sign = MP_NEG;68346835/* clamp */6836mp_clamp(c);68376838return res;6839}68406841/* setup regs */6842oldused = c->used;6843tmpa = a->dp;6844tmpc = c->dp;68456846/* if a <= b simply fix the single digit */6847if (((a->used == 1) && (a->dp[0] <= b)) || (a->used == 0)) {6848if (a->used == 1) {6849*tmpc++ = b - *tmpa;6850} else {6851*tmpc++ = b;6852}6853ix = 1;68546855/* negative/1digit */6856c->sign = MP_NEG;6857c->used = 1;6858} else {6859/* positive/size */6860c->sign = MP_ZPOS;6861c->used = a->used;68626863/* subtract first digit */6864*tmpc = *tmpa++ - b;6865mu = *tmpc >> ((sizeof(mp_digit) * (size_t)CHAR_BIT) - 1u);6866*tmpc++ &= MP_MASK;68676868/* handle rest of the digits */6869for (ix = 1; ix < a->used; ix++) {6870*tmpc = *tmpa++ - mu;6871mu = *tmpc >> ((sizeof(mp_digit) * (size_t)CHAR_BIT) - 1u);6872*tmpc++ &= MP_MASK;6873}6874}68756876/* zero excess digits */6877while (ix++ < oldused) {6878*tmpc++ = 0;6879}6880mp_clamp(c);6881return MP_OKAY;6882}68836884/* End: bn_mp_sub_d.c */68856886/* Start: bn_mp_submod.c */68876888/* d = a - b (mod c) */6889int mp_submod(const mp_int *a, const mp_int *b, const mp_int *c, mp_int *d)6890{6891int res;6892mp_int t;689368946895if ((res = mp_init(&t)) != MP_OKAY) {6896return res;6897}68986899if ((res = mp_sub(a, b, &t)) != MP_OKAY) {6900mp_clear(&t);6901return res;6902}6903res = mp_mod(&t, c, d);6904mp_clear(&t);6905return res;6906}69076908/* End: bn_mp_submod.c */69096910/* Start: bn_mp_tc_and.c */69116912/* two complement and */6913int mp_tc_and(const mp_int *a, const mp_int *b, mp_int *c)6914{6915int res = MP_OKAY, bits, abits, bbits;6916int as = mp_isneg(a), bs = mp_isneg(b);6917mp_int *mx = NULL, _mx, acpy, bcpy;69186919if ((as != MP_NO) || (bs != MP_NO)) {6920abits = mp_count_bits(a);6921bbits = mp_count_bits(b);6922bits = MAX(abits, bbits);6923res = mp_init_set_int(&_mx, 1uL);6924if (res != MP_OKAY) {6925goto end;6926}69276928mx = &_mx;6929res = mp_mul_2d(mx, bits + 1, mx);6930if (res != MP_OKAY) {6931goto end;6932}69336934if (as != MP_NO) {6935res = mp_init(&acpy);6936if (res != MP_OKAY) {6937goto end;6938}69396940res = mp_add(mx, a, &acpy);6941if (res != MP_OKAY) {6942mp_clear(&acpy);6943goto end;6944}6945a = &acpy;6946}6947if (bs != MP_NO) {6948res = mp_init(&bcpy);6949if (res != MP_OKAY) {6950goto end;6951}69526953res = mp_add(mx, b, &bcpy);6954if (res != MP_OKAY) {6955mp_clear(&bcpy);6956goto end;6957}6958b = &bcpy;6959}6960}69616962res = mp_and(a, b, c);69636964if ((as != MP_NO) && (bs != MP_NO) && (res == MP_OKAY)) {6965res = mp_sub(c, mx, c);6966}69676968end:6969if (a == &acpy) {6970mp_clear(&acpy);6971}69726973if (b == &bcpy) {6974mp_clear(&bcpy);6975}69766977if (mx == &_mx) {6978mp_clear(mx);6979}69806981return res;6982}69836984/* End: bn_mp_tc_and.c */69856986/* Start: bn_mp_tc_div_2d.c */69876988/* two complement right shift */6989int mp_tc_div_2d(const mp_int *a, int b, mp_int *c)6990{6991int res;6992if (mp_isneg(a) == MP_NO) {6993return mp_div_2d(a, b, c, NULL);6994}69956996res = mp_add_d(a, 1uL, c);6997if (res != MP_OKAY) {6998return res;6999}70007001res = mp_div_2d(c, b, c, NULL);7002return (res == MP_OKAY) ? mp_sub_d(c, 1uL, c) : res;7003}70047005/* End: bn_mp_tc_div_2d.c */70067007/* Start: bn_mp_tc_or.c */70087009/* two complement or */7010int mp_tc_or(const mp_int *a, const mp_int *b, mp_int *c)7011{7012int res = MP_OKAY, bits, abits, bbits;7013int as = mp_isneg(a), bs = mp_isneg(b);7014mp_int *mx = NULL, _mx, acpy, bcpy;70157016if ((as != MP_NO) || (bs != MP_NO)) {7017abits = mp_count_bits(a);7018bbits = mp_count_bits(b);7019bits = MAX(abits, bbits);7020res = mp_init_set_int(&_mx, 1uL);7021if (res != MP_OKAY) {7022goto end;7023}70247025mx = &_mx;7026res = mp_mul_2d(mx, bits + 1, mx);7027if (res != MP_OKAY) {7028goto end;7029}70307031if (as != MP_NO) {7032res = mp_init(&acpy);7033if (res != MP_OKAY) {7034goto end;7035}70367037res = mp_add(mx, a, &acpy);7038if (res != MP_OKAY) {7039mp_clear(&acpy);7040goto end;7041}7042a = &acpy;7043}7044if (bs != MP_NO) {7045res = mp_init(&bcpy);7046if (res != MP_OKAY) {7047goto end;7048}70497050res = mp_add(mx, b, &bcpy);7051if (res != MP_OKAY) {7052mp_clear(&bcpy);7053goto end;7054}7055b = &bcpy;7056}7057}70587059res = mp_or(a, b, c);70607061if (((as != MP_NO) || (bs != MP_NO)) && (res == MP_OKAY)) {7062res = mp_sub(c, mx, c);7063}70647065end:7066if (a == &acpy) {7067mp_clear(&acpy);7068}70697070if (b == &bcpy) {7071mp_clear(&bcpy);7072}70737074if (mx == &_mx) {7075mp_clear(mx);7076}70777078return res;7079}70807081/* End: bn_mp_tc_or.c */70827083/* Start: bn_mp_tc_xor.c */70847085/* two complement xor */7086int mp_tc_xor(const mp_int *a, const mp_int *b, mp_int *c)7087{7088int res = MP_OKAY, bits, abits, bbits;7089int as = mp_isneg(a), bs = mp_isneg(b);7090mp_int *mx = NULL, _mx, acpy, bcpy;70917092if ((as != MP_NO) || (bs != MP_NO)) {7093abits = mp_count_bits(a);7094bbits = mp_count_bits(b);7095bits = MAX(abits, bbits);7096res = mp_init_set_int(&_mx, 1uL);7097if (res != MP_OKAY) {7098goto end;7099}71007101mx = &_mx;7102res = mp_mul_2d(mx, bits + 1, mx);7103if (res != MP_OKAY) {7104goto end;7105}71067107if (as != MP_NO) {7108res = mp_init(&acpy);7109if (res != MP_OKAY) {7110goto end;7111}71127113res = mp_add(mx, a, &acpy);7114if (res != MP_OKAY) {7115mp_clear(&acpy);7116goto end;7117}7118a = &acpy;7119}7120if (bs != MP_NO) {7121res = mp_init(&bcpy);7122if (res != MP_OKAY) {7123goto end;7124}71257126res = mp_add(mx, b, &bcpy);7127if (res != MP_OKAY) {7128mp_clear(&bcpy);7129goto end;7130}7131b = &bcpy;7132}7133}71347135res = mp_xor(a, b, c);71367137if ((as != bs) && (res == MP_OKAY)) {7138res = mp_sub(c, mx, c);7139}71407141end:7142if (a == &acpy) {7143mp_clear(&acpy);7144}71457146if (b == &bcpy) {7147mp_clear(&bcpy);7148}71497150if (mx == &_mx) {7151mp_clear(mx);7152}71537154return res;7155}71567157/* End: bn_mp_tc_xor.c */71587159/* Start: bn_mp_to_signed_bin.c */71607161/* store in signed [big endian] format */7162int mp_to_signed_bin(const mp_int *a, unsigned char *b)7163{7164int res;71657166if ((res = mp_to_unsigned_bin(a, b + 1)) != MP_OKAY) {7167return res;7168}7169b[0] = (a->sign == MP_ZPOS) ? (unsigned char)0 : (unsigned char)1;7170return MP_OKAY;7171}71727173/* End: bn_mp_to_signed_bin.c */71747175/* Start: bn_mp_to_signed_bin_n.c */71767177/* store in signed [big endian] format */7178int mp_to_signed_bin_n(const mp_int *a, unsigned char *b, unsigned long *outlen)7179{7180if (*outlen < (unsigned long)mp_signed_bin_size(a)) {7181return MP_VAL;7182}7183*outlen = (unsigned long)mp_signed_bin_size(a);7184return mp_to_signed_bin(a, b);7185}71867187/* End: bn_mp_to_signed_bin_n.c */71887189/* Start: bn_mp_to_unsigned_bin.c */71907191/* store in unsigned [big endian] format */7192int mp_to_unsigned_bin(const mp_int *a, unsigned char *b)7193{7194int x, res;7195mp_int t;71967197if ((res = mp_init_copy(&t, a)) != MP_OKAY) {7198return res;7199}72007201x = 0;7202while (mp_iszero(&t) == MP_NO) {7203#ifndef MP_8BIT7204b[x++] = (unsigned char)(t.dp[0] & 255u);7205#else7206b[x++] = (unsigned char)(t.dp[0] | ((t.dp[1] & 1u) << 7));7207#endif7208if ((res = mp_div_2d(&t, 8, &t, NULL)) != MP_OKAY) {7209mp_clear(&t);7210return res;7211}7212}7213bn_reverse(b, x);7214mp_clear(&t);7215return MP_OKAY;7216}72177218/* End: bn_mp_to_unsigned_bin.c */72197220/* Start: bn_mp_to_unsigned_bin_n.c */72217222/* store in unsigned [big endian] format */7223int mp_to_unsigned_bin_n(const mp_int *a, unsigned char *b, unsigned long *outlen)7224{7225if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {7226return MP_VAL;7227}7228*outlen = (unsigned long)mp_unsigned_bin_size(a);7229return mp_to_unsigned_bin(a, b);7230}72317232/* End: bn_mp_to_unsigned_bin_n.c */72337234/* Start: bn_mp_toom_mul.c */72357236/* multiplication using the Toom-Cook 3-way algorithm7237*7238* Much more complicated than Karatsuba but has a lower7239* asymptotic running time of O(N**1.464). This algorithm is7240* only particularly useful on VERY large inputs7241* (we're talking 1000s of digits here...).7242*/7243int mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)7244{7245mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;7246int res, B;72477248/* init temps */7249if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,7250&a0, &a1, &a2, &b0, &b1,7251&b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {7252return res;7253}72547255/* B */7256B = MIN(a->used, b->used) / 3;72577258/* a = a2 * B**2 + a1 * B + a0 */7259if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {7260goto LBL_ERR;7261}72627263if ((res = mp_copy(a, &a1)) != MP_OKAY) {7264goto LBL_ERR;7265}7266mp_rshd(&a1, B);7267if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {7268goto LBL_ERR;7269}72707271if ((res = mp_copy(a, &a2)) != MP_OKAY) {7272goto LBL_ERR;7273}7274mp_rshd(&a2, B*2);72757276/* b = b2 * B**2 + b1 * B + b0 */7277if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {7278goto LBL_ERR;7279}72807281if ((res = mp_copy(b, &b1)) != MP_OKAY) {7282goto LBL_ERR;7283}7284mp_rshd(&b1, B);7285(void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1);72867287if ((res = mp_copy(b, &b2)) != MP_OKAY) {7288goto LBL_ERR;7289}7290mp_rshd(&b2, B*2);72917292/* w0 = a0*b0 */7293if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {7294goto LBL_ERR;7295}72967297/* w4 = a2 * b2 */7298if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {7299goto LBL_ERR;7300}73017302/* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */7303if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {7304goto LBL_ERR;7305}7306if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {7307goto LBL_ERR;7308}7309if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {7310goto LBL_ERR;7311}7312if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {7313goto LBL_ERR;7314}73157316if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {7317goto LBL_ERR;7318}7319if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {7320goto LBL_ERR;7321}7322if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {7323goto LBL_ERR;7324}7325if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {7326goto LBL_ERR;7327}73287329if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {7330goto LBL_ERR;7331}73327333/* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */7334if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {7335goto LBL_ERR;7336}7337if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {7338goto LBL_ERR;7339}7340if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {7341goto LBL_ERR;7342}7343if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {7344goto LBL_ERR;7345}73467347if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {7348goto LBL_ERR;7349}7350if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {7351goto LBL_ERR;7352}7353if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {7354goto LBL_ERR;7355}7356if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {7357goto LBL_ERR;7358}73597360if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {7361goto LBL_ERR;7362}736373647365/* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */7366if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {7367goto LBL_ERR;7368}7369if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {7370goto LBL_ERR;7371}7372if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {7373goto LBL_ERR;7374}7375if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {7376goto LBL_ERR;7377}7378if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {7379goto LBL_ERR;7380}73817382/* now solve the matrix738373840 0 0 0 173851 2 4 8 1673861 1 1 1 1738716 8 4 2 173881 0 0 0 073897390using 12 subtractions, 4 shifts,73912 small divisions and 1 small multiplication7392*/73937394/* r1 - r4 */7395if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {7396goto LBL_ERR;7397}7398/* r3 - r0 */7399if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {7400goto LBL_ERR;7401}7402/* r1/2 */7403if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {7404goto LBL_ERR;7405}7406/* r3/2 */7407if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {7408goto LBL_ERR;7409}7410/* r2 - r0 - r4 */7411if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {7412goto LBL_ERR;7413}7414if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {7415goto LBL_ERR;7416}7417/* r1 - r2 */7418if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {7419goto LBL_ERR;7420}7421/* r3 - r2 */7422if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {7423goto LBL_ERR;7424}7425/* r1 - 8r0 */7426if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {7427goto LBL_ERR;7428}7429if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {7430goto LBL_ERR;7431}7432/* r3 - 8r4 */7433if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {7434goto LBL_ERR;7435}7436if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {7437goto LBL_ERR;7438}7439/* 3r2 - r1 - r3 */7440if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {7441goto LBL_ERR;7442}7443if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {7444goto LBL_ERR;7445}7446if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {7447goto LBL_ERR;7448}7449/* r1 - r2 */7450if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {7451goto LBL_ERR;7452}7453/* r3 - r2 */7454if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {7455goto LBL_ERR;7456}7457/* r1/3 */7458if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {7459goto LBL_ERR;7460}7461/* r3/3 */7462if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {7463goto LBL_ERR;7464}74657466/* at this point shift W[n] by B*n */7467if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {7468goto LBL_ERR;7469}7470if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {7471goto LBL_ERR;7472}7473if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {7474goto LBL_ERR;7475}7476if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {7477goto LBL_ERR;7478}74797480if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {7481goto LBL_ERR;7482}7483if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {7484goto LBL_ERR;7485}7486if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {7487goto LBL_ERR;7488}7489if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {7490goto LBL_ERR;7491}74927493LBL_ERR:7494mp_clear_multi(&w0, &w1, &w2, &w3, &w4,7495&a0, &a1, &a2, &b0, &b1,7496&b2, &tmp1, &tmp2, NULL);7497return res;7498}74997500/* End: bn_mp_toom_mul.c */75017502/* Start: bn_mp_toom_sqr.c */75037504/* squaring using Toom-Cook 3-way algorithm */7505int mp_toom_sqr(const mp_int *a, mp_int *b)7506{7507mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;7508int res, B;75097510/* init temps */7511if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {7512return res;7513}75147515/* B */7516B = a->used / 3;75177518/* a = a2 * B**2 + a1 * B + a0 */7519if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {7520goto LBL_ERR;7521}75227523if ((res = mp_copy(a, &a1)) != MP_OKAY) {7524goto LBL_ERR;7525}7526mp_rshd(&a1, B);7527if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {7528goto LBL_ERR;7529}75307531if ((res = mp_copy(a, &a2)) != MP_OKAY) {7532goto LBL_ERR;7533}7534mp_rshd(&a2, B*2);75357536/* w0 = a0*a0 */7537if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {7538goto LBL_ERR;7539}75407541/* w4 = a2 * a2 */7542if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {7543goto LBL_ERR;7544}75457546/* w1 = (a2 + 2(a1 + 2a0))**2 */7547if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {7548goto LBL_ERR;7549}7550if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {7551goto LBL_ERR;7552}7553if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {7554goto LBL_ERR;7555}7556if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {7557goto LBL_ERR;7558}75597560if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {7561goto LBL_ERR;7562}75637564/* w3 = (a0 + 2(a1 + 2a2))**2 */7565if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {7566goto LBL_ERR;7567}7568if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {7569goto LBL_ERR;7570}7571if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {7572goto LBL_ERR;7573}7574if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {7575goto LBL_ERR;7576}75777578if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {7579goto LBL_ERR;7580}758175827583/* w2 = (a2 + a1 + a0)**2 */7584if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {7585goto LBL_ERR;7586}7587if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {7588goto LBL_ERR;7589}7590if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {7591goto LBL_ERR;7592}75937594/* now solve the matrix759575960 0 0 0 175971 2 4 8 1675981 1 1 1 1759916 8 4 2 176001 0 0 0 076017602using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.7603*/76047605/* r1 - r4 */7606if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {7607goto LBL_ERR;7608}7609/* r3 - r0 */7610if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {7611goto LBL_ERR;7612}7613/* r1/2 */7614if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {7615goto LBL_ERR;7616}7617/* r3/2 */7618if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {7619goto LBL_ERR;7620}7621/* r2 - r0 - r4 */7622if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {7623goto LBL_ERR;7624}7625if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {7626goto LBL_ERR;7627}7628/* r1 - r2 */7629if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {7630goto LBL_ERR;7631}7632/* r3 - r2 */7633if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {7634goto LBL_ERR;7635}7636/* r1 - 8r0 */7637if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {7638goto LBL_ERR;7639}7640if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {7641goto LBL_ERR;7642}7643/* r3 - 8r4 */7644if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {7645goto LBL_ERR;7646}7647if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {7648goto LBL_ERR;7649}7650/* 3r2 - r1 - r3 */7651if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {7652goto LBL_ERR;7653}7654if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {7655goto LBL_ERR;7656}7657if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {7658goto LBL_ERR;7659}7660/* r1 - r2 */7661if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {7662goto LBL_ERR;7663}7664/* r3 - r2 */7665if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {7666goto LBL_ERR;7667}7668/* r1/3 */7669if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {7670goto LBL_ERR;7671}7672/* r3/3 */7673if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {7674goto LBL_ERR;7675}76767677/* at this point shift W[n] by B*n */7678if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {7679goto LBL_ERR;7680}7681if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {7682goto LBL_ERR;7683}7684if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {7685goto LBL_ERR;7686}7687if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {7688goto LBL_ERR;7689}76907691if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {7692goto LBL_ERR;7693}7694if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {7695goto LBL_ERR;7696}7697if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {7698goto LBL_ERR;7699}7700if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {7701goto LBL_ERR;7702}77037704LBL_ERR:7705mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);7706return res;7707}77087709/* End: bn_mp_toom_sqr.c */77107711/* Start: bn_mp_toradix.c */77127713/* stores a bignum as a ASCII string in a given radix (2..64) */7714int mp_toradix(const mp_int *a, char *str, int radix)7715{7716int res, digs;7717mp_int t;7718mp_digit d;7719char *_s = str;77207721/* check range of the radix */7722if ((radix < 2) || (radix > 64)) {7723return MP_VAL;7724}77257726/* quick out if its zero */7727if (mp_iszero(a) == MP_YES) {7728*str++ = '0';7729*str = '\0';7730return MP_OKAY;7731}77327733if ((res = mp_init_copy(&t, a)) != MP_OKAY) {7734return res;7735}77367737/* if it is negative output a - */7738if (t.sign == MP_NEG) {7739++_s;7740*str++ = '-';7741t.sign = MP_ZPOS;7742}77437744digs = 0;7745while (mp_iszero(&t) == MP_NO) {7746if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {7747mp_clear(&t);7748return res;7749}7750*str++ = mp_s_rmap[d];7751++digs;7752}77537754/* reverse the digits of the string. In this case _s points7755* to the first digit [exluding the sign] of the number]7756*/7757bn_reverse((unsigned char *)_s, digs);77587759/* append a NULL so the string is properly terminated */7760*str = '\0';77617762mp_clear(&t);7763return MP_OKAY;7764}77657766/* End: bn_mp_toradix.c */77677768/* Start: bn_mp_toradix_n.c */77697770/* stores a bignum as a ASCII string in a given radix (2..64)7771*7772* Stores upto maxlen-1 chars and always a NULL byte7773*/7774int mp_toradix_n(const mp_int *a, char *str, int radix, int maxlen)7775{7776int res, digs;7777mp_int t;7778mp_digit d;7779char *_s = str;77807781/* check range of the maxlen, radix */7782if ((maxlen < 2) || (radix < 2) || (radix > 64)) {7783return MP_VAL;7784}77857786/* quick out if its zero */7787if (mp_iszero(a) == MP_YES) {7788*str++ = '0';7789*str = '\0';7790return MP_OKAY;7791}77927793if ((res = mp_init_copy(&t, a)) != MP_OKAY) {7794return res;7795}77967797/* if it is negative output a - */7798if (t.sign == MP_NEG) {7799/* we have to reverse our digits later... but not the - sign!! */7800++_s;78017802/* store the flag and mark the number as positive */7803*str++ = '-';7804t.sign = MP_ZPOS;78057806/* subtract a char */7807--maxlen;7808}78097810digs = 0;7811while (mp_iszero(&t) == MP_NO) {7812if (--maxlen < 1) {7813/* no more room */7814break;7815}7816if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {7817mp_clear(&t);7818return res;7819}7820*str++ = mp_s_rmap[d];7821++digs;7822}78237824/* reverse the digits of the string. In this case _s points7825* to the first digit [exluding the sign] of the number7826*/7827bn_reverse((unsigned char *)_s, digs);78287829/* append a NULL so the string is properly terminated */7830*str = '\0';78317832mp_clear(&t);7833return MP_OKAY;7834}78357836/* End: bn_mp_toradix_n.c */78377838/* Start: bn_mp_unsigned_bin_size.c */78397840/* get the size for an unsigned equivalent */7841int mp_unsigned_bin_size(const mp_int *a)7842{7843int size = mp_count_bits(a);7844return (size / 8) + ((((unsigned)size & 7u) != 0u) ? 1 : 0);7845}78467847/* End: bn_mp_unsigned_bin_size.c */78487849/* Start: bn_mp_xor.c */78507851/* XOR two ints together */7852int mp_xor(const mp_int *a, const mp_int *b, mp_int *c)7853{7854int res, ix, px;7855mp_int t;7856const mp_int *x;78577858if (a->used > b->used) {7859if ((res = mp_init_copy(&t, a)) != MP_OKAY) {7860return res;7861}7862px = b->used;7863x = b;7864} else {7865if ((res = mp_init_copy(&t, b)) != MP_OKAY) {7866return res;7867}7868px = a->used;7869x = a;7870}78717872for (ix = 0; ix < px; ix++) {7873t.dp[ix] ^= x->dp[ix];7874}7875mp_clamp(&t);7876mp_exch(c, &t);7877mp_clear(&t);7878return MP_OKAY;7879}78807881/* End: bn_mp_xor.c */78827883/* Start: bn_mp_zero.c */78847885/* set to zero */7886void mp_zero(mp_int *a)7887{7888int n;7889mp_digit *tmp;78907891a->sign = MP_ZPOS;7892a->used = 0;78937894tmp = a->dp;7895for (n = 0; n < a->alloc; n++) {7896*tmp++ = 0;7897}7898}78997900/* End: bn_mp_zero.c */79017902/* Start: bn_prime_tab.c */79037904const mp_digit ltm_prime_tab[] = {79050x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,79060x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,79070x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,79080x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,7909#ifndef MP_8BIT79100x0083,79110x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,79120x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,79130x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,79140x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,791579160x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,79170x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,79180x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,79190x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,79200x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,79210x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,79220x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,79230x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,792479250x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,79260x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,79270x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,79280x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,79290x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,79300x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,79310x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,79320x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,793379340x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,79350x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,79360x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,79370x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,79380x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,79390x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,79400x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,79410x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x06537942#endif7943};79447945/* End: bn_prime_tab.c */79467947/* Start: bn_reverse.c */79487949/* reverse an array, used for radix code */7950void bn_reverse(unsigned char *s, int len)7951{7952int ix, iy;7953unsigned char t;79547955ix = 0;7956iy = len - 1;7957while (ix < iy) {7958t = s[ix];7959s[ix] = s[iy];7960s[iy] = t;7961++ix;7962--iy;7963}7964}79657966/* End: bn_reverse.c */79677968/* Start: bn_s_mp_add.c */79697970/* low level addition, based on HAC pp.594, Algorithm 14.7 */7971int s_mp_add(const mp_int *a, const mp_int *b, mp_int *c)7972{7973const mp_int *x;7974int olduse, res, min, max;79757976/* find sizes, we let |a| <= |b| which means we have to sort7977* them. "x" will point to the input with the most digits7978*/7979if (a->used > b->used) {7980min = b->used;7981max = a->used;7982x = a;7983} else {7984min = a->used;7985max = b->used;7986x = b;7987}79887989/* init result */7990if (c->alloc < (max + 1)) {7991if ((res = mp_grow(c, max + 1)) != MP_OKAY) {7992return res;7993}7994}79957996/* get old used digit count and set new one */7997olduse = c->used;7998c->used = max + 1;79998000{8001mp_digit u, *tmpa, *tmpb, *tmpc;8002int i;80038004/* alias for digit pointers */80058006/* first input */8007tmpa = a->dp;80088009/* second input */8010tmpb = b->dp;80118012/* destination */8013tmpc = c->dp;80148015/* zero the carry */8016u = 0;8017for (i = 0; i < min; i++) {8018/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */8019*tmpc = *tmpa++ + *tmpb++ + u;80208021/* U = carry bit of T[i] */8022u = *tmpc >> (mp_digit)DIGIT_BIT;80238024/* take away carry bit from T[i] */8025*tmpc++ &= MP_MASK;8026}80278028/* now copy higher words if any, that is in A+B8029* if A or B has more digits add those in8030*/8031if (min != max) {8032for (; i < max; i++) {8033/* T[i] = X[i] + U */8034*tmpc = x->dp[i] + u;80358036/* U = carry bit of T[i] */8037u = *tmpc >> (mp_digit)DIGIT_BIT;80388039/* take away carry bit from T[i] */8040*tmpc++ &= MP_MASK;8041}8042}80438044/* add carry */8045*tmpc++ = u;80468047/* clear digits above oldused */8048for (i = c->used; i < olduse; i++) {8049*tmpc++ = 0;8050}8051}80528053mp_clamp(c);8054return MP_OKAY;8055}80568057/* End: bn_s_mp_add.c */80588059/* Start: bn_s_mp_exptmod.c */80608061#ifdef MP_LOW_MEM8062# define TAB_SIZE 328063#else8064# define TAB_SIZE 2568065#endif80668067int s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)8068{8069mp_int M[TAB_SIZE], res, mu;8070mp_digit buf;8071int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;8072int (*redux)(mp_int *x, const mp_int *m, const mp_int *mu);80738074/* find window size */8075x = mp_count_bits(X);8076if (x <= 7) {8077winsize = 2;8078} else if (x <= 36) {8079winsize = 3;8080} else if (x <= 140) {8081winsize = 4;8082} else if (x <= 450) {8083winsize = 5;8084} else if (x <= 1303) {8085winsize = 6;8086} else if (x <= 3529) {8087winsize = 7;8088} else {8089winsize = 8;8090}80918092#ifdef MP_LOW_MEM8093if (winsize > 5) {8094winsize = 5;8095}8096#endif80978098/* init M array */8099/* init first cell */8100if ((err = mp_init(&M[1])) != MP_OKAY) {8101return err;8102}81038104/* now init the second half of the array */8105for (x = 1<<(winsize-1); x < (1 << winsize); x++) {8106if ((err = mp_init(&M[x])) != MP_OKAY) {8107for (y = 1<<(winsize-1); y < x; y++) {8108mp_clear(&M[y]);8109}8110mp_clear(&M[1]);8111return err;8112}8113}81148115/* create mu, used for Barrett reduction */8116if ((err = mp_init(&mu)) != MP_OKAY) {8117goto LBL_M;8118}81198120if (redmode == 0) {8121if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) {8122goto LBL_MU;8123}8124redux = mp_reduce;8125} else {8126if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) {8127goto LBL_MU;8128}8129redux = mp_reduce_2k_l;8130}81318132/* create M table8133*8134* The M table contains powers of the base,8135* e.g. M[x] = G**x mod P8136*8137* The first half of the table is not8138* computed though accept for M[0] and M[1]8139*/8140if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {8141goto LBL_MU;8142}81438144/* compute the value at M[1<<(winsize-1)] by squaring8145* M[1] (winsize-1) times8146*/8147if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {8148goto LBL_MU;8149}81508151for (x = 0; x < (winsize - 1); x++) {8152/* square it */8153if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)],8154&M[(size_t)1 << (winsize - 1)])) != MP_OKAY) {8155goto LBL_MU;8156}81578158/* reduce modulo P */8159if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, &mu)) != MP_OKAY) {8160goto LBL_MU;8161}8162}81638164/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)8165* for x = (2**(winsize - 1) + 1) to (2**winsize - 1)8166*/8167for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {8168if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {8169goto LBL_MU;8170}8171if ((err = redux(&M[x], P, &mu)) != MP_OKAY) {8172goto LBL_MU;8173}8174}81758176/* setup result */8177if ((err = mp_init(&res)) != MP_OKAY) {8178goto LBL_MU;8179}8180mp_set(&res, 1uL);81818182/* set initial mode and bit cnt */8183mode = 0;8184bitcnt = 1;8185buf = 0;8186digidx = X->used - 1;8187bitcpy = 0;8188bitbuf = 0;81898190for (;;) {8191/* grab next digit as required */8192if (--bitcnt == 0) {8193/* if digidx == -1 we are out of digits */8194if (digidx == -1) {8195break;8196}8197/* read next digit and reset the bitcnt */8198buf = X->dp[digidx--];8199bitcnt = (int)DIGIT_BIT;8200}82018202/* grab the next msb from the exponent */8203y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;8204buf <<= (mp_digit)1;82058206/* if the bit is zero and mode == 0 then we ignore it8207* These represent the leading zero bits before the first 1 bit8208* in the exponent. Technically this opt is not required but it8209* does lower the # of trivial squaring/reductions used8210*/8211if ((mode == 0) && (y == 0)) {8212continue;8213}82148215/* if the bit is zero and mode == 1 then we square */8216if ((mode == 1) && (y == 0)) {8217if ((err = mp_sqr(&res, &res)) != MP_OKAY) {8218goto LBL_RES;8219}8220if ((err = redux(&res, P, &mu)) != MP_OKAY) {8221goto LBL_RES;8222}8223continue;8224}82258226/* else we add it to the window */8227bitbuf |= (y << (winsize - ++bitcpy));8228mode = 2;82298230if (bitcpy == winsize) {8231/* ok window is filled so square as required and multiply */8232/* square first */8233for (x = 0; x < winsize; x++) {8234if ((err = mp_sqr(&res, &res)) != MP_OKAY) {8235goto LBL_RES;8236}8237if ((err = redux(&res, P, &mu)) != MP_OKAY) {8238goto LBL_RES;8239}8240}82418242/* then multiply */8243if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {8244goto LBL_RES;8245}8246if ((err = redux(&res, P, &mu)) != MP_OKAY) {8247goto LBL_RES;8248}82498250/* empty window and reset */8251bitcpy = 0;8252bitbuf = 0;8253mode = 1;8254}8255}82568257/* if bits remain then square/multiply */8258if ((mode == 2) && (bitcpy > 0)) {8259/* square then multiply if the bit is set */8260for (x = 0; x < bitcpy; x++) {8261if ((err = mp_sqr(&res, &res)) != MP_OKAY) {8262goto LBL_RES;8263}8264if ((err = redux(&res, P, &mu)) != MP_OKAY) {8265goto LBL_RES;8266}82678268bitbuf <<= 1;8269if ((bitbuf & (1 << winsize)) != 0) {8270/* then multiply */8271if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {8272goto LBL_RES;8273}8274if ((err = redux(&res, P, &mu)) != MP_OKAY) {8275goto LBL_RES;8276}8277}8278}8279}82808281mp_exch(&res, Y);8282err = MP_OKAY;8283LBL_RES:8284mp_clear(&res);8285LBL_MU:8286mp_clear(&mu);8287LBL_M:8288mp_clear(&M[1]);8289for (x = 1<<(winsize-1); x < (1 << winsize); x++) {8290mp_clear(&M[x]);8291}8292return err;8293}82948295/* End: bn_s_mp_exptmod.c */82968297/* Start: bn_s_mp_mul_digs.c */82988299/* multiplies |a| * |b| and only computes upto digs digits of result8300* HAC pp. 595, Algorithm 14.12 Modified so you can control how8301* many digits of output are created.8302*/8303int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)8304{8305mp_int t;8306int res, pa, pb, ix, iy;8307mp_digit u;8308mp_word r;8309mp_digit tmpx, *tmpt, *tmpy;83108311/* can we use the fast multiplier? */8312if ((digs < (int)MP_WARRAY) &&8313(MIN(a->used, b->used) <8314(int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {8315return fast_s_mp_mul_digs(a, b, c, digs);8316}83178318if ((res = mp_init_size(&t, digs)) != MP_OKAY) {8319return res;8320}8321t.used = digs;83228323/* compute the digits of the product directly */8324pa = a->used;8325for (ix = 0; ix < pa; ix++) {8326/* set the carry to zero */8327u = 0;83288329/* limit ourselves to making digs digits of output */8330pb = MIN(b->used, digs - ix);83318332/* setup some aliases */8333/* copy of the digit from a used within the nested loop */8334tmpx = a->dp[ix];83358336/* an alias for the destination shifted ix places */8337tmpt = t.dp + ix;83388339/* an alias for the digits of b */8340tmpy = b->dp;83418342/* compute the columns of the output and propagate the carry */8343for (iy = 0; iy < pb; iy++) {8344/* compute the column as a mp_word */8345r = (mp_word)*tmpt +8346((mp_word)tmpx * (mp_word)*tmpy++) +8347(mp_word)u;83488349/* the new column is the lower part of the result */8350*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);83518352/* get the carry word from the result */8353u = (mp_digit)(r >> (mp_word)DIGIT_BIT);8354}8355/* set carry if it is placed below digs */8356if ((ix + iy) < digs) {8357*tmpt = u;8358}8359}83608361mp_clamp(&t);8362mp_exch(&t, c);83638364mp_clear(&t);8365return MP_OKAY;8366}83678368/* End: bn_s_mp_mul_digs.c */83698370/* Start: bn_s_mp_mul_high_digs.c */83718372/* multiplies |a| * |b| and does not compute the lower digs digits8373* [meant to get the higher part of the product]8374*/8375int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)8376{8377mp_int t;8378int res, pa, pb, ix, iy;8379mp_digit u;8380mp_word r;8381mp_digit tmpx, *tmpt, *tmpy;83828383/* can we use the fast multiplier? */8384if (((a->used + b->used + 1) < (int)MP_WARRAY)8385&& (MIN(a->used, b->used) < (int)(1u << (((size_t)CHAR_BIT * sizeof(mp_word)) - (2u * (size_t)DIGIT_BIT))))) {8386return fast_s_mp_mul_high_digs(a, b, c, digs);8387}83888389if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {8390return res;8391}8392t.used = a->used + b->used + 1;83938394pa = a->used;8395pb = b->used;8396for (ix = 0; ix < pa; ix++) {8397/* clear the carry */8398u = 0;83998400/* left hand side of A[ix] * B[iy] */8401tmpx = a->dp[ix];84028403/* alias to the address of where the digits will be stored */8404tmpt = &(t.dp[digs]);84058406/* alias for where to read the right hand side from */8407tmpy = b->dp + (digs - ix);84088409for (iy = digs - ix; iy < pb; iy++) {8410/* calculate the double precision result */8411r = (mp_word)*tmpt +8412((mp_word)tmpx * (mp_word)*tmpy++) +8413(mp_word)u;84148415/* get the lower part */8416*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);84178418/* carry the carry */8419u = (mp_digit)(r >> (mp_word)DIGIT_BIT);8420}8421*tmpt = u;8422}8423mp_clamp(&t);8424mp_exch(&t, c);8425mp_clear(&t);8426return MP_OKAY;8427}84288429/* End: bn_s_mp_mul_high_digs.c */84308431/* Start: bn_s_mp_sqr.c */84328433/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */8434int s_mp_sqr(const mp_int *a, mp_int *b)8435{8436mp_int t;8437int res, ix, iy, pa;8438mp_word r;8439mp_digit u, tmpx, *tmpt;84408441pa = a->used;8442if ((res = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) {8443return res;8444}84458446/* default used is maximum possible size */8447t.used = (2 * pa) + 1;84488449for (ix = 0; ix < pa; ix++) {8450/* first calculate the digit at 2*ix */8451/* calculate double precision result */8452r = (mp_word)t.dp[2*ix] +8453((mp_word)a->dp[ix] * (mp_word)a->dp[ix]);84548455/* store lower part in result */8456t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);84578458/* get the carry */8459u = (mp_digit)(r >> (mp_word)DIGIT_BIT);84608461/* left hand side of A[ix] * A[iy] */8462tmpx = a->dp[ix];84638464/* alias for where to store the results */8465tmpt = t.dp + ((2 * ix) + 1);84668467for (iy = ix + 1; iy < pa; iy++) {8468/* first calculate the product */8469r = (mp_word)tmpx * (mp_word)a->dp[iy];84708471/* now calculate the double precision result, note we use8472* addition instead of *2 since it's easier to optimize8473*/8474r = (mp_word)*tmpt + r + r + (mp_word)u;84758476/* store lower part */8477*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);84788479/* get carry */8480u = (mp_digit)(r >> (mp_word)DIGIT_BIT);8481}8482/* propagate upwards */8483while (u != 0uL) {8484r = (mp_word)*tmpt + (mp_word)u;8485*tmpt++ = (mp_digit)(r & (mp_word)MP_MASK);8486u = (mp_digit)(r >> (mp_word)DIGIT_BIT);8487}8488}84898490mp_clamp(&t);8491mp_exch(&t, b);8492mp_clear(&t);8493return MP_OKAY;8494}84958496/* End: bn_s_mp_sqr.c */84978498/* Start: bn_s_mp_sub.c */84998500/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */8501int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c)8502{8503int olduse, res, min, max;85048505/* find sizes */8506min = b->used;8507max = a->used;85088509/* init result */8510if (c->alloc < max) {8511if ((res = mp_grow(c, max)) != MP_OKAY) {8512return res;8513}8514}8515olduse = c->used;8516c->used = max;85178518{8519mp_digit u, *tmpa, *tmpb, *tmpc;8520int i;85218522/* alias for digit pointers */8523tmpa = a->dp;8524tmpb = b->dp;8525tmpc = c->dp;85268527/* set carry to zero */8528u = 0;8529for (i = 0; i < min; i++) {8530/* T[i] = A[i] - B[i] - U */8531*tmpc = (*tmpa++ - *tmpb++) - u;85328533/* U = carry bit of T[i]8534* Note this saves performing an AND operation since8535* if a carry does occur it will propagate all the way to the8536* MSB. As a result a single shift is enough to get the carry8537*/8538u = *tmpc >> (((size_t)CHAR_BIT * sizeof(mp_digit)) - 1u);85398540/* Clear carry from T[i] */8541*tmpc++ &= MP_MASK;8542}85438544/* now copy higher words if any, e.g. if A has more digits than B */8545for (; i < max; i++) {8546/* T[i] = A[i] - U */8547*tmpc = *tmpa++ - u;85488549/* U = carry bit of T[i] */8550u = *tmpc >> (((size_t)CHAR_BIT * sizeof(mp_digit)) - 1u);85518552/* Clear carry from T[i] */8553*tmpc++ &= MP_MASK;8554}85558556/* clear digits above used (since we may not have grown result above) */8557for (i = c->used; i < olduse; i++) {8558*tmpc++ = 0;8559}8560}85618562mp_clamp(c);8563return MP_OKAY;8564}85658566/* End: bn_s_mp_sub.c */85678568/* Start: bncore.c */85698570/* Known optimal configurations85718572CPU /Compiler /MUL CUTOFF/SQR CUTOFF8573-------------------------------------------------------------8574Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)8575AMD Athlon64 /GCC v3.4.4 / 80/ 120/LTM 0.3585768577*/85788579int KARATSUBA_MUL_CUTOFF = 80, /* Min. number of digits before Karatsuba multiplication is used. */8580KARATSUBA_SQR_CUTOFF = 120, /* Min. number of digits before Karatsuba squaring is used. */85818582TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */8583TOOM_SQR_CUTOFF = 400;85848585/* End: bncore.c */858685878588