/*1* Copyright (C) 2017 - This file is part of libecc project2*3* Authors:4* Ryad BENADJILA <[email protected]>5* Arnaud EBALARD <[email protected]>6* Jean-Pierre FLORI <[email protected]>7*8* Contributors:9* Nicolas VIVET <[email protected]>10* Karim KHALFALLAH <[email protected]>11*12* This software is licensed under a dual BSD and GPL v2 license.13* See LICENSE file at the root folder of the project.14*/15#include <libecc/nn/nn_mul_public.h>16#include <libecc/nn/nn_logical.h>17#include <libecc/nn/nn_add.h>18#include <libecc/nn/nn.h>19/* Use internal API header */20#include "nn_div.h"2122/*23* Some helper functions to perform operations on an arbitrary part24* of a multiprecision number.25* This is exactly the same code as for operations on the least significant26* part of a multiprecision number except for the starting point in the27* array representing it.28* Done in *constant time*.29*30* Operations producing an output are in place.31*/3233/*34* Compare all the bits of in2 with the same number of bits in in1 starting at35* 'shift' position in in1. in1 must be long enough for that purpose, i.e.36* in1->wlen >= (in2->wlen + shift). The comparison value is provided in37* 'cmp' parameter. The function returns 0 on success, -1 on error.38*39* The function is an internal helper; it expects initialized nn in1 and40* in2: it does not verify that.41*/42ATTRIBUTE_WARN_UNUSED_RET static int _nn_cmp_shift(nn_src_t in1, nn_src_t in2, u8 shift, int *cmp)43{44int ret, mask, tmp;45u8 i;4647MUST_HAVE((in1->wlen >= (in2->wlen + shift)), ret, err);48MUST_HAVE((cmp != NULL), ret, err);4950tmp = 0;51for (i = in2->wlen; i > 0; i--) {52mask = (!(tmp & 0x1));53tmp += ((in1->val[shift + i - 1] > in2->val[i - 1]) & mask);54tmp -= ((in1->val[shift + i - 1] < in2->val[i - 1]) & mask);55}56(*cmp) = tmp;57ret = 0;5859err:60return ret;61}6263/*64* Conditionally subtract a shifted version of in from out, i.e.:65* - if cnd == 1, out <- out - (in << shift)66* - if cnd == 0, out <- out67* The function returns 0 on success, -1 on error. On success, 'borrow'68* provides the possible borrow resulting from the subtraction. 'borrow'69* is not meaningful on error.70*71* The function is an internal helper; it expects initialized nn out and72* in: it does not verify that.73*/74ATTRIBUTE_WARN_UNUSED_RET static int _nn_cnd_sub_shift(int cnd, nn_t out, nn_src_t in,75u8 shift, word_t *borrow)76{77word_t tmp, borrow1, borrow2, _borrow = WORD(0);78word_t mask = WORD_MASK_IFNOTZERO(cnd);79int ret;80u8 i;8182MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);83MUST_HAVE((borrow != NULL), ret, err);8485/*86* Perform subtraction one word at a time,87* propagating the borrow.88*/89for (i = 0; i < in->wlen; i++) {90tmp = (word_t)(out->val[shift + i] - (in->val[i] & mask));91borrow1 = (word_t)(tmp > out->val[shift + i]);92out->val[shift + i] = (word_t)(tmp - _borrow);93borrow2 = (word_t)(out->val[shift + i] > tmp);94/* There is at most one borrow going out. */95_borrow = (word_t)(borrow1 | borrow2);96}9798(*borrow) = _borrow;99ret = 0;100101err:102return ret;103}104105/*106* Subtract a shifted version of 'in' multiplied by 'w' from 'out' and return107* borrow. The function returns 0 on success, -1 on error. 'borrow' is108* meaningful only on success.109*110* The function is an internal helper; it expects initialized nn out and111* in: it does not verify that.112*/113ATTRIBUTE_WARN_UNUSED_RET static int _nn_submul_word_shift(nn_t out, nn_src_t in, word_t w, u8 shift,114word_t *borrow)115{116word_t _borrow = WORD(0), prod_high, prod_low, tmp;117int ret;118u8 i;119120MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);121MUST_HAVE((borrow != NULL), ret, err);122123for (i = 0; i < in->wlen; i++) {124/*125* Compute the result of the multiplication of126* two words.127*/128WORD_MUL(prod_high, prod_low, in->val[i], w);129130/*131* And add previous borrow.132*/133prod_low = (word_t)(prod_low + _borrow);134prod_high = (word_t)(prod_high + (prod_low < _borrow));135136/*137* Subtract computed word at current position in result.138*/139tmp = (word_t)(out->val[shift + i] - prod_low);140_borrow = (word_t)(prod_high + (tmp > out->val[shift + i]));141out->val[shift + i] = tmp;142}143144(*borrow) = _borrow;145ret = 0;146147err:148return ret;149}150151/*152* Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'153* (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized on154* return. * Computation are performed in *constant time*, only depending on155* the lengths of 'a' and 'b', but not on the values of 'a' and 'b'.156*157* This uses the above function to perform arithmetic on arbitrary parts158* of multiprecision numbers.159*160* The algorithm used is schoolbook division:161* + the quotient is computed word by word,162* + a small division of the MSW is performed to obtain an163* approximation of the MSW of the quotient,164* + the approximation is corrected to obtain the correct165* multiprecision MSW of the quotient,166* + the corresponding product is subtracted from the dividend,167* + the same procedure is used for the following word of the quotient.168*169* It is assumed that:170* + b is normalized: the MSB of its MSW is 1,171* + the most significant part of a is smaller than b,172* + a precomputed reciprocal173* v = floor(B^3/(d+1)) - B174* where d is the MSW of the (normalized) divisor175* is given to perform the small 3-by-2 division.176* + using this reciprocal, the approximated quotient is always177* too small and at most one multiprecision correction is needed.178*179* It returns 0 on sucess, -1 on error.180*181* CAUTION:182*183* - The function is expected to be used ONLY by the generic version184* nn_divrem_normalized() defined later in the file.185* - All parameters must have been initialized. Unlike exported/public186* functions, this internal helper does not verify that nn parameters187* have been initialized. Again, this is expected from the caller188* (nn_divrem_normalized()).189* - The function does not support aliasing of 'b' or 'q'. See190* _nn_divrem_normalized_aliased() for such a wrapper.191*192*/193ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized(nn_t q, nn_t r,194nn_src_t a, nn_src_t b, word_t v)195{196word_t borrow, qstar, qh, ql, rh, rl; /* for 3-by-2 div. */197int _small, cmp, ret;198u8 i;199200MUST_HAVE(!(b->wlen <= 0), ret, err);201MUST_HAVE(!(a->wlen <= b->wlen), ret, err);202MUST_HAVE(!(!((b->val[b->wlen - 1] >> (WORD_BITS - 1)) == WORD(1))), ret, err);203MUST_HAVE(!_nn_cmp_shift(a, b, (u8)(a->wlen - b->wlen), &cmp) && (cmp < 0), ret, err);204205/* Handle trivial aliasing for a and r */206if (r != a) {207ret = nn_set_wlen(r, a->wlen); EG(ret, err);208ret = nn_copy(r, a); EG(ret, err);209}210211ret = nn_set_wlen(q, (u8)(r->wlen - b->wlen)); EG(ret, err);212213/*214* Compute subsequent words of the quotient one by one.215* Perform approximate 3-by-2 division using the precomputed216* reciprocal and correct afterward.217*/218for (i = r->wlen; i > b->wlen; i--) {219u8 shift = (u8)(i - b->wlen - 1);220221/*222* Perform 3-by-2 approximate division:223* <qstar, qh, ql> = <rh, rl> * (v + B)224* We are only interested in qstar.225*/226rh = r->val[i - 1];227rl = r->val[i - 2];228/* Perform 2-by-1 multiplication. */229WORD_MUL(qh, ql, rl, v);230WORD_MUL(qstar, ql, rh, v);231/* And propagate carries. */232qh = (word_t)(qh + ql);233qstar = (word_t)(qstar + (qh < ql));234qh = (word_t)(qh + rl);235rh = (word_t)(rh + (qh < rl));236qstar = (word_t)(qstar + rh);237238/*239* Compute approximate quotient times divisor240* and subtract it from remainder:241* r = r - (b*qstar << B^shift)242*/243ret = _nn_submul_word_shift(r, b, qstar, shift, &borrow); EG(ret, err);244245/* Check the approximate quotient was indeed not too large. */246MUST_HAVE(!(r->val[i - 1] < borrow), ret, err);247r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);248249/*250* Check whether the approximate quotient was too small or not.251* At most one multiprecision correction is needed.252*/253ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);254_small = ((!!(r->val[i - 1])) | (cmp >= 0));255/* Perform conditional multiprecision correction. */256ret = _nn_cnd_sub_shift(_small, r, b, shift, &borrow); EG(ret, err);257MUST_HAVE(!(r->val[i - 1] != borrow), ret, err);258r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);259/*260* Adjust the quotient if it was too small and set it in the261* multiprecision array.262*/263qstar = (word_t)(qstar + (word_t)_small);264q->val[shift] = qstar;265/*266* Check that the MSW of remainder was cancelled out and that267* we could not increase the quotient anymore.268*/269MUST_HAVE(!(r->val[r->wlen - 1] != WORD(0)), ret, err);270271ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);272MUST_HAVE(!(cmp >= 0), ret, err);273274ret = nn_set_wlen(r, (u8)(r->wlen - 1)); EG(ret, err);275}276277err:278return ret;279}280281/*282* Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'283* (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized.284* Compared to _nn_divrem_normalized(), this internal version285* explicitly handle the case where 'b' and 'r' point to the same nn (i.e. 'r'286* result is stored in 'b' on success), hence the removal of 'r' parameter from287* function prototype compared to _nn_divrem_normalized().288*289* The computation is performed in *constant time*, see documentation of290* _nn_divrem_normalized().291*292* Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the293* reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than294* 'b'.295*296* The function returns 0 on success, -1 on error.297*298* CAUTION:299*300* - The function is expected to be used ONLY by the generic version301* nn_divrem_normalized() defined later in the file.302* - All parameters must have been initialized. Unlike exported/public303* functions, this internal helper does not verify that nn parameters304* have been initialized. Again, this is expected from the caller305* (nn_divrem_normalized()).306* - The function does not support aliasing of 'a' or 'q'.307*308*/309ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized_aliased(nn_t q, nn_src_t a, nn_t b, word_t v)310{311int ret;312nn r;313r.magic = WORD(0);314315ret = nn_init(&r, 0); EG(ret, err);316ret = _nn_divrem_normalized(q, &r, a, b, v); EG(ret, err);317ret = nn_copy(b, &r); EG(ret, err);318319err:320nn_uninit(&r);321return ret;322}323324/*325* Compute quotient and remainder of Euclidean division, and do not normalize326* them. Done in *constant time*, see documentation of _nn_divrem_normalized().327*328* Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the329* reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than330* 'b'.331*332* Aliasing is supported for 'r' only (with 'b'), i.e. 'r' and 'b' can point333* to the same nn.334*335* The function returns 0 on success, -1 on error.336*/337int nn_divrem_normalized(nn_t q, nn_t r, nn_src_t a, nn_src_t b, word_t v)338{339int ret;340341ret = nn_check_initialized(a); EG(ret, err);342ret = nn_check_initialized(q); EG(ret, err);343ret = nn_check_initialized(r); EG(ret, err);344345/* Unsupported aliasings */346MUST_HAVE((q != r) && (q != a) && (q != b), ret, err);347348if (r == b) {349ret = _nn_divrem_normalized_aliased(q, a, r, v);350} else {351ret = nn_check_initialized(b); EG(ret, err);352ret = _nn_divrem_normalized(q, r, a, b, v);353}354355err:356return ret;357}358359/*360* Compute remainder only and do not normalize it.361* Constant time, see documentation of _nn_divrem_normalized.362*363* Support aliasing of inputs and outputs.364*365* The function returns 0 on success, -1 on error.366*/367int nn_mod_normalized(nn_t r, nn_src_t a, nn_src_t b, word_t v)368{369int ret;370nn q;371q.magic = WORD(0);372373ret = nn_init(&q, 0); EG(ret, err);374ret = nn_divrem_normalized(&q, r, a, b, v);375376err:377nn_uninit(&q);378return ret;379}380381/*382* Compute quotient and remainder of Euclidean division,383* and do not normalize them.384* Done in *constant time*,385* only depending on the lengths of 'a' and 'b' and the value of 'cnt',386* but not on the values of 'a' and 'b'.387*388* Assume that b has been normalized by a 'cnt' bit shift,389* that v is the reciprocal of the MSW of 'b',390* but a is not shifted yet.391* Useful when multiple multiplication by the same b are performed,392* e.g. at the fp level.393*394* All outputs MUST have been initialized. The function does not support395* aliasing of 'b' or 'q'. It returns 0 on success, -1 on error.396*397* CAUTION:398*399* - The function is expected to be used ONLY by the generic version400* nn_divrem_normalized() defined later in the file.401* - All parameters must have been initialized. Unlike exported/public402* functions, this internal helper does not verify that403* have been initialized. Again, this is expected from the caller404* (nn_divrem_unshifted()).405* - The function does not support aliasing. See406* _nn_divrem_unshifted_aliased() for such a wrapper.407*/408ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b_norm,409word_t v, bitcnt_t cnt)410{411nn a_shift;412u8 new_wlen, b_wlen;413int larger, ret, cmp;414word_t borrow;415a_shift.magic = WORD(0);416417/* Avoid overflow */418MUST_HAVE(((a->wlen + BIT_LEN_WORDS(cnt)) < NN_MAX_WORD_LEN), ret, err);419420/* We now know that new_wlen will fit in an u8 */421new_wlen = (u8)(a->wlen + (u8)BIT_LEN_WORDS(cnt));422423b_wlen = b_norm->wlen;424if (new_wlen < b_wlen) { /* trivial case */425ret = nn_copy(r, a); EG(ret, err);426ret = nn_zero(q);427goto err;428}429430/* Shift a. */431ret = nn_init(&a_shift, (u16)(new_wlen * WORD_BYTES)); EG(ret, err);432ret = nn_set_wlen(&a_shift, new_wlen); EG(ret, err);433ret = nn_lshift_fixedlen(&a_shift, a, cnt); EG(ret, err);434ret = nn_set_wlen(r, new_wlen); EG(ret, err);435436if (new_wlen == b_wlen) {437/* Ensure that a is smaller than b. */438ret = nn_cmp(&a_shift, b_norm, &cmp); EG(ret, err);439larger = (cmp >= 0);440ret = nn_cnd_sub(larger, r, &a_shift, b_norm); EG(ret, err);441MUST_HAVE(((!nn_cmp(r, b_norm, &cmp)) && (cmp < 0)), ret, err);442443/* Set MSW of quotient. */444ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);445q->val[new_wlen - b_wlen] = (word_t) larger;446/* And we are done as the quotient is 0 or 1. */447} else if (new_wlen > b_wlen) {448/* Ensure that most significant part of a is smaller than b. */449ret = _nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp); EG(ret, err);450larger = (cmp >= 0);451ret = _nn_cnd_sub_shift(larger, &a_shift, b_norm, (u8)(new_wlen - b_wlen), &borrow); EG(ret, err);452MUST_HAVE(((!_nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp)) && (cmp < 0)), ret, err);453454/*455* Perform division with MSP of a smaller than b. This ensures456* that the quotient is of length a_len - b_len.457*/458ret = nn_divrem_normalized(q, r, &a_shift, b_norm, v); EG(ret, err);459460/* Set MSW of quotient. */461ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);462q->val[new_wlen - b_wlen] = (word_t) larger;463} /* else a is smaller than b... treated above. */464465ret = nn_rshift_fixedlen(r, r, cnt); EG(ret, err);466ret = nn_set_wlen(r, b_wlen);467468err:469nn_uninit(&a_shift);470471return ret;472}473474/*475* Same as previous but handling aliasing of 'r' with 'b_norm', i.e. on success,476* result 'r' is passed through 'b_norm'.477*478* CAUTION:479*480* - The function is expected to be used ONLY by the generic version481* nn_divrem_normalized() defined later in the file.482* - All parameter must have been initialized. Unlike exported/public483* functions, this internal helper does not verify that nn parameters484* have been initialized. Again, this is expected from the caller485* (nn_divrem_unshifted()).486*/487ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted_aliased(nn_t q, nn_src_t a, nn_t b_norm,488word_t v, bitcnt_t cnt)489{490int ret;491nn r;492r.magic = WORD(0);493494ret = nn_init(&r, 0); EG(ret, err);495ret = _nn_divrem_unshifted(q, &r, a, b_norm, v, cnt); EG(ret, err);496ret = nn_copy(b_norm, &r); EG(ret, err);497498err:499nn_uninit(&r);500return ret;501}502503/*504* Compute quotient and remainder and do not normalize them.505* Constant time, see documentation of _nn_divrem_unshifted().506*507* Alias-supporting version of _nn_divrem_unshifted for 'r' only.508*509* The function returns 0 on success, -1 on error.510*/511int nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b,512word_t v, bitcnt_t cnt)513{514int ret;515516ret = nn_check_initialized(a); EG(ret, err);517ret = nn_check_initialized(q); EG(ret, err);518ret = nn_check_initialized(r); EG(ret, err);519520/* Unsupported aliasings */521MUST_HAVE((q != r) && (q != a) && (q != b), ret, err);522523if (r == b) {524ret = _nn_divrem_unshifted_aliased(q, a, r, v, cnt);525} else {526ret = nn_check_initialized(b); EG(ret, err);527ret = _nn_divrem_unshifted(q, r, a, b, v, cnt);528}529530err:531return ret;532}533534/*535* Compute remainder only and do not normalize it.536* Constant time, see documentation of _nn_divrem_unshifted.537*538* Aliasing of inputs and outputs is possible.539*540* The function returns 0 on success, -1 on error.541*/542int nn_mod_unshifted(nn_t r, nn_src_t a, nn_src_t b, word_t v, bitcnt_t cnt)543{544nn q;545int ret;546q.magic = WORD(0);547548ret = nn_init(&q, 0); EG(ret, err);549ret = nn_divrem_unshifted(&q, r, a, b, v, cnt);550551err:552nn_uninit(&q);553554return ret;555}556557/*558* Helper functions for arithmetic in 2-by-1 division559* used in the reciprocal computation.560*561* These are variations of the nn multiprecision functions562* acting on arrays of fixed length, in place,563* and returning carry/borrow.564*565* Done in constant time.566*/567568/*569* Comparison of two limbs numbers. Internal helper.570* Checks left to the caller571*/572ATTRIBUTE_WARN_UNUSED_RET static int _wcmp_22(word_t a[2], word_t b[2])573{574int mask, ret = 0;575ret += (a[1] > b[1]);576ret -= (a[1] < b[1]);577mask = !(ret & 0x1);578ret += ((a[0] > b[0]) & mask);579ret -= ((a[0] < b[0]) & mask);580return ret;581}582583/*584* Addition of two limbs numbers with carry returned. Internal helper.585* Checks left to the caller.586*/587ATTRIBUTE_WARN_UNUSED_RET static word_t _wadd_22(word_t a[2], word_t b[2])588{589word_t carry;590a[0] = (word_t)(a[0] + b[0]);591carry = (word_t)(a[0] < b[0]);592a[1] = (word_t)(a[1] + carry);593carry = (word_t)(a[1] < carry);594a[1] = (word_t)(a[1] + b[1]);595carry = (word_t)(carry | (a[1] < b[1]));596return carry;597}598599/*600* Subtraction of two limbs numbers with borrow returned. Internal helper.601* Checks left to the caller.602*/603ATTRIBUTE_WARN_UNUSED_RET static word_t _wsub_22(word_t a[2], word_t b[2])604{605word_t borrow, tmp;606tmp = (word_t)(a[0] - b[0]);607borrow = (word_t)(tmp > a[0]);608a[0] = tmp;609tmp = (word_t)(a[1] - borrow);610borrow = (word_t)(tmp > a[1]);611a[1] = (word_t)(tmp - b[1]);612borrow = (word_t)(borrow | (a[1] > tmp));613return borrow;614}615616/*617* Helper macros for conditional subtraction in 2-by-1 division618* used in the reciprocal computation.619*620* Done in constant time.621*/622623/* Conditional subtraction of a one limb number from a two limbs number. */624#define WORD_CND_SUB_21(cnd, ah, al, b) do { \625word_t tmp, mask; \626mask = WORD_MASK_IFNOTZERO((cnd)); \627tmp = (word_t)((al) - ((b) & mask)); \628(ah) = (word_t)((ah) - (tmp > (al))); \629(al) = tmp; \630} while (0)631/* Conditional subtraction of a two limbs number from a two limbs number. */632#define WORD_CND_SUB_22(cnd, ah, al, bh, bl) do { \633word_t tmp, mask; \634mask = WORD_MASK_IFNOTZERO((cnd)); \635tmp = (word_t)((al) - ((bl) & mask)); \636(ah) = (word_t)((ah) - (tmp > (al))); \637(al) = tmp; \638(ah) = (word_t)((ah) - ((bh) & mask)); \639} while (0)640641/*642* divide two words by a normalized word using schoolbook division on half643* words. This is only used below in the reciprocal computation. No checks644* are performed on inputs. This is expected to be done by the caller.645*646* Returns 0 on success, -1 on error.647*/648ATTRIBUTE_WARN_UNUSED_RET static int _word_divrem(word_t *q, word_t *r, word_t ah, word_t al, word_t b)649{650word_t bh, bl, qh, ql, rm, rhl[2], phl[2];651int larger, ret;652u8 j;653654MUST_HAVE((WRSHIFT((b), (WORD_BITS - 1)) == WORD(1)), ret, err);655bh = WRSHIFT((b), HWORD_BITS);656bl = WLSHIFT((b), HWORD_BITS);657rhl[1] = ah;658rhl[0] = al;659660/*661* Compute high part of the quotient. We know from662* MUST_HAVE() check above that bh (a word_t) is not 0663*/664665KNOWN_FACT(bh != 0, ret, err);666qh = (rhl[1] / bh);667qh = WORD_MIN(qh, HWORD_MASK);668WORD_MUL(phl[1], phl[0], qh, (b));669phl[1] = (WLSHIFT(phl[1], HWORD_BITS) |670WRSHIFT(phl[0], HWORD_BITS));671phl[0] = WLSHIFT(phl[0], HWORD_BITS);672673for (j = 0; j < 2; j++) {674larger = (_wcmp_22(phl, rhl) > 0);675qh = (word_t)(qh - (word_t)larger);676WORD_CND_SUB_22(larger, phl[1], phl[0], bh, bl);677}678679ret = (_wcmp_22(phl, rhl) > 0);680MUST_HAVE(!(ret), ret, err);681IGNORE_RET_VAL(_wsub_22(rhl, phl));682MUST_HAVE((WRSHIFT(rhl[1], HWORD_BITS) == 0), ret, err);683684/* Compute low part of the quotient. */685rm = (WLSHIFT(rhl[1], HWORD_BITS) |686WRSHIFT(rhl[0], HWORD_BITS));687ql = (rm / bh);688ql = WORD_MIN(ql, HWORD_MASK);689WORD_MUL(phl[1], phl[0], ql, (b));690691for (j = 0; j < 2; j++) {692larger = (_wcmp_22(phl, rhl) > 0);693ql = (word_t) (ql - (word_t)larger);694WORD_CND_SUB_21(larger, phl[1], phl[0], (b));695}696697ret = _wcmp_22(phl, rhl) > 0;698MUST_HAVE(!(ret), ret, err);699IGNORE_RET_VAL(_wsub_22(rhl, phl));700/* Set outputs. */701MUST_HAVE((rhl[1] == WORD(0)), ret, err);702MUST_HAVE(!(rhl[0] >= (b)), ret, err);703(*q) = (WLSHIFT(qh, HWORD_BITS) | ql);704(*r) = rhl[0];705MUST_HAVE(!((word_t) ((*q)*(b) + (*r)) != (al)), ret, err);706ret = 0;707708err:709return ret;710}711712/*713* Compute the reciprocal of d as714* floor(B^3/(d+1)) - B715* which is used to perform approximate small division using a multiplication.716*717* No attempt was made to make it constant time. Indeed, such values are usually718* precomputed in contexts where constant time is wanted, e.g. in the fp layer.719*720* Returns 0 on success, -1 on error.721*/722int wreciprocal(word_t dh, word_t dl, word_t *reciprocal)723{724word_t q, carry, r[2], t[2];725int ret;726727MUST_HAVE((reciprocal != NULL), ret, err);728729if (((word_t)(dh + WORD(1)) == WORD(0)) &&730((word_t)(dl + WORD(1)) == WORD(0))) {731(*reciprocal) = WORD(0);732ret = 0;733goto err;734}735736if ((word_t)(dh + WORD(1)) == WORD(0)) {737q = (word_t)(~dh);738r[1] = (word_t)(~dl);739} else {740t[1] = (word_t)(~dh);741t[0] = (word_t)(~dl);742ret = _word_divrem(&q, r+1, t[1], t[0],743(word_t)(dh + WORD(1))); EG(ret, err);744}745746if ((word_t)(dl + WORD(1)) == WORD(0)) {747(*reciprocal) = q;748ret = 0;749goto err;750}751752r[0] = WORD(0);753754WORD_MUL(t[1], t[0], q, (word_t)~dl);755carry = _wadd_22(r, t);756757t[0] = (word_t)(dl + WORD(1));758t[1] = dh;759while (carry || (_wcmp_22(r, t) >= 0)) {760q++;761carry = (word_t)(carry - _wsub_22(r, t));762}763764(*reciprocal) = q;765ret = 0;766767err:768return ret;769}770771/*772* Given an odd number p, compute division coefficients p_normalized,773* p_shift and p_reciprocal so that:774* - p_shift = p_rounded_bitlen - bitsizeof(p), where775* o p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of776* minimum number of words required to store p) and777* o p_bitlen is the real bit size of p778* - p_normalized = p << p_shift779* - p_reciprocal = B^3 / ((p_normalized >> (pbitlen - 2*WORDSIZE)) + 1) - B780* with B = 2^WORDSIZE781*782* These coefficients are useful for the optimized shifted variants of NN783* division and modular functions. Because we have two word_t outputs784* (p_shift and p_reciprocal), these are passed through word_t pointers.785* Aliasing of outputs with the input is possible since p_in is copied in786* local p at the beginning of the function.787*788* The function does not support aliasing.789*790* The function returns 0 on success, -1 on error.791*/792int nn_compute_div_coefs(nn_t p_normalized, word_t *p_shift,793word_t *p_reciprocal, nn_src_t p_in)794{795bitcnt_t p_rounded_bitlen, p_bitlen;796nn p, tmp_nn;797int ret;798p.magic = tmp_nn.magic = WORD(0);799800ret = nn_check_initialized(p_in); EG(ret, err);801802MUST_HAVE((p_shift != NULL), ret, err);803MUST_HAVE((p_reciprocal != NULL), ret, err);804805/* Unsupported aliasing */806MUST_HAVE((p_normalized != p_in), ret, err);807808ret = nn_init(&p, 0); EG(ret, err);809ret = nn_copy(&p, p_in); EG(ret, err);810811/*812* In order for our reciprocal division routines to work, it is expected813* that the bit length (including leading zeroes) of input prime814* p is >= 2 * wlen where wlen is the number of bits of a word size.815*/816if (p.wlen < 2) {817ret = nn_set_wlen(&p, 2); EG(ret, err);818}819820ret = nn_init(p_normalized, 0); EG(ret, err);821ret = nn_init(&tmp_nn, 0); EG(ret, err);822823/* p_rounded_bitlen = bitlen of p rounded to word size */824p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);825826/* p_shift */827ret = nn_bitlen(&p, &p_bitlen); EG(ret, err);828(*p_shift) = (word_t)(p_rounded_bitlen - p_bitlen);829830/* p_normalized = p << pshift */831ret = nn_lshift(p_normalized, &p, (bitcnt_t)(*p_shift)); EG(ret, err);832833/* Sanity check to protect the p_reciprocal computation */834MUST_HAVE((p_rounded_bitlen >= (2 * WORDSIZE)), ret, err);835836/*837* p_reciprocal = B^3 / ((p_normalized >> (p_rounded_bitlen - 2 * wlen)) + 1) - B838* where B = 2^wlen where wlen = word size in bits. We use our NN839* helper to compute it.840*/841ret = nn_rshift(&tmp_nn, p_normalized, (bitcnt_t)(p_rounded_bitlen - (2 * WORDSIZE))); EG(ret, err);842ret = wreciprocal(tmp_nn.val[1], tmp_nn.val[0], p_reciprocal);843844err:845nn_uninit(&p);846nn_uninit(&tmp_nn);847848return ret;849}850851/*852* Compute quotient remainder of Euclidean division.853*854* This function is a wrapper to normalize the divisor, i.e. shift it so that855* the MSB of its MSW is set, and precompute the reciprocal of this MSW to be856* used to perform small divisions using multiplications during the long857* schoolbook division. It uses the helper functions/macros above.858*859* This is NOT constant time with regards to the word length of a and b,860* but also the actual bitlength of b as we need to normalize b at the861* bit level.862* Moreover the precomputation of the reciprocal is not constant time at all.863*864* r need not be initialized, the function does it for the the caller.865*866* This function does not support aliasing. This is an internal helper, which867* expects caller to check parameters.868*869* It returns 0 on sucess, -1 on error.870*/871ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)872{873nn b_large, b_normalized;874bitcnt_t cnt;875word_t v;876nn_src_t ptr = b;877int ret, iszero;878b_large.magic = b_normalized.magic = WORD(0);879880ret = nn_init(r, 0); EG(ret, err);881ret = nn_init(q, 0); EG(ret, err);882ret = nn_init(&b_large, 0); EG(ret, err);883884MUST_HAVE(!nn_iszero(b, &iszero) && !iszero, ret, err);885886if (b->wlen == 1) {887ret = nn_copy(&b_large, b); EG(ret, err);888889/* Expand our big number with zeroes */890ret = nn_set_wlen(&b_large, 2); EG(ret, err);891892/*893* This cast could seem inappropriate, but we are894* sure here that we won't touch ptr since it is only895* given as a const parameter to sub functions.896*/897ptr = (nn_src_t) &b_large;898}899900/* After this, we only handle >= 2 words big numbers */901MUST_HAVE(!(ptr->wlen < 2), ret, err);902903ret = nn_init(&b_normalized, (u16)((ptr->wlen) * WORD_BYTES)); EG(ret, err);904ret = nn_clz(ptr, &cnt); EG(ret, err);905ret = nn_lshift_fixedlen(&b_normalized, ptr, cnt); EG(ret, err);906ret = wreciprocal(b_normalized.val[ptr->wlen - 1],907b_normalized.val[ptr->wlen - 2],908&v); /* Not constant time. */ EG(ret, err);909910ret = _nn_divrem_unshifted(q, r, a, &b_normalized, v, cnt);911912err:913nn_uninit(&b_normalized);914nn_uninit(&b_large);915916return ret;917}918919/*920* Returns 0 on succes, -1 on error. Internal helper. Checks on params921* expected from the caller.922*/923ATTRIBUTE_WARN_UNUSED_RET static int __nn_divrem_notrim_alias(nn_t q, nn_t r, nn_src_t a, nn_src_t b)924{925nn a_cpy, b_cpy;926int ret;927a_cpy.magic = b_cpy.magic = WORD(0);928929ret = nn_init(&a_cpy, 0); EG(ret, err);930ret = nn_init(&b_cpy, 0); EG(ret, err);931ret = nn_copy(&a_cpy, a); EG(ret, err);932ret = nn_copy(&b_cpy, b); EG(ret, err);933ret = _nn_divrem(q, r, &a_cpy, &b_cpy);934935err:936nn_uninit(&b_cpy);937nn_uninit(&a_cpy);938939return ret;940}941942943944/*945* Compute quotient and remainder and normalize them.946* Not constant time, see documentation of _nn_divrem.947*948* Aliased version of _nn_divrem. Returns 0 on success,949* -1 on error.950*/951int nn_divrem_notrim(nn_t q, nn_t r, nn_src_t a, nn_src_t b)952{953int ret;954955/* _nn_divrem initializes q and r */956ret = nn_check_initialized(a); EG(ret, err);957ret = nn_check_initialized(b); EG(ret, err);958MUST_HAVE(((q != NULL) && (r != NULL)), ret, err);959960/*961* Handle aliasing whenever any of the inputs is962* used as an output.963*/964if ((a == q) || (a == r) || (b == q) || (b == r)) {965ret = __nn_divrem_notrim_alias(q, r, a, b);966} else {967ret = _nn_divrem(q, r, a, b);968}969970err:971return ret;972}973974/*975* Compute quotient and remainder and normalize them.976* Not constant time, see documentation of _nn_divrem().977* Returns 0 on success, -1 on error.978*979* Aliasing is supported.980*/981int nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)982{983int ret;984985ret = nn_divrem_notrim(q, r, a, b); EG(ret, err);986987/* Normalize (trim) the quotient and rest to avoid size overflow */988ret = nn_normalize(q); EG(ret, err);989ret = nn_normalize(r);990991err:992return ret;993}994995/*996* Compute remainder only and do not normalize it. Not constant time, see997* documentation of _nn_divrem(). Returns 0 on success, -1 on error.998*999* Aliasing is supported.1000*/1001int nn_mod_notrim(nn_t r, nn_src_t a, nn_src_t b)1002{1003int ret;1004nn q;1005q.magic = WORD(0);10061007/* nn_divrem() will init q. */1008ret = nn_divrem_notrim(&q, r, a, b);10091010nn_uninit(&q);10111012return ret;1013}10141015/*1016* Compute remainder only and normalize it. Not constant time, see1017* documentation of _nn_divrem(). r is initialized by the function.1018* Returns 0 on success, -1 on error.1019*1020* Aliasing is supported.1021*/1022int nn_mod(nn_t r, nn_src_t a, nn_src_t b)1023{1024int ret;1025nn q;1026q.magic = WORD(0);10271028/* nn_divrem will init q. */1029ret = nn_divrem(&q, r, a, b);10301031nn_uninit(&q);10321033return ret;1034}10351036/*1037* Below follow gcd and xgcd non constant time functions for the user ease.1038*/10391040/*1041* Unaliased version of xgcd, and we suppose that a >= b. Badly non-constant1042* time per the algorithm used. The function returns 0 on success, -1 on1043* error. internal helper: expect caller to check parameters.1044*/1045ATTRIBUTE_WARN_UNUSED_RET static int _nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b,1046int *sign)1047{1048nn_t c, d, q, r, u1, v1, u2, v2;1049nn scratch[8];1050int ret, swap, iszero;1051u8 i;10521053for (i = 0; i < 8; i++){1054scratch[i].magic = WORD(0);1055}10561057/*1058* Maintain:1059* |u1 v1| |c| = |a|1060* |u2 v2| |d| |b|1061* u1, v1, u2, v2 >= 01062* c >= d1063*1064* Initially:1065* |1 0 | |a| = |a|1066* |0 1 | |b| |b|1067*1068* At each iteration:1069* c >= d1070* c = q*d + r1071* |u1 v1| = |q*u1+v1 u1|1072* |u2 v2| |q*u2+v2 u2|1073*1074* Finally, after i steps:1075* |u1 v1| |g| = |a|1076* |u2 v2| |0| = |b|1077*1078* Inverting the matrix:1079* |g| = (-1)^i | v2 -v1| |a|1080* |0| |-u2 u1| |b|1081*/10821083/*1084* Initialization.1085*/1086ret = nn_init(g, 0); EG(ret, err);1087ret = nn_init(u, 0); EG(ret, err);1088ret = nn_init(v, 0); EG(ret, err);1089ret = nn_iszero(b, &iszero); EG(ret, err);1090if (iszero) {1091/* gcd(0, a) = a, and 1*a + 0*b = a */1092ret = nn_copy(g, a); EG(ret, err);1093ret = nn_one(u); EG(ret, err);1094ret = nn_zero(v); EG(ret, err);1095(*sign) = 1;1096goto err;1097}10981099for (i = 0; i < 8; i++){1100ret = nn_init(&scratch[i], 0); EG(ret, err);1101}11021103u1 = &(scratch[0]);1104v1 = &(scratch[1]);1105u2 = &(scratch[2]);1106v2 = &(scratch[3]);1107ret = nn_one(u1); EG(ret, err);1108ret = nn_zero(v1); EG(ret, err);1109ret = nn_zero(u2); EG(ret, err);1110ret = nn_one(v2); EG(ret, err);1111c = &(scratch[4]);1112d = &(scratch[5]);1113ret = nn_copy(c, a); EG(ret, err); /* Copy could be skipped. */1114ret = nn_copy(d, b); EG(ret, err); /* Copy could be skipped. */1115q = &(scratch[6]);1116r = &(scratch[7]);1117swap = 0;11181119/*1120* Loop.1121*/1122ret = nn_iszero(d, &iszero); EG(ret, err);1123while (!iszero) {1124ret = nn_divrem(q, r, c, d); EG(ret, err);1125ret = nn_normalize(q); EG(ret, err);1126ret = nn_normalize(r); EG(ret, err);1127ret = nn_copy(c, r); EG(ret, err);1128ret = nn_mul(r, q, u1); EG(ret, err);1129ret = nn_normalize(r); EG(ret, err);1130ret = nn_add(v1, v1, r); EG(ret, err);1131ret = nn_mul(r, q, u2); EG(ret, err);1132ret = nn_normalize(r); EG(ret, err);1133ret = nn_add(v2, v2, r); EG(ret, err);1134ret = nn_normalize(v1); EG(ret, err);1135ret = nn_normalize(v2); EG(ret, err);1136swap = 1;1137ret = nn_iszero(c, &iszero); EG(ret, err);1138if (iszero) {1139break;1140}1141ret = nn_divrem(q, r, d, c); EG(ret, err);1142ret = nn_normalize(q); EG(ret, err);1143ret = nn_normalize(r); EG(ret, err);1144ret = nn_copy(d, r); EG(ret, err);1145ret = nn_mul(r, q, v1); EG(ret, err);1146ret = nn_normalize(r); EG(ret, err);1147ret = nn_add(u1, u1, r); EG(ret, err);1148ret = nn_mul(r, q, v2); EG(ret, err);1149ret = nn_normalize(r); EG(ret, err);1150ret = nn_add(u2, u2, r); EG(ret, err);1151ret = nn_normalize(u1); EG(ret, err);1152ret = nn_normalize(u2); EG(ret, err);1153swap = 0;11541155/* refresh loop condition */1156ret = nn_iszero(d, &iszero); EG(ret, err);1157}11581159/* Copies could be skipped. */1160if (swap) {1161ret = nn_copy(g, d); EG(ret, err);1162ret = nn_copy(u, u2); EG(ret, err);1163ret = nn_copy(v, u1); EG(ret, err);1164} else {1165ret = nn_copy(g, c); EG(ret, err);1166ret = nn_copy(u, v2); EG(ret, err);1167ret = nn_copy(v, v1); EG(ret, err);1168}11691170/* swap = -1 means u <= 0; = 1 means v <= 0 */1171(*sign) = swap ? -1 : 1;1172ret = 0;11731174err:1175/*1176* We uninit scratch elements in all cases, i.e. whether or not1177* we return an error or not.1178*/1179for (i = 0; i < 8; i++){1180nn_uninit(&scratch[i]);1181}1182/* Unitialize output in case of error */1183if (ret){1184nn_uninit(v);1185nn_uninit(u);1186nn_uninit(g);1187}11881189return ret;1190}11911192/*1193* Aliased version of xgcd, and no assumption on a and b. Not constant time at1194* all. returns 0 on success, -1 on error. XXX document 'sign'1195*1196* Aliasing is supported.1197*/1198int nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, int *sign)1199{1200/* Handle aliasing1201* Note: in order to properly handle aliasing, we accept to lose1202* some "space" on the stack with copies.1203*/1204nn a_cpy, b_cpy;1205nn_src_t a_, b_;1206int ret, cmp, _sign;1207a_cpy.magic = b_cpy.magic = WORD(0);12081209/* The internal _nn_xgcd function initializes g, u and v */1210ret = nn_check_initialized(a); EG(ret, err);1211ret = nn_check_initialized(b); EG(ret, err);1212MUST_HAVE((sign != NULL), ret, err);12131214ret = nn_init(&b_cpy, 0); EG(ret, err);12151216/* Aliasing of a */1217if ((g == a) || (u == a) || (v == a)){1218ret = nn_copy(&a_cpy, a); EG(ret, err);1219a_ = &a_cpy;1220} else {1221a_ = a;1222}1223/* Aliasing of b */1224if ((g == b) || (u == b) || (v == b)) {1225ret = nn_copy(&b_cpy, b); EG(ret, err);1226b_ = &b_cpy;1227} else {1228b_ = b;1229}12301231ret = nn_cmp(a_, b_, &cmp); EG(ret, err);1232if (cmp < 0) {1233/* If a < b, swap the inputs */1234ret = _nn_xgcd(g, v, u, b_, a_, &_sign); EG(ret, err);1235(*sign) = -(_sign);1236} else {1237ret = _nn_xgcd(g, u, v, a_, b_, &_sign); EG(ret, err);1238(*sign) = _sign;1239}12401241err:1242nn_uninit(&b_cpy);1243nn_uninit(&a_cpy);12441245return ret;1246}12471248/*1249* Compute g = gcd(a, b). Internally use the xgcd and drop u and v.1250* Not constant time at all. Returns 0 on success, -1 on error.1251* XXX document 'sign'.1252*1253* Aliasing is supported.1254*/1255int nn_gcd(nn_t g, nn_src_t a, nn_src_t b, int *sign)1256{1257nn u, v;1258int ret;1259u.magic = v.magic = WORD(0);12601261/* nn_xgcd will initialize g, u and v and1262* check if a and b are indeed initialized.1263*/1264ret = nn_xgcd(g, &u, &v, a, b, sign);12651266nn_uninit(&u);1267nn_uninit(&v);12681269return ret;1270}127112721273