Path: blob/main/crypto/libecc/src/nn/nn_mul_redc1.c
34878 views
/*1* Copyright (C) 2017 - This file is part of libecc project2*3* Authors:4* Ryad BENADJILA <[email protected]>5* Arnaud EBALARD <[email protected]>6* Jean-Pierre FLORI <[email protected]>7*8* Contributors:9* Nicolas VIVET <[email protected]>10* Karim KHALFALLAH <[email protected]>11*12* This software is licensed under a dual BSD and GPL v2 license.13* See LICENSE file at the root folder of the project.14*/15#include <libecc/nn/nn_mul_redc1.h>16#include <libecc/nn/nn_mul_public.h>17#include <libecc/nn/nn_add.h>18#include <libecc/nn/nn_logical.h>19#include <libecc/nn/nn_div_public.h>20#include <libecc/nn/nn_modinv.h>21#include <libecc/nn/nn.h>2223/*24* Given an odd number p, compute Montgomery coefficients r, r_square25* as well as mpinv so that:26*27* - r = 2^p_rounded_bitlen mod (p), where28* p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of29* minimum number of words required to store p)30* - r_square = r^2 mod (p)31* - mpinv = -p^-1 mod (2^WORDSIZE).32*33* Aliasing of outputs with the input is possible since p_in is34* copied in local p at the beginning of the function.35*36* The function returns 0 on success, -1 on error. out parameters 'r',37* 'r_square' and 'mpinv' are only meaningful on success.38*/39int nn_compute_redc1_coefs(nn_t r, nn_t r_square, nn_src_t p_in, word_t *mpinv)40{41bitcnt_t p_rounded_bitlen;42nn p, tmp_nn1, tmp_nn2;43word_t _mpinv;44int ret, isodd;45p.magic = tmp_nn1.magic = tmp_nn2.magic = WORD(0);4647ret = nn_check_initialized(p_in); EG(ret, err);48ret = nn_init(&p, 0); EG(ret, err);49ret = nn_copy(&p, p_in); EG(ret, err);50MUST_HAVE((mpinv != NULL), ret, err);5152/*53* In order for our reciprocal division routines to work, it is54* expected that the bit length (including leading zeroes) of55* input prime p is >= 2 * wlen where wlen is the number of bits56* of a word size.57*/58if (p.wlen < 2) {59ret = nn_set_wlen(&p, 2); EG(ret, err);60}6162ret = nn_init(r, 0); EG(ret, err);63ret = nn_init(r_square, 0); EG(ret, err);64ret = nn_init(&tmp_nn1, 0); EG(ret, err);65ret = nn_init(&tmp_nn2, 0); EG(ret, err);6667/* p_rounded_bitlen = bitlen of p rounded to word size */68p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);6970/* _mpinv = 2^wlen - (modinv(prime, 2^wlen)) */71ret = nn_set_wlen(&tmp_nn1, 2); EG(ret, err);72tmp_nn1.val[1] = WORD(1);73ret = nn_copy(&tmp_nn2, &tmp_nn1); EG(ret, err);74ret = nn_modinv_2exp(&tmp_nn1, &p, WORD_BITS, &isodd); EG(ret, err);75ret = nn_sub(&tmp_nn1, &tmp_nn2, &tmp_nn1); EG(ret, err);76_mpinv = tmp_nn1.val[0];7778/* r = (0x1 << p_rounded_bitlen) (p) */79ret = nn_one(r); EG(ret, err);80ret = nn_lshift(r, r, p_rounded_bitlen); EG(ret, err);81ret = nn_mod(r, r, &p); EG(ret, err);8283/*84* r_square = (0x1 << (2*p_rounded_bitlen)) (p)85* We are supposed to handle NN numbers of size at least two times86* the biggest prime we use. Thus, we should be able to compute r_square87* with a multiplication followed by a reduction. (NB: we cannot use our88* Montgomery primitives at this point since we are computing its89* constants!)90*/91/* Check we have indeed enough space for our r_square computation */92MUST_HAVE(!(NN_MAX_BIT_LEN < (2 * p_rounded_bitlen)), ret, err);9394ret = nn_sqr(r_square, r); EG(ret, err);95ret = nn_mod(r_square, r_square, &p); EG(ret, err);9697(*mpinv) = _mpinv;9899err:100nn_uninit(&p);101nn_uninit(&tmp_nn1);102nn_uninit(&tmp_nn2);103104return ret;105}106107/*108* Perform Montgomery multiplication, that is usual multiplication109* followed by reduction modulo p.110*111* Inputs are supposed to be < p (i.e. taken modulo p).112*113* This uses the CIOS algorithm from Koc et al.114*115* The p input is the modulo number of the Montgomery multiplication,116* and mpinv is -p^(-1) mod (2^WORDSIZE).117*118* The function does not check input parameters are initialized. This119* MUST be done by the caller.120*121* The function returns 0 on success, -1 on error.122*/123ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,124word_t mpinv)125{126word_t prod_high, prod_low, carry, acc, m;127unsigned int i, j, len, len_mul;128/* a and b inputs such that len(b) <= len(a) */129nn_src_t a, b;130int ret, cmp;131u8 old_wlen;132133/*134* These comparisons are input hypothesis and does not "break"135* the following computation. However performance loss exists136* when this check is always done, this is why we use our137* SHOULD_HAVE primitive.138*/139SHOULD_HAVE((!nn_cmp(in1, p, &cmp)) && (cmp < 0), ret, err);140SHOULD_HAVE((!nn_cmp(in2, p, &cmp)) && (cmp < 0), ret, err);141142ret = nn_init(out, 0); EG(ret, err);143144/* Check which one of in1 or in2 is the biggest */145a = (in1->wlen <= in2->wlen) ? in2 : in1;146b = (in1->wlen <= in2->wlen) ? in1 : in2;147148/*149* The inputs might have been reduced due to trimming150* because of leading zeroes. It is important for our151* Montgomery algorithm to work on sizes consistent with152* out prime p real bit size. Thus, we expand the output153* size to the size of p.154*/155ret = nn_set_wlen(out, p->wlen); EG(ret, err);156157len = out->wlen;158len_mul = b->wlen;159/*160* We extend out to store carries. We first check that we161* do not have an overflow on the NN size.162*/163MUST_HAVE(((WORD_BITS * (out->wlen + 1)) <= NN_MAX_BIT_LEN), ret, err);164old_wlen = out->wlen;165out->wlen = (u8)(out->wlen + 1);166167/*168* This can be skipped if the first iteration of the for loop169* is separated.170*/171for (i = 0; i < out->wlen; i++) {172out->val[i] = 0;173}174for (i = 0; i < len; i++) {175carry = WORD(0);176for (j = 0; j < len_mul; j++) {177WORD_MUL(prod_high, prod_low, a->val[i], b->val[j]);178prod_low = (word_t)(prod_low + carry);179prod_high = (word_t)(prod_high + (prod_low < carry));180out->val[j] = (word_t)(out->val[j] + prod_low);181carry = (word_t)(prod_high + (out->val[j] < prod_low));182}183for (; j < len; j++) {184out->val[j] = (word_t)(out->val[j] + carry);185carry = (word_t)(out->val[j] < carry);186}187out->val[j] = (word_t)(out->val[j] + carry);188acc = (word_t)(out->val[j] < carry);189190m = (word_t)(out->val[0] * mpinv);191WORD_MUL(prod_high, prod_low, m, p->val[0]);192prod_low = (word_t)(prod_low + out->val[0]);193carry = (word_t)(prod_high + (prod_low < out->val[0]));194for (j = 1; j < len; j++) {195WORD_MUL(prod_high, prod_low, m, p->val[j]);196prod_low = (word_t)(prod_low + carry);197prod_high = (word_t)(prod_high + (prod_low < carry));198out->val[j - 1] = (word_t)(prod_low + out->val[j]);199carry = (word_t)(prod_high + (out->val[j - 1] < prod_low));200}201out->val[j - 1] = (word_t)(carry + out->val[j]);202carry = (word_t)(out->val[j - 1] < out->val[j]);203out->val[j] = (word_t)(acc + carry);204}205/*206* Note that at this stage the msw of out is either 0 or 1.207* If out > p we need to subtract p from out.208*/209ret = nn_cmp(out, p, &cmp); EG(ret, err);210ret = nn_cnd_sub(cmp >= 0, out, out, p); EG(ret, err);211MUST_HAVE((!nn_cmp(out, p, &cmp)) && (cmp < 0), ret, err);212/* We restore out wlen. */213out->wlen = old_wlen;214215err:216return ret;217}218219/*220* Wrapper for previous function handling aliasing of one of the input221* paramter with out, through a copy. The function does not check222* input parameters are initialized. This MUST be done by the caller.223*/224ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1_aliased(nn_t out, nn_src_t in1, nn_src_t in2,225nn_src_t p, word_t mpinv)226{227nn out_cpy;228int ret;229out_cpy.magic = WORD(0);230231ret = _nn_mul_redc1(&out_cpy, in1, in2, p, mpinv); EG(ret, err);232ret = nn_init(out, out_cpy.wlen); EG(ret, err);233ret = nn_copy(out, &out_cpy);234235err:236nn_uninit(&out_cpy);237238return ret;239}240241/*242* Public version, handling possible aliasing of out parameter with243* one of the input parameters.244*/245int nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,246word_t mpinv)247{248int ret;249250ret = nn_check_initialized(in1); EG(ret, err);251ret = nn_check_initialized(in2); EG(ret, err);252ret = nn_check_initialized(p); EG(ret, err);253254/* Handle possible output aliasing */255if ((out == in1) || (out == in2) || (out == p)) {256ret = _nn_mul_redc1_aliased(out, in1, in2, p, mpinv);257} else {258ret = _nn_mul_redc1(out, in1, in2, p, mpinv);259}260261err:262return ret;263}264265/*266* Compute in1 * in2 mod p where in1 and in2 are numbers < p.267* When p is an odd number, the function redcifies in1 and in2268* parameters, does the computation and then unredcifies the269* result. When p is an even number, we use an unoptimized mul270* then mod operations sequence.271*272* From a mathematical standpoint, the computation is equivalent273* to performing:274*275* nn_mul(&tmp2, in1, in2);276* nn_mod(&out, &tmp2, q);277*278* but the modular reduction is done progressively during279* Montgomery reduction when p is odd (which brings more efficiency).280*281* Inputs are supposed to be < p (i.e. taken modulo p).282*283* The function returns 0 on success, -1 on error.284*/285int nn_mod_mul(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p_in)286{287nn r_square, p;288nn in1_tmp, in2_tmp;289word_t mpinv;290int ret, isodd;291r_square.magic = in1_tmp.magic = in2_tmp.magic = p.magic = WORD(0);292293/* When p_in is even, we cannot work with Montgomery multiplication */294ret = nn_isodd(p_in, &isodd); EG(ret, err);295if(!isodd){296/* When p_in is even, we fallback to less efficient mul then mod */297ret = nn_mul(out, in1, in2); EG(ret, err);298ret = nn_mod(out, out, p_in); EG(ret, err);299}300else{301/* Here, p_in is odd and we can use redcification */302ret = nn_copy(&p, p_in); EG(ret, err);303304/*305* In order for our reciprocal division routines to work, it is306* expected that the bit length (including leading zeroes) of307* input prime p is >= 2 * wlen where wlen is the number of bits308* of a word size.309*/310if (p.wlen < 2) {311ret = nn_set_wlen(&p, 2); EG(ret, err);312}313314/* Compute Mongtomery coefs.315* NOTE: in1_tmp holds a dummy value here after the operation.316*/317ret = nn_compute_redc1_coefs(&in1_tmp, &r_square, &p, &mpinv); EG(ret, err);318319/* redcify in1 and in2 */320ret = nn_mul_redc1(&in1_tmp, in1, &r_square, &p, mpinv); EG(ret, err);321ret = nn_mul_redc1(&in2_tmp, in2, &r_square, &p, mpinv); EG(ret, err);322323/* Compute in1 * in2 mod p in montgomery world.324* NOTE: r_square holds the result after the operation.325*/326ret = nn_mul_redc1(&r_square, &in1_tmp, &in2_tmp, &p, mpinv); EG(ret, err);327328/* Come back to real world by unredcifying result */329ret = nn_init(&in1_tmp, 0); EG(ret, err);330ret = nn_one(&in1_tmp); EG(ret, err);331ret = nn_mul_redc1(out, &r_square, &in1_tmp, &p, mpinv); EG(ret, err);332}333334err:335nn_uninit(&p);336nn_uninit(&r_square);337nn_uninit(&in1_tmp);338nn_uninit(&in2_tmp);339340return ret;341}342343344