Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/libecc/src/nn/nn_mul_redc1.c
34878 views
1
/*
2
* Copyright (C) 2017 - This file is part of libecc project
3
*
4
* Authors:
5
* Ryad BENADJILA <[email protected]>
6
* Arnaud EBALARD <[email protected]>
7
* Jean-Pierre FLORI <[email protected]>
8
*
9
* Contributors:
10
* Nicolas VIVET <[email protected]>
11
* Karim KHALFALLAH <[email protected]>
12
*
13
* This software is licensed under a dual BSD and GPL v2 license.
14
* See LICENSE file at the root folder of the project.
15
*/
16
#include <libecc/nn/nn_mul_redc1.h>
17
#include <libecc/nn/nn_mul_public.h>
18
#include <libecc/nn/nn_add.h>
19
#include <libecc/nn/nn_logical.h>
20
#include <libecc/nn/nn_div_public.h>
21
#include <libecc/nn/nn_modinv.h>
22
#include <libecc/nn/nn.h>
23
24
/*
25
* Given an odd number p, compute Montgomery coefficients r, r_square
26
* as well as mpinv so that:
27
*
28
* - r = 2^p_rounded_bitlen mod (p), where
29
* p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of
30
* minimum number of words required to store p)
31
* - r_square = r^2 mod (p)
32
* - mpinv = -p^-1 mod (2^WORDSIZE).
33
*
34
* Aliasing of outputs with the input is possible since p_in is
35
* copied in local p at the beginning of the function.
36
*
37
* The function returns 0 on success, -1 on error. out parameters 'r',
38
* 'r_square' and 'mpinv' are only meaningful on success.
39
*/
40
int nn_compute_redc1_coefs(nn_t r, nn_t r_square, nn_src_t p_in, word_t *mpinv)
41
{
42
bitcnt_t p_rounded_bitlen;
43
nn p, tmp_nn1, tmp_nn2;
44
word_t _mpinv;
45
int ret, isodd;
46
p.magic = tmp_nn1.magic = tmp_nn2.magic = WORD(0);
47
48
ret = nn_check_initialized(p_in); EG(ret, err);
49
ret = nn_init(&p, 0); EG(ret, err);
50
ret = nn_copy(&p, p_in); EG(ret, err);
51
MUST_HAVE((mpinv != NULL), ret, err);
52
53
/*
54
* In order for our reciprocal division routines to work, it is
55
* expected that the bit length (including leading zeroes) of
56
* input prime p is >= 2 * wlen where wlen is the number of bits
57
* of a word size.
58
*/
59
if (p.wlen < 2) {
60
ret = nn_set_wlen(&p, 2); EG(ret, err);
61
}
62
63
ret = nn_init(r, 0); EG(ret, err);
64
ret = nn_init(r_square, 0); EG(ret, err);
65
ret = nn_init(&tmp_nn1, 0); EG(ret, err);
66
ret = nn_init(&tmp_nn2, 0); EG(ret, err);
67
68
/* p_rounded_bitlen = bitlen of p rounded to word size */
69
p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);
70
71
/* _mpinv = 2^wlen - (modinv(prime, 2^wlen)) */
72
ret = nn_set_wlen(&tmp_nn1, 2); EG(ret, err);
73
tmp_nn1.val[1] = WORD(1);
74
ret = nn_copy(&tmp_nn2, &tmp_nn1); EG(ret, err);
75
ret = nn_modinv_2exp(&tmp_nn1, &p, WORD_BITS, &isodd); EG(ret, err);
76
ret = nn_sub(&tmp_nn1, &tmp_nn2, &tmp_nn1); EG(ret, err);
77
_mpinv = tmp_nn1.val[0];
78
79
/* r = (0x1 << p_rounded_bitlen) (p) */
80
ret = nn_one(r); EG(ret, err);
81
ret = nn_lshift(r, r, p_rounded_bitlen); EG(ret, err);
82
ret = nn_mod(r, r, &p); EG(ret, err);
83
84
/*
85
* r_square = (0x1 << (2*p_rounded_bitlen)) (p)
86
* We are supposed to handle NN numbers of size at least two times
87
* the biggest prime we use. Thus, we should be able to compute r_square
88
* with a multiplication followed by a reduction. (NB: we cannot use our
89
* Montgomery primitives at this point since we are computing its
90
* constants!)
91
*/
92
/* Check we have indeed enough space for our r_square computation */
93
MUST_HAVE(!(NN_MAX_BIT_LEN < (2 * p_rounded_bitlen)), ret, err);
94
95
ret = nn_sqr(r_square, r); EG(ret, err);
96
ret = nn_mod(r_square, r_square, &p); EG(ret, err);
97
98
(*mpinv) = _mpinv;
99
100
err:
101
nn_uninit(&p);
102
nn_uninit(&tmp_nn1);
103
nn_uninit(&tmp_nn2);
104
105
return ret;
106
}
107
108
/*
109
* Perform Montgomery multiplication, that is usual multiplication
110
* followed by reduction modulo p.
111
*
112
* Inputs are supposed to be < p (i.e. taken modulo p).
113
*
114
* This uses the CIOS algorithm from Koc et al.
115
*
116
* The p input is the modulo number of the Montgomery multiplication,
117
* and mpinv is -p^(-1) mod (2^WORDSIZE).
118
*
119
* The function does not check input parameters are initialized. This
120
* MUST be done by the caller.
121
*
122
* The function returns 0 on success, -1 on error.
123
*/
124
ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,
125
word_t mpinv)
126
{
127
word_t prod_high, prod_low, carry, acc, m;
128
unsigned int i, j, len, len_mul;
129
/* a and b inputs such that len(b) <= len(a) */
130
nn_src_t a, b;
131
int ret, cmp;
132
u8 old_wlen;
133
134
/*
135
* These comparisons are input hypothesis and does not "break"
136
* the following computation. However performance loss exists
137
* when this check is always done, this is why we use our
138
* SHOULD_HAVE primitive.
139
*/
140
SHOULD_HAVE((!nn_cmp(in1, p, &cmp)) && (cmp < 0), ret, err);
141
SHOULD_HAVE((!nn_cmp(in2, p, &cmp)) && (cmp < 0), ret, err);
142
143
ret = nn_init(out, 0); EG(ret, err);
144
145
/* Check which one of in1 or in2 is the biggest */
146
a = (in1->wlen <= in2->wlen) ? in2 : in1;
147
b = (in1->wlen <= in2->wlen) ? in1 : in2;
148
149
/*
150
* The inputs might have been reduced due to trimming
151
* because of leading zeroes. It is important for our
152
* Montgomery algorithm to work on sizes consistent with
153
* out prime p real bit size. Thus, we expand the output
154
* size to the size of p.
155
*/
156
ret = nn_set_wlen(out, p->wlen); EG(ret, err);
157
158
len = out->wlen;
159
len_mul = b->wlen;
160
/*
161
* We extend out to store carries. We first check that we
162
* do not have an overflow on the NN size.
163
*/
164
MUST_HAVE(((WORD_BITS * (out->wlen + 1)) <= NN_MAX_BIT_LEN), ret, err);
165
old_wlen = out->wlen;
166
out->wlen = (u8)(out->wlen + 1);
167
168
/*
169
* This can be skipped if the first iteration of the for loop
170
* is separated.
171
*/
172
for (i = 0; i < out->wlen; i++) {
173
out->val[i] = 0;
174
}
175
for (i = 0; i < len; i++) {
176
carry = WORD(0);
177
for (j = 0; j < len_mul; j++) {
178
WORD_MUL(prod_high, prod_low, a->val[i], b->val[j]);
179
prod_low = (word_t)(prod_low + carry);
180
prod_high = (word_t)(prod_high + (prod_low < carry));
181
out->val[j] = (word_t)(out->val[j] + prod_low);
182
carry = (word_t)(prod_high + (out->val[j] < prod_low));
183
}
184
for (; j < len; j++) {
185
out->val[j] = (word_t)(out->val[j] + carry);
186
carry = (word_t)(out->val[j] < carry);
187
}
188
out->val[j] = (word_t)(out->val[j] + carry);
189
acc = (word_t)(out->val[j] < carry);
190
191
m = (word_t)(out->val[0] * mpinv);
192
WORD_MUL(prod_high, prod_low, m, p->val[0]);
193
prod_low = (word_t)(prod_low + out->val[0]);
194
carry = (word_t)(prod_high + (prod_low < out->val[0]));
195
for (j = 1; j < len; j++) {
196
WORD_MUL(prod_high, prod_low, m, p->val[j]);
197
prod_low = (word_t)(prod_low + carry);
198
prod_high = (word_t)(prod_high + (prod_low < carry));
199
out->val[j - 1] = (word_t)(prod_low + out->val[j]);
200
carry = (word_t)(prod_high + (out->val[j - 1] < prod_low));
201
}
202
out->val[j - 1] = (word_t)(carry + out->val[j]);
203
carry = (word_t)(out->val[j - 1] < out->val[j]);
204
out->val[j] = (word_t)(acc + carry);
205
}
206
/*
207
* Note that at this stage the msw of out is either 0 or 1.
208
* If out > p we need to subtract p from out.
209
*/
210
ret = nn_cmp(out, p, &cmp); EG(ret, err);
211
ret = nn_cnd_sub(cmp >= 0, out, out, p); EG(ret, err);
212
MUST_HAVE((!nn_cmp(out, p, &cmp)) && (cmp < 0), ret, err);
213
/* We restore out wlen. */
214
out->wlen = old_wlen;
215
216
err:
217
return ret;
218
}
219
220
/*
221
* Wrapper for previous function handling aliasing of one of the input
222
* paramter with out, through a copy. The function does not check
223
* input parameters are initialized. This MUST be done by the caller.
224
*/
225
ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1_aliased(nn_t out, nn_src_t in1, nn_src_t in2,
226
nn_src_t p, word_t mpinv)
227
{
228
nn out_cpy;
229
int ret;
230
out_cpy.magic = WORD(0);
231
232
ret = _nn_mul_redc1(&out_cpy, in1, in2, p, mpinv); EG(ret, err);
233
ret = nn_init(out, out_cpy.wlen); EG(ret, err);
234
ret = nn_copy(out, &out_cpy);
235
236
err:
237
nn_uninit(&out_cpy);
238
239
return ret;
240
}
241
242
/*
243
* Public version, handling possible aliasing of out parameter with
244
* one of the input parameters.
245
*/
246
int nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,
247
word_t mpinv)
248
{
249
int ret;
250
251
ret = nn_check_initialized(in1); EG(ret, err);
252
ret = nn_check_initialized(in2); EG(ret, err);
253
ret = nn_check_initialized(p); EG(ret, err);
254
255
/* Handle possible output aliasing */
256
if ((out == in1) || (out == in2) || (out == p)) {
257
ret = _nn_mul_redc1_aliased(out, in1, in2, p, mpinv);
258
} else {
259
ret = _nn_mul_redc1(out, in1, in2, p, mpinv);
260
}
261
262
err:
263
return ret;
264
}
265
266
/*
267
* Compute in1 * in2 mod p where in1 and in2 are numbers < p.
268
* When p is an odd number, the function redcifies in1 and in2
269
* parameters, does the computation and then unredcifies the
270
* result. When p is an even number, we use an unoptimized mul
271
* then mod operations sequence.
272
*
273
* From a mathematical standpoint, the computation is equivalent
274
* to performing:
275
*
276
* nn_mul(&tmp2, in1, in2);
277
* nn_mod(&out, &tmp2, q);
278
*
279
* but the modular reduction is done progressively during
280
* Montgomery reduction when p is odd (which brings more efficiency).
281
*
282
* Inputs are supposed to be < p (i.e. taken modulo p).
283
*
284
* The function returns 0 on success, -1 on error.
285
*/
286
int nn_mod_mul(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p_in)
287
{
288
nn r_square, p;
289
nn in1_tmp, in2_tmp;
290
word_t mpinv;
291
int ret, isodd;
292
r_square.magic = in1_tmp.magic = in2_tmp.magic = p.magic = WORD(0);
293
294
/* When p_in is even, we cannot work with Montgomery multiplication */
295
ret = nn_isodd(p_in, &isodd); EG(ret, err);
296
if(!isodd){
297
/* When p_in is even, we fallback to less efficient mul then mod */
298
ret = nn_mul(out, in1, in2); EG(ret, err);
299
ret = nn_mod(out, out, p_in); EG(ret, err);
300
}
301
else{
302
/* Here, p_in is odd and we can use redcification */
303
ret = nn_copy(&p, p_in); EG(ret, err);
304
305
/*
306
* In order for our reciprocal division routines to work, it is
307
* expected that the bit length (including leading zeroes) of
308
* input prime p is >= 2 * wlen where wlen is the number of bits
309
* of a word size.
310
*/
311
if (p.wlen < 2) {
312
ret = nn_set_wlen(&p, 2); EG(ret, err);
313
}
314
315
/* Compute Mongtomery coefs.
316
* NOTE: in1_tmp holds a dummy value here after the operation.
317
*/
318
ret = nn_compute_redc1_coefs(&in1_tmp, &r_square, &p, &mpinv); EG(ret, err);
319
320
/* redcify in1 and in2 */
321
ret = nn_mul_redc1(&in1_tmp, in1, &r_square, &p, mpinv); EG(ret, err);
322
ret = nn_mul_redc1(&in2_tmp, in2, &r_square, &p, mpinv); EG(ret, err);
323
324
/* Compute in1 * in2 mod p in montgomery world.
325
* NOTE: r_square holds the result after the operation.
326
*/
327
ret = nn_mul_redc1(&r_square, &in1_tmp, &in2_tmp, &p, mpinv); EG(ret, err);
328
329
/* Come back to real world by unredcifying result */
330
ret = nn_init(&in1_tmp, 0); EG(ret, err);
331
ret = nn_one(&in1_tmp); EG(ret, err);
332
ret = nn_mul_redc1(out, &r_square, &in1_tmp, &p, mpinv); EG(ret, err);
333
}
334
335
err:
336
nn_uninit(&p);
337
nn_uninit(&r_square);
338
nn_uninit(&in1_tmp);
339
nn_uninit(&in2_tmp);
340
341
return ret;
342
}
343
344