CoCalc -- ec_prime

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bearssl/src/ec/ec_prime_i15.c
³⁹⁵⁰⁷ views
1
/*
2
 * Copyright (c) 2017 Thomas Pornin <[email protected]>
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining 
5
 * a copy of this software and associated documentation files (the
6
 * "Software"), to deal in the Software without restriction, including
7
 * without limitation the rights to use, copy, modify, merge, publish,
8
 * distribute, sublicense, and/or sell copies of the Software, and to
9
 * permit persons to whom the Software is furnished to do so, subject to
10
 * the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be 
13
 * included in all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
16
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
18
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 */
24

25
#include "inner.h"
26

27
/*
28
 * Parameters for supported curves:
29
 *   - field modulus p
30
 *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
31
 *   - b*R mod p (b is the second curve equation parameter)
32
 */
33

34
static const uint16_t P256_P[] = {
35
	0x0111,
36
	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
37
	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
38
	0x7FFF, 0x0001
39
};
40

41
static const uint16_t P256_R2[] = {
42
	0x0111,
43
	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
44
	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
45
	0x4FFF, 0x0000
46
};
47

48
static const uint16_t P256_B[] = {
49
	0x0111,
50
	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
51
	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
52
	0x0187, 0x0000
53
};
54

55
static const uint16_t P384_P[] = {
56
	0x0199,
57
	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
58
	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
59
	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
60
	0x7FFF, 0x01FF
61
};
62

63
static const uint16_t P384_R2[] = {
64
	0x0199,
65
	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
66
	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
67
	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
68
	0x0000, 0x0000
69
};
70

71
static const uint16_t P384_B[] = {
72
	0x0199,
73
	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
74
	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
75
	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
76
	0x0452, 0x0084
77
};
78

79
static const uint16_t P521_P[] = {
80
	0x022B,
81
	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
82
	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
83
	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
84
	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
85
	0x7FFF, 0x7FFF, 0x07FF
86
};
87

88
static const uint16_t P521_R2[] = {
89
	0x022B,
90
	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
91
	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
92
	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
93
	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
94
	0x0000, 0x0000, 0x0000
95
};
96

97
static const uint16_t P521_B[] = {
98
	0x022B,
99
	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
100
	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
101
	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
102
	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
103
	0x1618, 0x27D7, 0x0465
104
};
105

106
typedef struct {
107
	const uint16_t *p;
108
	const uint16_t *b;
109
	const uint16_t *R2;
110
	uint16_t p0i;
111
	size_t point_len;
112
} curve_params;
113

114
static inline const curve_params *
115
id_to_curve(int curve)
116
{
117
	static const curve_params pp[] = {
118
		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
119
		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
120
		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
121
	};
122

123
	return &pp[curve - BR_EC_secp256r1];
124
}
125

126
#define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
127

128
/*
129
 * Type for a point in Jacobian coordinates:
130
 * -- three values, x, y and z, in Montgomery representation
131
 * -- affine coordinates are X = x / z^2 and Y = y / z^3
132
 * -- for the point at infinity, z = 0
133
 */
134
typedef struct {
135
	uint16_t c[3][I15_LEN];
136
} jacobian;
137

138
/*
139
 * We use a custom interpreter that uses a dozen registers, and
140
 * only six operations:
141
 *    MSET(d, a)       copy a into d
142
 *    MADD(d, a)       d = d+a (modular)
143
 *    MSUB(d, a)       d = d-a (modular)
144
 *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
145
 *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
146
 *    MTZ(d)           clear return value if d = 0
147
 * Destination of MMUL (d) must be distinct from operands (a and b).
148
 * There is no such constraint for MSUB and MADD.
149
 *
150
 * Registers include the operand coordinates, and temporaries.
151
 */
152
#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
153
#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
154
#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
155
#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
156
#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
157
#define MTZ(d)          (0x5000 + ((d) << 8))
158
#define ENDCODE         0
159

160
/*
161
 * Registers for the input operands.
162
 */
163
#define P1x    0
164
#define P1y    1
165
#define P1z    2
166
#define P2x    3
167
#define P2y    4
168
#define P2z    5
169

170
/*
171
 * Alternate names for the first input operand.
172
 */
173
#define Px     0
174
#define Py     1
175
#define Pz     2
176

177
/*
178
 * Temporaries.
179
 */
180
#define t1     6
181
#define t2     7
182
#define t3     8
183
#define t4     9
184
#define t5    10
185
#define t6    11
186
#define t7    12
187

188
/*
189
 * Extra scratch registers available when there is no second operand (e.g.
190
 * for "double" and "affine").
191
 */
192
#define t8     3
193
#define t9     4
194
#define t10    5
195

196
/*
197
 * Doubling formulas are:
198
 *
199
 *   s = 4*x*y^2
200
 *   m = 3*(x + z^2)*(x - z^2)
201
 *   x' = m^2 - 2*s
202
 *   y' = m*(s - x') - 8*y^4
203
 *   z' = 2*y*z
204
 *
205
 * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
206
 * should. This case should not happen anyway, because our curves have
207
 * prime order, and thus do not contain any point of order 2.
208
 *
209
 * If P is infinity (z = 0), then again the formulas yield infinity,
210
 * which is correct. Thus, this code works for all points.
211
 *
212
 * Cost: 8 multiplications
213
 */
214
static const uint16_t code_double[] = {
215
	/*
216
	 * Compute z^2 (in t1).
217
	 */
218
	MMUL(t1, Pz, Pz),
219

220
	/*
221
	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
222
	 */
223
	MSET(t2, Px),
224
	MSUB(t2, t1),
225
	MADD(t1, Px),
226

227
	/*
228
	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
229
	 */
230
	MMUL(t3, t1, t2),
231
	MSET(t1, t3),
232
	MADD(t1, t3),
233
	MADD(t1, t3),
234

235
	/*
236
	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
237
	 */
238
	MMUL(t3, Py, Py),
239
	MADD(t3, t3),
240
	MMUL(t2, Px, t3),
241
	MADD(t2, t2),
242

243
	/*
244
	 * Compute x' = m^2 - 2*s.
245
	 */
246
	MMUL(Px, t1, t1),
247
	MSUB(Px, t2),
248
	MSUB(Px, t2),
249

250
	/*
251
	 * Compute z' = 2*y*z.
252
	 */
253
	MMUL(t4, Py, Pz),
254
	MSET(Pz, t4),
255
	MADD(Pz, t4),
256

257
	/*
258
	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
259
	 * 2*y^2 in t3.
260
	 */
261
	MSUB(t2, Px),
262
	MMUL(Py, t1, t2),
263
	MMUL(t4, t3, t3),
264
	MSUB(Py, t4),
265
	MSUB(Py, t4),
266

267
	ENDCODE
268
};
269

270
/*
271
 * Addtions formulas are:
272
 *
273
 *   u1 = x1 * z2^2
274
 *   u2 = x2 * z1^2
275
 *   s1 = y1 * z2^3
276
 *   s2 = y2 * z1^3
277
 *   h = u2 - u1
278
 *   r = s2 - s1
279
 *   x3 = r^2 - h^3 - 2 * u1 * h^2
280
 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
281
 *   z3 = h * z1 * z2
282
 *
283
 * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
284
 * z3 == 0, so the result is correct.
285
 * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
286
 * not correct.
287
 * h == 0 only if u1 == u2; this happens in two cases:
288
 * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
289
 * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
290
 *
291
 * Thus, the following situations are not handled correctly:
292
 * -- P1 = 0 and P2 != 0
293
 * -- P1 != 0 and P2 = 0
294
 * -- P1 = P2
295
 * All other cases are properly computed. However, even in "incorrect"
296
 * situations, the three coordinates still are properly formed field
297
 * elements.
298
 *
299
 * The returned flag is cleared if r == 0. This happens in the following
300
 * cases:
301
 * -- Both points are on the same horizontal line (same Y coordinate).
302
 * -- Both points are infinity.
303
 * -- One point is infinity and the other is on line Y = 0.
304
 * The third case cannot happen with our curves (there is no valid point
305
 * on line Y = 0 since that would be a point of order 2). If the two
306
 * source points are non-infinity, then remains only the case where the
307
 * two points are on the same horizontal line.
308
 *
309
 * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
310
 * P2 != 0:
311
 * -- If the returned value is not the point at infinity, then it was properly
312
 * computed.
313
 * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
314
 * is indeed the point at infinity.
315
 * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
316
 * use the 'double' code.
317
 *
318
 * Cost: 16 multiplications
319
 */
320
static const uint16_t code_add[] = {
321
	/*
322
	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
323
	 */
324
	MMUL(t3, P2z, P2z),
325
	MMUL(t1, P1x, t3),
326
	MMUL(t4, P2z, t3),
327
	MMUL(t3, P1y, t4),
328

329
	/*
330
	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
331
	 */
332
	MMUL(t4, P1z, P1z),
333
	MMUL(t2, P2x, t4),
334
	MMUL(t5, P1z, t4),
335
	MMUL(t4, P2y, t5),
336

337
	/*
338
	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
339
	 */
340
	MSUB(t2, t1),
341
	MSUB(t4, t3),
342

343
	/*
344
	 * Report cases where r = 0 through the returned flag.
345
	 */
346
	MTZ(t4),
347

348
	/*
349
	 * Compute u1*h^2 (in t6) and h^3 (in t5).
350
	 */
351
	MMUL(t7, t2, t2),
352
	MMUL(t6, t1, t7),
353
	MMUL(t5, t7, t2),
354

355
	/*
356
	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
357
	 * t1 and t7 can be used as scratch registers.
358
	 */
359
	MMUL(P1x, t4, t4),
360
	MSUB(P1x, t5),
361
	MSUB(P1x, t6),
362
	MSUB(P1x, t6),
363

364
	/*
365
	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
366
	 */
367
	MSUB(t6, P1x),
368
	MMUL(P1y, t4, t6),
369
	MMUL(t1, t5, t3),
370
	MSUB(P1y, t1),
371

372
	/*
373
	 * Compute z3 = h*z1*z2.
374
	 */
375
	MMUL(t1, P1z, P2z),
376
	MMUL(P1z, t1, t2),
377

378
	ENDCODE
379
};
380

381
/*
382
 * Check that the point is on the curve. This code snippet assumes the
383
 * following conventions:
384
 * -- Coordinates x and y have been freshly decoded in P1 (but not
385
 * converted to Montgomery coordinates yet).
386
 * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
387
 */
388
static const uint16_t code_check[] = {
389

390
	/* Convert x and y to Montgomery representation. */
391
	MMUL(t1, P1x, P2x),
392
	MMUL(t2, P1y, P2x),
393
	MSET(P1x, t1),
394
	MSET(P1y, t2),
395

396
	/* Compute x^3 in t1. */
397
	MMUL(t2, P1x, P1x),
398
	MMUL(t1, P1x, t2),
399

400
	/* Subtract 3*x from t1. */
401
	MSUB(t1, P1x),
402
	MSUB(t1, P1x),
403
	MSUB(t1, P1x),
404

405
	/* Add b. */
406
	MADD(t1, P2y),
407

408
	/* Compute y^2 in t2. */
409
	MMUL(t2, P1y, P1y),
410

411
	/* Compare y^2 with x^3 - 3*x + b; they must match. */
412
	MSUB(t1, t2),
413
	MTZ(t1),
414

415
	/* Set z to 1 (in Montgomery representation). */
416
	MMUL(P1z, P2x, P2z),
417

418
	ENDCODE
419
};
420

421
/*
422
 * Conversion back to affine coordinates. This code snippet assumes that
423
 * the z coordinate of P2 is set to 1 (not in Montgomery representation).
424
 */
425
static const uint16_t code_affine[] = {
426

427
	/* Save z*R in t1. */
428
	MSET(t1, P1z),
429

430
	/* Compute z^3 in t2. */
431
	MMUL(t2, P1z, P1z),
432
	MMUL(t3, P1z, t2),
433
	MMUL(t2, t3, P2z),
434

435
	/* Invert to (1/z^3) in t2. */
436
	MINV(t2, t3, t4),
437

438
	/* Compute y. */
439
	MSET(t3, P1y),
440
	MMUL(P1y, t2, t3),
441

442
	/* Compute (1/z^2) in t3. */
443
	MMUL(t3, t2, t1),
444

445
	/* Compute x. */
446
	MSET(t2, P1x),
447
	MMUL(P1x, t2, t3),
448

449
	ENDCODE
450
};
451

452
static uint32_t
453
run_code(jacobian *P1, const jacobian *P2,
454
	const curve_params *cc, const uint16_t *code)
455
{
456
	uint32_t r;
457
	uint16_t t[13][I15_LEN];
458
	size_t u;
459

460
	r = 1;
461

462
	/*
463
	 * Copy the two operands in the dedicated registers.
464
	 */
465
	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
466
	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
467

468
	/*
469
	 * Run formulas.
470
	 */
471
	for (u = 0;; u ++) {
472
		unsigned op, d, a, b;
473

474
		op = code[u];
475
		if (op == 0) {
476
			break;
477
		}
478
		d = (op >> 8) & 0x0F;
479
		a = (op >> 4) & 0x0F;
480
		b = op & 0x0F;
481
		op >>= 12;
482
		switch (op) {
483
			uint32_t ctl;
484
			size_t plen;
485
			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
486

487
		case 0:
488
			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
489
			break;
490
		case 1:
491
			ctl = br_i15_add(t[d], t[a], 1);
492
			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
493
			br_i15_sub(t[d], cc->p, ctl);
494
			break;
495
		case 2:
496
			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
497
			break;
498
		case 3:
499
			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
500
			break;
501
		case 4:
502
			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
503
			br_i15_encode(tp, plen, cc->p);
504
			tp[plen - 1] -= 2;
505
			br_i15_modpow(t[d], tp, plen,
506
				cc->p, cc->p0i, t[a], t[b]);
507
			break;
508
		default:
509
			r &= ~br_i15_iszero(t[d]);
510
			break;
511
		}
512
	}
513

514
	/*
515
	 * Copy back result.
516
	 */
517
	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
518
	return r;
519
}
520

521
static void
522
set_one(uint16_t *x, const uint16_t *p)
523
{
524
	size_t plen;
525

526
	plen = (p[0] + 31) >> 4;
527
	memset(x, 0, plen * sizeof *x);
528
	x[0] = p[0];
529
	x[1] = 0x0001;
530
}
531

532
static void
533
point_zero(jacobian *P, const curve_params *cc)
534
{
535
	memset(P, 0, sizeof *P);
536
	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
537
}
538

539
static inline void
540
point_double(jacobian *P, const curve_params *cc)
541
{
542
	run_code(P, P, cc, code_double);
543
}
544

545
static inline uint32_t
546
point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
547
{
548
	return run_code(P1, P2, cc, code_add);
549
}
550

551
static void
552
point_mul(jacobian *P, const unsigned char *x, size_t xlen,
553
	const curve_params *cc)
554
{
555
	/*
556
	 * We do a simple double-and-add ladder with a 2-bit window
557
	 * to make only one add every two doublings. We thus first
558
	 * precompute 2P and 3P in some local buffers.
559
	 *
560
	 * We always perform two doublings and one addition; the
561
	 * addition is with P, 2P and 3P and is done in a temporary
562
	 * array.
563
	 *
564
	 * The addition code cannot handle cases where one of the
565
	 * operands is infinity, which is the case at the start of the
566
	 * ladder. We therefore need to maintain a flag that controls
567
	 * this situation.
568
	 */
569
	uint32_t qz;
570
	jacobian P2, P3, Q, T, U;
571

572
	memcpy(&P2, P, sizeof P2);
573
	point_double(&P2, cc);
574
	memcpy(&P3, P, sizeof P3);
575
	point_add(&P3, &P2, cc);
576

577
	point_zero(&Q, cc);
578
	qz = 1;
579
	while (xlen -- > 0) {
580
		int k;
581

582
		for (k = 6; k >= 0; k -= 2) {
583
			uint32_t bits;
584
			uint32_t bnz;
585

586
			point_double(&Q, cc);
587
			point_double(&Q, cc);
588
			memcpy(&T, P, sizeof T);
589
			memcpy(&U, &Q, sizeof U);
590
			bits = (*x >> k) & (uint32_t)3;
591
			bnz = NEQ(bits, 0);
592
			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
593
			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
594
			point_add(&U, &T, cc);
595
			CCOPY(bnz & qz, &Q, &T, sizeof Q);
596
			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
597
			qz &= ~bnz;
598
		}
599
		x ++;
600
	}
601
	memcpy(P, &Q, sizeof Q);
602
}
603

604
/*
605
 * Decode point into Jacobian coordinates. This function does not support
606
 * the point at infinity. If the point is invalid then this returns 0, but
607
 * the coordinates are still set to properly formed field elements.
608
 */
609
static uint32_t
610
point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
611
{
612
	/*
613
	 * Points must use uncompressed format:
614
	 * -- first byte is 0x04;
615
	 * -- coordinates X and Y use unsigned big-endian, with the same
616
	 *    length as the field modulus.
617
	 *
618
	 * We don't support hybrid format (uncompressed, but first byte
619
	 * has value 0x06 or 0x07, depending on the least significant bit
620
	 * of Y) because it is rather useless, and explicitly forbidden
621
	 * by PKIX (RFC 5480, section 2.2).
622
	 *
623
	 * We don't support compressed format either, because it is not
624
	 * much used in practice (there are or were patent-related
625
	 * concerns about point compression, which explains the lack of
626
	 * generalised support). Also, point compression support would
627
	 * need a bit more code.
628
	 */
629
	const unsigned char *buf;
630
	size_t plen, zlen;
631
	uint32_t r;
632
	jacobian Q;
633

634
	buf = src;
635
	point_zero(P, cc);
636
	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
637
	if (len != 1 + (plen << 1)) {
638
		return 0;
639
	}
640
	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
641
	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
642

643
	/*
644
	 * Check first byte.
645
	 */
646
	r &= EQ(buf[0], 0x04);
647
	/* obsolete
648
	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
649
		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
650
	*/
651

652
	/*
653
	 * Convert coordinates and check that the point is valid.
654
	 */
655
	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
656
	memcpy(Q.c[0], cc->R2, zlen);
657
	memcpy(Q.c[1], cc->b, zlen);
658
	set_one(Q.c[2], cc->p);
659
	r &= ~run_code(P, &Q, cc, code_check);
660
	return r;
661
}
662

663
/*
664
 * Encode a point. This method assumes that the point is correct and is
665
 * not the point at infinity. Encoded size is always 1+2*plen, where
666
 * plen is the field modulus length, in bytes.
667
 */
668
static void
669
point_encode(void *dst, const jacobian *P, const curve_params *cc)
670
{
671
	unsigned char *buf;
672
	size_t plen;
673
	jacobian Q, T;
674

675
	buf = dst;
676
	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
677
	buf[0] = 0x04;
678
	memcpy(&Q, P, sizeof *P);
679
	set_one(T.c[2], cc->p);
680
	run_code(&Q, &T, cc, code_affine);
681
	br_i15_encode(buf + 1, plen, Q.c[0]);
682
	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
683
}
684

685
static const br_ec_curve_def *
686
id_to_curve_def(int curve)
687
{
688
	switch (curve) {
689
	case BR_EC_secp256r1:
690
		return &br_secp256r1;
691
	case BR_EC_secp384r1:
692
		return &br_secp384r1;
693
	case BR_EC_secp521r1:
694
		return &br_secp521r1;
695
	}
696
	return NULL;
697
}
698

699
static const unsigned char *
700
api_generator(int curve, size_t *len)
701
{
702
	const br_ec_curve_def *cd;
703

704
	cd = id_to_curve_def(curve);
705
	*len = cd->generator_len;
706
	return cd->generator;
707
}
708

709
static const unsigned char *
710
api_order(int curve, size_t *len)
711
{
712
	const br_ec_curve_def *cd;
713

714
	cd = id_to_curve_def(curve);
715
	*len = cd->order_len;
716
	return cd->order;
717
}
718

719
static size_t
720
api_xoff(int curve, size_t *len)
721
{
722
	api_generator(curve, len);
723
	*len >>= 1;
724
	return 1;
725
}
726

727
static uint32_t
728
api_mul(unsigned char *G, size_t Glen,
729
	const unsigned char *x, size_t xlen, int curve)
730
{
731
	uint32_t r;
732
	const curve_params *cc;
733
	jacobian P;
734

735
	cc = id_to_curve(curve);
736
	if (Glen != cc->point_len) {
737
		return 0;
738
	}
739
	r = point_decode(&P, G, Glen, cc);
740
	point_mul(&P, x, xlen, cc);
741
	point_encode(G, &P, cc);
742
	return r;
743
}
744

745
static size_t
746
api_mulgen(unsigned char *R,
747
	const unsigned char *x, size_t xlen, int curve)
748
{
749
	const unsigned char *G;
750
	size_t Glen;
751

752
	G = api_generator(curve, &Glen);
753
	memcpy(R, G, Glen);
754
	api_mul(R, Glen, x, xlen, curve);
755
	return Glen;
756
}
757

758
static uint32_t
759
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
760
	const unsigned char *x, size_t xlen,
761
	const unsigned char *y, size_t ylen, int curve)
762
{
763
	uint32_t r, t, z;
764
	const curve_params *cc;
765
	jacobian P, Q;
766

767
	/*
768
	 * TODO: see about merging the two ladders. Right now, we do
769
	 * two independent point multiplications, which is a bit
770
	 * wasteful of CPU resources (but yields short code).
771
	 */
772

773
	cc = id_to_curve(curve);
774
	if (len != cc->point_len) {
775
		return 0;
776
	}
777
	r = point_decode(&P, A, len, cc);
778
	if (B == NULL) {
779
		size_t Glen;
780

781
		B = api_generator(curve, &Glen);
782
	}
783
	r &= point_decode(&Q, B, len, cc);
784
	point_mul(&P, x, xlen, cc);
785
	point_mul(&Q, y, ylen, cc);
786

787
	/*
788
	 * We want to compute P+Q. Since the base points A and B are distinct
789
	 * from infinity, and the multipliers are non-zero and lower than the
790
	 * curve order, then we know that P and Q are non-infinity. This
791
	 * leaves two special situations to test for:
792
	 * -- If P = Q then we must use point_double().
793
	 * -- If P+Q = 0 then we must report an error.
794
	 */
795
	t = point_add(&P, &Q, cc);
796
	point_double(&Q, cc);
797
	z = br_i15_iszero(P.c[2]);
798

799
	/*
800
	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
801
	 * have the following:
802
	 *
803
	 *   z = 0, t = 0   return P (normal addition)
804
	 *   z = 0, t = 1   return P (normal addition)
805
	 *   z = 1, t = 0   return Q (a 'double' case)
806
	 *   z = 1, t = 1   report an error (P+Q = 0)
807
	 */
808
	CCOPY(z & ~t, &P, &Q, sizeof Q);
809
	point_encode(A, &P, cc);
810
	r &= ~(z & t);
811

812
	return r;
813
}
814

815
/* see bearssl_ec.h */
816
const br_ec_impl br_ec_prime_i15 = {
817
	(uint32_t)0x03800000,
818
	&api_generator,
819
	&api_order,
820
	&api_xoff,
821
	&api_mul,
822
	&api_mulgen,
823
	&api_muladd
824
};
825

826
Product

Resources

Company