CoCalc -- edwards25519.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/krb5/src/plugins/preauth/spake/edwards25519.c
³⁴⁸⁸⁹ views
1
/* -*- mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2
/* This file is adapted from the SPAKE edwards25519 code in BoringSSL. */
3
/*
4
 * The MIT License (MIT)
5
 *
6
 * Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a copy
9
 * of this software and associated documentation files (the "Software"), to
10
 * deal in the Software without restriction, including without limitation the
11
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12
 * sell copies of the Software, and to permit persons to whom the Software is
13
 * furnished to do so, subject to the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be included in
16
 * all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
 * IN THE SOFTWARE.
25
 */
26
/*
27
 * Copyright (c) 2015-2016, Google Inc.
28
 *
29
 * Permission to use, copy, modify, and/or distribute this software for any
30
 * purpose with or without fee is hereby granted, provided that the above
31
 * copyright notice and this permission notice appear in all copies.
32
 *
33
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
34
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
35
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
37
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
38
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
39
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
40
 */
41

42
/*
43
 * This code is adapted from the BoringSSL edwards25519 SPAKE2 implementation
44
 * from third_party/fiat and crypto/spake25519.c, with the following
45
 * adaptations:
46
 *
47
 * - The M and N points are the ones from draft-irtf-cfrg-spake2-05.  The
48
 *   BoringSSL M and N points were determined similarly, but were not
49
 *   restricted to members of the generator subgroup, so they use only one hash
50
 *   iteration for both points.  The intent in BoringSSL had been to multiply w
51
 *   by the cofactor so that wM and wN would be in the subgroup, but as that
52
 *   step was accidentally omitted, a hack had to be introduced after the fact
53
 *   to add multiples of the prime order to the scalar.  That hack is not
54
 *   present in this code, and the SPAKE preauth spec does not multiply w by
55
 *   the cofactor as it is unnecessary if M and N are chosen from the subgroup.
56
 *
57
 * - The SPAKE code is modified to fit the groups.h interface and the SPAKE
58
 *   preauth spec.
59
 *
60
 * - The required declarations and code are all here in one file (except for
61
 *   the generator point table, which is still in a separate header), so all of
62
 *   the functions are declared static.
63
 *
64
 * - BORINGSSL_CURVE25519_64BIT is defined here using autoconf tests.
65
 *
66
 * - curve25519_32.h and curve25519_64.h are combined into edwards25519_fiat.h
67
 *   (conditionalized on BORINGSSL_CURVE25519_64BIT) for predictable dependency
68
 *   generation.  The fiat_25519_selectznz and fiat_25519_carry_scmul_121666
69
 *   functions were removed from both branches as they are not used here (the
70
 *   former because it is not used by the BoringSSL code and the latter because
71
 *   it is only used by the X25519 code).  The fiat_25519_int128 and
72
 *   fiat_25519_uint128 typedefs were adjusted to work with older versions of
73
 *   gcc.
74
 *
75
 * - fe_cmov() has the initial "Silence an unused function warning" part
76
 *   removed, as we removed fiat_25519_selectznz instead.
77
 *
78
 * - The field element bounds assertion checks are disabled by default, as they
79
 *   slow the code down by roughly a factor of two.  The
80
 *   OPENSSL_COMPILE_ASSERT() in fe_copy_lt() is changed to a regular assert
81
 *   and is also conditionalized.  Do a build and "make check" with
82
 *   EDWARDS25519_ASSERTS defined when updating this code.
83
 *
84
 * - The copyright comments at the top are formatted the way we do so in other
85
 *   source files, for ease of extraction.
86
 *
87
 * - Declarations in for loops conflict with our compiler configuration in
88
 *   older versions of gcc, so they are moved outside of the for loop.
89
 *
90
 * - The preprocessor symbol OPENSSL_SMALL is changed to CONFIG_SMALL.
91
 *
92
 * - OPENSSL_memset and OPENSSL_memmove are changed to memset and memmove, in
93
 *   each case verifying that they are used with nonzero length arguments.
94
 *
95
 * - CRYPTO_memcmp is changed to k5_bcmp.
96
 *
97
 * - Functions used only by X25519 or Ed25519 interfaces but not SPAKE are
98
 *   removed, taking care to check for unused functions in both the 64-bit and
99
 *   32-bit preprocessor branches.  ge_p3_dbl() is unused here if CONFIG_SMALL
100
 *   is defined, so it is placed inside #ifndef CONFIG_SMALL.
101
 */
102

103
// Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
104
// 20141124 (https://bench.cr.yp.to/supercop.html). That code is released as
105
// public domain but parts have been replaced with code generated by Fiat
106
// (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
107

108
#include "groups.h"
109
#include "iana.h"
110

111
#ifdef __GNUC__
112
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
113
#endif
114

115
#if SIZEOF_SIZE_T >= 8 && defined(HAVE___INT128_T) && defined(HAVE___UINT128_T)
116
#define BORINGSSL_CURVE25519_64BIT
117
typedef __int128_t int128_t;
118
typedef __uint128_t uint128_t;
119
#endif
120

121
/* From BoringSSL crypto/curve25519/internal.h */
122

123
#if defined(BORINGSSL_CURVE25519_64BIT)
124
// fe means field element. Here the field is \Z/(2^255-19). An element t,
125
// entries t[0]...t[4], represents the integer t[0]+2^51 t[1]+2^102 t[2]+2^153
126
// t[3]+2^204 t[4].
127
// fe limbs are bounded by 1.125*2^51.
128
// Multiplication and carrying produce fe from fe_loose.
129
typedef struct fe { uint64_t v[5]; } fe;
130

131
// fe_loose limbs are bounded by 3.375*2^51.
132
// Addition and subtraction produce fe_loose from (fe, fe).
133
typedef struct fe_loose { uint64_t v[5]; } fe_loose;
134
#else
135
// fe means field element. Here the field is \Z/(2^255-19). An element t,
136
// entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
137
// t[3]+2^102 t[4]+...+2^230 t[9].
138
// fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
139
// Multiplication and carrying produce fe from fe_loose.
140
typedef struct fe { uint32_t v[10]; } fe;
141

142
// fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc.
143
// Addition and subtraction produce fe_loose from (fe, fe).
144
typedef struct fe_loose { uint32_t v[10]; } fe_loose;
145
#endif
146

147
// ge means group element.
148
//
149
// Here the group is the set of pairs (x,y) of field elements (see fe.h)
150
// satisfying -x^2 + y^2 = 1 + d x^2y^2
151
// where d = -121665/121666.
152
//
153
// Representations:
154
//   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
155
//   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
156
//   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
157
//   ge_precomp (Duif): (y+x,y-x,2dxy)
158

159
typedef struct {
160
  fe X;
161
  fe Y;
162
  fe Z;
163
} ge_p2;
164

165
typedef struct {
166
  fe X;
167
  fe Y;
168
  fe Z;
169
  fe T;
170
} ge_p3;
171

172
typedef struct {
173
  fe_loose X;
174
  fe_loose Y;
175
  fe_loose Z;
176
  fe_loose T;
177
} ge_p1p1;
178

179
typedef struct {
180
  fe_loose yplusx;
181
  fe_loose yminusx;
182
  fe_loose xy2d;
183
} ge_precomp;
184

185
typedef struct {
186
  fe_loose YplusX;
187
  fe_loose YminusX;
188
  fe_loose Z;
189
  fe_loose T2d;
190
} ge_cached;
191

192
#include "edwards25519_tables.h"
193
#include "edwards25519_fiat.h"
194

195
/* From BoringSSL third-party/fiat/curve25519.c */
196

197
static uint64_t load_3(const uint8_t *in) {
198
  uint64_t result;
199
  result = (uint64_t)in[0];
200
  result |= ((uint64_t)in[1]) << 8;
201
  result |= ((uint64_t)in[2]) << 16;
202
  return result;
203
}
204

205
static uint64_t load_4(const uint8_t *in) {
206
  uint64_t result;
207
  result = (uint64_t)in[0];
208
  result |= ((uint64_t)in[1]) << 8;
209
  result |= ((uint64_t)in[2]) << 16;
210
  result |= ((uint64_t)in[3]) << 24;
211
  return result;
212
}
213

214

215
// Field operations.
216

217
#if defined(BORINGSSL_CURVE25519_64BIT)
218

219
typedef uint64_t fe_limb_t;
220
#define FE_NUM_LIMBS 5
221

222
// assert_fe asserts that |f| satisfies bounds:
223
//
224
//  [[0x0 ~> 0x8cccccccccccc],
225
//   [0x0 ~> 0x8cccccccccccc],
226
//   [0x0 ~> 0x8cccccccccccc],
227
//   [0x0 ~> 0x8cccccccccccc],
228
//   [0x0 ~> 0x8cccccccccccc]]
229
//
230
// See comments in edwards25519_fiat.h for which functions use these bounds for
231
// inputs or outputs.
232
#define assert_fe(f)                                                    \
233
  do {                                                                  \
234
    for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \
235
      assert(f[_assert_fe_i] <= UINT64_C(0x8cccccccccccc));             \
236
    }                                                                   \
237
  } while (0)
238

239
// assert_fe_loose asserts that |f| satisfies bounds:
240
//
241
//  [[0x0 ~> 0x1a666666666664],
242
//   [0x0 ~> 0x1a666666666664],
243
//   [0x0 ~> 0x1a666666666664],
244
//   [0x0 ~> 0x1a666666666664],
245
//   [0x0 ~> 0x1a666666666664]]
246
//
247
// See comments in edwards25519_fiat.h for which functions use these bounds for
248
// inputs or outputs.
249
#define assert_fe_loose(f)                                              \
250
  do {                                                                  \
251
    for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \
252
      assert(f[_assert_fe_i] <= UINT64_C(0x1a666666666664));            \
253
    }                                                                   \
254
  } while (0)
255

256
#else
257

258
typedef uint32_t fe_limb_t;
259
#define FE_NUM_LIMBS 10
260

261
// assert_fe asserts that |f| satisfies bounds:
262
//
263
//  [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
264
//   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
265
//   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
266
//   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
267
//   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
268
//
269
// See comments in edwards25519_fiat.h for which functions use these bounds for
270
// inputs or outputs.
271
#define assert_fe(f)                                                     \
272
  do {                                                                   \
273
    for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
274
      assert(f[_assert_fe_i] <=                                          \
275
             ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u));            \
276
    }                                                                    \
277
  } while (0)
278

279
// assert_fe_loose asserts that |f| satisfies bounds:
280
//
281
//  [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
282
//   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
283
//   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
284
//   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
285
//   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
286
//
287
// See comments in edwards25519_fiat.h for which functions use these bounds for
288
// inputs or outputs.
289
#define assert_fe_loose(f)                                               \
290
  do {                                                                   \
291
    for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
292
      assert(f[_assert_fe_i] <=                                          \
293
             ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u));            \
294
    }                                                                    \
295
  } while (0)
296

297
#endif  // BORINGSSL_CURVE25519_64BIT
298

299
#ifndef EDWARDS25519_ASSERTS
300
#undef assert_fe
301
#undef assert_fe_loose
302
#define assert_fe(f)
303
#define assert_fe_loose(f)
304
#endif
305

306
static void fe_frombytes_strict(fe *h, const uint8_t s[32]) {
307
  // |fiat_25519_from_bytes| requires the top-most bit be clear.
308
  assert((s[31] & 0x80) == 0);
309
  fiat_25519_from_bytes(h->v, s);
310
  assert_fe(h->v);
311
}
312

313
static void fe_frombytes(fe *h, const uint8_t s[32]) {
314
  uint8_t s_copy[32];
315
  memcpy(s_copy, s, 32);
316
  s_copy[31] &= 0x7f;
317
  fe_frombytes_strict(h, s_copy);
318
}
319

320
static void fe_tobytes(uint8_t s[32], const fe *f) {
321
  assert_fe(f->v);
322
  fiat_25519_to_bytes(s, f->v);
323
}
324

325
// h = 0
326
static void fe_0(fe *h) {
327
  memset(h, 0, sizeof(fe));
328
}
329

330
static void fe_loose_0(fe_loose *h) {
331
  memset(h, 0, sizeof(fe_loose));
332
}
333

334
// h = 1
335
static void fe_1(fe *h) {
336
  memset(h, 0, sizeof(fe));
337
  h->v[0] = 1;
338
}
339

340
static void fe_loose_1(fe_loose *h) {
341
  memset(h, 0, sizeof(fe_loose));
342
  h->v[0] = 1;
343
}
344

345
// h = f + g
346
// Can overlap h with f or g.
347
static void fe_add(fe_loose *h, const fe *f, const fe *g) {
348
  assert_fe(f->v);
349
  assert_fe(g->v);
350
  fiat_25519_add(h->v, f->v, g->v);
351
  assert_fe_loose(h->v);
352
}
353

354
// h = f - g
355
// Can overlap h with f or g.
356
static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
357
  assert_fe(f->v);
358
  assert_fe(g->v);
359
  fiat_25519_sub(h->v, f->v, g->v);
360
  assert_fe_loose(h->v);
361
}
362

363
static void fe_carry(fe *h, const fe_loose* f) {
364
  assert_fe_loose(f->v);
365
  fiat_25519_carry(h->v, f->v);
366
  assert_fe(h->v);
367
}
368

369
static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],
370
                        const fe_limb_t in1[FE_NUM_LIMBS],
371
                        const fe_limb_t in2[FE_NUM_LIMBS]) {
372
  assert_fe_loose(in1);
373
  assert_fe_loose(in2);
374
  fiat_25519_carry_mul(out, in1, in2);
375
  assert_fe(out);
376
}
377

378
static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
379
  fe_mul_impl(h->v, f->v, g->v);
380
}
381

382
static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) {
383
  fe_mul_impl(h->v, f->v, g->v);
384
}
385

386
static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
387
  fe_mul_impl(h->v, f->v, g->v);
388
}
389

390
static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
391
  fe_mul_impl(h->v, f->v, g->v);
392
}
393

394
static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
395
  fe_mul_impl(h->v, f->v, g->v);
396
}
397

398
static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
399
  fe_mul_impl(h->v, f->v, g->v);
400
}
401

402
static void fe_sq_tl(fe *h, const fe_loose *f) {
403
  assert_fe_loose(f->v);
404
  fiat_25519_carry_square(h->v, f->v);
405
  assert_fe(h->v);
406
}
407

408
static void fe_sq_tt(fe *h, const fe *f) {
409
  assert_fe_loose(f->v);
410
  fiat_25519_carry_square(h->v, f->v);
411
  assert_fe(h->v);
412
}
413

414
// h = -f
415
static void fe_neg(fe_loose *h, const fe *f) {
416
  assert_fe(f->v);
417
  fiat_25519_opp(h->v, f->v);
418
  assert_fe_loose(h->v);
419
}
420

421
// Replace (f,g) with (g,g) if b == 1;
422
// replace (f,g) with (f,g) if b == 0.
423
//
424
// Preconditions: b in {0,1}.
425
static void fe_cmov(fe_loose *f, const fe_loose *g, fe_limb_t b) {
426
  b = 0-b;
427
  unsigned i;
428
  for (i = 0; i < FE_NUM_LIMBS; i++) {
429
    fe_limb_t x = f->v[i] ^ g->v[i];
430
    x &= b;
431
    f->v[i] ^= x;
432
  }
433
}
434

435
// h = f
436
static void fe_copy(fe *h, const fe *f) {
437
  memmove(h, f, sizeof(fe));
438
}
439

440
static void fe_copy_lt(fe_loose *h, const fe *f) {
441
#ifdef EDWARDS25519_ASSERTS
442
  assert(sizeof(fe_loose) == sizeof(fe));
443
#endif
444
  memmove(h, f, sizeof(fe));
445
}
446
#if !defined(CONFIG_SMALL)
447
static void fe_copy_ll(fe_loose *h, const fe_loose *f) {
448
  memmove(h, f, sizeof(fe_loose));
449
}
450
#endif // !defined(CONFIG_SMALL)
451

452
static void fe_loose_invert(fe *out, const fe_loose *z) {
453
  fe t0;
454
  fe t1;
455
  fe t2;
456
  fe t3;
457
  int i;
458

459
  fe_sq_tl(&t0, z);
460
  fe_sq_tt(&t1, &t0);
461
  for (i = 1; i < 2; ++i) {
462
    fe_sq_tt(&t1, &t1);
463
  }
464
  fe_mul_tlt(&t1, z, &t1);
465
  fe_mul_ttt(&t0, &t0, &t1);
466
  fe_sq_tt(&t2, &t0);
467
  fe_mul_ttt(&t1, &t1, &t2);
468
  fe_sq_tt(&t2, &t1);
469
  for (i = 1; i < 5; ++i) {
470
    fe_sq_tt(&t2, &t2);
471
  }
472
  fe_mul_ttt(&t1, &t2, &t1);
473
  fe_sq_tt(&t2, &t1);
474
  for (i = 1; i < 10; ++i) {
475
    fe_sq_tt(&t2, &t2);
476
  }
477
  fe_mul_ttt(&t2, &t2, &t1);
478
  fe_sq_tt(&t3, &t2);
479
  for (i = 1; i < 20; ++i) {
480
    fe_sq_tt(&t3, &t3);
481
  }
482
  fe_mul_ttt(&t2, &t3, &t2);
483
  fe_sq_tt(&t2, &t2);
484
  for (i = 1; i < 10; ++i) {
485
    fe_sq_tt(&t2, &t2);
486
  }
487
  fe_mul_ttt(&t1, &t2, &t1);
488
  fe_sq_tt(&t2, &t1);
489
  for (i = 1; i < 50; ++i) {
490
    fe_sq_tt(&t2, &t2);
491
  }
492
  fe_mul_ttt(&t2, &t2, &t1);
493
  fe_sq_tt(&t3, &t2);
494
  for (i = 1; i < 100; ++i) {
495
    fe_sq_tt(&t3, &t3);
496
  }
497
  fe_mul_ttt(&t2, &t3, &t2);
498
  fe_sq_tt(&t2, &t2);
499
  for (i = 1; i < 50; ++i) {
500
    fe_sq_tt(&t2, &t2);
501
  }
502
  fe_mul_ttt(&t1, &t2, &t1);
503
  fe_sq_tt(&t1, &t1);
504
  for (i = 1; i < 5; ++i) {
505
    fe_sq_tt(&t1, &t1);
506
  }
507
  fe_mul_ttt(out, &t1, &t0);
508
}
509

510
static void fe_invert(fe *out, const fe *z) {
511
  fe_loose l;
512
  fe_copy_lt(&l, z);
513
  fe_loose_invert(out, &l);
514
}
515

516
// return 0 if f == 0
517
// return 1 if f != 0
518
static int fe_isnonzero(const fe_loose *f) {
519
  fe tight;
520
  fe_carry(&tight, f);
521
  uint8_t s[32];
522
  fe_tobytes(s, &tight);
523

524
  static const uint8_t zero[32] = {0};
525
  return k5_bcmp(s, zero, sizeof(zero)) != 0;
526
}
527

528
// return 1 if f is in {1,3,5,...,q-2}
529
// return 0 if f is in {0,2,4,...,q-1}
530
static int fe_isnegative(const fe *f) {
531
  uint8_t s[32];
532
  fe_tobytes(s, f);
533
  return s[0] & 1;
534
}
535

536
static void fe_sq2_tt(fe *h, const fe *f) {
537
  // h = f^2
538
  fe_sq_tt(h, f);
539

540
  // h = h + h
541
  fe_loose tmp;
542
  fe_add(&tmp, h, h);
543
  fe_carry(h, &tmp);
544
}
545

546
static void fe_pow22523(fe *out, const fe *z) {
547
  fe t0;
548
  fe t1;
549
  fe t2;
550
  int i;
551

552
  fe_sq_tt(&t0, z);
553
  fe_sq_tt(&t1, &t0);
554
  for (i = 1; i < 2; ++i) {
555
    fe_sq_tt(&t1, &t1);
556
  }
557
  fe_mul_ttt(&t1, z, &t1);
558
  fe_mul_ttt(&t0, &t0, &t1);
559
  fe_sq_tt(&t0, &t0);
560
  fe_mul_ttt(&t0, &t1, &t0);
561
  fe_sq_tt(&t1, &t0);
562
  for (i = 1; i < 5; ++i) {
563
    fe_sq_tt(&t1, &t1);
564
  }
565
  fe_mul_ttt(&t0, &t1, &t0);
566
  fe_sq_tt(&t1, &t0);
567
  for (i = 1; i < 10; ++i) {
568
    fe_sq_tt(&t1, &t1);
569
  }
570
  fe_mul_ttt(&t1, &t1, &t0);
571
  fe_sq_tt(&t2, &t1);
572
  for (i = 1; i < 20; ++i) {
573
    fe_sq_tt(&t2, &t2);
574
  }
575
  fe_mul_ttt(&t1, &t2, &t1);
576
  fe_sq_tt(&t1, &t1);
577
  for (i = 1; i < 10; ++i) {
578
    fe_sq_tt(&t1, &t1);
579
  }
580
  fe_mul_ttt(&t0, &t1, &t0);
581
  fe_sq_tt(&t1, &t0);
582
  for (i = 1; i < 50; ++i) {
583
    fe_sq_tt(&t1, &t1);
584
  }
585
  fe_mul_ttt(&t1, &t1, &t0);
586
  fe_sq_tt(&t2, &t1);
587
  for (i = 1; i < 100; ++i) {
588
    fe_sq_tt(&t2, &t2);
589
  }
590
  fe_mul_ttt(&t1, &t2, &t1);
591
  fe_sq_tt(&t1, &t1);
592
  for (i = 1; i < 50; ++i) {
593
    fe_sq_tt(&t1, &t1);
594
  }
595
  fe_mul_ttt(&t0, &t1, &t0);
596
  fe_sq_tt(&t0, &t0);
597
  for (i = 1; i < 2; ++i) {
598
    fe_sq_tt(&t0, &t0);
599
  }
600
  fe_mul_ttt(out, &t0, z);
601
}
602

603

604
// Group operations.
605

606
static void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
607
  fe recip;
608
  fe x;
609
  fe y;
610

611
  fe_invert(&recip, &h->Z);
612
  fe_mul_ttt(&x, &h->X, &recip);
613
  fe_mul_ttt(&y, &h->Y, &recip);
614
  fe_tobytes(s, &y);
615
  s[31] ^= fe_isnegative(&x) << 7;
616
}
617

618
static int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) {
619
  fe u;
620
  fe_loose v;
621
  fe v3;
622
  fe vxx;
623
  fe_loose check;
624

625
  fe_frombytes(&h->Y, s);
626
  fe_1(&h->Z);
627
  fe_sq_tt(&v3, &h->Y);
628
  fe_mul_ttt(&vxx, &v3, &d);
629
  fe_sub(&v, &v3, &h->Z);  // u = y^2-1
630
  fe_carry(&u, &v);
631
  fe_add(&v, &vxx, &h->Z);  // v = dy^2+1
632

633
  fe_sq_tl(&v3, &v);
634
  fe_mul_ttl(&v3, &v3, &v);  // v3 = v^3
635
  fe_sq_tt(&h->X, &v3);
636
  fe_mul_ttl(&h->X, &h->X, &v);
637
  fe_mul_ttt(&h->X, &h->X, &u);  // x = uv^7
638

639
  fe_pow22523(&h->X, &h->X);  // x = (uv^7)^((q-5)/8)
640
  fe_mul_ttt(&h->X, &h->X, &v3);
641
  fe_mul_ttt(&h->X, &h->X, &u);  // x = uv^3(uv^7)^((q-5)/8)
642

643
  fe_sq_tt(&vxx, &h->X);
644
  fe_mul_ttl(&vxx, &vxx, &v);
645
  fe_sub(&check, &vxx, &u);
646
  if (fe_isnonzero(&check)) {
647
    fe_add(&check, &vxx, &u);
648
    if (fe_isnonzero(&check)) {
649
      return 0;
650
    }
651
    fe_mul_ttt(&h->X, &h->X, &sqrtm1);
652
  }
653

654
  if (fe_isnegative(&h->X) != (s[31] >> 7)) {
655
    fe_loose t;
656
    fe_neg(&t, &h->X);
657
    fe_carry(&h->X, &t);
658
  }
659

660
  fe_mul_ttt(&h->T, &h->X, &h->Y);
661
  return 1;
662
}
663

664
static void ge_p2_0(ge_p2 *h) {
665
  fe_0(&h->X);
666
  fe_1(&h->Y);
667
  fe_1(&h->Z);
668
}
669

670
static void ge_p3_0(ge_p3 *h) {
671
  fe_0(&h->X);
672
  fe_1(&h->Y);
673
  fe_1(&h->Z);
674
  fe_0(&h->T);
675
}
676

677
static void ge_cached_0(ge_cached *h) {
678
  fe_loose_1(&h->YplusX);
679
  fe_loose_1(&h->YminusX);
680
  fe_loose_1(&h->Z);
681
  fe_loose_0(&h->T2d);
682
}
683

684
static void ge_precomp_0(ge_precomp *h) {
685
  fe_loose_1(&h->yplusx);
686
  fe_loose_1(&h->yminusx);
687
  fe_loose_0(&h->xy2d);
688
}
689

690
// r = p
691
static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
692
  fe_copy(&r->X, &p->X);
693
  fe_copy(&r->Y, &p->Y);
694
  fe_copy(&r->Z, &p->Z);
695
}
696

697
// r = p
698
static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
699
  fe_add(&r->YplusX, &p->Y, &p->X);
700
  fe_sub(&r->YminusX, &p->Y, &p->X);
701
  fe_copy_lt(&r->Z, &p->Z);
702
  fe_mul_ltt(&r->T2d, &p->T, &d2);
703
}
704

705
// r = p
706
static void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
707
  fe_mul_tll(&r->X, &p->X, &p->T);
708
  fe_mul_tll(&r->Y, &p->Y, &p->Z);
709
  fe_mul_tll(&r->Z, &p->Z, &p->T);
710
}
711

712
// r = p
713
static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
714
  fe_mul_tll(&r->X, &p->X, &p->T);
715
  fe_mul_tll(&r->Y, &p->Y, &p->Z);
716
  fe_mul_tll(&r->Z, &p->Z, &p->T);
717
  fe_mul_tll(&r->T, &p->X, &p->Y);
718
}
719

720
// r = p
721
static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) {
722
  ge_p3 t;
723
  x25519_ge_p1p1_to_p3(&t, p);
724
  x25519_ge_p3_to_cached(r, &t);
725
}
726

727
// r = 2 * p
728
static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
729
  fe trX, trZ, trT;
730
  fe t0;
731

732
  fe_sq_tt(&trX, &p->X);
733
  fe_sq_tt(&trZ, &p->Y);
734
  fe_sq2_tt(&trT, &p->Z);
735
  fe_add(&r->Y, &p->X, &p->Y);
736
  fe_sq_tl(&t0, &r->Y);
737

738
  fe_add(&r->Y, &trZ, &trX);
739
  fe_sub(&r->Z, &trZ, &trX);
740
  fe_carry(&trZ, &r->Y);
741
  fe_sub(&r->X, &t0, &trZ);
742
  fe_carry(&trZ, &r->Z);
743
  fe_sub(&r->T, &trT, &trZ);
744
}
745

746
#ifndef CONFIG_SMALL
747
// r = 2 * p
748
static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
749
  ge_p2 q;
750
  ge_p3_to_p2(&q, p);
751
  ge_p2_dbl(r, &q);
752
}
753
#endif
754

755
// r = p + q
756
static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
757
  fe trY, trZ, trT;
758

759
  fe_add(&r->X, &p->Y, &p->X);
760
  fe_sub(&r->Y, &p->Y, &p->X);
761
  fe_mul_tll(&trZ, &r->X, &q->yplusx);
762
  fe_mul_tll(&trY, &r->Y, &q->yminusx);
763
  fe_mul_tlt(&trT, &q->xy2d, &p->T);
764
  fe_add(&r->T, &p->Z, &p->Z);
765
  fe_sub(&r->X, &trZ, &trY);
766
  fe_add(&r->Y, &trZ, &trY);
767
  fe_carry(&trZ, &r->T);
768
  fe_add(&r->Z, &trZ, &trT);
769
  fe_sub(&r->T, &trZ, &trT);
770
}
771

772
// r = p + q
773
static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
774
  fe trX, trY, trZ, trT;
775

776
  fe_add(&r->X, &p->Y, &p->X);
777
  fe_sub(&r->Y, &p->Y, &p->X);
778
  fe_mul_tll(&trZ, &r->X, &q->YplusX);
779
  fe_mul_tll(&trY, &r->Y, &q->YminusX);
780
  fe_mul_tlt(&trT, &q->T2d, &p->T);
781
  fe_mul_ttl(&trX, &p->Z, &q->Z);
782
  fe_add(&r->T, &trX, &trX);
783
  fe_sub(&r->X, &trZ, &trY);
784
  fe_add(&r->Y, &trZ, &trY);
785
  fe_carry(&trZ, &r->T);
786
  fe_add(&r->Z, &trZ, &trT);
787
  fe_sub(&r->T, &trZ, &trT);
788
}
789

790
// r = p - q
791
static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
792
  fe trX, trY, trZ, trT;
793

794
  fe_add(&r->X, &p->Y, &p->X);
795
  fe_sub(&r->Y, &p->Y, &p->X);
796
  fe_mul_tll(&trZ, &r->X, &q->YminusX);
797
  fe_mul_tll(&trY, &r->Y, &q->YplusX);
798
  fe_mul_tlt(&trT, &q->T2d, &p->T);
799
  fe_mul_ttl(&trX, &p->Z, &q->Z);
800
  fe_add(&r->T, &trX, &trX);
801
  fe_sub(&r->X, &trZ, &trY);
802
  fe_add(&r->Y, &trZ, &trY);
803
  fe_carry(&trZ, &r->T);
804
  fe_sub(&r->Z, &trZ, &trT);
805
  fe_add(&r->T, &trZ, &trT);
806
}
807

808
static uint8_t equal(signed char b, signed char c) {
809
  uint8_t ub = b;
810
  uint8_t uc = c;
811
  uint8_t x = ub ^ uc;  // 0: yes; 1..255: no
812
  uint32_t y = x;       // 0: yes; 1..255: no
813
  y -= 1;               // 4294967295: yes; 0..254: no
814
  y >>= 31;             // 1: yes; 0: no
815
  return y;
816
}
817

818
static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
819
  fe_cmov(&t->yplusx, &u->yplusx, b);
820
  fe_cmov(&t->yminusx, &u->yminusx, b);
821
  fe_cmov(&t->xy2d, &u->xy2d, b);
822
}
823

824
static void x25519_ge_scalarmult_small_precomp(
825
    ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) {
826
  // precomp_table is first expanded into matching |ge_precomp|
827
  // elements.
828
  ge_precomp multiples[15];
829

830
  unsigned i;
831
  for (i = 0; i < 15; i++) {
832
    // The precomputed table is assumed to already clear the top bit, so
833
    // |fe_frombytes_strict| may be used directly.
834
    const uint8_t *bytes = &precomp_table[i*(2 * 32)];
835
    fe x, y;
836
    fe_frombytes_strict(&x, bytes);
837
    fe_frombytes_strict(&y, bytes + 32);
838

839
    ge_precomp *out = &multiples[i];
840
    fe_add(&out->yplusx, &y, &x);
841
    fe_sub(&out->yminusx, &y, &x);
842
    fe_mul_ltt(&out->xy2d, &x, &y);
843
    fe_mul_llt(&out->xy2d, &out->xy2d, &d2);
844
  }
845

846
  // See the comment above |k25519SmallPrecomp| about the structure of the
847
  // precomputed elements. This loop does 64 additions and 64 doublings to
848
  // calculate the result.
849
  ge_p3_0(h);
850

851
  for (i = 63; i < 64; i--) {
852
    unsigned j;
853
    signed char index = 0;
854

855
    for (j = 0; j < 4; j++) {
856
      const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7));
857
      index |= (bit << j);
858
    }
859

860
    ge_precomp e;
861
    ge_precomp_0(&e);
862

863
    for (j = 1; j < 16; j++) {
864
      cmov(&e, &multiples[j-1], equal(index, j));
865
    }
866

867
    ge_cached cached;
868
    ge_p1p1 r;
869
    x25519_ge_p3_to_cached(&cached, h);
870
    x25519_ge_add(&r, h, &cached);
871
    x25519_ge_p1p1_to_p3(h, &r);
872

873
    ge_madd(&r, h, &e);
874
    x25519_ge_p1p1_to_p3(h, &r);
875
  }
876
}
877

878
#if defined(CONFIG_SMALL)
879

880
static void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
881
  x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp);
882
}
883

884
#else
885

886
static uint8_t negative(signed char b) {
887
  uint32_t x = b;
888
  x >>= 31;  // 1: yes; 0: no
889
  return x;
890
}
891

892
static void table_select(ge_precomp *t, int pos, signed char b) {
893
  ge_precomp minust;
894
  uint8_t bnegative = negative(b);
895
  uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
896

897
  ge_precomp_0(t);
898
  cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
899
  cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
900
  cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
901
  cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
902
  cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
903
  cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
904
  cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
905
  cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
906
  fe_copy_ll(&minust.yplusx, &t->yminusx);
907
  fe_copy_ll(&minust.yminusx, &t->yplusx);
908

909
  // NOTE: the input table is canonical, but types don't encode it
910
  fe tmp;
911
  fe_carry(&tmp, &t->xy2d);
912
  fe_neg(&minust.xy2d, &tmp);
913

914
  cmov(t, &minust, bnegative);
915
}
916

917
// h = a * B
918
// where a = a[0]+256*a[1]+...+256^31 a[31]
919
// B is the Ed25519 base point (x,4/5) with x positive.
920
//
921
// Preconditions:
922
//   a[31] <= 127
923
static void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
924
  signed char e[64];
925
  signed char carry;
926
  ge_p1p1 r;
927
  ge_p2 s;
928
  ge_precomp t;
929
  int i;
930

931
  for (i = 0; i < 32; ++i) {
932
    e[2 * i + 0] = (a[i] >> 0) & 15;
933
    e[2 * i + 1] = (a[i] >> 4) & 15;
934
  }
935
  // each e[i] is between 0 and 15
936
  // e[63] is between 0 and 7
937

938
  carry = 0;
939
  for (i = 0; i < 63; ++i) {
940
    e[i] += carry;
941
    carry = e[i] + 8;
942
    carry >>= 4;
943
    e[i] -= carry << 4;
944
  }
945
  e[63] += carry;
946
  // each e[i] is between -8 and 8
947

948
  ge_p3_0(h);
949
  for (i = 1; i < 64; i += 2) {
950
    table_select(&t, i / 2, e[i]);
951
    ge_madd(&r, h, &t);
952
    x25519_ge_p1p1_to_p3(h, &r);
953
  }
954

955
  ge_p3_dbl(&r, h);
956
  x25519_ge_p1p1_to_p2(&s, &r);
957
  ge_p2_dbl(&r, &s);
958
  x25519_ge_p1p1_to_p2(&s, &r);
959
  ge_p2_dbl(&r, &s);
960
  x25519_ge_p1p1_to_p2(&s, &r);
961
  ge_p2_dbl(&r, &s);
962
  x25519_ge_p1p1_to_p3(h, &r);
963

964
  for (i = 0; i < 64; i += 2) {
965
    table_select(&t, i / 2, e[i]);
966
    ge_madd(&r, h, &t);
967
    x25519_ge_p1p1_to_p3(h, &r);
968
  }
969
}
970

971
#endif
972

973
static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) {
974
  fe_cmov(&t->YplusX, &u->YplusX, b);
975
  fe_cmov(&t->YminusX, &u->YminusX, b);
976
  fe_cmov(&t->Z, &u->Z, b);
977
  fe_cmov(&t->T2d, &u->T2d, b);
978
}
979

980
// r = scalar * A.
981
// where a = a[0]+256*a[1]+...+256^31 a[31].
982
static void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar,
983
                                 const ge_p3 *A) {
984
  ge_p2 Ai_p2[8];
985
  ge_cached Ai[16];
986
  ge_p1p1 t;
987

988
  ge_cached_0(&Ai[0]);
989
  x25519_ge_p3_to_cached(&Ai[1], A);
990
  ge_p3_to_p2(&Ai_p2[1], A);
991

992
  unsigned i;
993
  for (i = 2; i < 16; i += 2) {
994
    ge_p2_dbl(&t, &Ai_p2[i / 2]);
995
    ge_p1p1_to_cached(&Ai[i], &t);
996
    if (i < 8) {
997
      x25519_ge_p1p1_to_p2(&Ai_p2[i], &t);
998
    }
999
    x25519_ge_add(&t, A, &Ai[i]);
1000
    ge_p1p1_to_cached(&Ai[i + 1], &t);
1001
    if (i < 7) {
1002
      x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t);
1003
    }
1004
  }
1005

1006
  ge_p2_0(r);
1007
  ge_p3 u;
1008

1009
  for (i = 0; i < 256; i += 4) {
1010
    ge_p2_dbl(&t, r);
1011
    x25519_ge_p1p1_to_p2(r, &t);
1012
    ge_p2_dbl(&t, r);
1013
    x25519_ge_p1p1_to_p2(r, &t);
1014
    ge_p2_dbl(&t, r);
1015
    x25519_ge_p1p1_to_p2(r, &t);
1016
    ge_p2_dbl(&t, r);
1017
    x25519_ge_p1p1_to_p3(&u, &t);
1018

1019
    uint8_t index = scalar[31 - i/8];
1020
    index >>= 4 - (i & 4);
1021
    index &= 0xf;
1022

1023
    unsigned j;
1024
    ge_cached selected;
1025
    ge_cached_0(&selected);
1026
    for (j = 0; j < 16; j++) {
1027
      cmov_cached(&selected, &Ai[j], equal(j, index));
1028
    }
1029

1030
    x25519_ge_add(&t, &u, &selected);
1031
    x25519_ge_p1p1_to_p2(r, &t);
1032
  }
1033
}
1034

1035
// int64_lshift21 returns |a << 21| but is defined when shifting bits into the
1036
// sign bit. This works around a language flaw in C.
1037
static inline int64_t int64_lshift21(int64_t a) {
1038
  return (int64_t)((uint64_t)a << 21);
1039
}
1040

1041
// The set of scalars is \Z/l
1042
// where l = 2^252 + 27742317777372353535851937790883648493.
1043

1044
// Input:
1045
//   s[0]+256*s[1]+...+256^63*s[63] = s
1046
//
1047
// Output:
1048
//   s[0]+256*s[1]+...+256^31*s[31] = s mod l
1049
//   where l = 2^252 + 27742317777372353535851937790883648493.
1050
//   Overwrites s in place.
1051
static void x25519_sc_reduce(uint8_t s[64]) {
1052
  int64_t s0 = 2097151 & load_3(s);
1053
  int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
1054
  int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
1055
  int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
1056
  int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
1057
  int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
1058
  int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
1059
  int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
1060
  int64_t s8 = 2097151 & load_3(s + 21);
1061
  int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
1062
  int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1063
  int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1064
  int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1065
  int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1066
  int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1067
  int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1068
  int64_t s16 = 2097151 & load_3(s + 42);
1069
  int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1070
  int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1071
  int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1072
  int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1073
  int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1074
  int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1075
  int64_t s23 = (load_4(s + 60) >> 3);
1076
  int64_t carry0;
1077
  int64_t carry1;
1078
  int64_t carry2;
1079
  int64_t carry3;
1080
  int64_t carry4;
1081
  int64_t carry5;
1082
  int64_t carry6;
1083
  int64_t carry7;
1084
  int64_t carry8;
1085
  int64_t carry9;
1086
  int64_t carry10;
1087
  int64_t carry11;
1088
  int64_t carry12;
1089
  int64_t carry13;
1090
  int64_t carry14;
1091
  int64_t carry15;
1092
  int64_t carry16;
1093

1094
  s11 += s23 * 666643;
1095
  s12 += s23 * 470296;
1096
  s13 += s23 * 654183;
1097
  s14 -= s23 * 997805;
1098
  s15 += s23 * 136657;
1099
  s16 -= s23 * 683901;
1100
  s23 = 0;
1101

1102
  s10 += s22 * 666643;
1103
  s11 += s22 * 470296;
1104
  s12 += s22 * 654183;
1105
  s13 -= s22 * 997805;
1106
  s14 += s22 * 136657;
1107
  s15 -= s22 * 683901;
1108
  s22 = 0;
1109

1110
  s9 += s21 * 666643;
1111
  s10 += s21 * 470296;
1112
  s11 += s21 * 654183;
1113
  s12 -= s21 * 997805;
1114
  s13 += s21 * 136657;
1115
  s14 -= s21 * 683901;
1116
  s21 = 0;
1117

1118
  s8 += s20 * 666643;
1119
  s9 += s20 * 470296;
1120
  s10 += s20 * 654183;
1121
  s11 -= s20 * 997805;
1122
  s12 += s20 * 136657;
1123
  s13 -= s20 * 683901;
1124
  s20 = 0;
1125

1126
  s7 += s19 * 666643;
1127
  s8 += s19 * 470296;
1128
  s9 += s19 * 654183;
1129
  s10 -= s19 * 997805;
1130
  s11 += s19 * 136657;
1131
  s12 -= s19 * 683901;
1132
  s19 = 0;
1133

1134
  s6 += s18 * 666643;
1135
  s7 += s18 * 470296;
1136
  s8 += s18 * 654183;
1137
  s9 -= s18 * 997805;
1138
  s10 += s18 * 136657;
1139
  s11 -= s18 * 683901;
1140
  s18 = 0;
1141

1142
  carry6 = (s6 + (1 << 20)) >> 21;
1143
  s7 += carry6;
1144
  s6 -= int64_lshift21(carry6);
1145
  carry8 = (s8 + (1 << 20)) >> 21;
1146
  s9 += carry8;
1147
  s8 -= int64_lshift21(carry8);
1148
  carry10 = (s10 + (1 << 20)) >> 21;
1149
  s11 += carry10;
1150
  s10 -= int64_lshift21(carry10);
1151
  carry12 = (s12 + (1 << 20)) >> 21;
1152
  s13 += carry12;
1153
  s12 -= int64_lshift21(carry12);
1154
  carry14 = (s14 + (1 << 20)) >> 21;
1155
  s15 += carry14;
1156
  s14 -= int64_lshift21(carry14);
1157
  carry16 = (s16 + (1 << 20)) >> 21;
1158
  s17 += carry16;
1159
  s16 -= int64_lshift21(carry16);
1160

1161
  carry7 = (s7 + (1 << 20)) >> 21;
1162
  s8 += carry7;
1163
  s7 -= int64_lshift21(carry7);
1164
  carry9 = (s9 + (1 << 20)) >> 21;
1165
  s10 += carry9;
1166
  s9 -= int64_lshift21(carry9);
1167
  carry11 = (s11 + (1 << 20)) >> 21;
1168
  s12 += carry11;
1169
  s11 -= int64_lshift21(carry11);
1170
  carry13 = (s13 + (1 << 20)) >> 21;
1171
  s14 += carry13;
1172
  s13 -= int64_lshift21(carry13);
1173
  carry15 = (s15 + (1 << 20)) >> 21;
1174
  s16 += carry15;
1175
  s15 -= int64_lshift21(carry15);
1176

1177
  s5 += s17 * 666643;
1178
  s6 += s17 * 470296;
1179
  s7 += s17 * 654183;
1180
  s8 -= s17 * 997805;
1181
  s9 += s17 * 136657;
1182
  s10 -= s17 * 683901;
1183
  s17 = 0;
1184

1185
  s4 += s16 * 666643;
1186
  s5 += s16 * 470296;
1187
  s6 += s16 * 654183;
1188
  s7 -= s16 * 997805;
1189
  s8 += s16 * 136657;
1190
  s9 -= s16 * 683901;
1191
  s16 = 0;
1192

1193
  s3 += s15 * 666643;
1194
  s4 += s15 * 470296;
1195
  s5 += s15 * 654183;
1196
  s6 -= s15 * 997805;
1197
  s7 += s15 * 136657;
1198
  s8 -= s15 * 683901;
1199
  s15 = 0;
1200

1201
  s2 += s14 * 666643;
1202
  s3 += s14 * 470296;
1203
  s4 += s14 * 654183;
1204
  s5 -= s14 * 997805;
1205
  s6 += s14 * 136657;
1206
  s7 -= s14 * 683901;
1207
  s14 = 0;
1208

1209
  s1 += s13 * 666643;
1210
  s2 += s13 * 470296;
1211
  s3 += s13 * 654183;
1212
  s4 -= s13 * 997805;
1213
  s5 += s13 * 136657;
1214
  s6 -= s13 * 683901;
1215
  s13 = 0;
1216

1217
  s0 += s12 * 666643;
1218
  s1 += s12 * 470296;
1219
  s2 += s12 * 654183;
1220
  s3 -= s12 * 997805;
1221
  s4 += s12 * 136657;
1222
  s5 -= s12 * 683901;
1223
  s12 = 0;
1224

1225
  carry0 = (s0 + (1 << 20)) >> 21;
1226
  s1 += carry0;
1227
  s0 -= int64_lshift21(carry0);
1228
  carry2 = (s2 + (1 << 20)) >> 21;
1229
  s3 += carry2;
1230
  s2 -= int64_lshift21(carry2);
1231
  carry4 = (s4 + (1 << 20)) >> 21;
1232
  s5 += carry4;
1233
  s4 -= int64_lshift21(carry4);
1234
  carry6 = (s6 + (1 << 20)) >> 21;
1235
  s7 += carry6;
1236
  s6 -= int64_lshift21(carry6);
1237
  carry8 = (s8 + (1 << 20)) >> 21;
1238
  s9 += carry8;
1239
  s8 -= int64_lshift21(carry8);
1240
  carry10 = (s10 + (1 << 20)) >> 21;
1241
  s11 += carry10;
1242
  s10 -= int64_lshift21(carry10);
1243

1244
  carry1 = (s1 + (1 << 20)) >> 21;
1245
  s2 += carry1;
1246
  s1 -= int64_lshift21(carry1);
1247
  carry3 = (s3 + (1 << 20)) >> 21;
1248
  s4 += carry3;
1249
  s3 -= int64_lshift21(carry3);
1250
  carry5 = (s5 + (1 << 20)) >> 21;
1251
  s6 += carry5;
1252
  s5 -= int64_lshift21(carry5);
1253
  carry7 = (s7 + (1 << 20)) >> 21;
1254
  s8 += carry7;
1255
  s7 -= int64_lshift21(carry7);
1256
  carry9 = (s9 + (1 << 20)) >> 21;
1257
  s10 += carry9;
1258
  s9 -= int64_lshift21(carry9);
1259
  carry11 = (s11 + (1 << 20)) >> 21;
1260
  s12 += carry11;
1261
  s11 -= int64_lshift21(carry11);
1262

1263
  s0 += s12 * 666643;
1264
  s1 += s12 * 470296;
1265
  s2 += s12 * 654183;
1266
  s3 -= s12 * 997805;
1267
  s4 += s12 * 136657;
1268
  s5 -= s12 * 683901;
1269
  s12 = 0;
1270

1271
  carry0 = s0 >> 21;
1272
  s1 += carry0;
1273
  s0 -= int64_lshift21(carry0);
1274
  carry1 = s1 >> 21;
1275
  s2 += carry1;
1276
  s1 -= int64_lshift21(carry1);
1277
  carry2 = s2 >> 21;
1278
  s3 += carry2;
1279
  s2 -= int64_lshift21(carry2);
1280
  carry3 = s3 >> 21;
1281
  s4 += carry3;
1282
  s3 -= int64_lshift21(carry3);
1283
  carry4 = s4 >> 21;
1284
  s5 += carry4;
1285
  s4 -= int64_lshift21(carry4);
1286
  carry5 = s5 >> 21;
1287
  s6 += carry5;
1288
  s5 -= int64_lshift21(carry5);
1289
  carry6 = s6 >> 21;
1290
  s7 += carry6;
1291
  s6 -= int64_lshift21(carry6);
1292
  carry7 = s7 >> 21;
1293
  s8 += carry7;
1294
  s7 -= int64_lshift21(carry7);
1295
  carry8 = s8 >> 21;
1296
  s9 += carry8;
1297
  s8 -= int64_lshift21(carry8);
1298
  carry9 = s9 >> 21;
1299
  s10 += carry9;
1300
  s9 -= int64_lshift21(carry9);
1301
  carry10 = s10 >> 21;
1302
  s11 += carry10;
1303
  s10 -= int64_lshift21(carry10);
1304
  carry11 = s11 >> 21;
1305
  s12 += carry11;
1306
  s11 -= int64_lshift21(carry11);
1307

1308
  s0 += s12 * 666643;
1309
  s1 += s12 * 470296;
1310
  s2 += s12 * 654183;
1311
  s3 -= s12 * 997805;
1312
  s4 += s12 * 136657;
1313
  s5 -= s12 * 683901;
1314
  s12 = 0;
1315

1316
  carry0 = s0 >> 21;
1317
  s1 += carry0;
1318
  s0 -= int64_lshift21(carry0);
1319
  carry1 = s1 >> 21;
1320
  s2 += carry1;
1321
  s1 -= int64_lshift21(carry1);
1322
  carry2 = s2 >> 21;
1323
  s3 += carry2;
1324
  s2 -= int64_lshift21(carry2);
1325
  carry3 = s3 >> 21;
1326
  s4 += carry3;
1327
  s3 -= int64_lshift21(carry3);
1328
  carry4 = s4 >> 21;
1329
  s5 += carry4;
1330
  s4 -= int64_lshift21(carry4);
1331
  carry5 = s5 >> 21;
1332
  s6 += carry5;
1333
  s5 -= int64_lshift21(carry5);
1334
  carry6 = s6 >> 21;
1335
  s7 += carry6;
1336
  s6 -= int64_lshift21(carry6);
1337
  carry7 = s7 >> 21;
1338
  s8 += carry7;
1339
  s7 -= int64_lshift21(carry7);
1340
  carry8 = s8 >> 21;
1341
  s9 += carry8;
1342
  s8 -= int64_lshift21(carry8);
1343
  carry9 = s9 >> 21;
1344
  s10 += carry9;
1345
  s9 -= int64_lshift21(carry9);
1346
  carry10 = s10 >> 21;
1347
  s11 += carry10;
1348
  s10 -= int64_lshift21(carry10);
1349

1350
  s[0] = s0 >> 0;
1351
  s[1] = s0 >> 8;
1352
  s[2] = (s0 >> 16) | (s1 << 5);
1353
  s[3] = s1 >> 3;
1354
  s[4] = s1 >> 11;
1355
  s[5] = (s1 >> 19) | (s2 << 2);
1356
  s[6] = s2 >> 6;
1357
  s[7] = (s2 >> 14) | (s3 << 7);
1358
  s[8] = s3 >> 1;
1359
  s[9] = s3 >> 9;
1360
  s[10] = (s3 >> 17) | (s4 << 4);
1361
  s[11] = s4 >> 4;
1362
  s[12] = s4 >> 12;
1363
  s[13] = (s4 >> 20) | (s5 << 1);
1364
  s[14] = s5 >> 7;
1365
  s[15] = (s5 >> 15) | (s6 << 6);
1366
  s[16] = s6 >> 2;
1367
  s[17] = s6 >> 10;
1368
  s[18] = (s6 >> 18) | (s7 << 3);
1369
  s[19] = s7 >> 5;
1370
  s[20] = s7 >> 13;
1371
  s[21] = s8 >> 0;
1372
  s[22] = s8 >> 8;
1373
  s[23] = (s8 >> 16) | (s9 << 5);
1374
  s[24] = s9 >> 3;
1375
  s[25] = s9 >> 11;
1376
  s[26] = (s9 >> 19) | (s10 << 2);
1377
  s[27] = s10 >> 6;
1378
  s[28] = (s10 >> 14) | (s11 << 7);
1379
  s[29] = s11 >> 1;
1380
  s[30] = s11 >> 9;
1381
  s[31] = s11 >> 17;
1382
}
1383

1384
/* Loosely from BoringSSL crypto/curve25519/spake25519.c */
1385

1386
/*
1387
 * Here BoringSSL uses different points, not restricted to the generator
1388
 * subgroup, while we use the draft-irtf-cfrg-spake2-05 points.  The Python
1389
 * code is modified to add the subgroup restriction.
1390
 */
1391

1392
// The following precomputation tables are for the following
1393
// points:
1394
//
1395
// N (found in 7 iterations):
1396
//   x: 10742253510813957597047979962966927467575235974254765187031601461055699024931
1397
//   y: 19796686047937480651099107989427797822652529149428697746066532921705571401683
1398
//   encoded: d3bfb518f44f3430f29d0c92af503865a1ed3281dc69b35dd868ba85f886c4ab
1399
//
1400
// M (found in 21 iterations):
1401
//   x: 8158688967149231307266666683326742915289288280191350817196911733632187385319
1402
//   y: 21622333750659878624441478467798461427617029906629724657331223068277098105040
1403
//   encoded: d048032c6ea0b6d697ddc2e86bda85a33adac920f1bf18e1b0c6d166a5cecdaf
1404
//
1405
// These points and their precomputation tables are generated with the
1406
// following Python code.
1407

1408
/*
1409
import hashlib
1410
import ed25519 as E  # https://ed25519.cr.yp.to/python/ed25519.py
1411

1412
SEED_N = 'edwards25519 point generation seed (N)'
1413
SEED_M = 'edwards25519 point generation seed (M)'
1414

1415
def genpoint(seed):
1416
    v = hashlib.sha256(seed).digest()
1417
    it = 1
1418
    while True:
1419
        try:
1420
            x,y = E.decodepoint(v)
1421
            if E.scalarmult((x,y), E.l) != [0, 1]:
1422
                raise Exception('point has wrong order')
1423
        except Exception, e:
1424
            print e
1425
            it += 1
1426
            v = hashlib.sha256(v).digest()
1427
            continue
1428
        print "Found in %d iterations:" % it
1429
        print "  x = %d" % x
1430
        print "  y = %d" % y
1431
        print " Encoded (hex)"
1432
        print E.encodepoint((x,y)).encode('hex')
1433
        return (x,y)
1434

1435
def gentable(P):
1436
    t = []
1437
    for i in range(1,16):
1438
        k = (i >> 3 & 1) * (1 << 192) + \
1439
            (i >> 2 & 1) * (1 << 128) + \
1440
            (i >> 1 & 1) * (1 <<  64) + \
1441
            (i      & 1)
1442
        t.append(E.scalarmult(P, k))
1443
    return ''.join(E.encodeint(x) + E.encodeint(y) for (x,y) in t)
1444

1445
def printtable(table, name):
1446
    print "static const uint8_t %s[15 * 2 * 32] = {" % name,
1447
    for i in range(15 * 2 * 32):
1448
        if i % 12 == 0:
1449
            print "\n   ",
1450
        print " 0x%02x," % ord(table[i]),
1451
    print "\n};"
1452

1453
if __name__ == "__main__":
1454
    print "Searching for N"
1455
    N = genpoint(SEED_N)
1456
    print "Generating precomputation table for N"
1457
    Ntable = gentable(N)
1458
    printtable(Ntable, "kSpakeNSmallPrecomp")
1459

1460
    print "Searching for M"
1461
    M = genpoint(SEED_M)
1462
    print "Generating precomputation table for M"
1463
    Mtable = gentable(M)
1464
    printtable(Mtable, "kSpakeMSmallPrecomp")
1465
*/
1466

1467
static const uint8_t kSpakeNSmallPrecomp[15 * 2 * 32] = {
1468
    0x23, 0xfc, 0x27, 0x6c, 0x55, 0xaf, 0xb3, 0x9c, 0xd8, 0x99, 0x3a, 0x0d,
1469
    0x7f, 0x08, 0xc9, 0xeb, 0x4d, 0x6e, 0x90, 0x99, 0x2f, 0x3c, 0x15, 0x2b,
1470
    0x89, 0x5a, 0x0f, 0xf2, 0x67, 0xe6, 0xbf, 0x17, 0xd3, 0xbf, 0xb5, 0x18,
1471
    0xf4, 0x4f, 0x34, 0x30, 0xf2, 0x9d, 0x0c, 0x92, 0xaf, 0x50, 0x38, 0x65,
1472
    0xa1, 0xed, 0x32, 0x81, 0xdc, 0x69, 0xb3, 0x5d, 0xd8, 0x68, 0xba, 0x85,
1473
    0xf8, 0x86, 0xc4, 0x2b, 0x53, 0x93, 0xb1, 0x99, 0x90, 0x30, 0xca, 0xb0,
1474
    0xbd, 0xea, 0x14, 0x4c, 0x6f, 0x2b, 0x81, 0x1e, 0x23, 0x45, 0xb2, 0x32,
1475
    0x2e, 0x2d, 0xe6, 0xb8, 0x5d, 0xc5, 0x15, 0x91, 0x63, 0x39, 0x18, 0x5b,
1476
    0x62, 0x63, 0x9b, 0xf4, 0x8b, 0xe0, 0x34, 0xa2, 0x95, 0x11, 0x92, 0x68,
1477
    0x54, 0xb7, 0xf3, 0x91, 0xca, 0x22, 0xad, 0x08, 0xd8, 0x9c, 0xa2, 0xf0,
1478
    0xdc, 0x9c, 0x2c, 0x84, 0x32, 0x26, 0xe0, 0x17, 0x89, 0x53, 0x6b, 0xfd,
1479
    0x76, 0x97, 0x25, 0xea, 0x99, 0x94, 0xf8, 0x29, 0x7c, 0xc4, 0x53, 0xc0,
1480
    0x98, 0x9a, 0x20, 0xdc, 0x70, 0x01, 0x50, 0xaa, 0x05, 0xa3, 0x40, 0x50,
1481
    0x66, 0x87, 0x30, 0x19, 0x12, 0xc3, 0xb8, 0x2d, 0x28, 0x8b, 0x7b, 0x48,
1482
    0xf7, 0x7b, 0xab, 0x45, 0x70, 0x2e, 0xbb, 0x85, 0xc1, 0x6c, 0xdd, 0x35,
1483
    0x00, 0x83, 0x20, 0x13, 0x82, 0x08, 0xaa, 0xa3, 0x03, 0x0f, 0xca, 0x27,
1484
    0x3e, 0x8b, 0x52, 0xc2, 0xd7, 0xb1, 0x8c, 0x22, 0xfe, 0x04, 0x4a, 0xf2,
1485
    0xe8, 0xac, 0xee, 0x2e, 0xd7, 0x77, 0x34, 0x49, 0xf2, 0xe9, 0xeb, 0x8c,
1486
    0xa6, 0xc8, 0xc6, 0xcd, 0x8a, 0x8f, 0x7c, 0x5d, 0x51, 0xc8, 0xfa, 0x6f,
1487
    0xb3, 0x93, 0xdb, 0x71, 0xef, 0x3e, 0x6e, 0xa7, 0x85, 0xc7, 0xd4, 0x3e,
1488
    0xa2, 0xe2, 0xc0, 0xaa, 0x17, 0xb3, 0xa4, 0x7c, 0xc2, 0x3f, 0x7c, 0x7a,
1489
    0xdd, 0x26, 0xde, 0x3e, 0xf1, 0x99, 0x06, 0xf7, 0x69, 0x1b, 0xc9, 0x20,
1490
    0x55, 0x4f, 0x86, 0x7a, 0x93, 0x89, 0x68, 0xe9, 0x2b, 0x2d, 0xbc, 0x08,
1491
    0x15, 0x5d, 0x2d, 0x0b, 0x4f, 0x1a, 0xb3, 0xd4, 0x8e, 0x77, 0x79, 0x2a,
1492
    0x25, 0xf9, 0xb6, 0x46, 0xfb, 0x87, 0x02, 0xa6, 0xe0, 0xd3, 0xba, 0x84,
1493
    0xea, 0x3e, 0x58, 0xa5, 0x7f, 0x8f, 0x8c, 0x39, 0x79, 0x28, 0xb5, 0xcf,
1494
    0xe4, 0xca, 0x63, 0xdc, 0xac, 0xed, 0x4b, 0x74, 0x1e, 0x94, 0x85, 0x8c,
1495
    0xe5, 0xf4, 0x76, 0x6f, 0x20, 0x67, 0x8b, 0xd8, 0xd6, 0x4b, 0xe7, 0x2d,
1496
    0xa0, 0xbd, 0xcc, 0x1f, 0xdf, 0x46, 0x9c, 0xa2, 0x49, 0x64, 0xdf, 0x24,
1497
    0x00, 0x11, 0x11, 0x45, 0x62, 0x5c, 0xd7, 0x8a, 0x00, 0x02, 0xf5, 0x9b,
1498
    0x4f, 0x53, 0x42, 0xc5, 0xd5, 0x55, 0x80, 0x73, 0x9a, 0x5b, 0x31, 0x5a,
1499
    0xbd, 0x3a, 0x43, 0xe9, 0x33, 0xe5, 0xaf, 0x1d, 0x92, 0x5e, 0x59, 0x37,
1500
    0xae, 0x57, 0xfa, 0x3b, 0xd2, 0x31, 0xae, 0xa6, 0xf9, 0xc9, 0xc1, 0x82,
1501
    0xa6, 0xa5, 0xed, 0x24, 0x53, 0x4b, 0x38, 0x22, 0xf2, 0x85, 0x8d, 0x13,
1502
    0xa6, 0x5e, 0xd6, 0x57, 0x17, 0xd3, 0x33, 0x38, 0x8d, 0x65, 0xd3, 0xcb,
1503
    0x1a, 0xa2, 0x3a, 0x2b, 0xbb, 0x61, 0x53, 0xd7, 0xff, 0xcd, 0x20, 0xb6,
1504
    0xbb, 0x8c, 0xab, 0x63, 0xef, 0xb8, 0x26, 0x7e, 0x81, 0x65, 0xaf, 0x90,
1505
    0xfc, 0xd2, 0xb6, 0x72, 0xdb, 0xe9, 0x23, 0x78, 0x12, 0x04, 0xc0, 0x03,
1506
    0x82, 0xa8, 0x7a, 0x0f, 0x48, 0x6f, 0x82, 0x7f, 0x81, 0xcd, 0xa7, 0x89,
1507
    0xdd, 0x86, 0xea, 0x5e, 0xa1, 0x50, 0x14, 0x34, 0x17, 0x64, 0x82, 0x0f,
1508
    0xc4, 0x40, 0x20, 0x1d, 0x8f, 0xfe, 0xfa, 0x99, 0xaf, 0x5b, 0xc1, 0x5d,
1509
    0xc8, 0x47, 0x07, 0x54, 0x4a, 0x22, 0x56, 0x57, 0xf1, 0x2c, 0x3b, 0x62,
1510
    0x7f, 0x12, 0x62, 0xaf, 0xfd, 0xf8, 0x04, 0x11, 0xa8, 0x51, 0xf0, 0x46,
1511
    0x5d, 0x79, 0x66, 0xff, 0x8a, 0x06, 0xef, 0x54, 0x64, 0x1b, 0x84, 0x3e,
1512
    0x41, 0xf3, 0xfe, 0x19, 0x51, 0xf7, 0x44, 0x9c, 0x16, 0xd3, 0x7a, 0x09,
1513
    0x59, 0xf5, 0x47, 0x45, 0xd0, 0x31, 0xef, 0x96, 0x2c, 0xc5, 0xc0, 0xd0,
1514
    0x56, 0xef, 0x3f, 0x07, 0x2b, 0xb7, 0x28, 0x49, 0xf5, 0xb1, 0x42, 0x18,
1515
    0xcf, 0x77, 0xd8, 0x2b, 0x71, 0x74, 0x80, 0xba, 0x34, 0x52, 0xce, 0x11,
1516
    0xfe, 0xc4, 0xb9, 0xeb, 0xf9, 0xc4, 0x5e, 0x1f, 0xd3, 0xde, 0x4b, 0x14,
1517
    0xe3, 0x6e, 0xe7, 0xd7, 0x83, 0x59, 0x98, 0xe8, 0x3d, 0x8e, 0xd6, 0x7d,
1518
    0xc0, 0x9a, 0x79, 0xb9, 0x83, 0xf1, 0xc1, 0x00, 0x5d, 0x16, 0x1b, 0x44,
1519
    0xe9, 0x02, 0xce, 0x99, 0x1e, 0x77, 0xef, 0xca, 0xbc, 0xf0, 0x6a, 0xb9,
1520
    0x65, 0x3f, 0x3c, 0xd9, 0xe1, 0x63, 0x0b, 0xbf, 0xaa, 0xa7, 0xe6, 0x6d,
1521
    0x6d, 0x3f, 0x44, 0x29, 0xa3, 0x8b, 0x6d, 0xc4, 0x81, 0xa9, 0xc3, 0x5a,
1522
    0x90, 0x55, 0x72, 0x61, 0x17, 0x22, 0x7f, 0x3e, 0x5f, 0xfc, 0xba, 0xb3,
1523
    0x7a, 0x99, 0x76, 0xe9, 0x20, 0xe5, 0xc5, 0xe8, 0x55, 0x56, 0x0f, 0x7a,
1524
    0x48, 0xe7, 0xbc, 0xe1, 0x13, 0xf4, 0x90, 0xef, 0x97, 0x6c, 0x02, 0x89,
1525
    0x4d, 0x22, 0x48, 0xda, 0xd3, 0x52, 0x45, 0x31, 0x26, 0xcc, 0xe8, 0x9e,
1526
    0x5d, 0xdd, 0x75, 0xe4, 0x1d, 0xbc, 0xb1, 0x08, 0x55, 0xaf, 0x54, 0x70,
1527
    0x0d, 0x0c, 0xf3, 0x50, 0xbc, 0x40, 0x83, 0xee, 0xdc, 0x6d, 0x8b, 0x40,
1528
    0x79, 0x62, 0x18, 0x37, 0xc4, 0x78, 0x02, 0x58, 0x7c, 0x78, 0xd3, 0x54,
1529
    0xed, 0x31, 0xbd, 0x7d, 0x48, 0xcf, 0xb6, 0x11, 0x27, 0x37, 0x9c, 0x86,
1530
    0xf7, 0x2e, 0x00, 0x7a, 0x48, 0x1b, 0xa6, 0x72, 0x70, 0x7b, 0x44, 0x45,
1531
    0xeb, 0x49, 0xbf, 0xbe, 0x09, 0x78, 0x66, 0x71, 0x12, 0x7f, 0x3d, 0x78,
1532
    0x51, 0x24, 0x82, 0xa2, 0xf0, 0x1e, 0x83, 0x81, 0x81, 0x45, 0x53, 0xfd,
1533
    0x5e, 0xf3, 0x03, 0x74, 0xbd, 0x23, 0x35, 0xf6, 0x10, 0xdd, 0x7c, 0x73,
1534
    0x46, 0x32, 0x09, 0x54, 0x99, 0x95, 0x91, 0x25, 0xb8, 0x32, 0x09, 0xd8,
1535
    0x2f, 0x97, 0x50, 0xa3, 0xf5, 0xd6, 0xb1, 0xed, 0x97, 0x51, 0x06, 0x42,
1536
    0x12, 0x0c, 0x69, 0x38, 0x09, 0xa0, 0xd8, 0x19, 0x70, 0xf7, 0x8f, 0x61,
1537
    0x0d, 0x56, 0x43, 0x66, 0x22, 0x8b, 0x0e, 0x0e, 0xf9, 0x81, 0x9f, 0xac,
1538
    0x6f, 0xbf, 0x7d, 0x04, 0x13, 0xf2, 0xe4, 0xeb, 0xfd, 0xbe, 0x4e, 0x56,
1539
    0xda, 0xe0, 0x22, 0x6d, 0x1b, 0x25, 0xc8, 0xa5, 0x9c, 0x05, 0x45, 0x52,
1540
    0x3c, 0x3a, 0xde, 0x6b, 0xac, 0x9b, 0xf8, 0x81, 0x97, 0x21, 0x46, 0xac,
1541
    0x7e, 0x89, 0xf8, 0x49, 0x58, 0xbb, 0x45, 0xac, 0xa2, 0xc4, 0x90, 0x1f,
1542
    0xb2, 0xb4, 0xf8, 0xe0, 0xcd, 0xa1, 0x9d, 0x1c, 0xf2, 0xf1, 0xdf, 0xfb,
1543
    0x88, 0x4e, 0xe5, 0x41, 0xd8, 0x6e, 0xac, 0x07, 0x87, 0x95, 0x35, 0xa6,
1544
    0x12, 0x08, 0x5d, 0x57, 0x5e, 0xaf, 0x71, 0x0f, 0x07, 0x4e, 0x81, 0x77,
1545
    0xf1, 0xef, 0xb5, 0x35, 0x5c, 0xfa, 0xf4, 0x4e, 0x42, 0xdc, 0x19, 0xfe,
1546
    0xe4, 0xd2, 0xb4, 0x27, 0xfb, 0x34, 0x1f, 0xb2, 0x6f, 0xf2, 0x95, 0xcc,
1547
    0xd4, 0x47, 0x63, 0xdc, 0x7e, 0x4f, 0x97, 0x2b, 0x7a, 0xe0, 0x80, 0x31,
1548
};
1549

1550
static const uint8_t kSpakeMSmallPrecomp[15 * 2 * 32] = {
1551
    0xe7, 0x45, 0x7e, 0x47, 0x49, 0x69, 0xbd, 0x1b, 0x35, 0x1c, 0x2c, 0x98,
1552
    0x03, 0xf3, 0xb3, 0x37, 0xde, 0x39, 0xa5, 0xda, 0xc0, 0x2e, 0xa4, 0xac,
1553
    0x7d, 0x08, 0x26, 0xfc, 0x80, 0xa7, 0x09, 0x12, 0xd0, 0x48, 0x03, 0x2c,
1554
    0x6e, 0xa0, 0xb6, 0xd6, 0x97, 0xdd, 0xc2, 0xe8, 0x6b, 0xda, 0x85, 0xa3,
1555
    0x3a, 0xda, 0xc9, 0x20, 0xf1, 0xbf, 0x18, 0xe1, 0xb0, 0xc6, 0xd1, 0x66,
1556
    0xa5, 0xce, 0xcd, 0x2f, 0x80, 0xa8, 0x4e, 0xc3, 0x81, 0xae, 0x68, 0x3b,
1557
    0x0d, 0xdb, 0x56, 0x32, 0x2f, 0xa8, 0x97, 0xa0, 0x5c, 0x15, 0xc1, 0xcb,
1558
    0x6f, 0x7a, 0x5f, 0xc5, 0x32, 0xfb, 0x49, 0x17, 0x18, 0xfa, 0x85, 0x08,
1559
    0x85, 0xf1, 0xe3, 0x11, 0x8e, 0x3d, 0x70, 0x20, 0x38, 0x4e, 0x0c, 0x17,
1560
    0xa1, 0xa8, 0x20, 0xd2, 0xb1, 0x1d, 0x05, 0x8d, 0x0f, 0xc9, 0x96, 0x18,
1561
    0x9d, 0x8c, 0x89, 0x8f, 0x46, 0x6a, 0x6c, 0x6e, 0x72, 0x03, 0xb2, 0x75,
1562
    0x87, 0xd8, 0xa9, 0x60, 0x93, 0x2b, 0x8b, 0x66, 0xee, 0xaf, 0xce, 0x98,
1563
    0xcd, 0x6b, 0x7c, 0x6a, 0xbe, 0x19, 0xda, 0x66, 0x7c, 0xda, 0x53, 0xa0,
1564
    0xe3, 0x9a, 0x0e, 0x53, 0x3a, 0x7c, 0x73, 0x4a, 0x37, 0xa6, 0x53, 0x23,
1565
    0x67, 0x31, 0xce, 0x8a, 0xab, 0xee, 0x72, 0x76, 0xc2, 0xb5, 0x54, 0x42,
1566
    0xcf, 0x4b, 0xc7, 0x53, 0x24, 0x59, 0xaf, 0x76, 0x53, 0x10, 0x7e, 0x25,
1567
    0x94, 0x5c, 0x23, 0xa6, 0x5e, 0x05, 0xea, 0x14, 0xad, 0x2b, 0xce, 0x50,
1568
    0x77, 0xb3, 0x7a, 0x88, 0x4c, 0xf7, 0x74, 0x04, 0x35, 0xa4, 0x0c, 0x9e,
1569
    0xee, 0x6a, 0x4c, 0x3c, 0xc1, 0x6a, 0x35, 0x4d, 0x6d, 0x8f, 0x94, 0x95,
1570
    0xe4, 0x10, 0xca, 0x46, 0x4e, 0xfa, 0x38, 0x40, 0xeb, 0x1a, 0x1b, 0x5a,
1571
    0xff, 0x73, 0x4d, 0xe9, 0xf2, 0xbe, 0x89, 0xf5, 0xd1, 0x72, 0xd0, 0x1a,
1572
    0x7b, 0x82, 0x08, 0x19, 0xda, 0x54, 0x44, 0xa5, 0x3d, 0xd8, 0x10, 0x1c,
1573
    0xcf, 0x3b, 0xc7, 0x54, 0xd5, 0x11, 0xd7, 0x2a, 0x69, 0x3f, 0xa6, 0x58,
1574
    0x74, 0xfd, 0x90, 0xb2, 0xf4, 0xc2, 0x0e, 0xf3, 0x19, 0x8f, 0x51, 0x7c,
1575
    0x31, 0x12, 0x79, 0x61, 0x16, 0xb4, 0x2f, 0x2f, 0xd0, 0x88, 0x97, 0xf2,
1576
    0xc3, 0x8c, 0xa6, 0xa3, 0x29, 0xff, 0x7e, 0x12, 0x46, 0x2a, 0x9c, 0x09,
1577
    0x7c, 0x5f, 0x87, 0x07, 0x6b, 0xa1, 0x9a, 0x57, 0x55, 0x8e, 0xb0, 0x56,
1578
    0x5d, 0xc9, 0x4c, 0x5b, 0xae, 0xd3, 0xd0, 0x8e, 0xb8, 0xac, 0xba, 0xe8,
1579
    0x54, 0x45, 0x30, 0x14, 0xf6, 0x59, 0x20, 0xc4, 0x03, 0xb7, 0x7a, 0x5d,
1580
    0x6b, 0x5a, 0xcb, 0x28, 0x60, 0xf8, 0xef, 0x61, 0x60, 0x78, 0x6b, 0xf5,
1581
    0x21, 0x4b, 0x75, 0xc2, 0x77, 0xba, 0x0e, 0x38, 0x98, 0xe0, 0xfb, 0xb7,
1582
    0x5f, 0x75, 0x87, 0x04, 0x0c, 0xb4, 0x5c, 0x09, 0x04, 0x00, 0x38, 0x4e,
1583
    0x4f, 0x7b, 0x73, 0xe5, 0xdb, 0xdb, 0xf1, 0xf4, 0x5c, 0x64, 0x68, 0xfd,
1584
    0xb1, 0x86, 0xe8, 0x89, 0xbe, 0x9c, 0xd4, 0x96, 0x1d, 0xcb, 0xdc, 0x5c,
1585
    0xef, 0xd4, 0x33, 0x28, 0xb9, 0xb6, 0xaf, 0x3b, 0xcf, 0x8d, 0x30, 0xba,
1586
    0xe8, 0x08, 0xcf, 0x84, 0xba, 0x61, 0x10, 0x9b, 0x62, 0xf6, 0x18, 0x79,
1587
    0x66, 0x87, 0x82, 0x7c, 0xaa, 0x71, 0xac, 0xd0, 0xd0, 0x32, 0xb0, 0x54,
1588
    0x03, 0xa4, 0xad, 0x3f, 0x72, 0xca, 0x22, 0xff, 0x01, 0x87, 0x08, 0x36,
1589
    0x61, 0x22, 0xaa, 0x18, 0xab, 0x3a, 0xbc, 0xf2, 0x78, 0x05, 0xe1, 0x99,
1590
    0xa3, 0x59, 0x98, 0xcc, 0x21, 0xc6, 0x2b, 0x51, 0x6d, 0x43, 0x0a, 0x46,
1591
    0x50, 0xae, 0x11, 0x7e, 0xd5, 0x23, 0x56, 0xef, 0x83, 0xc8, 0xbf, 0x42,
1592
    0xf0, 0x45, 0x52, 0x1f, 0x34, 0xbc, 0x2f, 0xb0, 0xf0, 0xce, 0xf0, 0xec,
1593
    0xd0, 0x99, 0x59, 0x2e, 0x1f, 0xab, 0xa8, 0x1e, 0x4b, 0xce, 0x1b, 0x9a,
1594
    0x75, 0xc6, 0xc4, 0x71, 0x86, 0xf0, 0x8d, 0xec, 0xb0, 0x30, 0xb9, 0x62,
1595
    0xb3, 0xb7, 0xdd, 0x96, 0x29, 0xc8, 0xbf, 0xe9, 0xb0, 0x74, 0x78, 0x7b,
1596
    0xf7, 0xea, 0xa3, 0x14, 0x12, 0x56, 0xe0, 0xf3, 0x35, 0x7a, 0x26, 0x4a,
1597
    0x4c, 0xe6, 0xdf, 0x13, 0xb5, 0x52, 0xb0, 0x2a, 0x5f, 0x2e, 0xac, 0x34,
1598
    0xab, 0x5f, 0x1a, 0x01, 0xe4, 0x15, 0x1a, 0xd1, 0xbf, 0xc9, 0x95, 0x0a,
1599
    0xac, 0x1d, 0xe7, 0x53, 0x59, 0x8d, 0xc3, 0x21, 0x78, 0x5e, 0x12, 0x97,
1600
    0x8f, 0x4e, 0x1d, 0xf9, 0xe5, 0xe2, 0xc2, 0xc4, 0xba, 0xfb, 0x50, 0x96,
1601
    0x5b, 0x43, 0xe8, 0xf7, 0x0d, 0x1b, 0x64, 0x58, 0xbe, 0xd3, 0x95, 0x7f,
1602
    0x8e, 0xf1, 0x85, 0x35, 0xba, 0x25, 0x55, 0x2e, 0x02, 0x46, 0x5c, 0xad,
1603
    0x1f, 0xc5, 0x03, 0xcc, 0xd0, 0x43, 0x4c, 0xf2, 0x5e, 0x64, 0x0a, 0x89,
1604
    0xd9, 0xfd, 0x23, 0x7d, 0x4f, 0xbe, 0x2f, 0x0f, 0x1e, 0x12, 0x4a, 0xd9,
1605
    0xf8, 0x82, 0xde, 0x8f, 0x4f, 0x98, 0xb9, 0x90, 0xf6, 0xfa, 0xd1, 0x11,
1606
    0xa6, 0xdc, 0x7e, 0x32, 0x48, 0x6a, 0x8a, 0x14, 0x5e, 0x73, 0xb9, 0x6c,
1607
    0x0e, 0xc2, 0xf9, 0xcc, 0xf0, 0x32, 0xc8, 0xb5, 0x56, 0xaa, 0x5d, 0xd2,
1608
    0x07, 0xf1, 0x6f, 0x33, 0x6f, 0x05, 0x70, 0x49, 0x60, 0x49, 0x23, 0x23,
1609
    0x14, 0x0e, 0x4c, 0x58, 0x92, 0xad, 0xa9, 0x50, 0xb1, 0x59, 0x43, 0x96,
1610
    0x7b, 0xc1, 0x51, 0x45, 0xef, 0x0d, 0xef, 0xd1, 0xe4, 0xd0, 0xce, 0xdf,
1611
    0x6a, 0xbc, 0x1b, 0xbf, 0x7a, 0x87, 0x4e, 0x47, 0x17, 0x9c, 0x34, 0x38,
1612
    0xb0, 0x3c, 0xa1, 0x04, 0xfb, 0xe2, 0x66, 0xce, 0xb6, 0x82, 0xbb, 0xad,
1613
    0xc3, 0x8e, 0x12, 0x35, 0xbc, 0x17, 0xce, 0x01, 0x2d, 0xa3, 0xa6, 0xb9,
1614
    0xfa, 0x84, 0xc2, 0x2f, 0x5a, 0x4a, 0x8c, 0x4c, 0x11, 0x4e, 0xa8, 0x14,
1615
    0xcb, 0xb8, 0x99, 0xaa, 0x2e, 0x8c, 0xa0, 0xc9, 0x5f, 0x62, 0x2a, 0x84,
1616
    0x66, 0x60, 0x0a, 0x7e, 0xdc, 0x93, 0x17, 0x45, 0x19, 0xb3, 0x93, 0x4c,
1617
    0xdc, 0xd0, 0xd5, 0x5c, 0x25, 0xd2, 0xcd, 0x4e, 0x84, 0x4c, 0x73, 0xb3,
1618
    0x90, 0xa4, 0x22, 0x05, 0x2c, 0x7c, 0x39, 0x2b, 0x70, 0xd9, 0x61, 0x76,
1619
    0xb2, 0x03, 0x71, 0xe9, 0x0e, 0xf8, 0x57, 0x85, 0xad, 0xb1, 0x2f, 0x34,
1620
    0xa5, 0x66, 0xb0, 0x0f, 0x75, 0x94, 0x6e, 0x26, 0x79, 0x99, 0xb4, 0xe2,
1621
    0xe2, 0xa3, 0x58, 0xdd, 0xb4, 0xfb, 0x74, 0xf4, 0xa1, 0xca, 0xc3, 0x30,
1622
    0xe7, 0x86, 0xb2, 0xa2, 0x2c, 0x11, 0xc9, 0x58, 0xe3, 0xc1, 0xa6, 0x5f,
1623
    0x86, 0x6a, 0xe7, 0x75, 0xd5, 0xd8, 0x63, 0x95, 0x64, 0x59, 0xbc, 0xb8,
1624
    0xb7, 0xf5, 0x12, 0xe3, 0x03, 0xc6, 0x17, 0xea, 0x4e, 0xcb, 0xee, 0x4c,
1625
    0xae, 0x03, 0xd1, 0x33, 0xd0, 0x39, 0x36, 0x00, 0x0f, 0xf4, 0x9c, 0xbd,
1626
    0x35, 0x96, 0xfd, 0x0d, 0x26, 0xb7, 0x9e, 0xf4, 0x4b, 0x6f, 0x4b, 0xf1,
1627
    0xec, 0x11, 0x00, 0x16, 0x21, 0x1e, 0xd4, 0x43, 0x23, 0x8c, 0x4a, 0xfa,
1628
    0x9e, 0xd4, 0x2b, 0x36, 0x9a, 0x43, 0x1e, 0x58, 0x31, 0xe8, 0x1f, 0x83,
1629
    0x15, 0x20, 0x31, 0x68, 0xfe, 0x27, 0xd3, 0xd8, 0x9b, 0x43, 0x81, 0x8f,
1630
    0x57, 0x32, 0x14, 0xe6, 0x9e, 0xbf, 0xd1, 0xfb, 0xdf, 0xad, 0x7a, 0x52,
1631
};
1632

1633
/* left_shift_3 sets |n| to |n|*8, where |n| is represented in little-endian
1634
 * order. */
1635
static void left_shift_3(uint8_t n[32]) {
1636
  uint8_t carry = 0;
1637
  unsigned i;
1638

1639
  for (i = 0; i < 32; i++) {
1640
    const uint8_t next_carry = n[i] >> 5;
1641
    n[i] = (n[i] << 3) | carry;
1642
    carry = next_carry;
1643
  }
1644
}
1645

1646
static krb5_error_code
1647
builtin_edwards25519_keygen(krb5_context context, groupdata *gdata,
1648
                            const uint8_t *wbytes, krb5_boolean use_m,
1649
                            uint8_t *priv_out, uint8_t *pub_out)
1650
{
1651
  uint8_t private[64];
1652
  krb5_data data = make_data(private, 32);
1653
  krb5_error_code ret;
1654

1655
  /* Pick x or y uniformly from [0, p*h) divisible by h. */
1656
  ret = krb5_c_random_make_octets(context, &data);
1657
  if (ret)
1658
    return ret;
1659
  memset(private + 32, 0, 32);
1660
  x25519_sc_reduce(private);
1661
  left_shift_3(private);
1662

1663
  /* Compute X=x*G or Y=y*G. */
1664
  ge_p3 P;
1665
  x25519_ge_scalarmult_base(&P, private);
1666

1667
  /* Compute w mod p. */
1668
  uint8_t wreduced[64];
1669
  memcpy(wreduced, wbytes, 32);
1670
  memset(wreduced + 32, 0, 32);
1671
  x25519_sc_reduce(wreduced);
1672

1673
  /* Compute the mask, w*M or w*N. */
1674
  ge_p3 mask;
1675
  x25519_ge_scalarmult_small_precomp(&mask, wreduced,
1676
                                     use_m ? kSpakeMSmallPrecomp :
1677
                                     kSpakeNSmallPrecomp);
1678

1679
  /* Compute the masked point T=w*M+X or S=w*N+Y. */
1680
  ge_cached mask_cached;
1681
  x25519_ge_p3_to_cached(&mask_cached, &mask);
1682
  ge_p1p1 Pmasked;
1683
  x25519_ge_add(&Pmasked, &P, &mask_cached);
1684

1685
  /* Encode T or S into pub_out. */
1686
  ge_p2 Pmasked_proj;
1687
  x25519_ge_p1p1_to_p2(&Pmasked_proj, &Pmasked);
1688
  x25519_ge_tobytes(pub_out, &Pmasked_proj);
1689

1690
  /* Remember the private key in priv_out. */
1691
  memcpy(priv_out, private, 32);
1692
  return 0;
1693
}
1694

1695
static krb5_error_code
1696
builtin_edwards25519_result(krb5_context context, groupdata *gdata,
1697
                            const uint8_t *wbytes, const uint8_t *ourpriv,
1698
                            const uint8_t *theirpub, krb5_boolean use_m,
1699
                            uint8_t *elem_out)
1700
{
1701
  /*
1702
   * Check if the point received from peer is on the curve.  This does not
1703
   * verify that it is in the generator subgroup, but since our private key is
1704
   * a multiple of the cofactor, the shared point will be in the generator
1705
   * subgroup even if a rogue peer sends a point which is not.
1706
   */
1707
  ge_p3 Qmasked;
1708
  if (!x25519_ge_frombytes_vartime(&Qmasked, theirpub))
1709
    return EINVAL;
1710

1711
  /* Compute w mod p. */
1712
  uint8_t wreduced[64];
1713
  memcpy(wreduced, wbytes, 32);
1714
  memset(wreduced + 32, 0, 32);
1715
  x25519_sc_reduce(wreduced);
1716

1717
  /* Compute the peer's mask, w*M or w*N. */
1718
  ge_p3 peers_mask;
1719
  x25519_ge_scalarmult_small_precomp(&peers_mask, wreduced,
1720
                                     use_m ? kSpakeMSmallPrecomp :
1721
                                     kSpakeNSmallPrecomp);
1722

1723
  ge_cached peers_mask_cached;
1724
  x25519_ge_p3_to_cached(&peers_mask_cached, &peers_mask);
1725

1726
  /* Compute the peer's unmasked point, T-w*M or S-w*N. */
1727
  ge_p1p1 Qcompl;
1728
  ge_p3 Qunmasked;
1729
  x25519_ge_sub(&Qcompl, &Qmasked, &peers_mask_cached);
1730
  x25519_ge_p1p1_to_p3(&Qunmasked, &Qcompl);
1731

1732
  /* Multiply by our private value to compute K=x*(S-w*N) or K=y*(T-w*M). */
1733
  ge_p2 K;
1734
  x25519_ge_scalarmult(&K, ourpriv, &Qunmasked);
1735

1736
  /* Encode K into elem_out. */
1737
  x25519_ge_tobytes(elem_out, &K);
1738
  return 0;
1739
}
1740

1741
static krb5_error_code
1742
builtin_sha256(krb5_context context, groupdata *gdata, const krb5_data *dlist,
1743
               size_t ndata, uint8_t *result_out)
1744
{
1745
  return k5_sha256(dlist, ndata, result_out);
1746
}
1747

1748
groupdef builtin_edwards25519 = {
1749
  .reg = &spake_iana_edwards25519,
1750
  .keygen = builtin_edwards25519_keygen,
1751
  .result = builtin_edwards25519_result,
1752
  .hash = builtin_sha256
1753
};
1754

1755
Product

Resources

Company