Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/curve25519-x86_64.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0 OR MIT
2
/*
3
* Copyright (C) 2020 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
4
* Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
5
*/
6
7
#include <crypto/curve25519.h>
8
#include <crypto/internal/kpp.h>
9
10
#include <linux/export.h>
11
#include <linux/types.h>
12
#include <linux/jump_label.h>
13
#include <linux/kernel.h>
14
#include <linux/module.h>
15
#include <linux/scatterlist.h>
16
17
#include <asm/cpufeature.h>
18
#include <asm/processor.h>
19
20
static __always_inline u64 eq_mask(u64 a, u64 b)
21
{
22
u64 x = a ^ b;
23
u64 minus_x = ~x + (u64)1U;
24
u64 x_or_minus_x = x | minus_x;
25
u64 xnx = x_or_minus_x >> (u32)63U;
26
return xnx - (u64)1U;
27
}
28
29
static __always_inline u64 gte_mask(u64 a, u64 b)
30
{
31
u64 x = a;
32
u64 y = b;
33
u64 x_xor_y = x ^ y;
34
u64 x_sub_y = x - y;
35
u64 x_sub_y_xor_y = x_sub_y ^ y;
36
u64 q = x_xor_y | x_sub_y_xor_y;
37
u64 x_xor_q = x ^ q;
38
u64 x_xor_q_ = x_xor_q >> (u32)63U;
39
return x_xor_q_ - (u64)1U;
40
}
41
42
/* Computes the addition of four-element f1 with value in f2
43
* and returns the carry (if any) */
44
static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
45
{
46
u64 carry_r;
47
48
asm volatile(
49
/* Clear registers to propagate the carry bit */
50
" xor %%r8d, %%r8d;"
51
" xor %%r9d, %%r9d;"
52
" xor %%r10d, %%r10d;"
53
" xor %%r11d, %%r11d;"
54
" xor %k1, %k1;"
55
56
/* Begin addition chain */
57
" addq 0(%3), %0;"
58
" movq %0, 0(%2);"
59
" adcxq 8(%3), %%r8;"
60
" movq %%r8, 8(%2);"
61
" adcxq 16(%3), %%r9;"
62
" movq %%r9, 16(%2);"
63
" adcxq 24(%3), %%r10;"
64
" movq %%r10, 24(%2);"
65
66
/* Return the carry bit in a register */
67
" adcx %%r11, %1;"
68
: "+&r"(f2), "=&r"(carry_r)
69
: "r"(out), "r"(f1)
70
: "%r8", "%r9", "%r10", "%r11", "memory", "cc");
71
72
return carry_r;
73
}
74
75
/* Computes the field addition of two field elements */
76
static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
77
{
78
asm volatile(
79
/* Compute the raw addition of f1 + f2 */
80
" movq 0(%0), %%r8;"
81
" addq 0(%2), %%r8;"
82
" movq 8(%0), %%r9;"
83
" adcxq 8(%2), %%r9;"
84
" movq 16(%0), %%r10;"
85
" adcxq 16(%2), %%r10;"
86
" movq 24(%0), %%r11;"
87
" adcxq 24(%2), %%r11;"
88
89
/* Wrap the result back into the field */
90
91
/* Step 1: Compute carry*38 */
92
" mov $0, %%rax;"
93
" mov $38, %0;"
94
" cmovc %0, %%rax;"
95
96
/* Step 2: Add carry*38 to the original sum */
97
" xor %%ecx, %%ecx;"
98
" add %%rax, %%r8;"
99
" adcx %%rcx, %%r9;"
100
" movq %%r9, 8(%1);"
101
" adcx %%rcx, %%r10;"
102
" movq %%r10, 16(%1);"
103
" adcx %%rcx, %%r11;"
104
" movq %%r11, 24(%1);"
105
106
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
107
" mov $0, %%rax;"
108
" cmovc %0, %%rax;"
109
" add %%rax, %%r8;"
110
" movq %%r8, 0(%1);"
111
: "+&r"(f2)
112
: "r"(out), "r"(f1)
113
: "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
114
}
115
116
/* Computes the field subtraction of two field elements */
117
static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
118
{
119
asm volatile(
120
/* Compute the raw subtraction of f1-f2 */
121
" movq 0(%1), %%r8;"
122
" subq 0(%2), %%r8;"
123
" movq 8(%1), %%r9;"
124
" sbbq 8(%2), %%r9;"
125
" movq 16(%1), %%r10;"
126
" sbbq 16(%2), %%r10;"
127
" movq 24(%1), %%r11;"
128
" sbbq 24(%2), %%r11;"
129
130
/* Wrap the result back into the field */
131
132
/* Step 1: Compute carry*38 */
133
" mov $0, %%rax;"
134
" mov $38, %%rcx;"
135
" cmovc %%rcx, %%rax;"
136
137
/* Step 2: Subtract carry*38 from the original difference */
138
" sub %%rax, %%r8;"
139
" sbb $0, %%r9;"
140
" sbb $0, %%r10;"
141
" sbb $0, %%r11;"
142
143
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
144
" mov $0, %%rax;"
145
" cmovc %%rcx, %%rax;"
146
" sub %%rax, %%r8;"
147
148
/* Store the result */
149
" movq %%r8, 0(%0);"
150
" movq %%r9, 8(%0);"
151
" movq %%r10, 16(%0);"
152
" movq %%r11, 24(%0);"
153
:
154
: "r"(out), "r"(f1), "r"(f2)
155
: "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
156
}
157
158
/* Computes a field multiplication: out <- f1 * f2
159
* Uses the 8-element buffer tmp for intermediate results */
160
static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
161
{
162
asm volatile(
163
164
/* Compute the raw multiplication: tmp <- src1 * src2 */
165
166
/* Compute src1[0] * src2 */
167
" movq 0(%0), %%rdx;"
168
" mulxq 0(%1), %%r8, %%r9;"
169
" xor %%r10d, %%r10d;"
170
" movq %%r8, 0(%2);"
171
" mulxq 8(%1), %%r10, %%r11;"
172
" adox %%r9, %%r10;"
173
" movq %%r10, 8(%2);"
174
" mulxq 16(%1), %%rbx, %%r13;"
175
" adox %%r11, %%rbx;"
176
" mulxq 24(%1), %%r14, %%rdx;"
177
" adox %%r13, %%r14;"
178
" mov $0, %%rax;"
179
" adox %%rdx, %%rax;"
180
181
/* Compute src1[1] * src2 */
182
" movq 8(%0), %%rdx;"
183
" mulxq 0(%1), %%r8, %%r9;"
184
" xor %%r10d, %%r10d;"
185
" adcxq 8(%2), %%r8;"
186
" movq %%r8, 8(%2);"
187
" mulxq 8(%1), %%r10, %%r11;"
188
" adox %%r9, %%r10;"
189
" adcx %%rbx, %%r10;"
190
" movq %%r10, 16(%2);"
191
" mulxq 16(%1), %%rbx, %%r13;"
192
" adox %%r11, %%rbx;"
193
" adcx %%r14, %%rbx;"
194
" mov $0, %%r8;"
195
" mulxq 24(%1), %%r14, %%rdx;"
196
" adox %%r13, %%r14;"
197
" adcx %%rax, %%r14;"
198
" mov $0, %%rax;"
199
" adox %%rdx, %%rax;"
200
" adcx %%r8, %%rax;"
201
202
/* Compute src1[2] * src2 */
203
" movq 16(%0), %%rdx;"
204
" mulxq 0(%1), %%r8, %%r9;"
205
" xor %%r10d, %%r10d;"
206
" adcxq 16(%2), %%r8;"
207
" movq %%r8, 16(%2);"
208
" mulxq 8(%1), %%r10, %%r11;"
209
" adox %%r9, %%r10;"
210
" adcx %%rbx, %%r10;"
211
" movq %%r10, 24(%2);"
212
" mulxq 16(%1), %%rbx, %%r13;"
213
" adox %%r11, %%rbx;"
214
" adcx %%r14, %%rbx;"
215
" mov $0, %%r8;"
216
" mulxq 24(%1), %%r14, %%rdx;"
217
" adox %%r13, %%r14;"
218
" adcx %%rax, %%r14;"
219
" mov $0, %%rax;"
220
" adox %%rdx, %%rax;"
221
" adcx %%r8, %%rax;"
222
223
/* Compute src1[3] * src2 */
224
" movq 24(%0), %%rdx;"
225
" mulxq 0(%1), %%r8, %%r9;"
226
" xor %%r10d, %%r10d;"
227
" adcxq 24(%2), %%r8;"
228
" movq %%r8, 24(%2);"
229
" mulxq 8(%1), %%r10, %%r11;"
230
" adox %%r9, %%r10;"
231
" adcx %%rbx, %%r10;"
232
" movq %%r10, 32(%2);"
233
" mulxq 16(%1), %%rbx, %%r13;"
234
" adox %%r11, %%rbx;"
235
" adcx %%r14, %%rbx;"
236
" movq %%rbx, 40(%2);"
237
" mov $0, %%r8;"
238
" mulxq 24(%1), %%r14, %%rdx;"
239
" adox %%r13, %%r14;"
240
" adcx %%rax, %%r14;"
241
" movq %%r14, 48(%2);"
242
" mov $0, %%rax;"
243
" adox %%rdx, %%rax;"
244
" adcx %%r8, %%rax;"
245
" movq %%rax, 56(%2);"
246
247
/* Line up pointers */
248
" mov %2, %0;"
249
" mov %3, %2;"
250
251
/* Wrap the result back into the field */
252
253
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
254
" mov $38, %%rdx;"
255
" mulxq 32(%0), %%r8, %%r13;"
256
" xor %k1, %k1;"
257
" adoxq 0(%0), %%r8;"
258
" mulxq 40(%0), %%r9, %%rbx;"
259
" adcx %%r13, %%r9;"
260
" adoxq 8(%0), %%r9;"
261
" mulxq 48(%0), %%r10, %%r13;"
262
" adcx %%rbx, %%r10;"
263
" adoxq 16(%0), %%r10;"
264
" mulxq 56(%0), %%r11, %%rax;"
265
" adcx %%r13, %%r11;"
266
" adoxq 24(%0), %%r11;"
267
" adcx %1, %%rax;"
268
" adox %1, %%rax;"
269
" imul %%rdx, %%rax;"
270
271
/* Step 2: Fold the carry back into dst */
272
" add %%rax, %%r8;"
273
" adcx %1, %%r9;"
274
" movq %%r9, 8(%2);"
275
" adcx %1, %%r10;"
276
" movq %%r10, 16(%2);"
277
" adcx %1, %%r11;"
278
" movq %%r11, 24(%2);"
279
280
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
281
" mov $0, %%rax;"
282
" cmovc %%rdx, %%rax;"
283
" add %%rax, %%r8;"
284
" movq %%r8, 0(%2);"
285
: "+&r"(f1), "+&r"(f2), "+&r"(tmp)
286
: "r"(out)
287
: "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
288
"%r14", "memory", "cc");
289
}
290
291
/* Computes two field multiplications:
292
* out[0] <- f1[0] * f2[0]
293
* out[1] <- f1[1] * f2[1]
294
* Uses the 16-element buffer tmp for intermediate results: */
295
static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
296
{
297
asm volatile(
298
299
/* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
300
301
/* Compute src1[0] * src2 */
302
" movq 0(%0), %%rdx;"
303
" mulxq 0(%1), %%r8, %%r9;"
304
" xor %%r10d, %%r10d;"
305
" movq %%r8, 0(%2);"
306
" mulxq 8(%1), %%r10, %%r11;"
307
" adox %%r9, %%r10;"
308
" movq %%r10, 8(%2);"
309
" mulxq 16(%1), %%rbx, %%r13;"
310
" adox %%r11, %%rbx;"
311
" mulxq 24(%1), %%r14, %%rdx;"
312
" adox %%r13, %%r14;"
313
" mov $0, %%rax;"
314
" adox %%rdx, %%rax;"
315
316
/* Compute src1[1] * src2 */
317
" movq 8(%0), %%rdx;"
318
" mulxq 0(%1), %%r8, %%r9;"
319
" xor %%r10d, %%r10d;"
320
" adcxq 8(%2), %%r8;"
321
" movq %%r8, 8(%2);"
322
" mulxq 8(%1), %%r10, %%r11;"
323
" adox %%r9, %%r10;"
324
" adcx %%rbx, %%r10;"
325
" movq %%r10, 16(%2);"
326
" mulxq 16(%1), %%rbx, %%r13;"
327
" adox %%r11, %%rbx;"
328
" adcx %%r14, %%rbx;"
329
" mov $0, %%r8;"
330
" mulxq 24(%1), %%r14, %%rdx;"
331
" adox %%r13, %%r14;"
332
" adcx %%rax, %%r14;"
333
" mov $0, %%rax;"
334
" adox %%rdx, %%rax;"
335
" adcx %%r8, %%rax;"
336
337
/* Compute src1[2] * src2 */
338
" movq 16(%0), %%rdx;"
339
" mulxq 0(%1), %%r8, %%r9;"
340
" xor %%r10d, %%r10d;"
341
" adcxq 16(%2), %%r8;"
342
" movq %%r8, 16(%2);"
343
" mulxq 8(%1), %%r10, %%r11;"
344
" adox %%r9, %%r10;"
345
" adcx %%rbx, %%r10;"
346
" movq %%r10, 24(%2);"
347
" mulxq 16(%1), %%rbx, %%r13;"
348
" adox %%r11, %%rbx;"
349
" adcx %%r14, %%rbx;"
350
" mov $0, %%r8;"
351
" mulxq 24(%1), %%r14, %%rdx;"
352
" adox %%r13, %%r14;"
353
" adcx %%rax, %%r14;"
354
" mov $0, %%rax;"
355
" adox %%rdx, %%rax;"
356
" adcx %%r8, %%rax;"
357
358
/* Compute src1[3] * src2 */
359
" movq 24(%0), %%rdx;"
360
" mulxq 0(%1), %%r8, %%r9;"
361
" xor %%r10d, %%r10d;"
362
" adcxq 24(%2), %%r8;"
363
" movq %%r8, 24(%2);"
364
" mulxq 8(%1), %%r10, %%r11;"
365
" adox %%r9, %%r10;"
366
" adcx %%rbx, %%r10;"
367
" movq %%r10, 32(%2);"
368
" mulxq 16(%1), %%rbx, %%r13;"
369
" adox %%r11, %%rbx;"
370
" adcx %%r14, %%rbx;"
371
" movq %%rbx, 40(%2);"
372
" mov $0, %%r8;"
373
" mulxq 24(%1), %%r14, %%rdx;"
374
" adox %%r13, %%r14;"
375
" adcx %%rax, %%r14;"
376
" movq %%r14, 48(%2);"
377
" mov $0, %%rax;"
378
" adox %%rdx, %%rax;"
379
" adcx %%r8, %%rax;"
380
" movq %%rax, 56(%2);"
381
382
/* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
383
384
/* Compute src1[0] * src2 */
385
" movq 32(%0), %%rdx;"
386
" mulxq 32(%1), %%r8, %%r9;"
387
" xor %%r10d, %%r10d;"
388
" movq %%r8, 64(%2);"
389
" mulxq 40(%1), %%r10, %%r11;"
390
" adox %%r9, %%r10;"
391
" movq %%r10, 72(%2);"
392
" mulxq 48(%1), %%rbx, %%r13;"
393
" adox %%r11, %%rbx;"
394
" mulxq 56(%1), %%r14, %%rdx;"
395
" adox %%r13, %%r14;"
396
" mov $0, %%rax;"
397
" adox %%rdx, %%rax;"
398
399
/* Compute src1[1] * src2 */
400
" movq 40(%0), %%rdx;"
401
" mulxq 32(%1), %%r8, %%r9;"
402
" xor %%r10d, %%r10d;"
403
" adcxq 72(%2), %%r8;"
404
" movq %%r8, 72(%2);"
405
" mulxq 40(%1), %%r10, %%r11;"
406
" adox %%r9, %%r10;"
407
" adcx %%rbx, %%r10;"
408
" movq %%r10, 80(%2);"
409
" mulxq 48(%1), %%rbx, %%r13;"
410
" adox %%r11, %%rbx;"
411
" adcx %%r14, %%rbx;"
412
" mov $0, %%r8;"
413
" mulxq 56(%1), %%r14, %%rdx;"
414
" adox %%r13, %%r14;"
415
" adcx %%rax, %%r14;"
416
" mov $0, %%rax;"
417
" adox %%rdx, %%rax;"
418
" adcx %%r8, %%rax;"
419
420
/* Compute src1[2] * src2 */
421
" movq 48(%0), %%rdx;"
422
" mulxq 32(%1), %%r8, %%r9;"
423
" xor %%r10d, %%r10d;"
424
" adcxq 80(%2), %%r8;"
425
" movq %%r8, 80(%2);"
426
" mulxq 40(%1), %%r10, %%r11;"
427
" adox %%r9, %%r10;"
428
" adcx %%rbx, %%r10;"
429
" movq %%r10, 88(%2);"
430
" mulxq 48(%1), %%rbx, %%r13;"
431
" adox %%r11, %%rbx;"
432
" adcx %%r14, %%rbx;"
433
" mov $0, %%r8;"
434
" mulxq 56(%1), %%r14, %%rdx;"
435
" adox %%r13, %%r14;"
436
" adcx %%rax, %%r14;"
437
" mov $0, %%rax;"
438
" adox %%rdx, %%rax;"
439
" adcx %%r8, %%rax;"
440
441
/* Compute src1[3] * src2 */
442
" movq 56(%0), %%rdx;"
443
" mulxq 32(%1), %%r8, %%r9;"
444
" xor %%r10d, %%r10d;"
445
" adcxq 88(%2), %%r8;"
446
" movq %%r8, 88(%2);"
447
" mulxq 40(%1), %%r10, %%r11;"
448
" adox %%r9, %%r10;"
449
" adcx %%rbx, %%r10;"
450
" movq %%r10, 96(%2);"
451
" mulxq 48(%1), %%rbx, %%r13;"
452
" adox %%r11, %%rbx;"
453
" adcx %%r14, %%rbx;"
454
" movq %%rbx, 104(%2);"
455
" mov $0, %%r8;"
456
" mulxq 56(%1), %%r14, %%rdx;"
457
" adox %%r13, %%r14;"
458
" adcx %%rax, %%r14;"
459
" movq %%r14, 112(%2);"
460
" mov $0, %%rax;"
461
" adox %%rdx, %%rax;"
462
" adcx %%r8, %%rax;"
463
" movq %%rax, 120(%2);"
464
465
/* Line up pointers */
466
" mov %2, %0;"
467
" mov %3, %2;"
468
469
/* Wrap the results back into the field */
470
471
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
472
" mov $38, %%rdx;"
473
" mulxq 32(%0), %%r8, %%r13;"
474
" xor %k1, %k1;"
475
" adoxq 0(%0), %%r8;"
476
" mulxq 40(%0), %%r9, %%rbx;"
477
" adcx %%r13, %%r9;"
478
" adoxq 8(%0), %%r9;"
479
" mulxq 48(%0), %%r10, %%r13;"
480
" adcx %%rbx, %%r10;"
481
" adoxq 16(%0), %%r10;"
482
" mulxq 56(%0), %%r11, %%rax;"
483
" adcx %%r13, %%r11;"
484
" adoxq 24(%0), %%r11;"
485
" adcx %1, %%rax;"
486
" adox %1, %%rax;"
487
" imul %%rdx, %%rax;"
488
489
/* Step 2: Fold the carry back into dst */
490
" add %%rax, %%r8;"
491
" adcx %1, %%r9;"
492
" movq %%r9, 8(%2);"
493
" adcx %1, %%r10;"
494
" movq %%r10, 16(%2);"
495
" adcx %1, %%r11;"
496
" movq %%r11, 24(%2);"
497
498
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
499
" mov $0, %%rax;"
500
" cmovc %%rdx, %%rax;"
501
" add %%rax, %%r8;"
502
" movq %%r8, 0(%2);"
503
504
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
505
" mov $38, %%rdx;"
506
" mulxq 96(%0), %%r8, %%r13;"
507
" xor %k1, %k1;"
508
" adoxq 64(%0), %%r8;"
509
" mulxq 104(%0), %%r9, %%rbx;"
510
" adcx %%r13, %%r9;"
511
" adoxq 72(%0), %%r9;"
512
" mulxq 112(%0), %%r10, %%r13;"
513
" adcx %%rbx, %%r10;"
514
" adoxq 80(%0), %%r10;"
515
" mulxq 120(%0), %%r11, %%rax;"
516
" adcx %%r13, %%r11;"
517
" adoxq 88(%0), %%r11;"
518
" adcx %1, %%rax;"
519
" adox %1, %%rax;"
520
" imul %%rdx, %%rax;"
521
522
/* Step 2: Fold the carry back into dst */
523
" add %%rax, %%r8;"
524
" adcx %1, %%r9;"
525
" movq %%r9, 40(%2);"
526
" adcx %1, %%r10;"
527
" movq %%r10, 48(%2);"
528
" adcx %1, %%r11;"
529
" movq %%r11, 56(%2);"
530
531
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
532
" mov $0, %%rax;"
533
" cmovc %%rdx, %%rax;"
534
" add %%rax, %%r8;"
535
" movq %%r8, 32(%2);"
536
: "+&r"(f1), "+&r"(f2), "+&r"(tmp)
537
: "r"(out)
538
: "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
539
"%r14", "memory", "cc");
540
}
541
542
/* Computes the field multiplication of four-element f1 with value in f2
543
* Requires f2 to be smaller than 2^17 */
544
static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
545
{
546
register u64 f2_r asm("rdx") = f2;
547
548
asm volatile(
549
/* Compute the raw multiplication of f1*f2 */
550
" mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
551
" mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
552
" add %%rcx, %%r9;"
553
" mov $0, %%rcx;"
554
" mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
555
" adcx %%rbx, %%r10;"
556
" mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
557
" adcx %%r13, %%r11;"
558
" adcx %%rcx, %%rax;"
559
560
/* Wrap the result back into the field */
561
562
/* Step 1: Compute carry*38 */
563
" mov $38, %%rdx;"
564
" imul %%rdx, %%rax;"
565
566
/* Step 2: Fold the carry back into dst */
567
" add %%rax, %%r8;"
568
" adcx %%rcx, %%r9;"
569
" movq %%r9, 8(%1);"
570
" adcx %%rcx, %%r10;"
571
" movq %%r10, 16(%1);"
572
" adcx %%rcx, %%r11;"
573
" movq %%r11, 24(%1);"
574
575
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
576
" mov $0, %%rax;"
577
" cmovc %%rdx, %%rax;"
578
" add %%rax, %%r8;"
579
" movq %%r8, 0(%1);"
580
: "+&r"(f2_r)
581
: "r"(out), "r"(f1)
582
: "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13",
583
"memory", "cc");
584
}
585
586
/* Computes p1 <- bit ? p2 : p1 in constant time */
587
static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
588
{
589
asm volatile(
590
/* Transfer bit into CF flag */
591
" add $18446744073709551615, %0;"
592
593
/* cswap p1[0], p2[0] */
594
" movq 0(%1), %%r8;"
595
" movq 0(%2), %%r9;"
596
" mov %%r8, %%r10;"
597
" cmovc %%r9, %%r8;"
598
" cmovc %%r10, %%r9;"
599
" movq %%r8, 0(%1);"
600
" movq %%r9, 0(%2);"
601
602
/* cswap p1[1], p2[1] */
603
" movq 8(%1), %%r8;"
604
" movq 8(%2), %%r9;"
605
" mov %%r8, %%r10;"
606
" cmovc %%r9, %%r8;"
607
" cmovc %%r10, %%r9;"
608
" movq %%r8, 8(%1);"
609
" movq %%r9, 8(%2);"
610
611
/* cswap p1[2], p2[2] */
612
" movq 16(%1), %%r8;"
613
" movq 16(%2), %%r9;"
614
" mov %%r8, %%r10;"
615
" cmovc %%r9, %%r8;"
616
" cmovc %%r10, %%r9;"
617
" movq %%r8, 16(%1);"
618
" movq %%r9, 16(%2);"
619
620
/* cswap p1[3], p2[3] */
621
" movq 24(%1), %%r8;"
622
" movq 24(%2), %%r9;"
623
" mov %%r8, %%r10;"
624
" cmovc %%r9, %%r8;"
625
" cmovc %%r10, %%r9;"
626
" movq %%r8, 24(%1);"
627
" movq %%r9, 24(%2);"
628
629
/* cswap p1[4], p2[4] */
630
" movq 32(%1), %%r8;"
631
" movq 32(%2), %%r9;"
632
" mov %%r8, %%r10;"
633
" cmovc %%r9, %%r8;"
634
" cmovc %%r10, %%r9;"
635
" movq %%r8, 32(%1);"
636
" movq %%r9, 32(%2);"
637
638
/* cswap p1[5], p2[5] */
639
" movq 40(%1), %%r8;"
640
" movq 40(%2), %%r9;"
641
" mov %%r8, %%r10;"
642
" cmovc %%r9, %%r8;"
643
" cmovc %%r10, %%r9;"
644
" movq %%r8, 40(%1);"
645
" movq %%r9, 40(%2);"
646
647
/* cswap p1[6], p2[6] */
648
" movq 48(%1), %%r8;"
649
" movq 48(%2), %%r9;"
650
" mov %%r8, %%r10;"
651
" cmovc %%r9, %%r8;"
652
" cmovc %%r10, %%r9;"
653
" movq %%r8, 48(%1);"
654
" movq %%r9, 48(%2);"
655
656
/* cswap p1[7], p2[7] */
657
" movq 56(%1), %%r8;"
658
" movq 56(%2), %%r9;"
659
" mov %%r8, %%r10;"
660
" cmovc %%r9, %%r8;"
661
" cmovc %%r10, %%r9;"
662
" movq %%r8, 56(%1);"
663
" movq %%r9, 56(%2);"
664
: "+&r"(bit)
665
: "r"(p1), "r"(p2)
666
: "%r8", "%r9", "%r10", "memory", "cc");
667
}
668
669
/* Computes the square of a field element: out <- f * f
670
* Uses the 8-element buffer tmp for intermediate results */
671
static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
672
{
673
asm volatile(
674
/* Compute the raw multiplication: tmp <- f * f */
675
676
/* Step 1: Compute all partial products */
677
" movq 0(%0), %%rdx;" /* f[0] */
678
" mulxq 8(%0), %%r8, %%r14;"
679
" xor %%r15d, %%r15d;" /* f[1]*f[0] */
680
" mulxq 16(%0), %%r9, %%r10;"
681
" adcx %%r14, %%r9;" /* f[2]*f[0] */
682
" mulxq 24(%0), %%rax, %%rcx;"
683
" adcx %%rax, %%r10;" /* f[3]*f[0] */
684
" movq 24(%0), %%rdx;" /* f[3] */
685
" mulxq 8(%0), %%r11, %%rbx;"
686
" adcx %%rcx, %%r11;" /* f[1]*f[3] */
687
" mulxq 16(%0), %%rax, %%r13;"
688
" adcx %%rax, %%rbx;" /* f[2]*f[3] */
689
" movq 8(%0), %%rdx;"
690
" adcx %%r15, %%r13;" /* f1 */
691
" mulxq 16(%0), %%rax, %%rcx;"
692
" mov $0, %%r14;" /* f[2]*f[1] */
693
694
/* Step 2: Compute two parallel carry chains */
695
" xor %%r15d, %%r15d;"
696
" adox %%rax, %%r10;"
697
" adcx %%r8, %%r8;"
698
" adox %%rcx, %%r11;"
699
" adcx %%r9, %%r9;"
700
" adox %%r15, %%rbx;"
701
" adcx %%r10, %%r10;"
702
" adox %%r15, %%r13;"
703
" adcx %%r11, %%r11;"
704
" adox %%r15, %%r14;"
705
" adcx %%rbx, %%rbx;"
706
" adcx %%r13, %%r13;"
707
" adcx %%r14, %%r14;"
708
709
/* Step 3: Compute intermediate squares */
710
" movq 0(%0), %%rdx;"
711
" mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
712
" movq %%rax, 0(%1);"
713
" add %%rcx, %%r8;"
714
" movq %%r8, 8(%1);"
715
" movq 8(%0), %%rdx;"
716
" mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
717
" adcx %%rax, %%r9;"
718
" movq %%r9, 16(%1);"
719
" adcx %%rcx, %%r10;"
720
" movq %%r10, 24(%1);"
721
" movq 16(%0), %%rdx;"
722
" mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
723
" adcx %%rax, %%r11;"
724
" movq %%r11, 32(%1);"
725
" adcx %%rcx, %%rbx;"
726
" movq %%rbx, 40(%1);"
727
" movq 24(%0), %%rdx;"
728
" mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
729
" adcx %%rax, %%r13;"
730
" movq %%r13, 48(%1);"
731
" adcx %%rcx, %%r14;"
732
" movq %%r14, 56(%1);"
733
734
/* Line up pointers */
735
" mov %1, %0;"
736
" mov %2, %1;"
737
738
/* Wrap the result back into the field */
739
740
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
741
" mov $38, %%rdx;"
742
" mulxq 32(%0), %%r8, %%r13;"
743
" xor %%ecx, %%ecx;"
744
" adoxq 0(%0), %%r8;"
745
" mulxq 40(%0), %%r9, %%rbx;"
746
" adcx %%r13, %%r9;"
747
" adoxq 8(%0), %%r9;"
748
" mulxq 48(%0), %%r10, %%r13;"
749
" adcx %%rbx, %%r10;"
750
" adoxq 16(%0), %%r10;"
751
" mulxq 56(%0), %%r11, %%rax;"
752
" adcx %%r13, %%r11;"
753
" adoxq 24(%0), %%r11;"
754
" adcx %%rcx, %%rax;"
755
" adox %%rcx, %%rax;"
756
" imul %%rdx, %%rax;"
757
758
/* Step 2: Fold the carry back into dst */
759
" add %%rax, %%r8;"
760
" adcx %%rcx, %%r9;"
761
" movq %%r9, 8(%1);"
762
" adcx %%rcx, %%r10;"
763
" movq %%r10, 16(%1);"
764
" adcx %%rcx, %%r11;"
765
" movq %%r11, 24(%1);"
766
767
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
768
" mov $0, %%rax;"
769
" cmovc %%rdx, %%rax;"
770
" add %%rax, %%r8;"
771
" movq %%r8, 0(%1);"
772
: "+&r"(f), "+&r"(tmp)
773
: "r"(out)
774
: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
775
"%r13", "%r14", "%r15", "memory", "cc");
776
}
777
778
/* Computes two field squarings:
779
* out[0] <- f[0] * f[0]
780
* out[1] <- f[1] * f[1]
781
* Uses the 16-element buffer tmp for intermediate results */
782
static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
783
{
784
asm volatile(
785
/* Step 1: Compute all partial products */
786
" movq 0(%0), %%rdx;" /* f[0] */
787
" mulxq 8(%0), %%r8, %%r14;"
788
" xor %%r15d, %%r15d;" /* f[1]*f[0] */
789
" mulxq 16(%0), %%r9, %%r10;"
790
" adcx %%r14, %%r9;" /* f[2]*f[0] */
791
" mulxq 24(%0), %%rax, %%rcx;"
792
" adcx %%rax, %%r10;" /* f[3]*f[0] */
793
" movq 24(%0), %%rdx;" /* f[3] */
794
" mulxq 8(%0), %%r11, %%rbx;"
795
" adcx %%rcx, %%r11;" /* f[1]*f[3] */
796
" mulxq 16(%0), %%rax, %%r13;"
797
" adcx %%rax, %%rbx;" /* f[2]*f[3] */
798
" movq 8(%0), %%rdx;"
799
" adcx %%r15, %%r13;" /* f1 */
800
" mulxq 16(%0), %%rax, %%rcx;"
801
" mov $0, %%r14;" /* f[2]*f[1] */
802
803
/* Step 2: Compute two parallel carry chains */
804
" xor %%r15d, %%r15d;"
805
" adox %%rax, %%r10;"
806
" adcx %%r8, %%r8;"
807
" adox %%rcx, %%r11;"
808
" adcx %%r9, %%r9;"
809
" adox %%r15, %%rbx;"
810
" adcx %%r10, %%r10;"
811
" adox %%r15, %%r13;"
812
" adcx %%r11, %%r11;"
813
" adox %%r15, %%r14;"
814
" adcx %%rbx, %%rbx;"
815
" adcx %%r13, %%r13;"
816
" adcx %%r14, %%r14;"
817
818
/* Step 3: Compute intermediate squares */
819
" movq 0(%0), %%rdx;"
820
" mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
821
" movq %%rax, 0(%1);"
822
" add %%rcx, %%r8;"
823
" movq %%r8, 8(%1);"
824
" movq 8(%0), %%rdx;"
825
" mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
826
" adcx %%rax, %%r9;"
827
" movq %%r9, 16(%1);"
828
" adcx %%rcx, %%r10;"
829
" movq %%r10, 24(%1);"
830
" movq 16(%0), %%rdx;"
831
" mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
832
" adcx %%rax, %%r11;"
833
" movq %%r11, 32(%1);"
834
" adcx %%rcx, %%rbx;"
835
" movq %%rbx, 40(%1);"
836
" movq 24(%0), %%rdx;"
837
" mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
838
" adcx %%rax, %%r13;"
839
" movq %%r13, 48(%1);"
840
" adcx %%rcx, %%r14;"
841
" movq %%r14, 56(%1);"
842
843
/* Step 1: Compute all partial products */
844
" movq 32(%0), %%rdx;" /* f[0] */
845
" mulxq 40(%0), %%r8, %%r14;"
846
" xor %%r15d, %%r15d;" /* f[1]*f[0] */
847
" mulxq 48(%0), %%r9, %%r10;"
848
" adcx %%r14, %%r9;" /* f[2]*f[0] */
849
" mulxq 56(%0), %%rax, %%rcx;"
850
" adcx %%rax, %%r10;" /* f[3]*f[0] */
851
" movq 56(%0), %%rdx;" /* f[3] */
852
" mulxq 40(%0), %%r11, %%rbx;"
853
" adcx %%rcx, %%r11;" /* f[1]*f[3] */
854
" mulxq 48(%0), %%rax, %%r13;"
855
" adcx %%rax, %%rbx;" /* f[2]*f[3] */
856
" movq 40(%0), %%rdx;"
857
" adcx %%r15, %%r13;" /* f1 */
858
" mulxq 48(%0), %%rax, %%rcx;"
859
" mov $0, %%r14;" /* f[2]*f[1] */
860
861
/* Step 2: Compute two parallel carry chains */
862
" xor %%r15d, %%r15d;"
863
" adox %%rax, %%r10;"
864
" adcx %%r8, %%r8;"
865
" adox %%rcx, %%r11;"
866
" adcx %%r9, %%r9;"
867
" adox %%r15, %%rbx;"
868
" adcx %%r10, %%r10;"
869
" adox %%r15, %%r13;"
870
" adcx %%r11, %%r11;"
871
" adox %%r15, %%r14;"
872
" adcx %%rbx, %%rbx;"
873
" adcx %%r13, %%r13;"
874
" adcx %%r14, %%r14;"
875
876
/* Step 3: Compute intermediate squares */
877
" movq 32(%0), %%rdx;"
878
" mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
879
" movq %%rax, 64(%1);"
880
" add %%rcx, %%r8;"
881
" movq %%r8, 72(%1);"
882
" movq 40(%0), %%rdx;"
883
" mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
884
" adcx %%rax, %%r9;"
885
" movq %%r9, 80(%1);"
886
" adcx %%rcx, %%r10;"
887
" movq %%r10, 88(%1);"
888
" movq 48(%0), %%rdx;"
889
" mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
890
" adcx %%rax, %%r11;"
891
" movq %%r11, 96(%1);"
892
" adcx %%rcx, %%rbx;"
893
" movq %%rbx, 104(%1);"
894
" movq 56(%0), %%rdx;"
895
" mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
896
" adcx %%rax, %%r13;"
897
" movq %%r13, 112(%1);"
898
" adcx %%rcx, %%r14;"
899
" movq %%r14, 120(%1);"
900
901
/* Line up pointers */
902
" mov %1, %0;"
903
" mov %2, %1;"
904
905
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
906
" mov $38, %%rdx;"
907
" mulxq 32(%0), %%r8, %%r13;"
908
" xor %%ecx, %%ecx;"
909
" adoxq 0(%0), %%r8;"
910
" mulxq 40(%0), %%r9, %%rbx;"
911
" adcx %%r13, %%r9;"
912
" adoxq 8(%0), %%r9;"
913
" mulxq 48(%0), %%r10, %%r13;"
914
" adcx %%rbx, %%r10;"
915
" adoxq 16(%0), %%r10;"
916
" mulxq 56(%0), %%r11, %%rax;"
917
" adcx %%r13, %%r11;"
918
" adoxq 24(%0), %%r11;"
919
" adcx %%rcx, %%rax;"
920
" adox %%rcx, %%rax;"
921
" imul %%rdx, %%rax;"
922
923
/* Step 2: Fold the carry back into dst */
924
" add %%rax, %%r8;"
925
" adcx %%rcx, %%r9;"
926
" movq %%r9, 8(%1);"
927
" adcx %%rcx, %%r10;"
928
" movq %%r10, 16(%1);"
929
" adcx %%rcx, %%r11;"
930
" movq %%r11, 24(%1);"
931
932
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
933
" mov $0, %%rax;"
934
" cmovc %%rdx, %%rax;"
935
" add %%rax, %%r8;"
936
" movq %%r8, 0(%1);"
937
938
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
939
" mov $38, %%rdx;"
940
" mulxq 96(%0), %%r8, %%r13;"
941
" xor %%ecx, %%ecx;"
942
" adoxq 64(%0), %%r8;"
943
" mulxq 104(%0), %%r9, %%rbx;"
944
" adcx %%r13, %%r9;"
945
" adoxq 72(%0), %%r9;"
946
" mulxq 112(%0), %%r10, %%r13;"
947
" adcx %%rbx, %%r10;"
948
" adoxq 80(%0), %%r10;"
949
" mulxq 120(%0), %%r11, %%rax;"
950
" adcx %%r13, %%r11;"
951
" adoxq 88(%0), %%r11;"
952
" adcx %%rcx, %%rax;"
953
" adox %%rcx, %%rax;"
954
" imul %%rdx, %%rax;"
955
956
/* Step 2: Fold the carry back into dst */
957
" add %%rax, %%r8;"
958
" adcx %%rcx, %%r9;"
959
" movq %%r9, 40(%1);"
960
" adcx %%rcx, %%r10;"
961
" movq %%r10, 48(%1);"
962
" adcx %%rcx, %%r11;"
963
" movq %%r11, 56(%1);"
964
965
/* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
966
" mov $0, %%rax;"
967
" cmovc %%rdx, %%rax;"
968
" add %%rax, %%r8;"
969
" movq %%r8, 32(%1);"
970
: "+&r"(f), "+&r"(tmp)
971
: "r"(out)
972
: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
973
"%r13", "%r14", "%r15", "memory", "cc");
974
}
975
976
static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
977
{
978
u64 *nq = p01_tmp1;
979
u64 *nq_p1 = p01_tmp1 + (u32)8U;
980
u64 *tmp1 = p01_tmp1 + (u32)16U;
981
u64 *x1 = q;
982
u64 *x2 = nq;
983
u64 *z2 = nq + (u32)4U;
984
u64 *z3 = nq_p1 + (u32)4U;
985
u64 *a = tmp1;
986
u64 *b = tmp1 + (u32)4U;
987
u64 *ab = tmp1;
988
u64 *dc = tmp1 + (u32)8U;
989
u64 *x3;
990
u64 *z31;
991
u64 *d0;
992
u64 *c0;
993
u64 *a1;
994
u64 *b1;
995
u64 *d;
996
u64 *c;
997
u64 *ab1;
998
u64 *dc1;
999
fadd(a, x2, z2);
1000
fsub(b, x2, z2);
1001
x3 = nq_p1;
1002
z31 = nq_p1 + (u32)4U;
1003
d0 = dc;
1004
c0 = dc + (u32)4U;
1005
fadd(c0, x3, z31);
1006
fsub(d0, x3, z31);
1007
fmul2(dc, dc, ab, tmp2);
1008
fadd(x3, d0, c0);
1009
fsub(z31, d0, c0);
1010
a1 = tmp1;
1011
b1 = tmp1 + (u32)4U;
1012
d = tmp1 + (u32)8U;
1013
c = tmp1 + (u32)12U;
1014
ab1 = tmp1;
1015
dc1 = tmp1 + (u32)8U;
1016
fsqr2(dc1, ab1, tmp2);
1017
fsqr2(nq_p1, nq_p1, tmp2);
1018
a1[0U] = c[0U];
1019
a1[1U] = c[1U];
1020
a1[2U] = c[2U];
1021
a1[3U] = c[3U];
1022
fsub(c, d, c);
1023
fmul_scalar(b1, c, (u64)121665U);
1024
fadd(b1, b1, d);
1025
fmul2(nq, dc1, ab1, tmp2);
1026
fmul(z3, z3, x1, tmp2);
1027
}
1028
1029
static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
1030
{
1031
u64 *x2 = nq;
1032
u64 *z2 = nq + (u32)4U;
1033
u64 *a = tmp1;
1034
u64 *b = tmp1 + (u32)4U;
1035
u64 *d = tmp1 + (u32)8U;
1036
u64 *c = tmp1 + (u32)12U;
1037
u64 *ab = tmp1;
1038
u64 *dc = tmp1 + (u32)8U;
1039
fadd(a, x2, z2);
1040
fsub(b, x2, z2);
1041
fsqr2(dc, ab, tmp2);
1042
a[0U] = c[0U];
1043
a[1U] = c[1U];
1044
a[2U] = c[2U];
1045
a[3U] = c[3U];
1046
fsub(c, d, c);
1047
fmul_scalar(b, c, (u64)121665U);
1048
fadd(b, b, d);
1049
fmul2(nq, dc, ab, tmp2);
1050
}
1051
1052
static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
1053
{
1054
u64 tmp2[16U] = { 0U };
1055
u64 p01_tmp1_swap[33U] = { 0U };
1056
u64 *p0 = p01_tmp1_swap;
1057
u64 *p01 = p01_tmp1_swap;
1058
u64 *p03 = p01;
1059
u64 *p11 = p01 + (u32)8U;
1060
u64 *x0;
1061
u64 *z0;
1062
u64 *p01_tmp1;
1063
u64 *p01_tmp11;
1064
u64 *nq10;
1065
u64 *nq_p11;
1066
u64 *swap1;
1067
u64 sw0;
1068
u64 *nq1;
1069
u64 *tmp1;
1070
memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
1071
x0 = p03;
1072
z0 = p03 + (u32)4U;
1073
x0[0U] = (u64)1U;
1074
x0[1U] = (u64)0U;
1075
x0[2U] = (u64)0U;
1076
x0[3U] = (u64)0U;
1077
z0[0U] = (u64)0U;
1078
z0[1U] = (u64)0U;
1079
z0[2U] = (u64)0U;
1080
z0[3U] = (u64)0U;
1081
p01_tmp1 = p01_tmp1_swap;
1082
p01_tmp11 = p01_tmp1_swap;
1083
nq10 = p01_tmp1_swap;
1084
nq_p11 = p01_tmp1_swap + (u32)8U;
1085
swap1 = p01_tmp1_swap + (u32)32U;
1086
cswap2((u64)1U, nq10, nq_p11);
1087
point_add_and_double(init1, p01_tmp11, tmp2);
1088
swap1[0U] = (u64)1U;
1089
{
1090
u32 i;
1091
for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
1092
u64 *p01_tmp12 = p01_tmp1_swap;
1093
u64 *swap2 = p01_tmp1_swap + (u32)32U;
1094
u64 *nq2 = p01_tmp12;
1095
u64 *nq_p12 = p01_tmp12 + (u32)8U;
1096
u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
1097
u64 sw = swap2[0U] ^ bit;
1098
cswap2(sw, nq2, nq_p12);
1099
point_add_and_double(init1, p01_tmp12, tmp2);
1100
swap2[0U] = bit;
1101
}
1102
}
1103
sw0 = swap1[0U];
1104
cswap2(sw0, nq10, nq_p11);
1105
nq1 = p01_tmp1;
1106
tmp1 = p01_tmp1 + (u32)16U;
1107
point_double(nq1, tmp1, tmp2);
1108
point_double(nq1, tmp1, tmp2);
1109
point_double(nq1, tmp1, tmp2);
1110
memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
1111
1112
memzero_explicit(tmp2, sizeof(tmp2));
1113
memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
1114
}
1115
1116
static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
1117
{
1118
u32 i;
1119
fsqr(o, inp, tmp);
1120
for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
1121
fsqr(o, o, tmp);
1122
}
1123
1124
static void finv(u64 *o, const u64 *i, u64 *tmp)
1125
{
1126
u64 t1[16U] = { 0U };
1127
u64 *a0 = t1;
1128
u64 *b = t1 + (u32)4U;
1129
u64 *c = t1 + (u32)8U;
1130
u64 *t00 = t1 + (u32)12U;
1131
u64 *tmp1 = tmp;
1132
u64 *a;
1133
u64 *t0;
1134
fsquare_times(a0, i, tmp1, (u32)1U);
1135
fsquare_times(t00, a0, tmp1, (u32)2U);
1136
fmul(b, t00, i, tmp);
1137
fmul(a0, b, a0, tmp);
1138
fsquare_times(t00, a0, tmp1, (u32)1U);
1139
fmul(b, t00, b, tmp);
1140
fsquare_times(t00, b, tmp1, (u32)5U);
1141
fmul(b, t00, b, tmp);
1142
fsquare_times(t00, b, tmp1, (u32)10U);
1143
fmul(c, t00, b, tmp);
1144
fsquare_times(t00, c, tmp1, (u32)20U);
1145
fmul(t00, t00, c, tmp);
1146
fsquare_times(t00, t00, tmp1, (u32)10U);
1147
fmul(b, t00, b, tmp);
1148
fsquare_times(t00, b, tmp1, (u32)50U);
1149
fmul(c, t00, b, tmp);
1150
fsquare_times(t00, c, tmp1, (u32)100U);
1151
fmul(t00, t00, c, tmp);
1152
fsquare_times(t00, t00, tmp1, (u32)50U);
1153
fmul(t00, t00, b, tmp);
1154
fsquare_times(t00, t00, tmp1, (u32)5U);
1155
a = t1;
1156
t0 = t1 + (u32)12U;
1157
fmul(o, t0, a, tmp);
1158
}
1159
1160
static void store_felem(u64 *b, u64 *f)
1161
{
1162
u64 f30 = f[3U];
1163
u64 top_bit0 = f30 >> (u32)63U;
1164
u64 f31;
1165
u64 top_bit;
1166
u64 f0;
1167
u64 f1;
1168
u64 f2;
1169
u64 f3;
1170
u64 m0;
1171
u64 m1;
1172
u64 m2;
1173
u64 m3;
1174
u64 mask;
1175
u64 f0_;
1176
u64 f1_;
1177
u64 f2_;
1178
u64 f3_;
1179
u64 o0;
1180
u64 o1;
1181
u64 o2;
1182
u64 o3;
1183
f[3U] = f30 & (u64)0x7fffffffffffffffU;
1184
add_scalar(f, f, (u64)19U * top_bit0);
1185
f31 = f[3U];
1186
top_bit = f31 >> (u32)63U;
1187
f[3U] = f31 & (u64)0x7fffffffffffffffU;
1188
add_scalar(f, f, (u64)19U * top_bit);
1189
f0 = f[0U];
1190
f1 = f[1U];
1191
f2 = f[2U];
1192
f3 = f[3U];
1193
m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
1194
m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
1195
m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
1196
m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
1197
mask = ((m0 & m1) & m2) & m3;
1198
f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
1199
f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
1200
f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
1201
f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
1202
o0 = f0_;
1203
o1 = f1_;
1204
o2 = f2_;
1205
o3 = f3_;
1206
b[0U] = o0;
1207
b[1U] = o1;
1208
b[2U] = o2;
1209
b[3U] = o3;
1210
}
1211
1212
static void encode_point(u8 *o, const u64 *i)
1213
{
1214
const u64 *x = i;
1215
const u64 *z = i + (u32)4U;
1216
u64 tmp[4U] = { 0U };
1217
u64 tmp_w[16U] = { 0U };
1218
finv(tmp, z, tmp_w);
1219
fmul(tmp, tmp, x, tmp_w);
1220
store_felem((u64 *)o, tmp);
1221
}
1222
1223
static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
1224
{
1225
u64 init1[8U] = { 0U };
1226
u64 tmp[4U] = { 0U };
1227
u64 tmp3;
1228
u64 *x;
1229
u64 *z;
1230
{
1231
u32 i;
1232
for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
1233
u64 *os = tmp;
1234
const u8 *bj = pub + i * (u32)8U;
1235
u64 u = *(u64 *)bj;
1236
u64 r = u;
1237
u64 x0 = r;
1238
os[i] = x0;
1239
}
1240
}
1241
tmp3 = tmp[3U];
1242
tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
1243
x = init1;
1244
z = init1 + (u32)4U;
1245
z[0U] = (u64)1U;
1246
z[1U] = (u64)0U;
1247
z[2U] = (u64)0U;
1248
z[3U] = (u64)0U;
1249
x[0U] = tmp[0U];
1250
x[1U] = tmp[1U];
1251
x[2U] = tmp[2U];
1252
x[3U] = tmp[3U];
1253
montgomery_ladder(init1, priv, init1);
1254
encode_point(out, init1);
1255
}
1256
1257
/* The below constants were generated using this sage script:
1258
*
1259
* #!/usr/bin/env sage
1260
* import sys
1261
* from sage.all import *
1262
* def limbs(n):
1263
* n = int(n)
1264
* l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
1265
* return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
1266
* ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
1267
* p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
1268
* print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
1269
* print("static const u64 table_ladder[] = {")
1270
* p = ec.lift_x(9)
1271
* for i in range(252):
1272
* l = (p[0] + p[2]) / (p[0] - p[2])
1273
* print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
1274
* p = p * 2
1275
* print("};")
1276
*
1277
*/
1278
1279
static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
1280
1281
static const u64 table_ladder[] = {
1282
0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
1283
0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
1284
0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
1285
0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
1286
0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
1287
0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
1288
0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
1289
0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
1290
0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
1291
0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
1292
0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
1293
0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
1294
0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
1295
0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
1296
0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
1297
0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
1298
0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
1299
0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
1300
0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
1301
0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
1302
0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
1303
0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
1304
0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
1305
0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
1306
0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
1307
0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
1308
0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
1309
0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
1310
0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
1311
0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
1312
0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
1313
0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
1314
0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
1315
0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
1316
0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
1317
0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
1318
0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
1319
0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
1320
0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
1321
0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
1322
0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
1323
0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
1324
0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
1325
0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
1326
0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
1327
0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
1328
0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
1329
0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
1330
0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
1331
0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
1332
0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
1333
0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
1334
0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
1335
0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
1336
0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
1337
0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
1338
0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
1339
0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
1340
0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
1341
0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
1342
0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
1343
0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
1344
0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
1345
0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
1346
0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
1347
0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
1348
0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
1349
0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
1350
0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
1351
0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
1352
0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
1353
0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
1354
0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
1355
0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
1356
0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
1357
0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
1358
0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
1359
0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
1360
0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
1361
0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
1362
0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
1363
0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
1364
0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
1365
0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
1366
0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
1367
0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
1368
0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
1369
0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
1370
0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
1371
0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
1372
0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
1373
0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
1374
0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
1375
0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
1376
0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
1377
0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
1378
0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
1379
0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
1380
0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
1381
0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
1382
0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
1383
0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
1384
0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
1385
0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
1386
0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
1387
0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
1388
0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
1389
0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
1390
0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
1391
0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
1392
0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
1393
0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
1394
0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
1395
0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
1396
0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
1397
0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
1398
0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
1399
0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
1400
0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
1401
0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
1402
0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
1403
0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
1404
0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
1405
0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
1406
0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
1407
0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
1408
0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
1409
0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
1410
0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
1411
0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
1412
0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
1413
0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
1414
0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
1415
0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
1416
0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
1417
0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
1418
0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
1419
0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
1420
0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
1421
0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
1422
0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
1423
0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
1424
0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
1425
0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
1426
0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
1427
0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
1428
0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
1429
0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
1430
0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
1431
0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
1432
0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
1433
0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
1434
0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
1435
0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
1436
0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
1437
0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
1438
0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
1439
0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
1440
0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
1441
0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
1442
0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
1443
0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
1444
0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
1445
0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
1446
0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
1447
0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
1448
0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
1449
0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
1450
0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
1451
0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
1452
0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
1453
0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
1454
0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
1455
0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
1456
0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
1457
0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
1458
0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
1459
0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
1460
0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
1461
0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
1462
0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
1463
0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
1464
0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
1465
0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
1466
0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
1467
0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
1468
0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
1469
0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
1470
0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
1471
0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
1472
0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
1473
0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
1474
0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
1475
0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
1476
0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
1477
0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
1478
0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
1479
0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
1480
0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
1481
0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
1482
0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
1483
0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
1484
0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
1485
0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
1486
0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
1487
0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
1488
0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
1489
0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
1490
0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
1491
0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
1492
0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
1493
0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
1494
0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
1495
0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
1496
0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
1497
0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
1498
0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
1499
0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
1500
0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
1501
0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
1502
0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
1503
0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
1504
0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
1505
0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
1506
0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
1507
0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
1508
0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
1509
0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
1510
0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
1511
0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
1512
0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
1513
0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
1514
0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
1515
0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
1516
0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
1517
0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
1518
0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
1519
0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
1520
0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
1521
0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
1522
0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
1523
0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
1524
0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
1525
0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
1526
0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
1527
0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
1528
0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
1529
0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
1530
0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
1531
0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
1532
0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
1533
0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
1534
};
1535
1536
static void curve25519_ever64_base(u8 *out, const u8 *priv)
1537
{
1538
u64 swap = 1;
1539
int i, j, k;
1540
u64 tmp[16 + 32 + 4];
1541
u64 *x1 = &tmp[0];
1542
u64 *z1 = &tmp[4];
1543
u64 *x2 = &tmp[8];
1544
u64 *z2 = &tmp[12];
1545
u64 *xz1 = &tmp[0];
1546
u64 *xz2 = &tmp[8];
1547
u64 *a = &tmp[0 + 16];
1548
u64 *b = &tmp[4 + 16];
1549
u64 *c = &tmp[8 + 16];
1550
u64 *ab = &tmp[0 + 16];
1551
u64 *abcd = &tmp[0 + 16];
1552
u64 *ef = &tmp[16 + 16];
1553
u64 *efgh = &tmp[16 + 16];
1554
u64 *key = &tmp[0 + 16 + 32];
1555
1556
memcpy(key, priv, 32);
1557
((u8 *)key)[0] &= 248;
1558
((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
1559
1560
x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
1561
z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
1562
z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
1563
memcpy(x2, p_minus_s, sizeof(p_minus_s));
1564
1565
j = 3;
1566
for (i = 0; i < 4; ++i) {
1567
while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
1568
u64 bit = (key[i] >> j) & 1;
1569
k = (64 * i + j - 3);
1570
swap = swap ^ bit;
1571
cswap2(swap, xz1, xz2);
1572
swap = bit;
1573
fsub(b, x1, z1);
1574
fadd(a, x1, z1);
1575
fmul(c, &table_ladder[4 * k], b, ef);
1576
fsub(b, a, c);
1577
fadd(a, a, c);
1578
fsqr2(ab, ab, efgh);
1579
fmul2(xz1, xz2, ab, efgh);
1580
++j;
1581
}
1582
j = 0;
1583
}
1584
1585
point_double(xz1, abcd, efgh);
1586
point_double(xz1, abcd, efgh);
1587
point_double(xz1, abcd, efgh);
1588
encode_point(out, xz1);
1589
1590
memzero_explicit(tmp, sizeof(tmp));
1591
}
1592
1593
static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
1594
1595
void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
1596
const u8 secret[CURVE25519_KEY_SIZE],
1597
const u8 basepoint[CURVE25519_KEY_SIZE])
1598
{
1599
if (static_branch_likely(&curve25519_use_bmi2_adx))
1600
curve25519_ever64(mypublic, secret, basepoint);
1601
else
1602
curve25519_generic(mypublic, secret, basepoint);
1603
}
1604
EXPORT_SYMBOL(curve25519_arch);
1605
1606
void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
1607
const u8 secret[CURVE25519_KEY_SIZE])
1608
{
1609
if (static_branch_likely(&curve25519_use_bmi2_adx))
1610
curve25519_ever64_base(pub, secret);
1611
else
1612
curve25519_generic(pub, secret, curve25519_base_point);
1613
}
1614
EXPORT_SYMBOL(curve25519_base_arch);
1615
1616
static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
1617
unsigned int len)
1618
{
1619
u8 *secret = kpp_tfm_ctx(tfm);
1620
1621
if (!len)
1622
curve25519_generate_secret(secret);
1623
else if (len == CURVE25519_KEY_SIZE &&
1624
crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
1625
memcpy(secret, buf, CURVE25519_KEY_SIZE);
1626
else
1627
return -EINVAL;
1628
return 0;
1629
}
1630
1631
static int curve25519_generate_public_key(struct kpp_request *req)
1632
{
1633
struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
1634
const u8 *secret = kpp_tfm_ctx(tfm);
1635
u8 buf[CURVE25519_KEY_SIZE];
1636
int copied, nbytes;
1637
1638
if (req->src)
1639
return -EINVAL;
1640
1641
curve25519_base_arch(buf, secret);
1642
1643
/* might want less than we've got */
1644
nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
1645
copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
1646
nbytes),
1647
buf, nbytes);
1648
if (copied != nbytes)
1649
return -EINVAL;
1650
return 0;
1651
}
1652
1653
static int curve25519_compute_shared_secret(struct kpp_request *req)
1654
{
1655
struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
1656
const u8 *secret = kpp_tfm_ctx(tfm);
1657
u8 public_key[CURVE25519_KEY_SIZE];
1658
u8 buf[CURVE25519_KEY_SIZE];
1659
int copied, nbytes;
1660
1661
if (!req->src)
1662
return -EINVAL;
1663
1664
copied = sg_copy_to_buffer(req->src,
1665
sg_nents_for_len(req->src,
1666
CURVE25519_KEY_SIZE),
1667
public_key, CURVE25519_KEY_SIZE);
1668
if (copied != CURVE25519_KEY_SIZE)
1669
return -EINVAL;
1670
1671
curve25519_arch(buf, secret, public_key);
1672
1673
/* might want less than we've got */
1674
nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
1675
copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
1676
nbytes),
1677
buf, nbytes);
1678
if (copied != nbytes)
1679
return -EINVAL;
1680
return 0;
1681
}
1682
1683
static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
1684
{
1685
return CURVE25519_KEY_SIZE;
1686
}
1687
1688
static struct kpp_alg curve25519_alg = {
1689
.base.cra_name = "curve25519",
1690
.base.cra_driver_name = "curve25519-x86",
1691
.base.cra_priority = 200,
1692
.base.cra_module = THIS_MODULE,
1693
.base.cra_ctxsize = CURVE25519_KEY_SIZE,
1694
1695
.set_secret = curve25519_set_secret,
1696
.generate_public_key = curve25519_generate_public_key,
1697
.compute_shared_secret = curve25519_compute_shared_secret,
1698
.max_size = curve25519_max_size,
1699
};
1700
1701
1702
static int __init curve25519_mod_init(void)
1703
{
1704
if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
1705
static_branch_enable(&curve25519_use_bmi2_adx);
1706
else
1707
return 0;
1708
return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
1709
crypto_register_kpp(&curve25519_alg) : 0;
1710
}
1711
1712
static void __exit curve25519_mod_exit(void)
1713
{
1714
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
1715
static_branch_likely(&curve25519_use_bmi2_adx))
1716
crypto_unregister_kpp(&curve25519_alg);
1717
}
1718
1719
module_init(curve25519_mod_init);
1720
module_exit(curve25519_mod_exit);
1721
1722
MODULE_ALIAS_CRYPTO("curve25519");
1723
MODULE_ALIAS_CRYPTO("curve25519-x86");
1724
MODULE_DESCRIPTION("Curve25519 algorithm, ADX optimized");
1725
MODULE_LICENSE("GPL v2");
1726
MODULE_AUTHOR("Jason A. Donenfeld <[email protected]>");
1727
1728