Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
folium-app
GitHub Repository: folium-app/Folium
Path: blob/a-new-beginning/SharedDependencies/Sources/cryptopp/donna_32.cpp
2 views
1
// donna_32.cpp - written and placed in public domain by Jeffrey Walton
2
// Crypto++ specific implementation wrapped around Andrew
3
// Moon's public domain curve25519-donna and ed25519-donna,
4
// https://github.com/floodyberry/curve25519-donna and
5
// https://github.com/floodyberry/ed25519-donna.
6
7
// The curve25519 and ed25519 source files multiplex different repos and
8
// architectures using namespaces. The repos are Andrew Moon's
9
// curve25519-donna and ed25519-donna. The architectures are 32-bit, 64-bit
10
// and SSE. For example, 32-bit x25519 uses symbols from Donna::X25519 and
11
// Donna::Arch32.
12
13
// A fair amount of duplication happens below, but we could not directly
14
// use curve25519 for both x25519 and ed25519. A close examination reveals
15
// slight differences in the implementation. For example, look at the
16
// two curve25519_sub functions.
17
18
// If needed, see Moon's commit "Go back to ignoring 256th bit [sic]",
19
// https://github.com/floodyberry/curve25519-donna/commit/57a683d18721a658
20
21
#include "pch.h"
22
23
#include "config.h"
24
#include "donna.h"
25
#include "secblock.h"
26
#include "sha.h"
27
#include "misc.h"
28
#include "cpu.h"
29
30
#include <istream>
31
#include <sstream>
32
33
#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
34
# pragma GCC diagnostic ignored "-Wunused-function"
35
#endif
36
37
#if CRYPTOPP_MSC_VERSION
38
# pragma warning(disable: 4244)
39
#endif
40
41
// Squash MS LNK4221 and libtool warnings
42
extern const char DONNA32_FNAME[] = __FILE__;
43
44
ANONYMOUS_NAMESPACE_BEGIN
45
46
// Can't use GetAlignmentOf<word32>() because of C++11 and constexpr
47
// Can use 'const unsigned int' because of MSVC 2013
48
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
49
# define ALIGN_SPEC 16
50
#else
51
# define ALIGN_SPEC 4
52
#endif
53
54
ANONYMOUS_NAMESPACE_END
55
56
#if defined(CRYPTOPP_CURVE25519_32BIT)
57
58
#include "donna_32.h"
59
60
ANONYMOUS_NAMESPACE_BEGIN
61
62
using CryptoPP::byte;
63
using CryptoPP::word32;
64
using CryptoPP::GetWord;
65
using CryptoPP::PutWord;
66
using CryptoPP::LITTLE_ENDIAN_ORDER;
67
68
inline word32 U8TO32_LE(const byte* p)
69
{
70
return GetWord<word32>(false, LITTLE_ENDIAN_ORDER, p);
71
}
72
73
inline void U32TO8_LE(byte* p, word32 w)
74
{
75
PutWord(false, LITTLE_ENDIAN_ORDER, p, w);
76
}
77
78
ANONYMOUS_NAMESPACE_END
79
80
NAMESPACE_BEGIN(CryptoPP)
81
NAMESPACE_BEGIN(Donna)
82
NAMESPACE_BEGIN(X25519)
83
ANONYMOUS_NAMESPACE_BEGIN
84
85
using CryptoPP::byte;
86
using CryptoPP::word32;
87
using CryptoPP::sword32;
88
using CryptoPP::word64;
89
using CryptoPP::sword64;
90
91
using CryptoPP::GetBlock;
92
using CryptoPP::LittleEndian;
93
94
// Bring in all the symbols from the 32-bit header
95
using namespace CryptoPP::Donna::Arch32;
96
97
/* out = in */
98
inline void
99
curve25519_copy(bignum25519 out, const bignum25519 in) {
100
out[0] = in[0]; out[1] = in[1];
101
out[2] = in[2]; out[3] = in[3];
102
out[4] = in[4]; out[5] = in[5];
103
out[6] = in[6]; out[7] = in[7];
104
out[8] = in[8]; out[9] = in[9];
105
}
106
107
/* out = a + b */
108
inline void
109
curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
110
out[0] = a[0] + b[0]; out[1] = a[1] + b[1];
111
out[2] = a[2] + b[2]; out[3] = a[3] + b[3];
112
out[4] = a[4] + b[4]; out[5] = a[5] + b[5];
113
out[6] = a[6] + b[6]; out[7] = a[7] + b[7];
114
out[8] = a[8] + b[8]; out[9] = a[9] + b[9];
115
}
116
117
/* out = a - b */
118
inline void
119
curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
120
word32 c;
121
out[0] = 0x7ffffda + a[0] - b[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
122
out[1] = 0x3fffffe + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
123
out[2] = 0x7fffffe + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
124
out[3] = 0x3fffffe + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
125
out[4] = 0x7fffffe + a[4] - b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
126
out[5] = 0x3fffffe + a[5] - b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
127
out[6] = 0x7fffffe + a[6] - b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
128
out[7] = 0x3fffffe + a[7] - b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
129
out[8] = 0x7fffffe + a[8] - b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
130
out[9] = 0x3fffffe + a[9] - b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
131
out[0] += 19 * c;
132
}
133
134
/* out = in * scalar */
135
inline void
136
curve25519_scalar_product(bignum25519 out, const bignum25519 in, const word32 scalar) {
137
word64 a;
138
word32 c;
139
a = mul32x32_64(in[0], scalar); out[0] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
140
a = mul32x32_64(in[1], scalar) + c; out[1] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
141
a = mul32x32_64(in[2], scalar) + c; out[2] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
142
a = mul32x32_64(in[3], scalar) + c; out[3] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
143
a = mul32x32_64(in[4], scalar) + c; out[4] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
144
a = mul32x32_64(in[5], scalar) + c; out[5] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
145
a = mul32x32_64(in[6], scalar) + c; out[6] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
146
a = mul32x32_64(in[7], scalar) + c; out[7] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
147
a = mul32x32_64(in[8], scalar) + c; out[8] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
148
a = mul32x32_64(in[9], scalar) + c; out[9] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
149
out[0] += c * 19;
150
}
151
152
/* out = a * b */
153
inline void
154
curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) {
155
word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
156
word32 s0,s1,s2,s3,s4,s5,s6,s7,s8,s9;
157
word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
158
word32 p;
159
160
r0 = b[0]; r1 = b[1]; r2 = b[2]; r3 = b[3]; r4 = b[4];
161
r5 = b[5]; r6 = b[6]; r7 = b[7]; r8 = b[8]; r9 = b[9];
162
163
s0 = a[0]; s1 = a[1]; s2 = a[2]; s3 = a[3]; s4 = a[4];
164
s5 = a[5]; s6 = a[6]; s7 = a[7]; s8 = a[8]; s9 = a[9];
165
166
m1 = mul32x32_64(r0, s1) + mul32x32_64(r1, s0);
167
m3 = mul32x32_64(r0, s3) + mul32x32_64(r1, s2) + mul32x32_64(r2, s1) + mul32x32_64(r3, s0);
168
m5 = mul32x32_64(r0, s5) + mul32x32_64(r1, s4) + mul32x32_64(r2, s3) + mul32x32_64(r3, s2) + mul32x32_64(r4, s1) + mul32x32_64(r5, s0);
169
m7 = mul32x32_64(r0, s7) + mul32x32_64(r1, s6) + mul32x32_64(r2, s5) + mul32x32_64(r3, s4) + mul32x32_64(r4, s3) + mul32x32_64(r5, s2) + mul32x32_64(r6, s1) + mul32x32_64(r7, s0);
170
m9 = mul32x32_64(r0, s9) + mul32x32_64(r1, s8) + mul32x32_64(r2, s7) + mul32x32_64(r3, s6) + mul32x32_64(r4, s5) + mul32x32_64(r5, s4) + mul32x32_64(r6, s3) + mul32x32_64(r7, s2) + mul32x32_64(r8, s1) + mul32x32_64(r9, s0);
171
172
r1 *= 2; r3 *= 2; r5 *= 2; r7 *= 2;
173
174
m0 = mul32x32_64(r0, s0);
175
m2 = mul32x32_64(r0, s2) + mul32x32_64(r1, s1) + mul32x32_64(r2, s0);
176
m4 = mul32x32_64(r0, s4) + mul32x32_64(r1, s3) + mul32x32_64(r2, s2) + mul32x32_64(r3, s1) + mul32x32_64(r4, s0);
177
m6 = mul32x32_64(r0, s6) + mul32x32_64(r1, s5) + mul32x32_64(r2, s4) + mul32x32_64(r3, s3) + mul32x32_64(r4, s2) + mul32x32_64(r5, s1) + mul32x32_64(r6, s0);
178
m8 = mul32x32_64(r0, s8) + mul32x32_64(r1, s7) + mul32x32_64(r2, s6) + mul32x32_64(r3, s5) + mul32x32_64(r4, s4) + mul32x32_64(r5, s3) + mul32x32_64(r6, s2) + mul32x32_64(r7, s1) + mul32x32_64(r8, s0);
179
180
r1 *= 19; r2 *= 19;
181
r3 = (r3 / 2) * 19;
182
r4 *= 19;
183
r5 = (r5 / 2) * 19;
184
r6 *= 19;
185
r7 = (r7 / 2) * 19;
186
r8 *= 19; r9 *= 19;
187
188
m1 += (mul32x32_64(r9, s2) + mul32x32_64(r8, s3) + mul32x32_64(r7, s4) + mul32x32_64(r6, s5) + mul32x32_64(r5, s6) + mul32x32_64(r4, s7) + mul32x32_64(r3, s8) + mul32x32_64(r2, s9));
189
m3 += (mul32x32_64(r9, s4) + mul32x32_64(r8, s5) + mul32x32_64(r7, s6) + mul32x32_64(r6, s7) + mul32x32_64(r5, s8) + mul32x32_64(r4, s9));
190
m5 += (mul32x32_64(r9, s6) + mul32x32_64(r8, s7) + mul32x32_64(r7, s8) + mul32x32_64(r6, s9));
191
m7 += (mul32x32_64(r9, s8) + mul32x32_64(r8, s9));
192
193
r3 *= 2; r5 *= 2; r7 *= 2; r9 *= 2;
194
195
m0 += (mul32x32_64(r9, s1) + mul32x32_64(r8, s2) + mul32x32_64(r7, s3) + mul32x32_64(r6, s4) + mul32x32_64(r5, s5) + mul32x32_64(r4, s6) + mul32x32_64(r3, s7) + mul32x32_64(r2, s8) + mul32x32_64(r1, s9));
196
m2 += (mul32x32_64(r9, s3) + mul32x32_64(r8, s4) + mul32x32_64(r7, s5) + mul32x32_64(r6, s6) + mul32x32_64(r5, s7) + mul32x32_64(r4, s8) + mul32x32_64(r3, s9));
197
m4 += (mul32x32_64(r9, s5) + mul32x32_64(r8, s6) + mul32x32_64(r7, s7) + mul32x32_64(r6, s8) + mul32x32_64(r5, s9));
198
m6 += (mul32x32_64(r9, s7) + mul32x32_64(r8, s8) + mul32x32_64(r7, s9));
199
m8 += (mul32x32_64(r9, s9));
200
201
r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
202
m1 += c; r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
203
m2 += c; r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
204
m3 += c; r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
205
m4 += c; r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
206
m5 += c; r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
207
m6 += c; r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
208
m7 += c; r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
209
m8 += c; r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
210
m9 += c; r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
211
m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
212
r1 += p;
213
214
out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
215
out[5] = r5; out[6] = r6; out[7] = r7; out[8] = r8; out[9] = r9;
216
}
217
218
/* out = in * in */
219
inline void
220
curve25519_square(bignum25519 out, const bignum25519 in) {
221
word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
222
word32 d6,d7,d8,d9;
223
word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
224
word32 p;
225
226
r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];
227
r5 = in[5]; r6 = in[6]; r7 = in[7]; r8 = in[8]; r9 = in[9];
228
229
m0 = mul32x32_64(r0, r0);
230
r0 *= 2;
231
m1 = mul32x32_64(r0, r1);
232
m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
233
r1 *= 2;
234
m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2 );
235
m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
236
r2 *= 2;
237
m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4 ) + mul32x32_64(r2, r3);
238
m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
239
r3 *= 2;
240
m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6 ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4 );
241
m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4 );
242
m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8 ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6 ) + mul32x32_64(r4, r5 * 2);
243
244
d6 = r6 * 19; d7 = r7 * 2 * 19;
245
d8 = r8 * 19; d9 = r9 * 2 * 19;
246
247
m0 += (mul32x32_64(d9, r1 ) + mul32x32_64(d8, r2 ) + mul32x32_64(d7, r3 ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
248
m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3 ) + mul32x32_64(d7, r4 ) + mul32x32_64(d6, r5 * 2));
249
m2 += (mul32x32_64(d9, r3 ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6 ));
250
m3 += (mul32x32_64(d9, r4 ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6 ));
251
m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7 ));
252
m5 += (mul32x32_64(d9, r6 ) + mul32x32_64(d8, r7 * 2));
253
m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8 ));
254
m7 += (mul32x32_64(d9, r8 ));
255
m8 += (mul32x32_64(d9, r9 ));
256
257
r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
258
m1 += c; r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
259
m2 += c; r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
260
m3 += c; r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
261
m4 += c; r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
262
m5 += c; r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
263
m6 += c; r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
264
m7 += c; r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
265
m8 += c; r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
266
m9 += c; r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
267
m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
268
r1 += p;
269
270
out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
271
out[5] = r5; out[6] = r6; out[7] = r7; out[8] = r8; out[9] = r9;
272
}
273
274
/* out = in^(2 * count) */
275
void
276
curve25519_square_times(bignum25519 out, const bignum25519 in, int count) {
277
word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
278
word32 d6,d7,d8,d9;
279
word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
280
word32 p;
281
282
r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];
283
r5 = in[5]; r6 = in[6]; r7 = in[7]; r8 = in[8]; r9 = in[9];
284
285
do {
286
m0 = mul32x32_64(r0, r0);
287
r0 *= 2;
288
m1 = mul32x32_64(r0, r1);
289
m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
290
r1 *= 2;
291
m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2 );
292
m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
293
r2 *= 2;
294
m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4 ) + mul32x32_64(r2, r3);
295
m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
296
r3 *= 2;
297
m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6 ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4 );
298
m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4 );
299
m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8 ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6 ) + mul32x32_64(r4, r5 * 2);
300
301
d6 = r6 * 19; d7 = r7 * 2 * 19;
302
d8 = r8 * 19; d9 = r9 * 2 * 19;
303
304
m0 += (mul32x32_64(d9, r1 ) + mul32x32_64(d8, r2 ) + mul32x32_64(d7, r3 ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
305
m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3 ) + mul32x32_64(d7, r4 ) + mul32x32_64(d6, r5 * 2));
306
m2 += (mul32x32_64(d9, r3 ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6 ));
307
m3 += (mul32x32_64(d9, r4 ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6 ));
308
m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7 ));
309
m5 += (mul32x32_64(d9, r6 ) + mul32x32_64(d8, r7 * 2));
310
m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8 ));
311
m7 += (mul32x32_64(d9, r8 ));
312
m8 += (mul32x32_64(d9, r9 ));
313
314
r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
315
m1 += c; r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
316
m2 += c; r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
317
m3 += c; r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
318
m4 += c; r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
319
m5 += c; r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
320
m6 += c; r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
321
m7 += c; r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
322
m8 += c; r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
323
m9 += c; r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
324
m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
325
r1 += p;
326
} while (--count);
327
328
out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
329
out[5] = r5; out[6] = r6; out[7] = r7; out[8] = r8; out[9] = r9;
330
}
331
332
/* Take a little-endian, 32-byte number and expand it into polynomial form */
333
void
334
curve25519_expand(bignum25519 out, const byte in[32]) {
335
word32 x0,x1,x2,x3,x4,x5,x6,x7;
336
GetBlock<word32, LittleEndian> block(in);
337
block(x0)(x1)(x2)(x3)(x4)(x5)(x6)(x7);
338
339
out[0] = ( x0 ) & reduce_mask_26;
340
out[1] = ((((word64)x1 << 32) | x0) >> 26) & reduce_mask_25;
341
out[2] = ((((word64)x2 << 32) | x1) >> 19) & reduce_mask_26;
342
out[3] = ((((word64)x3 << 32) | x2) >> 13) & reduce_mask_25;
343
out[4] = (( x3) >> 6) & reduce_mask_26;
344
out[5] = ( x4 ) & reduce_mask_25;
345
out[6] = ((((word64)x5 << 32) | x4) >> 25) & reduce_mask_26;
346
out[7] = ((((word64)x6 << 32) | x5) >> 19) & reduce_mask_25;
347
out[8] = ((((word64)x7 << 32) | x6) >> 12) & reduce_mask_26;
348
out[9] = (( x7) >> 6) & reduce_mask_25; /* ignore the top bit */
349
}
350
351
/* Take a fully reduced polynomial form number and contract it into a little-endian, 32-byte array */
352
void
353
curve25519_contract(byte out[32], const bignum25519 in) {
354
bignum25519 f;
355
curve25519_copy(f, in);
356
357
#define carry_pass() \
358
f[1] += f[0] >> 26; f[0] &= reduce_mask_26; \
359
f[2] += f[1] >> 25; f[1] &= reduce_mask_25; \
360
f[3] += f[2] >> 26; f[2] &= reduce_mask_26; \
361
f[4] += f[3] >> 25; f[3] &= reduce_mask_25; \
362
f[5] += f[4] >> 26; f[4] &= reduce_mask_26; \
363
f[6] += f[5] >> 25; f[5] &= reduce_mask_25; \
364
f[7] += f[6] >> 26; f[6] &= reduce_mask_26; \
365
f[8] += f[7] >> 25; f[7] &= reduce_mask_25; \
366
f[9] += f[8] >> 26; f[8] &= reduce_mask_26;
367
368
#define carry_pass_full() \
369
carry_pass() \
370
f[0] += 19 * (f[9] >> 25); f[9] &= reduce_mask_25;
371
372
#define carry_pass_final() \
373
carry_pass() \
374
f[9] &= reduce_mask_25;
375
376
carry_pass_full()
377
carry_pass_full()
378
379
/* now t is between 0 and 2^255-1, properly carried. */
380
/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
381
f[0] += 19;
382
carry_pass_full()
383
384
/* now between 19 and 2^255-1 in both cases, and offset by 19. */
385
f[0] += (1 << 26) - 19;
386
f[1] += (1 << 25) - 1;
387
f[2] += (1 << 26) - 1;
388
f[3] += (1 << 25) - 1;
389
f[4] += (1 << 26) - 1;
390
f[5] += (1 << 25) - 1;
391
f[6] += (1 << 26) - 1;
392
f[7] += (1 << 25) - 1;
393
f[8] += (1 << 26) - 1;
394
f[9] += (1 << 25) - 1;
395
396
/* now between 2^255 and 2^256-20, and offset by 2^255. */
397
carry_pass_final()
398
399
#undef carry_pass
400
#undef carry_full
401
#undef carry_final
402
403
f[1] <<= 2;
404
f[2] <<= 3;
405
f[3] <<= 5;
406
f[4] <<= 6;
407
f[6] <<= 1;
408
f[7] <<= 3;
409
f[8] <<= 4;
410
f[9] <<= 6;
411
412
#define F(i, s) \
413
out[s+0] |= (byte)( f[i] & 0xff); \
414
out[s+1] = (byte)((f[i] >> 8) & 0xff); \
415
out[s+2] = (byte)((f[i] >> 16) & 0xff); \
416
out[s+3] = (byte)((f[i] >> 24) & 0xff);
417
418
out[0] = out[16] = 0;
419
F(0,0); F(1,3);
420
F(2,6); F(3,9);
421
F(4,12); F(5,16);
422
F(6,19); F(7,22);
423
F(8,25); F(9,28);
424
#undef F
425
}
426
427
inline void
428
curve25519_swap_conditional(bignum25519 x, bignum25519 qpx, word32 iswap) {
429
const word32 swap = (word32)(-(sword32)iswap);
430
word32 x0,x1,x2,x3,x4,x5,x6,x7,x8,x9;
431
432
x0 = swap & (x[0] ^ qpx[0]); x[0] ^= x0; qpx[0] ^= x0;
433
x1 = swap & (x[1] ^ qpx[1]); x[1] ^= x1; qpx[1] ^= x1;
434
x2 = swap & (x[2] ^ qpx[2]); x[2] ^= x2; qpx[2] ^= x2;
435
x3 = swap & (x[3] ^ qpx[3]); x[3] ^= x3; qpx[3] ^= x3;
436
x4 = swap & (x[4] ^ qpx[4]); x[4] ^= x4; qpx[4] ^= x4;
437
x5 = swap & (x[5] ^ qpx[5]); x[5] ^= x5; qpx[5] ^= x5;
438
x6 = swap & (x[6] ^ qpx[6]); x[6] ^= x6; qpx[6] ^= x6;
439
x7 = swap & (x[7] ^ qpx[7]); x[7] ^= x7; qpx[7] ^= x7;
440
x8 = swap & (x[8] ^ qpx[8]); x[8] ^= x8; qpx[8] ^= x8;
441
x9 = swap & (x[9] ^ qpx[9]); x[9] ^= x9; qpx[9] ^= x9;
442
}
443
444
/*
445
* In: b = 2^5 - 2^0
446
* Out: b = 2^250 - 2^0
447
*/
448
void
449
curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {
450
ALIGN(ALIGN_SPEC) bignum25519 t0,c;
451
452
/* 2^5 - 2^0 */ /* b */
453
/* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);
454
/* 2^10 - 2^0 */ curve25519_mul(b, t0, b);
455
/* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);
456
/* 2^20 - 2^0 */ curve25519_mul(c, t0, b);
457
/* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);
458
/* 2^40 - 2^0 */ curve25519_mul(t0, t0, c);
459
/* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);
460
/* 2^50 - 2^0 */ curve25519_mul(b, t0, b);
461
/* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);
462
/* 2^100 - 2^0 */ curve25519_mul(c, t0, b);
463
/* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);
464
/* 2^200 - 2^0 */ curve25519_mul(t0, t0, c);
465
/* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);
466
/* 2^250 - 2^0 */ curve25519_mul(b, t0, b);
467
}
468
469
/*
470
* z^(p - 2) = z(2^255 - 21)
471
*/
472
void
473
curve25519_recip(bignum25519 out, const bignum25519 z) {
474
ALIGN(ALIGN_SPEC) bignum25519 a, t0, b;
475
476
/* 2 */ curve25519_square(a, z); /* a = 2 */
477
/* 8 */ curve25519_square_times(t0, a, 2);
478
/* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
479
/* 11 */ curve25519_mul(a, b, a); /* a = 11 */
480
/* 22 */ curve25519_square(t0, a);
481
/* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
482
/* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
483
/* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);
484
/* 2^255 - 21 */ curve25519_mul(out, b, a);
485
}
486
487
ANONYMOUS_NAMESPACE_END
488
NAMESPACE_END // X25519
489
NAMESPACE_END // Donna
490
NAMESPACE_END // CryptoPP
491
492
//******************************* ed25519 *******************************//
493
494
NAMESPACE_BEGIN(CryptoPP)
495
NAMESPACE_BEGIN(Donna)
496
NAMESPACE_BEGIN(Ed25519)
497
ANONYMOUS_NAMESPACE_BEGIN
498
499
using CryptoPP::byte;
500
using CryptoPP::word32;
501
using CryptoPP::sword32;
502
using CryptoPP::word64;
503
using CryptoPP::sword64;
504
505
using CryptoPP::GetBlock;
506
using CryptoPP::LittleEndian;
507
508
using CryptoPP::SHA512;
509
510
// Bring in all the symbols from the 32-bit header
511
using namespace CryptoPP::Donna::Arch32;
512
513
/* out = in */
514
inline void
515
curve25519_copy(bignum25519 out, const bignum25519 in) {
516
out[0] = in[0]; out[1] = in[1];
517
out[2] = in[2]; out[3] = in[3];
518
out[4] = in[4]; out[5] = in[5];
519
out[6] = in[6]; out[7] = in[7];
520
out[8] = in[8]; out[9] = in[9];
521
}
522
523
/* out = a + b */
524
inline void
525
curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
526
out[0] = a[0] + b[0]; out[1] = a[1] + b[1];
527
out[2] = a[2] + b[2]; out[3] = a[3] + b[3];
528
out[4] = a[4] + b[4]; out[5] = a[5] + b[5];
529
out[6] = a[6] + b[6]; out[7] = a[7] + b[7];
530
out[8] = a[8] + b[8]; out[9] = a[9] + b[9];
531
}
532
533
inline void
534
curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
535
word32 c;
536
out[0] = a[0] + b[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
537
out[1] = a[1] + b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
538
out[2] = a[2] + b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
539
out[3] = a[3] + b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
540
out[4] = a[4] + b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
541
out[5] = a[5] + b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
542
out[6] = a[6] + b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
543
out[7] = a[7] + b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
544
out[8] = a[8] + b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
545
out[9] = a[9] + b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
546
out[0] += 19 * c;
547
}
548
549
inline void
550
curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
551
word32 c;
552
out[0] = a[0] + b[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
553
out[1] = a[1] + b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
554
out[2] = a[2] + b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
555
out[3] = a[3] + b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
556
out[4] = a[4] + b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
557
out[5] = a[5] + b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
558
out[6] = a[6] + b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
559
out[7] = a[7] + b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
560
out[8] = a[8] + b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
561
out[9] = a[9] + b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
562
out[0] += 19 * c;
563
}
564
565
/* out = a - b */
566
inline void
567
curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
568
word32 c;
569
out[0] = twoP0 + a[0] - b[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
570
out[1] = twoP13579 + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
571
out[2] = twoP2468 + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
572
out[3] = twoP13579 + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
573
out[4] = twoP2468 + a[4] - b[4] + c;
574
out[5] = twoP13579 + a[5] - b[5] ;
575
out[6] = twoP2468 + a[6] - b[6] ;
576
out[7] = twoP13579 + a[7] - b[7] ;
577
out[8] = twoP2468 + a[8] - b[8] ;
578
out[9] = twoP13579 + a[9] - b[9] ;
579
}
580
581
/* out = a - b, where a is the result of a basic op (add,sub) */
582
inline void
583
curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
584
word32 c;
585
out[0] = fourP0 + a[0] - b[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
586
out[1] = fourP13579 + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
587
out[2] = fourP2468 + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
588
out[3] = fourP13579 + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
589
out[4] = fourP2468 + a[4] - b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
590
out[5] = fourP13579 + a[5] - b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
591
out[6] = fourP2468 + a[6] - b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
592
out[7] = fourP13579 + a[7] - b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
593
out[8] = fourP2468 + a[8] - b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
594
out[9] = fourP13579 + a[9] - b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
595
out[0] += 19 * c;
596
}
597
598
inline void
599
curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
600
word32 c;
601
out[0] = fourP0 + a[0] - b[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
602
out[1] = fourP13579 + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
603
out[2] = fourP2468 + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
604
out[3] = fourP13579 + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
605
out[4] = fourP2468 + a[4] - b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
606
out[5] = fourP13579 + a[5] - b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
607
out[6] = fourP2468 + a[6] - b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
608
out[7] = fourP13579 + a[7] - b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
609
out[8] = fourP2468 + a[8] - b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
610
out[9] = fourP13579 + a[9] - b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
611
out[0] += 19 * c;
612
}
613
614
/* out = -a */
615
inline void
616
curve25519_neg(bignum25519 out, const bignum25519 a) {
617
word32 c;
618
out[0] = twoP0 - a[0] ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
619
out[1] = twoP13579 - a[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
620
out[2] = twoP2468 - a[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
621
out[3] = twoP13579 - a[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
622
out[4] = twoP2468 - a[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
623
out[5] = twoP13579 - a[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
624
out[6] = twoP2468 - a[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
625
out[7] = twoP13579 - a[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
626
out[8] = twoP2468 - a[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
627
out[9] = twoP13579 - a[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
628
out[0] += 19 * c;
629
}
630
631
/* out = a * b */
632
void
633
curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) {
634
word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
635
word32 s0,s1,s2,s3,s4,s5,s6,s7,s8,s9;
636
word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
637
word32 p;
638
639
r0 = b[0]; r1 = b[1];
640
r2 = b[2]; r3 = b[3];
641
r4 = b[4]; r5 = b[5];
642
r6 = b[6]; r7 = b[7];
643
r8 = b[8]; r9 = b[9];
644
645
s0 = a[0]; s1 = a[1];
646
s2 = a[2]; s3 = a[3];
647
s4 = a[4]; s5 = a[5];
648
s6 = a[6]; s7 = a[7];
649
s8 = a[8]; s9 = a[9];
650
651
m1 = mul32x32_64(r0, s1) + mul32x32_64(r1, s0);
652
m3 = mul32x32_64(r0, s3) + mul32x32_64(r1, s2) + mul32x32_64(r2, s1) + mul32x32_64(r3, s0);
653
m5 = mul32x32_64(r0, s5) + mul32x32_64(r1, s4) + mul32x32_64(r2, s3) + mul32x32_64(r3, s2) + mul32x32_64(r4, s1) + mul32x32_64(r5, s0);
654
m7 = mul32x32_64(r0, s7) + mul32x32_64(r1, s6) + mul32x32_64(r2, s5) + mul32x32_64(r3, s4) + mul32x32_64(r4, s3) + mul32x32_64(r5, s2) + mul32x32_64(r6, s1) + mul32x32_64(r7, s0);
655
m9 = mul32x32_64(r0, s9) + mul32x32_64(r1, s8) + mul32x32_64(r2, s7) + mul32x32_64(r3, s6) + mul32x32_64(r4, s5) + mul32x32_64(r5, s4) + mul32x32_64(r6, s3) + mul32x32_64(r7, s2) + mul32x32_64(r8, s1) + mul32x32_64(r9, s0);
656
657
r1 *= 2; r3 *= 2;
658
r5 *= 2; r7 *= 2;
659
660
m0 = mul32x32_64(r0, s0);
661
m2 = mul32x32_64(r0, s2) + mul32x32_64(r1, s1) + mul32x32_64(r2, s0);
662
m4 = mul32x32_64(r0, s4) + mul32x32_64(r1, s3) + mul32x32_64(r2, s2) + mul32x32_64(r3, s1) + mul32x32_64(r4, s0);
663
m6 = mul32x32_64(r0, s6) + mul32x32_64(r1, s5) + mul32x32_64(r2, s4) + mul32x32_64(r3, s3) + mul32x32_64(r4, s2) + mul32x32_64(r5, s1) + mul32x32_64(r6, s0);
664
m8 = mul32x32_64(r0, s8) + mul32x32_64(r1, s7) + mul32x32_64(r2, s6) + mul32x32_64(r3, s5) + mul32x32_64(r4, s4) + mul32x32_64(r5, s3) + mul32x32_64(r6, s2) + mul32x32_64(r7, s1) + mul32x32_64(r8, s0);
665
666
r1 *= 19; r2 *= 19;
667
r3 = (r3 / 2) * 19;
668
r4 *= 19;
669
r5 = (r5 / 2) * 19;
670
r6 *= 19;
671
r7 = (r7 / 2) * 19;
672
r8 *= 19; r9 *= 19;
673
674
m1 += (mul32x32_64(r9, s2) + mul32x32_64(r8, s3) + mul32x32_64(r7, s4) + mul32x32_64(r6, s5) + mul32x32_64(r5, s6) + mul32x32_64(r4, s7) + mul32x32_64(r3, s8) + mul32x32_64(r2, s9));
675
m3 += (mul32x32_64(r9, s4) + mul32x32_64(r8, s5) + mul32x32_64(r7, s6) + mul32x32_64(r6, s7) + mul32x32_64(r5, s8) + mul32x32_64(r4, s9));
676
m5 += (mul32x32_64(r9, s6) + mul32x32_64(r8, s7) + mul32x32_64(r7, s8) + mul32x32_64(r6, s9));
677
m7 += (mul32x32_64(r9, s8) + mul32x32_64(r8, s9));
678
679
r3 *= 2; r5 *= 2;
680
r7 *= 2; r9 *= 2;
681
682
m0 += (mul32x32_64(r9, s1) + mul32x32_64(r8, s2) + mul32x32_64(r7, s3) + mul32x32_64(r6, s4) + mul32x32_64(r5, s5) + mul32x32_64(r4, s6) + mul32x32_64(r3, s7) + mul32x32_64(r2, s8) + mul32x32_64(r1, s9));
683
m2 += (mul32x32_64(r9, s3) + mul32x32_64(r8, s4) + mul32x32_64(r7, s5) + mul32x32_64(r6, s6) + mul32x32_64(r5, s7) + mul32x32_64(r4, s8) + mul32x32_64(r3, s9));
684
m4 += (mul32x32_64(r9, s5) + mul32x32_64(r8, s6) + mul32x32_64(r7, s7) + mul32x32_64(r6, s8) + mul32x32_64(r5, s9));
685
m6 += (mul32x32_64(r9, s7) + mul32x32_64(r8, s8) + mul32x32_64(r7, s9));
686
m8 += (mul32x32_64(r9, s9));
687
688
r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
689
m1 += c; r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
690
m2 += c; r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
691
m3 += c; r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
692
m4 += c; r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
693
m5 += c; r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
694
m6 += c; r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
695
m7 += c; r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
696
m8 += c; r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
697
m9 += c; r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
698
m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
699
r1 += p;
700
701
out[0] = r0; out[1] = r1;
702
out[2] = r2; out[3] = r3;
703
out[4] = r4; out[5] = r5;
704
out[6] = r6; out[7] = r7;
705
out[8] = r8; out[9] = r9;
706
}
707
708
/* out = in*in */
709
void
710
curve25519_square(bignum25519 out, const bignum25519 in) {
711
word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
712
word32 d6,d7,d8,d9;
713
word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
714
word32 p;
715
716
r0 = in[0]; r1 = in[1];
717
r2 = in[2]; r3 = in[3];
718
r4 = in[4]; r5 = in[5];
719
r6 = in[6]; r7 = in[7];
720
r8 = in[8]; r9 = in[9];
721
722
m0 = mul32x32_64(r0, r0);
723
r0 *= 2;
724
m1 = mul32x32_64(r0, r1);
725
m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
726
r1 *= 2;
727
m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2 );
728
m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
729
r2 *= 2;
730
m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4 ) + mul32x32_64(r2, r3);
731
m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
732
r3 *= 2;
733
m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6 ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4 );
734
m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4 );
735
m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8 ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6 ) + mul32x32_64(r4, r5 * 2);
736
737
d6 = r6 * 19;
738
d7 = r7 * 2 * 19;
739
d8 = r8 * 19;
740
d9 = r9 * 2 * 19;
741
742
m0 += (mul32x32_64(d9, r1 ) + mul32x32_64(d8, r2 ) + mul32x32_64(d7, r3 ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
743
m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3 ) + mul32x32_64(d7, r4 ) + mul32x32_64(d6, r5 * 2));
744
m2 += (mul32x32_64(d9, r3 ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6 ));
745
m3 += (mul32x32_64(d9, r4 ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6 ));
746
m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7 ));
747
m5 += (mul32x32_64(d9, r6 ) + mul32x32_64(d8, r7 * 2));
748
m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8 ));
749
m7 += (mul32x32_64(d9, r8 ));
750
m8 += (mul32x32_64(d9, r9 ));
751
752
r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
753
m1 += c; r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
754
m2 += c; r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
755
m3 += c; r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
756
m4 += c; r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
757
m5 += c; r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
758
m6 += c; r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
759
m7 += c; r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
760
m8 += c; r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
761
m9 += c; r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
762
m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
763
r1 += p;
764
765
out[0] = r0; out[1] = r1;
766
out[2] = r2; out[3] = r3;
767
out[4] = r4; out[5] = r5;
768
out[6] = r6; out[7] = r7;
769
out[8] = r8; out[9] = r9;
770
}
771
772
/* out = in ^ (2 * count) */
773
void
774
curve25519_square_times(bignum25519 out, const bignum25519 in, int count) {
775
word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
776
word32 d6,d7,d8,d9,p;
777
word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
778
779
r0 = in[0]; r1 = in[1];
780
r2 = in[2]; r3 = in[3];
781
r4 = in[4]; r5 = in[5];
782
r6 = in[6]; r7 = in[7];
783
r8 = in[8]; r9 = in[9];
784
785
do {
786
m0 = mul32x32_64(r0, r0);
787
r0 *= 2;
788
m1 = mul32x32_64(r0, r1);
789
m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
790
r1 *= 2;
791
m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2 );
792
m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
793
r2 *= 2;
794
m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4 ) + mul32x32_64(r2, r3);
795
m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
796
r3 *= 2;
797
m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6 ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4 );
798
m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4 );
799
m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8 ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6 ) + mul32x32_64(r4, r5 * 2);
800
801
d6 = r6 * 19;
802
d7 = r7 * 2 * 19;
803
d8 = r8 * 19;
804
d9 = r9 * 2 * 19;
805
806
m0 += (mul32x32_64(d9, r1 ) + mul32x32_64(d8, r2 ) + mul32x32_64(d7, r3 ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
807
m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3 ) + mul32x32_64(d7, r4 ) + mul32x32_64(d6, r5 * 2));
808
m2 += (mul32x32_64(d9, r3 ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6 ));
809
m3 += (mul32x32_64(d9, r4 ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6 ));
810
m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7 ));
811
m5 += (mul32x32_64(d9, r6 ) + mul32x32_64(d8, r7 * 2));
812
m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8 ));
813
m7 += (mul32x32_64(d9, r8 ));
814
m8 += (mul32x32_64(d9, r9 ));
815
816
r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
817
m1 += c; r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
818
m2 += c; r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
819
m3 += c; r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
820
m4 += c; r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
821
m5 += c; r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
822
m6 += c; r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
823
m7 += c; r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
824
m8 += c; r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
825
m9 += c; r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
826
m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
827
r1 += p;
828
} while (--count);
829
830
out[0] = r0; out[1] = r1;
831
out[2] = r2; out[3] = r3;
832
out[4] = r4; out[5] = r5;
833
out[6] = r6; out[7] = r7;
834
out[8] = r8; out[9] = r9;
835
}
836
837
/* Take a little-endian, 32-byte number and expand it into polynomial form */
838
void
839
curve25519_expand(bignum25519 out, const byte in[32]) {
840
word32 x0,x1,x2,x3,x4,x5,x6,x7;
841
GetBlock<word32, LittleEndian> block(in);
842
block(x0)(x1)(x2)(x3)(x4)(x5)(x6)(x7);
843
844
out[0] = ( x0 ) & 0x3ffffff;
845
out[1] = ((((word64)x1 << 32) | x0) >> 26) & 0x1ffffff;
846
out[2] = ((((word64)x2 << 32) | x1) >> 19) & 0x3ffffff;
847
out[3] = ((((word64)x3 << 32) | x2) >> 13) & 0x1ffffff;
848
out[4] = (( x3) >> 6) & 0x3ffffff;
849
out[5] = ( x4 ) & 0x1ffffff;
850
out[6] = ((((word64)x5 << 32) | x4) >> 25) & 0x3ffffff;
851
out[7] = ((((word64)x6 << 32) | x5) >> 19) & 0x1ffffff;
852
out[8] = ((((word64)x7 << 32) | x6) >> 12) & 0x3ffffff;
853
out[9] = (( x7) >> 6) & 0x1ffffff;
854
}
855
856
/* Take a fully reduced polynomial form number and contract it into a
857
* little-endian, 32-byte array
858
*/
859
void
860
curve25519_contract(byte out[32], const bignum25519 in) {
861
bignum25519 f;
862
curve25519_copy(f, in);
863
864
#define carry_pass() \
865
f[1] += f[0] >> 26; f[0] &= reduce_mask_26; \
866
f[2] += f[1] >> 25; f[1] &= reduce_mask_25; \
867
f[3] += f[2] >> 26; f[2] &= reduce_mask_26; \
868
f[4] += f[3] >> 25; f[3] &= reduce_mask_25; \
869
f[5] += f[4] >> 26; f[4] &= reduce_mask_26; \
870
f[6] += f[5] >> 25; f[5] &= reduce_mask_25; \
871
f[7] += f[6] >> 26; f[6] &= reduce_mask_26; \
872
f[8] += f[7] >> 25; f[7] &= reduce_mask_25; \
873
f[9] += f[8] >> 26; f[8] &= reduce_mask_26;
874
875
#define carry_pass_full() \
876
carry_pass() \
877
f[0] += 19 * (f[9] >> 25); f[9] &= reduce_mask_25;
878
879
#define carry_pass_final() \
880
carry_pass() \
881
f[9] &= reduce_mask_25;
882
883
carry_pass_full()
884
carry_pass_full()
885
886
/* now t is between 0 and 2^255-1, properly carried. */
887
/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
888
f[0] += 19;
889
carry_pass_full()
890
891
/* now between 19 and 2^255-1 in both cases, and offset by 19. */
892
f[0] += (reduce_mask_26 + 1) - 19;
893
f[1] += (reduce_mask_25 + 1) - 1;
894
f[2] += (reduce_mask_26 + 1) - 1;
895
f[3] += (reduce_mask_25 + 1) - 1;
896
f[4] += (reduce_mask_26 + 1) - 1;
897
f[5] += (reduce_mask_25 + 1) - 1;
898
f[6] += (reduce_mask_26 + 1) - 1;
899
f[7] += (reduce_mask_25 + 1) - 1;
900
f[8] += (reduce_mask_26 + 1) - 1;
901
f[9] += (reduce_mask_25 + 1) - 1;
902
903
/* now between 2^255 and 2^256-20, and offset by 2^255. */
904
carry_pass_final()
905
906
#undef carry_pass
907
#undef carry_full
908
#undef carry_final
909
910
f[1] <<= 2; f[2] <<= 3;
911
f[3] <<= 5; f[4] <<= 6;
912
f[6] <<= 1; f[7] <<= 3;
913
f[8] <<= 4; f[9] <<= 6;
914
915
#define F(i, s) \
916
out[s+0] |= (byte)( f[i] & 0xff); \
917
out[s+1] = (byte)((f[i] >> 8) & 0xff); \
918
out[s+2] = (byte)((f[i] >> 16) & 0xff); \
919
out[s+3] = (byte)((f[i] >> 24) & 0xff);
920
921
out[0] = out[16] = 0;
922
F(0,0); F(1,3);
923
F(2,6); F(3,9);
924
F(4,12); F(5,16);
925
F(6,19); F(7,22);
926
F(8,25); F(9,28);
927
#undef F
928
}
929
930
/* out = (flag) ? in : out */
931
inline void
932
curve25519_move_conditional_bytes(byte out[96], const byte in[96], word32 flag)
933
{
934
// TODO: enable this code path once we can test and benchmark it.
935
// It is about 48 insns shorter, it avoids punning which may be UB,
936
// and it is guaranteed constant time.
937
#if defined(__GNUC__) && defined(__i686__) && 0
938
const word32 iter = 96/sizeof(word32);
939
word32* outl = reinterpret_cast<word32*>(out);
940
const word32* inl = reinterpret_cast<const word32*>(in);
941
word32 idx=0, val;
942
943
__asm__ __volatile__ (
944
".att_syntax ;\n"
945
"cmpl $0, %[flag] ;\n" // compare, set ZERO flag
946
"movl %[iter], %%ecx ;\n" // load iteration count
947
"1: ;\n"
948
" movl (%[idx],%[out]), %[val] ;\n" // val = out[idx]
949
" cmovnzl (%[idx],%[in]), %[val] ;\n" // copy in[idx] to val if NZ
950
" movl %[val], (%[idx],%[out]) ;\n" // out[idx] = val
951
" leal 4(%[idx]), %[idx] ;\n" // increment index
952
" loopnz 1b ;\n" // does not affect flags
953
: [out] "+S" (outl), [in] "+D" (inl),
954
[idx] "+b" (idx), [val] "=r" (val)
955
: [flag] "g" (flag), [iter] "I" (iter)
956
: "ecx", "memory", "cc"
957
);
958
#else
959
const word32 nb = flag - 1, b = ~nb;
960
const word32 *inl = (const word32 *)in;
961
word32 *outl = (word32 *)out;
962
outl[0] = (outl[0] & nb) | (inl[0] & b);
963
outl[1] = (outl[1] & nb) | (inl[1] & b);
964
outl[2] = (outl[2] & nb) | (inl[2] & b);
965
outl[3] = (outl[3] & nb) | (inl[3] & b);
966
outl[4] = (outl[4] & nb) | (inl[4] & b);
967
outl[5] = (outl[5] & nb) | (inl[5] & b);
968
outl[6] = (outl[6] & nb) | (inl[6] & b);
969
outl[7] = (outl[7] & nb) | (inl[7] & b);
970
outl[8] = (outl[8] & nb) | (inl[8] & b);
971
outl[9] = (outl[9] & nb) | (inl[9] & b);
972
outl[10] = (outl[10] & nb) | (inl[10] & b);
973
outl[11] = (outl[11] & nb) | (inl[11] & b);
974
outl[12] = (outl[12] & nb) | (inl[12] & b);
975
outl[13] = (outl[13] & nb) | (inl[13] & b);
976
outl[14] = (outl[14] & nb) | (inl[14] & b);
977
outl[15] = (outl[15] & nb) | (inl[15] & b);
978
outl[16] = (outl[16] & nb) | (inl[16] & b);
979
outl[17] = (outl[17] & nb) | (inl[17] & b);
980
outl[18] = (outl[18] & nb) | (inl[18] & b);
981
outl[19] = (outl[19] & nb) | (inl[19] & b);
982
outl[20] = (outl[20] & nb) | (inl[20] & b);
983
outl[21] = (outl[21] & nb) | (inl[21] & b);
984
outl[22] = (outl[22] & nb) | (inl[22] & b);
985
outl[23] = (outl[23] & nb) | (inl[23] & b);
986
#endif
987
}
988
989
/* if (iswap) swap(a, b) */
990
inline void
991
curve25519_swap_conditional(bignum25519 a, bignum25519 b, word32 iswap) {
992
const word32 swap = (word32)(-(sword32)iswap);
993
word32 x0,x1,x2,x3,x4,x5,x6,x7,x8,x9;
994
995
x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;
996
x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;
997
x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;
998
x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;
999
x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;
1000
x5 = swap & (a[5] ^ b[5]); a[5] ^= x5; b[5] ^= x5;
1001
x6 = swap & (a[6] ^ b[6]); a[6] ^= x6; b[6] ^= x6;
1002
x7 = swap & (a[7] ^ b[7]); a[7] ^= x7; b[7] ^= x7;
1003
x8 = swap & (a[8] ^ b[8]); a[8] ^= x8; b[8] ^= x8;
1004
x9 = swap & (a[9] ^ b[9]); a[9] ^= x9; b[9] ^= x9;
1005
}
1006
1007
/*
1008
* In: b = 2^5 - 2^0
1009
* Out: b = 2^250 - 2^0
1010
*/
1011
void
1012
curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {
1013
ALIGN(ALIGN_SPEC) bignum25519 t0,c;
1014
1015
/* 2^5 - 2^0 */ /* b */
1016
/* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);
1017
/* 2^10 - 2^0 */ curve25519_mul(b, t0, b);
1018
/* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);
1019
/* 2^20 - 2^0 */ curve25519_mul(c, t0, b);
1020
/* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);
1021
/* 2^40 - 2^0 */ curve25519_mul(t0, t0, c);
1022
/* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);
1023
/* 2^50 - 2^0 */ curve25519_mul(b, t0, b);
1024
/* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);
1025
/* 2^100 - 2^0 */ curve25519_mul(c, t0, b);
1026
/* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);
1027
/* 2^200 - 2^0 */ curve25519_mul(t0, t0, c);
1028
/* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);
1029
/* 2^250 - 2^0 */ curve25519_mul(b, t0, b);
1030
}
1031
1032
/*
1033
* z^(p - 2) = z(2^255 - 21)
1034
*/
1035
void
1036
curve25519_recip(bignum25519 out, const bignum25519 z) {
1037
ALIGN(ALIGN_SPEC) bignum25519 a,t0,b;
1038
1039
/* 2 */ curve25519_square_times(a, z, 1); /* a = 2 */
1040
/* 8 */ curve25519_square_times(t0, a, 2);
1041
/* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
1042
/* 11 */ curve25519_mul(a, b, a); /* a = 11 */
1043
/* 22 */ curve25519_square_times(t0, a, 1);
1044
/* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
1045
/* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
1046
/* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);
1047
/* 2^255 - 21 */ curve25519_mul(out, b, a);
1048
}
1049
1050
/*
1051
* z^((p-5)/8) = z^(2^252 - 3)
1052
*/
1053
void
1054
curve25519_pow_two252m3(bignum25519 two252m3, const bignum25519 z) {
1055
ALIGN(ALIGN_SPEC) bignum25519 b,c,t0;
1056
1057
/* 2 */ curve25519_square_times(c, z, 1); /* c = 2 */
1058
/* 8 */ curve25519_square_times(t0, c, 2); /* t0 = 8 */
1059
/* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
1060
/* 11 */ curve25519_mul(c, b, c); /* c = 11 */
1061
/* 22 */ curve25519_square_times(t0, c, 1);
1062
/* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
1063
/* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
1064
/* 2^252 - 2^2 */ curve25519_square_times(b, b, 2);
1065
/* 2^252 - 3 */ curve25519_mul(two252m3, b, z);
1066
}
1067
1068
inline void
1069
ed25519_hash(byte *hash, const byte *in, size_t inlen) {
1070
SHA512().CalculateDigest(hash, in, inlen);
1071
}
1072
1073
inline void
1074
ed25519_extsk(hash_512bits extsk, const byte sk[32]) {
1075
ed25519_hash(extsk, sk, 32);
1076
extsk[0] &= 248;
1077
extsk[31] &= 127;
1078
extsk[31] |= 64;
1079
}
1080
1081
void
1082
UpdateFromStream(HashTransformation& hash, std::istream& stream)
1083
{
1084
SecByteBlock block(4096);
1085
while (stream.read((char*)block.begin(), block.size()))
1086
hash.Update(block, block.size());
1087
1088
std::streamsize rem = stream.gcount();
1089
if (rem)
1090
hash.Update(block, (size_t)rem);
1091
1092
block.SetMark(0);
1093
}
1094
1095
void
1096
ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], const byte *m, size_t mlen) {
1097
SHA512 hash;
1098
hash.Update(RS, 32);
1099
hash.Update(pk, 32);
1100
hash.Update(m, mlen);
1101
hash.Final(hram);
1102
}
1103
1104
void
1105
ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], std::istream& stream) {
1106
SHA512 hash;
1107
hash.Update(RS, 32);
1108
hash.Update(pk, 32);
1109
UpdateFromStream(hash, stream);
1110
hash.Final(hram);
1111
}
1112
1113
inline bignum256modm_element_t
1114
lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
1115
return (a - b) >> 31;
1116
}
1117
1118
/* see HAC, Alg. 14.42 Step 4 */
1119
void
1120
reduce256_modm(bignum256modm r) {
1121
bignum256modm t;
1122
bignum256modm_element_t b = 0, pb, mask;
1123
1124
/* t = r - m */
1125
pb = 0;
1126
pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 30)); pb = b;
1127
pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 30)); pb = b;
1128
pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 30)); pb = b;
1129
pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 30)); pb = b;
1130
pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 30)); pb = b;
1131
pb += modm_m[5]; b = lt_modm(r[5], pb); t[5] = (r[5] - pb + (b << 30)); pb = b;
1132
pb += modm_m[6]; b = lt_modm(r[6], pb); t[6] = (r[6] - pb + (b << 30)); pb = b;
1133
pb += modm_m[7]; b = lt_modm(r[7], pb); t[7] = (r[7] - pb + (b << 30)); pb = b;
1134
pb += modm_m[8]; b = lt_modm(r[8], pb); t[8] = (r[8] - pb + (b << 16));
1135
1136
/* keep r if r was smaller than m */
1137
mask = b - 1;
1138
r[0] ^= mask & (r[0] ^ t[0]);
1139
r[1] ^= mask & (r[1] ^ t[1]);
1140
r[2] ^= mask & (r[2] ^ t[2]);
1141
r[3] ^= mask & (r[3] ^ t[3]);
1142
r[4] ^= mask & (r[4] ^ t[4]);
1143
r[5] ^= mask & (r[5] ^ t[5]);
1144
r[6] ^= mask & (r[6] ^ t[6]);
1145
r[7] ^= mask & (r[7] ^ t[7]);
1146
r[8] ^= mask & (r[8] ^ t[8]);
1147
}
1148
1149
/* Barrett reduction, see HAC, Alg. 14.42 */
1150
void
1151
barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
1152
bignum256modm q3, r2;
1153
word64 c;
1154
bignum256modm_element_t f, b, pb;
1155
1156
/* q1 = x >> 248 = 264 bits = 9 30 bit elements
1157
q2 = mu * q1
1158
q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264
1159
*/
1160
c = mul32x32_64(modm_mu[0], q1[7]) + mul32x32_64(modm_mu[1], q1[6]) + mul32x32_64(modm_mu[2], q1[5]) + mul32x32_64(modm_mu[3], q1[4]) + mul32x32_64(modm_mu[4], q1[3]) + mul32x32_64(modm_mu[5], q1[2]) + mul32x32_64(modm_mu[6], q1[1]) + mul32x32_64(modm_mu[7], q1[0]);
1161
c >>= 30;
1162
c += mul32x32_64(modm_mu[0], q1[8]) + mul32x32_64(modm_mu[1], q1[7]) + mul32x32_64(modm_mu[2], q1[6]) + mul32x32_64(modm_mu[3], q1[5]) + mul32x32_64(modm_mu[4], q1[4]) + mul32x32_64(modm_mu[5], q1[3]) + mul32x32_64(modm_mu[6], q1[2]) + mul32x32_64(modm_mu[7], q1[1]) + mul32x32_64(modm_mu[8], q1[0]);
1163
f = (bignum256modm_element_t)c; q3[0] = (f >> 24) & 0x3f; c >>= 30;
1164
c += mul32x32_64(modm_mu[1], q1[8]) + mul32x32_64(modm_mu[2], q1[7]) + mul32x32_64(modm_mu[3], q1[6]) + mul32x32_64(modm_mu[4], q1[5]) + mul32x32_64(modm_mu[5], q1[4]) + mul32x32_64(modm_mu[6], q1[3]) + mul32x32_64(modm_mu[7], q1[2]) + mul32x32_64(modm_mu[8], q1[1]);
1165
f = (bignum256modm_element_t)c; q3[0] |= (f << 6) & 0x3fffffff; q3[1] = (f >> 24) & 0x3f; c >>= 30;
1166
c += mul32x32_64(modm_mu[2], q1[8]) + mul32x32_64(modm_mu[3], q1[7]) + mul32x32_64(modm_mu[4], q1[6]) + mul32x32_64(modm_mu[5], q1[5]) + mul32x32_64(modm_mu[6], q1[4]) + mul32x32_64(modm_mu[7], q1[3]) + mul32x32_64(modm_mu[8], q1[2]);
1167
f = (bignum256modm_element_t)c; q3[1] |= (f << 6) & 0x3fffffff; q3[2] = (f >> 24) & 0x3f; c >>= 30;
1168
c += mul32x32_64(modm_mu[3], q1[8]) + mul32x32_64(modm_mu[4], q1[7]) + mul32x32_64(modm_mu[5], q1[6]) + mul32x32_64(modm_mu[6], q1[5]) + mul32x32_64(modm_mu[7], q1[4]) + mul32x32_64(modm_mu[8], q1[3]);
1169
f = (bignum256modm_element_t)c; q3[2] |= (f << 6) & 0x3fffffff; q3[3] = (f >> 24) & 0x3f; c >>= 30;
1170
c += mul32x32_64(modm_mu[4], q1[8]) + mul32x32_64(modm_mu[5], q1[7]) + mul32x32_64(modm_mu[6], q1[6]) + mul32x32_64(modm_mu[7], q1[5]) + mul32x32_64(modm_mu[8], q1[4]);
1171
f = (bignum256modm_element_t)c; q3[3] |= (f << 6) & 0x3fffffff; q3[4] = (f >> 24) & 0x3f; c >>= 30;
1172
c += mul32x32_64(modm_mu[5], q1[8]) + mul32x32_64(modm_mu[6], q1[7]) + mul32x32_64(modm_mu[7], q1[6]) + mul32x32_64(modm_mu[8], q1[5]);
1173
f = (bignum256modm_element_t)c; q3[4] |= (f << 6) & 0x3fffffff; q3[5] = (f >> 24) & 0x3f; c >>= 30;
1174
c += mul32x32_64(modm_mu[6], q1[8]) + mul32x32_64(modm_mu[7], q1[7]) + mul32x32_64(modm_mu[8], q1[6]);
1175
f = (bignum256modm_element_t)c; q3[5] |= (f << 6) & 0x3fffffff; q3[6] = (f >> 24) & 0x3f; c >>= 30;
1176
c += mul32x32_64(modm_mu[7], q1[8]) + mul32x32_64(modm_mu[8], q1[7]);
1177
f = (bignum256modm_element_t)c; q3[6] |= (f << 6) & 0x3fffffff; q3[7] = (f >> 24) & 0x3f; c >>= 30;
1178
c += mul32x32_64(modm_mu[8], q1[8]);
1179
f = (bignum256modm_element_t)c; q3[7] |= (f << 6) & 0x3fffffff; q3[8] = (bignum256modm_element_t)(c >> 24);
1180
1181
/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
1182
r2 = (q3 * m) mod (256^(32+1)) = (q3 * m) & ((1 << 264) - 1)
1183
*/
1184
c = mul32x32_64(modm_m[0], q3[0]);
1185
r2[0] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1186
c += mul32x32_64(modm_m[0], q3[1]) + mul32x32_64(modm_m[1], q3[0]);
1187
r2[1] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1188
c += mul32x32_64(modm_m[0], q3[2]) + mul32x32_64(modm_m[1], q3[1]) + mul32x32_64(modm_m[2], q3[0]);
1189
r2[2] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1190
c += mul32x32_64(modm_m[0], q3[3]) + mul32x32_64(modm_m[1], q3[2]) + mul32x32_64(modm_m[2], q3[1]) + mul32x32_64(modm_m[3], q3[0]);
1191
r2[3] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1192
c += mul32x32_64(modm_m[0], q3[4]) + mul32x32_64(modm_m[1], q3[3]) + mul32x32_64(modm_m[2], q3[2]) + mul32x32_64(modm_m[3], q3[1]) + mul32x32_64(modm_m[4], q3[0]);
1193
r2[4] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1194
c += mul32x32_64(modm_m[0], q3[5]) + mul32x32_64(modm_m[1], q3[4]) + mul32x32_64(modm_m[2], q3[3]) + mul32x32_64(modm_m[3], q3[2]) + mul32x32_64(modm_m[4], q3[1]) + mul32x32_64(modm_m[5], q3[0]);
1195
r2[5] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1196
c += mul32x32_64(modm_m[0], q3[6]) + mul32x32_64(modm_m[1], q3[5]) + mul32x32_64(modm_m[2], q3[4]) + mul32x32_64(modm_m[3], q3[3]) + mul32x32_64(modm_m[4], q3[2]) + mul32x32_64(modm_m[5], q3[1]) + mul32x32_64(modm_m[6], q3[0]);
1197
r2[6] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1198
c += mul32x32_64(modm_m[0], q3[7]) + mul32x32_64(modm_m[1], q3[6]) + mul32x32_64(modm_m[2], q3[5]) + mul32x32_64(modm_m[3], q3[4]) + mul32x32_64(modm_m[4], q3[3]) + mul32x32_64(modm_m[5], q3[2]) + mul32x32_64(modm_m[6], q3[1]) + mul32x32_64(modm_m[7], q3[0]);
1199
r2[7] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1200
c += mul32x32_64(modm_m[0], q3[8]) + mul32x32_64(modm_m[1], q3[7]) + mul32x32_64(modm_m[2], q3[6]) + mul32x32_64(modm_m[3], q3[5]) + mul32x32_64(modm_m[4], q3[4]) + mul32x32_64(modm_m[5], q3[3]) + mul32x32_64(modm_m[6], q3[2]) + mul32x32_64(modm_m[7], q3[1]) + mul32x32_64(modm_m[8], q3[0]);
1201
r2[8] = (bignum256modm_element_t)(c & 0xffffff);
1202
1203
/* r = r1 - r2
1204
if (r < 0) r += (1 << 264) */
1205
pb = 0;
1206
pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 30)); pb = b;
1207
pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 30)); pb = b;
1208
pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 30)); pb = b;
1209
pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 30)); pb = b;
1210
pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 30)); pb = b;
1211
pb += r2[5]; b = lt_modm(r1[5], pb); r[5] = (r1[5] - pb + (b << 30)); pb = b;
1212
pb += r2[6]; b = lt_modm(r1[6], pb); r[6] = (r1[6] - pb + (b << 30)); pb = b;
1213
pb += r2[7]; b = lt_modm(r1[7], pb); r[7] = (r1[7] - pb + (b << 30)); pb = b;
1214
pb += r2[8]; b = lt_modm(r1[8], pb); r[8] = (r1[8] - pb + (b << 24));
1215
1216
reduce256_modm(r);
1217
reduce256_modm(r);
1218
}
1219
1220
/* addition modulo m */
1221
void
1222
add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
1223
bignum256modm_element_t c;
1224
1225
c = x[0] + y[0]; r[0] = c & 0x3fffffff; c >>= 30;
1226
c += x[1] + y[1]; r[1] = c & 0x3fffffff; c >>= 30;
1227
c += x[2] + y[2]; r[2] = c & 0x3fffffff; c >>= 30;
1228
c += x[3] + y[3]; r[3] = c & 0x3fffffff; c >>= 30;
1229
c += x[4] + y[4]; r[4] = c & 0x3fffffff; c >>= 30;
1230
c += x[5] + y[5]; r[5] = c & 0x3fffffff; c >>= 30;
1231
c += x[6] + y[6]; r[6] = c & 0x3fffffff; c >>= 30;
1232
c += x[7] + y[7]; r[7] = c & 0x3fffffff; c >>= 30;
1233
c += x[8] + y[8]; r[8] = c;
1234
1235
reduce256_modm(r);
1236
}
1237
1238
/* multiplication modulo m */
1239
void
1240
mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
1241
bignum256modm r1, q1;
1242
word64 c;
1243
bignum256modm_element_t f;
1244
1245
c = mul32x32_64(x[0], y[0]);
1246
f = (bignum256modm_element_t)c; r1[0] = (f & 0x3fffffff); c >>= 30;
1247
c += mul32x32_64(x[0], y[1]) + mul32x32_64(x[1], y[0]);
1248
f = (bignum256modm_element_t)c; r1[1] = (f & 0x3fffffff); c >>= 30;
1249
c += mul32x32_64(x[0], y[2]) + mul32x32_64(x[1], y[1]) + mul32x32_64(x[2], y[0]);
1250
f = (bignum256modm_element_t)c; r1[2] = (f & 0x3fffffff); c >>= 30;
1251
c += mul32x32_64(x[0], y[3]) + mul32x32_64(x[1], y[2]) + mul32x32_64(x[2], y[1]) + mul32x32_64(x[3], y[0]);
1252
f = (bignum256modm_element_t)c; r1[3] = (f & 0x3fffffff); c >>= 30;
1253
c += mul32x32_64(x[0], y[4]) + mul32x32_64(x[1], y[3]) + mul32x32_64(x[2], y[2]) + mul32x32_64(x[3], y[1]) + mul32x32_64(x[4], y[0]);
1254
f = (bignum256modm_element_t)c; r1[4] = (f & 0x3fffffff); c >>= 30;
1255
c += mul32x32_64(x[0], y[5]) + mul32x32_64(x[1], y[4]) + mul32x32_64(x[2], y[3]) + mul32x32_64(x[3], y[2]) + mul32x32_64(x[4], y[1]) + mul32x32_64(x[5], y[0]);
1256
f = (bignum256modm_element_t)c; r1[5] = (f & 0x3fffffff); c >>= 30;
1257
c += mul32x32_64(x[0], y[6]) + mul32x32_64(x[1], y[5]) + mul32x32_64(x[2], y[4]) + mul32x32_64(x[3], y[3]) + mul32x32_64(x[4], y[2]) + mul32x32_64(x[5], y[1]) + mul32x32_64(x[6], y[0]);
1258
f = (bignum256modm_element_t)c; r1[6] = (f & 0x3fffffff); c >>= 30;
1259
c += mul32x32_64(x[0], y[7]) + mul32x32_64(x[1], y[6]) + mul32x32_64(x[2], y[5]) + mul32x32_64(x[3], y[4]) + mul32x32_64(x[4], y[3]) + mul32x32_64(x[5], y[2]) + mul32x32_64(x[6], y[1]) + mul32x32_64(x[7], y[0]);
1260
f = (bignum256modm_element_t)c; r1[7] = (f & 0x3fffffff); c >>= 30;
1261
c += mul32x32_64(x[0], y[8]) + mul32x32_64(x[1], y[7]) + mul32x32_64(x[2], y[6]) + mul32x32_64(x[3], y[5]) + mul32x32_64(x[4], y[4]) + mul32x32_64(x[5], y[3]) + mul32x32_64(x[6], y[2]) + mul32x32_64(x[7], y[1]) + mul32x32_64(x[8], y[0]);
1262
f = (bignum256modm_element_t)c; r1[8] = (f & 0x00ffffff); q1[0] = (f >> 8) & 0x3fffff; c >>= 30;
1263
c += mul32x32_64(x[1], y[8]) + mul32x32_64(x[2], y[7]) + mul32x32_64(x[3], y[6]) + mul32x32_64(x[4], y[5]) + mul32x32_64(x[5], y[4]) + mul32x32_64(x[6], y[3]) + mul32x32_64(x[7], y[2]) + mul32x32_64(x[8], y[1]);
1264
f = (bignum256modm_element_t)c; q1[0] = (q1[0] | (f << 22)) & 0x3fffffff; q1[1] = (f >> 8) & 0x3fffff; c >>= 30;
1265
c += mul32x32_64(x[2], y[8]) + mul32x32_64(x[3], y[7]) + mul32x32_64(x[4], y[6]) + mul32x32_64(x[5], y[5]) + mul32x32_64(x[6], y[4]) + mul32x32_64(x[7], y[3]) + mul32x32_64(x[8], y[2]);
1266
f = (bignum256modm_element_t)c; q1[1] = (q1[1] | (f << 22)) & 0x3fffffff; q1[2] = (f >> 8) & 0x3fffff; c >>= 30;
1267
c += mul32x32_64(x[3], y[8]) + mul32x32_64(x[4], y[7]) + mul32x32_64(x[5], y[6]) + mul32x32_64(x[6], y[5]) + mul32x32_64(x[7], y[4]) + mul32x32_64(x[8], y[3]);
1268
f = (bignum256modm_element_t)c; q1[2] = (q1[2] | (f << 22)) & 0x3fffffff; q1[3] = (f >> 8) & 0x3fffff; c >>= 30;
1269
c += mul32x32_64(x[4], y[8]) + mul32x32_64(x[5], y[7]) + mul32x32_64(x[6], y[6]) + mul32x32_64(x[7], y[5]) + mul32x32_64(x[8], y[4]);
1270
f = (bignum256modm_element_t)c; q1[3] = (q1[3] | (f << 22)) & 0x3fffffff; q1[4] = (f >> 8) & 0x3fffff; c >>= 30;
1271
c += mul32x32_64(x[5], y[8]) + mul32x32_64(x[6], y[7]) + mul32x32_64(x[7], y[6]) + mul32x32_64(x[8], y[5]);
1272
f = (bignum256modm_element_t)c; q1[4] = (q1[4] | (f << 22)) & 0x3fffffff; q1[5] = (f >> 8) & 0x3fffff; c >>= 30;
1273
c += mul32x32_64(x[6], y[8]) + mul32x32_64(x[7], y[7]) + mul32x32_64(x[8], y[6]);
1274
f = (bignum256modm_element_t)c; q1[5] = (q1[5] | (f << 22)) & 0x3fffffff; q1[6] = (f >> 8) & 0x3fffff; c >>= 30;
1275
c += mul32x32_64(x[7], y[8]) + mul32x32_64(x[8], y[7]);
1276
f = (bignum256modm_element_t)c; q1[6] = (q1[6] | (f << 22)) & 0x3fffffff; q1[7] = (f >> 8) & 0x3fffff; c >>= 30;
1277
c += mul32x32_64(x[8], y[8]);
1278
f = (bignum256modm_element_t)c; q1[7] = (q1[7] | (f << 22)) & 0x3fffffff; q1[8] = (f >> 8) & 0x3fffff;
1279
1280
barrett_reduce256_modm(r, q1, r1);
1281
}
1282
1283
void
1284
expand256_modm(bignum256modm out, const byte *in, size_t len) {
1285
byte work[64] = {0};
1286
bignum256modm_element_t x[16];
1287
bignum256modm q1;
1288
1289
std::memcpy(work, in, len);
1290
x[0] = U8TO32_LE(work + 0);
1291
x[1] = U8TO32_LE(work + 4);
1292
x[2] = U8TO32_LE(work + 8);
1293
x[3] = U8TO32_LE(work + 12);
1294
x[4] = U8TO32_LE(work + 16);
1295
x[5] = U8TO32_LE(work + 20);
1296
x[6] = U8TO32_LE(work + 24);
1297
x[7] = U8TO32_LE(work + 28);
1298
x[8] = U8TO32_LE(work + 32);
1299
x[9] = U8TO32_LE(work + 36);
1300
x[10] = U8TO32_LE(work + 40);
1301
x[11] = U8TO32_LE(work + 44);
1302
x[12] = U8TO32_LE(work + 48);
1303
x[13] = U8TO32_LE(work + 52);
1304
x[14] = U8TO32_LE(work + 56);
1305
x[15] = U8TO32_LE(work + 60);
1306
1307
/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
1308
out[0] = ( x[0]) & 0x3fffffff;
1309
out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
1310
out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
1311
out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
1312
out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
1313
out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
1314
out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
1315
out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
1316
out[8] = ((x[ 7] >> 16) | (x[ 8] << 16)) & 0x00ffffff;
1317
1318
/* 8*31 = 248 bits, no need to reduce */
1319
if (len < 32)
1320
return;
1321
1322
/* q1 = x >> 248 = 264 bits = 9 30 bit elements */
1323
q1[0] = ((x[ 7] >> 24) | (x[ 8] << 8)) & 0x3fffffff;
1324
q1[1] = ((x[ 8] >> 22) | (x[ 9] << 10)) & 0x3fffffff;
1325
q1[2] = ((x[ 9] >> 20) | (x[10] << 12)) & 0x3fffffff;
1326
q1[3] = ((x[10] >> 18) | (x[11] << 14)) & 0x3fffffff;
1327
q1[4] = ((x[11] >> 16) | (x[12] << 16)) & 0x3fffffff;
1328
q1[5] = ((x[12] >> 14) | (x[13] << 18)) & 0x3fffffff;
1329
q1[6] = ((x[13] >> 12) | (x[14] << 20)) & 0x3fffffff;
1330
q1[7] = ((x[14] >> 10) | (x[15] << 22)) & 0x3fffffff;
1331
q1[8] = ((x[15] >> 8) );
1332
1333
barrett_reduce256_modm(out, q1, out);
1334
}
1335
1336
void
1337
expand_raw256_modm(bignum256modm out, const byte in[32]) {
1338
bignum256modm_element_t x[8];
1339
1340
x[0] = U8TO32_LE(in + 0);
1341
x[1] = U8TO32_LE(in + 4);
1342
x[2] = U8TO32_LE(in + 8);
1343
x[3] = U8TO32_LE(in + 12);
1344
x[4] = U8TO32_LE(in + 16);
1345
x[5] = U8TO32_LE(in + 20);
1346
x[6] = U8TO32_LE(in + 24);
1347
x[7] = U8TO32_LE(in + 28);
1348
1349
out[0] = ( x[0]) & 0x3fffffff;
1350
out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
1351
out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
1352
out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
1353
out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
1354
out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
1355
out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
1356
out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
1357
out[8] = ((x[ 7] >> 16) ) & 0x0000ffff;
1358
}
1359
1360
void
1361
contract256_modm(byte out[32], const bignum256modm in) {
1362
U32TO8_LE(out + 0, (in[0] ) | (in[1] << 30));
1363
U32TO8_LE(out + 4, (in[1] >> 2) | (in[2] << 28));
1364
U32TO8_LE(out + 8, (in[2] >> 4) | (in[3] << 26));
1365
U32TO8_LE(out + 12, (in[3] >> 6) | (in[4] << 24));
1366
U32TO8_LE(out + 16, (in[4] >> 8) | (in[5] << 22));
1367
U32TO8_LE(out + 20, (in[5] >> 10) | (in[6] << 20));
1368
U32TO8_LE(out + 24, (in[6] >> 12) | (in[7] << 18));
1369
U32TO8_LE(out + 28, (in[7] >> 14) | (in[8] << 16));
1370
}
1371
1372
void
1373
contract256_window4_modm(signed char r[64], const bignum256modm in) {
1374
char carry;
1375
signed char *quads = r;
1376
bignum256modm_element_t i, j, v;
1377
1378
for (i = 0; i < 8; i += 2) {
1379
v = in[i];
1380
for (j = 0; j < 7; j++) {
1381
*quads++ = (v & 15);
1382
v >>= 4;
1383
}
1384
v |= (in[i+1] << 2);
1385
for (j = 0; j < 8; j++) {
1386
*quads++ = (v & 15);
1387
v >>= 4;
1388
}
1389
}
1390
1391
v = in[8];
1392
*quads++ = (v & 15); v >>= 4;
1393
*quads++ = (v & 15); v >>= 4;
1394
*quads++ = (v & 15); v >>= 4;
1395
*quads++ = (v & 15); v >>= 4;
1396
1397
/* making it signed */
1398
carry = 0;
1399
for(i = 0; i < 63; i++) {
1400
r[i] += carry;
1401
r[i+1] += (r[i] >> 4);
1402
r[i] &= 15;
1403
carry = (r[i] >> 3);
1404
r[i] -= (carry << 4);
1405
}
1406
r[63] += carry;
1407
}
1408
1409
void
1410
contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
1411
int i,j,k,b;
1412
int m = (1 << (windowsize - 1)) - 1, soplen = 256;
1413
signed char *bits = r;
1414
bignum256modm_element_t v;
1415
1416
/* first put the binary expansion into r */
1417
for (i = 0; i < 8; i++) {
1418
v = s[i];
1419
for (j = 0; j < 30; j++, v >>= 1)
1420
*bits++ = (v & 1);
1421
}
1422
v = s[8];
1423
for (j = 0; j < 16; j++, v >>= 1)
1424
*bits++ = (v & 1);
1425
1426
/* Making it sliding window */
1427
for (j = 0; j < soplen; j++) {
1428
if (!r[j])
1429
continue;
1430
1431
for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
1432
if ((r[j] + (r[j + b] << b)) <= m) {
1433
r[j] += r[j + b] << b;
1434
r[j + b] = 0;
1435
} else if ((r[j] - (r[j + b] << b)) >= -m) {
1436
r[j] -= r[j + b] << b;
1437
for (k = j + b; k < soplen; k++) {
1438
if (!r[k]) {
1439
r[k] = 1;
1440
break;
1441
}
1442
r[k] = 0;
1443
}
1444
} else if (r[j + b]) {
1445
break;
1446
}
1447
}
1448
}
1449
}
1450
1451
inline void
1452
ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
1453
curve25519_mul(r->x, p->x, p->t);
1454
curve25519_mul(r->y, p->y, p->z);
1455
curve25519_mul(r->z, p->z, p->t);
1456
}
1457
1458
inline void
1459
ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
1460
curve25519_mul(r->x, p->x, p->t);
1461
curve25519_mul(r->y, p->y, p->z);
1462
curve25519_mul(r->z, p->z, p->t);
1463
curve25519_mul(r->t, p->x, p->y);
1464
}
1465
1466
void
1467
ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {
1468
curve25519_sub(p->ysubx, r->y, r->x);
1469
curve25519_add(p->xaddy, r->y, r->x);
1470
curve25519_copy(p->z, r->z);
1471
curve25519_mul(p->t2d, r->t, ge25519_ec2d);
1472
}
1473
1474
void
1475
ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {
1476
bignum25519 a,b,c,d,t,u;
1477
1478
curve25519_sub(a, p->y, p->x);
1479
curve25519_add(b, p->y, p->x);
1480
curve25519_sub(t, q->y, q->x);
1481
curve25519_add(u, q->y, q->x);
1482
curve25519_mul(a, a, t);
1483
curve25519_mul(b, b, u);
1484
curve25519_mul(c, p->t, q->t);
1485
curve25519_mul(c, c, ge25519_ec2d);
1486
curve25519_mul(d, p->z, q->z);
1487
curve25519_add(d, d, d);
1488
curve25519_sub(r->x, b, a);
1489
curve25519_add(r->y, b, a);
1490
curve25519_add_after_basic(r->z, d, c);
1491
curve25519_sub_after_basic(r->t, d, c);
1492
}
1493
1494
void
1495
ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {
1496
bignum25519 a,b,c;
1497
1498
curve25519_square(a, p->x);
1499
curve25519_square(b, p->y);
1500
curve25519_square(c, p->z);
1501
curve25519_add_reduce(c, c, c);
1502
curve25519_add(r->x, p->x, p->y);
1503
curve25519_square(r->x, r->x);
1504
curve25519_add(r->y, b, a);
1505
curve25519_sub(r->z, b, a);
1506
curve25519_sub_after_basic(r->x, r->x, r->y);
1507
curve25519_sub_after_basic(r->t, c, r->z);
1508
}
1509
1510
void
1511
ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, byte signbit) {
1512
const bignum25519 *qb = (const bignum25519 *)q;
1513
bignum25519 *rb = (bignum25519 *)r;
1514
bignum25519 a,b,c;
1515
1516
curve25519_sub(a, p->y, p->x);
1517
curve25519_add(b, p->y, p->x);
1518
curve25519_mul(a, a, qb[signbit]); /* x for +, y for - */
1519
curve25519_mul(r->x, b, qb[signbit^1]); /* y for +, x for - */
1520
curve25519_add(r->y, r->x, a);
1521
curve25519_sub(r->x, r->x, a);
1522
curve25519_mul(c, p->t, q->t2d);
1523
curve25519_add_reduce(r->t, p->z, p->z);
1524
curve25519_copy(r->z, r->t);
1525
curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */
1526
curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */
1527
}
1528
1529
void
1530
ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, byte signbit) {
1531
const bignum25519 *qb = (const bignum25519 *)q;
1532
bignum25519 *rb = (bignum25519 *)r;
1533
bignum25519 a,b,c;
1534
1535
curve25519_sub(a, p->y, p->x);
1536
curve25519_add(b, p->y, p->x);
1537
curve25519_mul(a, a, qb[signbit]); /* ysubx for +, xaddy for - */
1538
curve25519_mul(r->x, b, qb[signbit^1]); /* xaddy for +, ysubx for - */
1539
curve25519_add(r->y, r->x, a);
1540
curve25519_sub(r->x, r->x, a);
1541
curve25519_mul(c, p->t, q->t2d);
1542
curve25519_mul(r->t, p->z, q->z);
1543
curve25519_add_reduce(r->t, r->t, r->t);
1544
curve25519_copy(r->z, r->t);
1545
curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */
1546
curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */
1547
}
1548
1549
void
1550
ge25519_double_partial(ge25519 *r, const ge25519 *p) {
1551
ge25519_p1p1 t;
1552
ge25519_double_p1p1(&t, p);
1553
ge25519_p1p1_to_partial(r, &t);
1554
}
1555
1556
void
1557
ge25519_double(ge25519 *r, const ge25519 *p) {
1558
ge25519_p1p1 t;
1559
ge25519_double_p1p1(&t, p);
1560
ge25519_p1p1_to_full(r, &t);
1561
}
1562
1563
void
1564
ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) {
1565
ge25519_p1p1 t;
1566
ge25519_add_p1p1(&t, p, q);
1567
ge25519_p1p1_to_full(r, &t);
1568
}
1569
1570
void
1571
ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {
1572
bignum25519 a,b,c,e,f,g,h;
1573
1574
curve25519_sub(a, r->y, r->x);
1575
curve25519_add(b, r->y, r->x);
1576
curve25519_mul(a, a, q->ysubx);
1577
curve25519_mul(e, b, q->xaddy);
1578
curve25519_add(h, e, a);
1579
curve25519_sub(e, e, a);
1580
curve25519_mul(c, r->t, q->t2d);
1581
curve25519_add(f, r->z, r->z);
1582
curve25519_add_after_basic(g, f, c);
1583
curve25519_sub_after_basic(f, f, c);
1584
curve25519_mul(r->x, e, f);
1585
curve25519_mul(r->y, h, g);
1586
curve25519_mul(r->z, g, f);
1587
curve25519_mul(r->t, e, h);
1588
}
1589
1590
void
1591
ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {
1592
bignum25519 a,b,c,x,y,z,t;
1593
1594
curve25519_sub(a, p->y, p->x);
1595
curve25519_add(b, p->y, p->x);
1596
curve25519_mul(a, a, q->ysubx);
1597
curve25519_mul(x, b, q->xaddy);
1598
curve25519_add(y, x, a);
1599
curve25519_sub(x, x, a);
1600
curve25519_mul(c, p->t, q->t2d);
1601
curve25519_mul(t, p->z, q->z);
1602
curve25519_add(t, t, t);
1603
curve25519_add_after_basic(z, t, c);
1604
curve25519_sub_after_basic(t, t, c);
1605
curve25519_mul(r->xaddy, x, t);
1606
curve25519_mul(r->ysubx, y, z);
1607
curve25519_mul(r->z, z, t);
1608
curve25519_mul(r->t2d, x, y);
1609
curve25519_copy(y, r->ysubx);
1610
curve25519_sub(r->ysubx, r->ysubx, r->xaddy);
1611
curve25519_add(r->xaddy, r->xaddy, y);
1612
curve25519_mul(r->t2d, r->t2d, ge25519_ec2d);
1613
}
1614
1615
void
1616
ge25519_pack(byte r[32], const ge25519 *p) {
1617
bignum25519 tx, ty, zi;
1618
byte parity[32];
1619
curve25519_recip(zi, p->z);
1620
curve25519_mul(tx, p->x, zi);
1621
curve25519_mul(ty, p->y, zi);
1622
curve25519_contract(r, ty);
1623
curve25519_contract(parity, tx);
1624
r[31] ^= ((parity[0] & 1) << 7);
1625
}
1626
1627
int
1628
ed25519_verify(const byte *x, const byte *y, size_t len) {
1629
size_t differentbits = 0;
1630
while (len--)
1631
differentbits |= (*x++ ^ *y++);
1632
return (int) (1 & ((differentbits - 1) >> 8));
1633
}
1634
1635
int
1636
ge25519_unpack_negative_vartime(ge25519 *r, const byte p[32]) {
1637
const byte zero[32] = {0};
1638
const bignum25519 one = {1};
1639
byte parity = p[31] >> 7;
1640
byte check[32];
1641
bignum25519 t, root, num, den, d3;
1642
1643
curve25519_expand(r->y, p);
1644
curve25519_copy(r->z, one);
1645
curve25519_square(num, r->y); /* x = y^2 */
1646
curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */
1647
curve25519_sub_reduce(num, num, r->z); /* x = y^1 - 1 */
1648
curve25519_add(den, den, r->z); /* den = dy^2 + 1 */
1649
1650
/* Computation of sqrt(num/den) */
1651
/* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */
1652
curve25519_square(t, den);
1653
curve25519_mul(d3, t, den);
1654
curve25519_square(r->x, d3);
1655
curve25519_mul(r->x, r->x, den);
1656
curve25519_mul(r->x, r->x, num);
1657
curve25519_pow_two252m3(r->x, r->x);
1658
1659
/* 2. computation of r->x = num * den^3 * (num*den^7)^((p-5)/8) */
1660
curve25519_mul(r->x, r->x, d3);
1661
curve25519_mul(r->x, r->x, num);
1662
1663
/* 3. Check if either of the roots works: */
1664
curve25519_square(t, r->x);
1665
curve25519_mul(t, t, den);
1666
curve25519_sub_reduce(root, t, num);
1667
curve25519_contract(check, root);
1668
if (!ed25519_verify(check, zero, 32)) {
1669
curve25519_add_reduce(t, t, num);
1670
curve25519_contract(check, t);
1671
if (!ed25519_verify(check, zero, 32))
1672
return 0;
1673
curve25519_mul(r->x, r->x, ge25519_sqrtneg1);
1674
}
1675
1676
curve25519_contract(check, r->x);
1677
if ((check[0] & 1) == parity) {
1678
curve25519_copy(t, r->x);
1679
curve25519_neg(r->x, t);
1680
}
1681
curve25519_mul(r->t, r->x, r->y);
1682
return 1;
1683
}
1684
1685
/* computes [s1]p1 + [s2]basepoint */
1686
void
1687
ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
1688
signed char slide1[256], slide2[256];
1689
ge25519_pniels pre1[S1_TABLE_SIZE];
1690
ge25519 d1;
1691
ge25519_p1p1 t;
1692
sword32 i;
1693
1694
contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
1695
contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);
1696
1697
ge25519_double(&d1, p1);
1698
ge25519_full_to_pniels(pre1, p1);
1699
for (i = 0; i < S1_TABLE_SIZE - 1; i++)
1700
ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
1701
1702
/* set neutral */
1703
std::memset(r, 0, sizeof(ge25519));
1704
r->y[0] = 1;
1705
r->z[0] = 1;
1706
1707
i = 255;
1708
while ((i >= 0) && !(slide1[i] | slide2[i]))
1709
i--;
1710
1711
for (; i >= 0; i--) {
1712
ge25519_double_p1p1(&t, r);
1713
1714
if (slide1[i]) {
1715
ge25519_p1p1_to_full(r, &t);
1716
ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (byte)slide1[i] >> 7);
1717
}
1718
1719
if (slide2[i]) {
1720
ge25519_p1p1_to_full(r, &t);
1721
ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (byte)slide2[i] >> 7);
1722
}
1723
1724
ge25519_p1p1_to_partial(r, &t);
1725
}
1726
}
1727
1728
#if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)
1729
1730
word32
1731
ge25519_windowb_equal(word32 b, word32 c) {
1732
return ((b ^ c) - 1) >> 31;
1733
}
1734
1735
void
1736
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const byte table[256][96], word32 pos, signed char b) {
1737
bignum25519 neg;
1738
word32 sign = (word32)((byte)b >> 7);
1739
word32 mask = ~(sign - 1);
1740
word32 u = (b + mask) ^ mask;
1741
word32 i;
1742
1743
/* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */
1744
byte packed[96] = {0};
1745
packed[0] = 1;
1746
packed[32] = 1;
1747
1748
for (i = 0; i < 8; i++)
1749
curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1));
1750
1751
/* expand in to t */
1752
curve25519_expand(t->ysubx, packed + 0);
1753
curve25519_expand(t->xaddy, packed + 32);
1754
curve25519_expand(t->t2d , packed + 64);
1755
1756
/* adjust for sign */
1757
curve25519_swap_conditional(t->ysubx, t->xaddy, sign);
1758
curve25519_neg(neg, t->t2d);
1759
curve25519_swap_conditional(t->t2d, neg, sign);
1760
}
1761
1762
#endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */
1763
1764
/* computes [s]basepoint */
1765
void
1766
ge25519_scalarmult_base_niels(ge25519 *r, const byte basepoint_table[256][96], const bignum256modm s) {
1767
signed char b[64];
1768
word32 i;
1769
ge25519_niels t;
1770
1771
contract256_window4_modm(b, s);
1772
1773
ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[1]);
1774
curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
1775
curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
1776
std::memset(r->z, 0, sizeof(bignum25519));
1777
curve25519_copy(r->t, t.t2d);
1778
r->z[0] = 2;
1779
for (i = 3; i < 64; i += 2) {
1780
ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
1781
ge25519_nielsadd2(r, &t);
1782
}
1783
ge25519_double_partial(r, r);
1784
ge25519_double_partial(r, r);
1785
ge25519_double_partial(r, r);
1786
ge25519_double(r, r);
1787
ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[0]);
1788
curve25519_mul(t.t2d, t.t2d, ge25519_ecd);
1789
ge25519_nielsadd2(r, &t);
1790
for(i = 2; i < 64; i += 2) {
1791
ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
1792
ge25519_nielsadd2(r, &t);
1793
}
1794
}
1795
1796
ANONYMOUS_NAMESPACE_END
1797
NAMESPACE_END // Ed25519
1798
NAMESPACE_END // Donna
1799
NAMESPACE_END // CryptoPP
1800
1801
//***************************** curve25519 *****************************//
1802
1803
NAMESPACE_BEGIN(CryptoPP)
1804
NAMESPACE_BEGIN(Donna)
1805
1806
int curve25519_mult_CXX(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])
1807
{
1808
using namespace CryptoPP::Donna::X25519;
1809
1810
FixedSizeSecBlock<byte, 32> e;
1811
for (size_t i = 0; i < 32; ++i)
1812
e[i] = secretKey[i];
1813
e[0] &= 0xf8; e[31] &= 0x7f; e[31] |= 0x40;
1814
1815
bignum25519 nqpqx = {1}, nqpqz = {0}, nqz = {1}, nqx;
1816
bignum25519 q, qx, qpqx, qqx, zzz, zmone;
1817
size_t bit, lastbit;
1818
1819
curve25519_expand(q, othersKey);
1820
curve25519_copy(nqx, q);
1821
1822
/* bit 255 is always 0, and bit 254 is always 1, so skip bit 255 and
1823
start pre-swapped on bit 254 */
1824
lastbit = 1;
1825
1826
/* we are doing bits 254..3 in the loop, but are swapping in bits 253..2 */
1827
for (int i = 253; i >= 2; i--) {
1828
curve25519_add(qx, nqx, nqz);
1829
curve25519_sub(nqz, nqx, nqz);
1830
curve25519_add(qpqx, nqpqx, nqpqz);
1831
curve25519_sub(nqpqz, nqpqx, nqpqz);
1832
curve25519_mul(nqpqx, qpqx, nqz);
1833
curve25519_mul(nqpqz, qx, nqpqz);
1834
curve25519_add(qqx, nqpqx, nqpqz);
1835
curve25519_sub(nqpqz, nqpqx, nqpqz);
1836
curve25519_square(nqpqz, nqpqz);
1837
curve25519_square(nqpqx, qqx);
1838
curve25519_mul(nqpqz, nqpqz, q);
1839
curve25519_square(qx, qx);
1840
curve25519_square(nqz, nqz);
1841
curve25519_mul(nqx, qx, nqz);
1842
curve25519_sub(nqz, qx, nqz);
1843
curve25519_scalar_product(zzz, nqz, 121665);
1844
curve25519_add(zzz, zzz, qx);
1845
curve25519_mul(nqz, nqz, zzz);
1846
1847
bit = (e[i/8] >> (i & 7)) & 1;
1848
curve25519_swap_conditional(nqx, nqpqx, (word32)(bit ^ lastbit));
1849
curve25519_swap_conditional(nqz, nqpqz, (word32)(bit ^ lastbit));
1850
lastbit = bit;
1851
}
1852
1853
/* the final 3 bits are always zero, so we only need to double */
1854
for (int i = 0; i < 3; i++) {
1855
curve25519_add(qx, nqx, nqz);
1856
curve25519_sub(nqz, nqx, nqz);
1857
curve25519_square(qx, qx);
1858
curve25519_square(nqz, nqz);
1859
curve25519_mul(nqx, qx, nqz);
1860
curve25519_sub(nqz, qx, nqz);
1861
curve25519_scalar_product(zzz, nqz, 121665);
1862
curve25519_add(zzz, zzz, qx);
1863
curve25519_mul(nqz, nqz, zzz);
1864
}
1865
1866
curve25519_recip(zmone, nqz);
1867
curve25519_mul(nqz, nqx, zmone);
1868
curve25519_contract(sharedKey, nqz);
1869
1870
return 0;
1871
}
1872
1873
int curve25519_mult(byte publicKey[32], const byte secretKey[32])
1874
{
1875
using namespace CryptoPP::Donna::X25519;
1876
1877
#if (CRYPTOPP_CURVE25519_SSE2)
1878
if (HasSSE2())
1879
return curve25519_mult_SSE2(publicKey, secretKey, basePoint);
1880
else
1881
#endif
1882
1883
return curve25519_mult_CXX(publicKey, secretKey, basePoint);
1884
}
1885
1886
int curve25519_mult(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])
1887
{
1888
#if (CRYPTOPP_CURVE25519_SSE2)
1889
if (HasSSE2())
1890
return curve25519_mult_SSE2(sharedKey, secretKey, othersKey);
1891
else
1892
#endif
1893
1894
return curve25519_mult_CXX(sharedKey, secretKey, othersKey);
1895
}
1896
1897
NAMESPACE_END // Donna
1898
NAMESPACE_END // CryptoPP
1899
1900
//******************************* ed25519 *******************************//
1901
1902
NAMESPACE_BEGIN(CryptoPP)
1903
NAMESPACE_BEGIN(Donna)
1904
1905
int
1906
ed25519_publickey_CXX(byte publicKey[32], const byte secretKey[32])
1907
{
1908
using namespace CryptoPP::Donna::Ed25519;
1909
1910
bignum256modm a;
1911
ALIGN(ALIGN_SPEC) ge25519 A;
1912
hash_512bits extsk;
1913
1914
/* A = aB */
1915
ed25519_extsk(extsk, secretKey);
1916
expand256_modm(a, extsk, 32);
1917
ge25519_scalarmult_base_niels(&A, ge25519_niels_base_multiples, a);
1918
ge25519_pack(publicKey, &A);
1919
1920
return 0;
1921
}
1922
1923
int
1924
ed25519_publickey(byte publicKey[32], const byte secretKey[32])
1925
{
1926
return ed25519_publickey_CXX(publicKey, secretKey);
1927
}
1928
1929
int
1930
ed25519_sign_CXX(std::istream& stream, const byte sk[32], const byte pk[32], byte RS[64])
1931
{
1932
using namespace CryptoPP::Donna::Ed25519;
1933
1934
bignum256modm r, S, a;
1935
ALIGN(ALIGN_SPEC) ge25519 R;
1936
hash_512bits extsk, hashr, hram;
1937
1938
// Unfortunately we need to read the stream twice. The first time calculates
1939
// 'r = H(aExt[32..64], m)'. The second time calculates 'S = H(R,A,m)'. There
1940
// is a data dependency due to hashing 'RS' with 'R = [r]B' that does not
1941
// allow us to read the stream once.
1942
std::streampos where = stream.tellg();
1943
1944
ed25519_extsk(extsk, sk);
1945
1946
/* r = H(aExt[32..64], m) */
1947
SHA512 hash;
1948
hash.Update(extsk + 32, 32);
1949
UpdateFromStream(hash, stream);
1950
hash.Final(hashr);
1951
expand256_modm(r, hashr, 64);
1952
1953
/* R = rB */
1954
ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);
1955
ge25519_pack(RS, &R);
1956
1957
// Reset stream for the second digest
1958
stream.clear();
1959
stream.seekg(where);
1960
1961
/* S = H(R,A,m).. */
1962
ed25519_hram(hram, RS, pk, stream);
1963
expand256_modm(S, hram, 64);
1964
1965
/* S = H(R,A,m)a */
1966
expand256_modm(a, extsk, 32);
1967
mul256_modm(S, S, a);
1968
1969
/* S = (r + H(R,A,m)a) */
1970
add256_modm(S, S, r);
1971
1972
/* S = (r + H(R,A,m)a) mod L */
1973
contract256_modm(RS + 32, S);
1974
1975
return 0;
1976
}
1977
1978
int
1979
ed25519_sign_CXX(const byte *m, size_t mlen, const byte sk[32], const byte pk[32], byte RS[64])
1980
{
1981
using namespace CryptoPP::Donna::Ed25519;
1982
1983
bignum256modm r, S, a;
1984
ALIGN(ALIGN_SPEC) ge25519 R;
1985
hash_512bits extsk, hashr, hram;
1986
1987
ed25519_extsk(extsk, sk);
1988
1989
/* r = H(aExt[32..64], m) */
1990
SHA512 hash;
1991
hash.Update(extsk + 32, 32);
1992
hash.Update(m, mlen);
1993
hash.Final(hashr);
1994
expand256_modm(r, hashr, 64);
1995
1996
/* R = rB */
1997
ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);
1998
ge25519_pack(RS, &R);
1999
2000
/* S = H(R,A,m).. */
2001
ed25519_hram(hram, RS, pk, m, mlen);
2002
expand256_modm(S, hram, 64);
2003
2004
/* S = H(R,A,m)a */
2005
expand256_modm(a, extsk, 32);
2006
mul256_modm(S, S, a);
2007
2008
/* S = (r + H(R,A,m)a) */
2009
add256_modm(S, S, r);
2010
2011
/* S = (r + H(R,A,m)a) mod L */
2012
contract256_modm(RS + 32, S);
2013
2014
return 0;
2015
}
2016
2017
int
2018
ed25519_sign(std::istream& stream, const byte secretKey[32], const byte publicKey[32],
2019
byte signature[64])
2020
{
2021
return ed25519_sign_CXX(stream, secretKey, publicKey, signature);
2022
}
2023
2024
int
2025
ed25519_sign(const byte* message, size_t messageLength, const byte secretKey[32],
2026
const byte publicKey[32], byte signature[64])
2027
{
2028
return ed25519_sign_CXX(message, messageLength, secretKey, publicKey, signature);
2029
}
2030
2031
int
2032
ed25519_sign_open_CXX(std::istream& stream, const byte pk[32], const byte RS[64]) {
2033
2034
using namespace CryptoPP::Donna::Ed25519;
2035
2036
ALIGN(ALIGN_SPEC) ge25519 R, A;
2037
hash_512bits hash;
2038
bignum256modm hram, S;
2039
byte checkR[32];
2040
2041
if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))
2042
return -1;
2043
2044
/* hram = H(R,A,m) */
2045
ed25519_hram(hash, RS, pk, stream);
2046
expand256_modm(hram, hash, 64);
2047
2048
/* S */
2049
expand256_modm(S, RS + 32, 32);
2050
2051
/* SB - H(R,A,m)A */
2052
ge25519_double_scalarmult_vartime(&R, &A, hram, S);
2053
ge25519_pack(checkR, &R);
2054
2055
/* check that R = SB - H(R,A,m)A */
2056
return ed25519_verify(RS, checkR, 32) ? 0 : -1;
2057
}
2058
2059
int
2060
ed25519_sign_open_CXX(const byte *m, size_t mlen, const byte pk[32], const byte RS[64]) {
2061
2062
using namespace CryptoPP::Donna::Ed25519;
2063
2064
ALIGN(ALIGN_SPEC) ge25519 R, A;
2065
hash_512bits hash;
2066
bignum256modm hram, S;
2067
byte checkR[32];
2068
2069
if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))
2070
return -1;
2071
2072
/* hram = H(R,A,m) */
2073
ed25519_hram(hash, RS, pk, m, mlen);
2074
expand256_modm(hram, hash, 64);
2075
2076
/* S */
2077
expand256_modm(S, RS + 32, 32);
2078
2079
/* SB - H(R,A,m)A */
2080
ge25519_double_scalarmult_vartime(&R, &A, hram, S);
2081
ge25519_pack(checkR, &R);
2082
2083
/* check that R = SB - H(R,A,m)A */
2084
return ed25519_verify(RS, checkR, 32) ? 0 : -1;
2085
}
2086
2087
int
2088
ed25519_sign_open(const byte *message, size_t messageLength, const byte publicKey[32], const byte signature[64])
2089
{
2090
return ed25519_sign_open_CXX(message, messageLength, publicKey, signature);
2091
}
2092
2093
int
2094
ed25519_sign_open(std::istream& stream, const byte publicKey[32], const byte signature[64])
2095
{
2096
return ed25519_sign_open_CXX(stream, publicKey, signature);
2097
}
2098
2099
NAMESPACE_END // Donna
2100
NAMESPACE_END // CryptoPP
2101
2102
#endif // CRYPTOPP_CURVE25519_32BIT
2103
2104