CoCalc -- donna

GitHub Repository: folium-app/Folium
Path: blob/a-new-beginning/SharedDependencies/Sources/cryptopp/donna_32.cpp
² views
1
// donna_32.cpp - written and placed in public domain by Jeffrey Walton
2
//                Crypto++ specific implementation wrapped around Andrew
3
//                Moon's public domain curve25519-donna and ed25519-donna,
4
//                https://github.com/floodyberry/curve25519-donna and
5
//                https://github.com/floodyberry/ed25519-donna.
6

7
// The curve25519 and ed25519 source files multiplex different repos and
8
// architectures using namespaces. The repos are Andrew Moon's
9
// curve25519-donna and ed25519-donna. The architectures are 32-bit, 64-bit
10
// and SSE. For example, 32-bit x25519 uses symbols from Donna::X25519 and
11
// Donna::Arch32.
12

13
// A fair amount of duplication happens below, but we could not directly
14
// use curve25519 for both x25519 and ed25519. A close examination reveals
15
// slight differences in the implementation. For example, look at the
16
// two curve25519_sub functions.
17

18
// If needed, see Moon's commit "Go back to ignoring 256th bit [sic]",
19
// https://github.com/floodyberry/curve25519-donna/commit/57a683d18721a658
20

21
#include "pch.h"
22

23
#include "config.h"
24
#include "donna.h"
25
#include "secblock.h"
26
#include "sha.h"
27
#include "misc.h"
28
#include "cpu.h"
29

30
#include <istream>
31
#include <sstream>
32

33
#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
34
# pragma GCC diagnostic ignored "-Wunused-function"
35
#endif
36

37
#if CRYPTOPP_MSC_VERSION
38
# pragma warning(disable: 4244)
39
#endif
40

41
// Squash MS LNK4221 and libtool warnings
42
extern const char DONNA32_FNAME[] = __FILE__;
43

44
ANONYMOUS_NAMESPACE_BEGIN
45

46
// Can't use GetAlignmentOf<word32>() because of C++11 and constexpr
47
// Can use 'const unsigned int' because of MSVC 2013
48
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
49
# define ALIGN_SPEC 16
50
#else
51
# define ALIGN_SPEC 4
52
#endif
53

54
ANONYMOUS_NAMESPACE_END
55

56
#if defined(CRYPTOPP_CURVE25519_32BIT)
57

58
#include "donna_32.h"
59

60
ANONYMOUS_NAMESPACE_BEGIN
61

62
using CryptoPP::byte;
63
using CryptoPP::word32;
64
using CryptoPP::GetWord;
65
using CryptoPP::PutWord;
66
using CryptoPP::LITTLE_ENDIAN_ORDER;
67

68
inline word32 U8TO32_LE(const byte* p)
69
{
70
    return GetWord<word32>(false, LITTLE_ENDIAN_ORDER, p);
71
}
72

73
inline void U32TO8_LE(byte* p, word32 w)
74
{
75
    PutWord(false, LITTLE_ENDIAN_ORDER, p, w);
76
}
77

78
ANONYMOUS_NAMESPACE_END
79

80
NAMESPACE_BEGIN(CryptoPP)
81
NAMESPACE_BEGIN(Donna)
82
NAMESPACE_BEGIN(X25519)
83
ANONYMOUS_NAMESPACE_BEGIN
84

85
using CryptoPP::byte;
86
using CryptoPP::word32;
87
using CryptoPP::sword32;
88
using CryptoPP::word64;
89
using CryptoPP::sword64;
90

91
using CryptoPP::GetBlock;
92
using CryptoPP::LittleEndian;
93

94
// Bring in all the symbols from the 32-bit header
95
using namespace CryptoPP::Donna::Arch32;
96

97
/* out = in */
98
inline void
99
curve25519_copy(bignum25519 out, const bignum25519 in) {
100
    out[0] = in[0]; out[1] = in[1];
101
    out[2] = in[2]; out[3] = in[3];
102
    out[4] = in[4]; out[5] = in[5];
103
    out[6] = in[6]; out[7] = in[7];
104
    out[8] = in[8]; out[9] = in[9];
105
}
106

107
/* out = a + b */
108
inline void
109
curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
110
    out[0] = a[0] + b[0]; out[1] = a[1] + b[1];
111
    out[2] = a[2] + b[2]; out[3] = a[3] + b[3];
112
    out[4] = a[4] + b[4]; out[5] = a[5] + b[5];
113
    out[6] = a[6] + b[6]; out[7] = a[7] + b[7];
114
    out[8] = a[8] + b[8]; out[9] = a[9] + b[9];
115
}
116

117
/* out = a - b */
118
inline void
119
curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
120
    word32 c;
121
    out[0] = 0x7ffffda + a[0] - b[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
122
    out[1] = 0x3fffffe + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
123
    out[2] = 0x7fffffe + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
124
    out[3] = 0x3fffffe + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
125
    out[4] = 0x7fffffe + a[4] - b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
126
    out[5] = 0x3fffffe + a[5] - b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
127
    out[6] = 0x7fffffe + a[6] - b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
128
    out[7] = 0x3fffffe + a[7] - b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
129
    out[8] = 0x7fffffe + a[8] - b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
130
    out[9] = 0x3fffffe + a[9] - b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
131
    out[0] += 19 * c;
132
}
133

134
/* out = in * scalar */
135
inline void
136
curve25519_scalar_product(bignum25519 out, const bignum25519 in, const word32 scalar) {
137
    word64 a;
138
    word32 c;
139
    a = mul32x32_64(in[0], scalar);     out[0] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
140
    a = mul32x32_64(in[1], scalar) + c; out[1] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
141
    a = mul32x32_64(in[2], scalar) + c; out[2] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
142
    a = mul32x32_64(in[3], scalar) + c; out[3] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
143
    a = mul32x32_64(in[4], scalar) + c; out[4] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
144
    a = mul32x32_64(in[5], scalar) + c; out[5] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
145
    a = mul32x32_64(in[6], scalar) + c; out[6] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
146
    a = mul32x32_64(in[7], scalar) + c; out[7] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
147
    a = mul32x32_64(in[8], scalar) + c; out[8] = (word32)a & reduce_mask_26; c = (word32)(a >> 26);
148
    a = mul32x32_64(in[9], scalar) + c; out[9] = (word32)a & reduce_mask_25; c = (word32)(a >> 25);
149
                                        out[0] += c * 19;
150
}
151

152
/* out = a * b */
153
inline void
154
curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) {
155
    word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
156
    word32 s0,s1,s2,s3,s4,s5,s6,s7,s8,s9;
157
    word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
158
    word32 p;
159

160
    r0 = b[0]; r1 = b[1]; r2 = b[2]; r3 = b[3]; r4 = b[4];
161
    r5 = b[5]; r6 = b[6]; r7 = b[7]; r8 = b[8]; r9 = b[9];
162

163
    s0 = a[0]; s1 = a[1]; s2 = a[2]; s3 = a[3]; s4 = a[4];
164
    s5 = a[5]; s6 = a[6]; s7 = a[7]; s8 = a[8]; s9 = a[9];
165

166
    m1 = mul32x32_64(r0, s1) + mul32x32_64(r1, s0);
167
    m3 = mul32x32_64(r0, s3) + mul32x32_64(r1, s2) + mul32x32_64(r2, s1) + mul32x32_64(r3, s0);
168
    m5 = mul32x32_64(r0, s5) + mul32x32_64(r1, s4) + mul32x32_64(r2, s3) + mul32x32_64(r3, s2) + mul32x32_64(r4, s1) + mul32x32_64(r5, s0);
169
    m7 = mul32x32_64(r0, s7) + mul32x32_64(r1, s6) + mul32x32_64(r2, s5) + mul32x32_64(r3, s4) + mul32x32_64(r4, s3) + mul32x32_64(r5, s2) + mul32x32_64(r6, s1) + mul32x32_64(r7, s0);
170
    m9 = mul32x32_64(r0, s9) + mul32x32_64(r1, s8) + mul32x32_64(r2, s7) + mul32x32_64(r3, s6) + mul32x32_64(r4, s5) + mul32x32_64(r5, s4) + mul32x32_64(r6, s3) + mul32x32_64(r7, s2) + mul32x32_64(r8, s1) + mul32x32_64(r9, s0);
171

172
    r1 *= 2; r3 *= 2; r5 *= 2; r7 *= 2;
173

174
    m0 = mul32x32_64(r0, s0);
175
    m2 = mul32x32_64(r0, s2) + mul32x32_64(r1, s1) + mul32x32_64(r2, s0);
176
    m4 = mul32x32_64(r0, s4) + mul32x32_64(r1, s3) + mul32x32_64(r2, s2) + mul32x32_64(r3, s1) + mul32x32_64(r4, s0);
177
    m6 = mul32x32_64(r0, s6) + mul32x32_64(r1, s5) + mul32x32_64(r2, s4) + mul32x32_64(r3, s3) + mul32x32_64(r4, s2) + mul32x32_64(r5, s1) + mul32x32_64(r6, s0);
178
    m8 = mul32x32_64(r0, s8) + mul32x32_64(r1, s7) + mul32x32_64(r2, s6) + mul32x32_64(r3, s5) + mul32x32_64(r4, s4) + mul32x32_64(r5, s3) + mul32x32_64(r6, s2) + mul32x32_64(r7, s1) + mul32x32_64(r8, s0);
179

180
    r1 *= 19; r2 *= 19;
181
    r3 = (r3 / 2) * 19;
182
    r4 *= 19;
183
    r5 = (r5 / 2) * 19;
184
    r6 *= 19;
185
    r7 = (r7 / 2) * 19;
186
    r8 *= 19; r9 *= 19;
187

188
    m1 += (mul32x32_64(r9, s2) + mul32x32_64(r8, s3) + mul32x32_64(r7, s4) + mul32x32_64(r6, s5) + mul32x32_64(r5, s6) + mul32x32_64(r4, s7) + mul32x32_64(r3, s8) + mul32x32_64(r2, s9));
189
    m3 += (mul32x32_64(r9, s4) + mul32x32_64(r8, s5) + mul32x32_64(r7, s6) + mul32x32_64(r6, s7) + mul32x32_64(r5, s8) + mul32x32_64(r4, s9));
190
    m5 += (mul32x32_64(r9, s6) + mul32x32_64(r8, s7) + mul32x32_64(r7, s8) + mul32x32_64(r6, s9));
191
    m7 += (mul32x32_64(r9, s8) + mul32x32_64(r8, s9));
192

193
    r3 *= 2; r5 *= 2; r7 *= 2; r9 *= 2;
194

195
    m0 += (mul32x32_64(r9, s1) + mul32x32_64(r8, s2) + mul32x32_64(r7, s3) + mul32x32_64(r6, s4) + mul32x32_64(r5, s5) + mul32x32_64(r4, s6) + mul32x32_64(r3, s7) + mul32x32_64(r2, s8) + mul32x32_64(r1, s9));
196
    m2 += (mul32x32_64(r9, s3) + mul32x32_64(r8, s4) + mul32x32_64(r7, s5) + mul32x32_64(r6, s6) + mul32x32_64(r5, s7) + mul32x32_64(r4, s8) + mul32x32_64(r3, s9));
197
    m4 += (mul32x32_64(r9, s5) + mul32x32_64(r8, s6) + mul32x32_64(r7, s7) + mul32x32_64(r6, s8) + mul32x32_64(r5, s9));
198
    m6 += (mul32x32_64(r9, s7) + mul32x32_64(r8, s8) + mul32x32_64(r7, s9));
199
    m8 += (mul32x32_64(r9, s9));
200

201
                                 r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
202
    m1 += c;                     r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
203
    m2 += c;                     r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
204
    m3 += c;                     r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
205
    m4 += c;                     r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
206
    m5 += c;                     r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
207
    m6 += c;                     r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
208
    m7 += c;                     r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
209
    m8 += c;                     r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
210
    m9 += c;                     r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
211
    m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
212
    r1 += p;
213

214
    out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
215
    out[5] = r5; out[6] = r6; out[7] = r7; out[8] = r8; out[9] = r9;
216
}
217

218
/* out = in * in */
219
inline void
220
curve25519_square(bignum25519 out, const bignum25519 in) {
221
    word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
222
    word32 d6,d7,d8,d9;
223
    word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
224
    word32 p;
225

226
    r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];
227
    r5 = in[5]; r6 = in[6]; r7 = in[7]; r8 = in[8]; r9 = in[9];
228

229
    m0 = mul32x32_64(r0, r0);
230
    r0 *= 2;
231
    m1 = mul32x32_64(r0, r1);
232
    m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
233
    r1 *= 2;
234
    m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2    );
235
    m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
236
    r2 *= 2;
237
    m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4    ) + mul32x32_64(r2, r3);
238
    m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
239
    r3 *= 2;
240
    m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6    ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4    );
241
    m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4    );
242
    m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8    ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6    ) + mul32x32_64(r4, r5 * 2);
243

244
    d6 = r6 * 19; d7 = r7 * 2 * 19;
245
    d8 = r8 * 19; d9 = r9 * 2 * 19;
246

247
    m0 += (mul32x32_64(d9, r1    ) + mul32x32_64(d8, r2    ) + mul32x32_64(d7, r3    ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
248
    m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3    ) + mul32x32_64(d7, r4    ) + mul32x32_64(d6, r5 * 2));
249
    m2 += (mul32x32_64(d9, r3    ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6    ));
250
    m3 += (mul32x32_64(d9, r4    ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6    ));
251
    m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7    ));
252
    m5 += (mul32x32_64(d9, r6    ) + mul32x32_64(d8, r7 * 2));
253
    m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8    ));
254
    m7 += (mul32x32_64(d9, r8    ));
255
    m8 += (mul32x32_64(d9, r9    ));
256

257
                                 r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
258
    m1 += c;                     r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
259
    m2 += c;                     r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
260
    m3 += c;                     r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
261
    m4 += c;                     r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
262
    m5 += c;                     r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
263
    m6 += c;                     r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
264
    m7 += c;                     r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
265
    m8 += c;                     r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
266
    m9 += c;                     r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
267
    m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
268
    r1 += p;
269

270
    out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
271
    out[5] = r5; out[6] = r6; out[7] = r7; out[8] = r8; out[9] = r9;
272
}
273

274
/* out = in^(2 * count) */
275
void
276
curve25519_square_times(bignum25519 out, const bignum25519 in, int count) {
277
    word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
278
    word32 d6,d7,d8,d9;
279
    word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
280
    word32 p;
281

282
    r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];
283
    r5 = in[5]; r6 = in[6]; r7 = in[7]; r8 = in[8]; r9 = in[9];
284

285
    do {
286
        m0 = mul32x32_64(r0, r0);
287
        r0 *= 2;
288
        m1 = mul32x32_64(r0, r1);
289
        m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
290
        r1 *= 2;
291
        m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2    );
292
        m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
293
        r2 *= 2;
294
        m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4    ) + mul32x32_64(r2, r3);
295
        m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
296
        r3 *= 2;
297
        m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6    ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4    );
298
        m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4    );
299
        m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8    ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6    ) + mul32x32_64(r4, r5 * 2);
300

301
        d6 = r6 * 19; d7 = r7 * 2 * 19;
302
        d8 = r8 * 19; d9 = r9 * 2 * 19;
303

304
        m0 += (mul32x32_64(d9, r1    ) + mul32x32_64(d8, r2    ) + mul32x32_64(d7, r3    ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
305
        m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3    ) + mul32x32_64(d7, r4    ) + mul32x32_64(d6, r5 * 2));
306
        m2 += (mul32x32_64(d9, r3    ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6    ));
307
        m3 += (mul32x32_64(d9, r4    ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6    ));
308
        m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7    ));
309
        m5 += (mul32x32_64(d9, r6    ) + mul32x32_64(d8, r7 * 2));
310
        m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8    ));
311
        m7 += (mul32x32_64(d9, r8    ));
312
        m8 += (mul32x32_64(d9, r9    ));
313

314
                                     r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
315
        m1 += c;                     r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
316
        m2 += c;                     r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
317
        m3 += c;                     r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
318
        m4 += c;                     r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
319
        m5 += c;                     r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
320
        m6 += c;                     r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
321
        m7 += c;                     r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
322
        m8 += c;                     r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
323
        m9 += c;                     r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
324
        m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
325
        r1 += p;
326
    } while (--count);
327

328
    out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;
329
    out[5] = r5; out[6] = r6; out[7] = r7; out[8] = r8; out[9] = r9;
330
}
331

332
/* Take a little-endian, 32-byte number and expand it into polynomial form */
333
void
334
curve25519_expand(bignum25519 out, const byte in[32]) {
335
    word32 x0,x1,x2,x3,x4,x5,x6,x7;
336
    GetBlock<word32, LittleEndian> block(in);
337
    block(x0)(x1)(x2)(x3)(x4)(x5)(x6)(x7);
338

339
    out[0] = (                      x0       ) & reduce_mask_26;
340
    out[1] = ((((word64)x1 << 32) | x0) >> 26) & reduce_mask_25;
341
    out[2] = ((((word64)x2 << 32) | x1) >> 19) & reduce_mask_26;
342
    out[3] = ((((word64)x3 << 32) | x2) >> 13) & reduce_mask_25;
343
    out[4] = ((                     x3) >>  6) & reduce_mask_26;
344
    out[5] = (                      x4       ) & reduce_mask_25;
345
    out[6] = ((((word64)x5 << 32) | x4) >> 25) & reduce_mask_26;
346
    out[7] = ((((word64)x6 << 32) | x5) >> 19) & reduce_mask_25;
347
    out[8] = ((((word64)x7 << 32) | x6) >> 12) & reduce_mask_26;
348
    out[9] = ((                     x7) >>  6) & reduce_mask_25; /* ignore the top bit */
349
}
350

351
/* Take a fully reduced polynomial form number and contract it into a little-endian, 32-byte array */
352
void
353
curve25519_contract(byte out[32], const bignum25519 in) {
354
    bignum25519 f;
355
    curve25519_copy(f, in);
356

357
    #define carry_pass() \
358
        f[1] += f[0] >> 26; f[0] &= reduce_mask_26; \
359
        f[2] += f[1] >> 25; f[1] &= reduce_mask_25; \
360
        f[3] += f[2] >> 26; f[2] &= reduce_mask_26; \
361
        f[4] += f[3] >> 25; f[3] &= reduce_mask_25; \
362
        f[5] += f[4] >> 26; f[4] &= reduce_mask_26; \
363
        f[6] += f[5] >> 25; f[5] &= reduce_mask_25; \
364
        f[7] += f[6] >> 26; f[6] &= reduce_mask_26; \
365
        f[8] += f[7] >> 25; f[7] &= reduce_mask_25; \
366
        f[9] += f[8] >> 26; f[8] &= reduce_mask_26;
367

368
    #define carry_pass_full() \
369
        carry_pass() \
370
        f[0] += 19 * (f[9] >> 25); f[9] &= reduce_mask_25;
371

372
    #define carry_pass_final() \
373
        carry_pass() \
374
        f[9] &= reduce_mask_25;
375

376
    carry_pass_full()
377
    carry_pass_full()
378

379
    /* now t is between 0 and 2^255-1, properly carried. */
380
    /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
381
    f[0] += 19;
382
    carry_pass_full()
383

384
    /* now between 19 and 2^255-1 in both cases, and offset by 19. */
385
    f[0] += (1 << 26) - 19;
386
    f[1] += (1 << 25) - 1;
387
    f[2] += (1 << 26) - 1;
388
    f[3] += (1 << 25) - 1;
389
    f[4] += (1 << 26) - 1;
390
    f[5] += (1 << 25) - 1;
391
    f[6] += (1 << 26) - 1;
392
    f[7] += (1 << 25) - 1;
393
    f[8] += (1 << 26) - 1;
394
    f[9] += (1 << 25) - 1;
395

396
    /* now between 2^255 and 2^256-20, and offset by 2^255. */
397
    carry_pass_final()
398

399
    #undef carry_pass
400
    #undef carry_full
401
    #undef carry_final
402

403
    f[1] <<= 2;
404
    f[2] <<= 3;
405
    f[3] <<= 5;
406
    f[4] <<= 6;
407
    f[6] <<= 1;
408
    f[7] <<= 3;
409
    f[8] <<= 4;
410
    f[9] <<= 6;
411

412
    #define F(i, s) \
413
        out[s+0] |= (byte)( f[i] & 0xff); \
414
        out[s+1]  = (byte)((f[i] >>  8) & 0xff); \
415
        out[s+2]  = (byte)((f[i] >> 16) & 0xff); \
416
        out[s+3]  = (byte)((f[i] >> 24) & 0xff);
417

418
    out[0] = out[16] = 0;
419
    F(0,0); F(1,3);
420
    F(2,6); F(3,9);
421
    F(4,12); F(5,16);
422
    F(6,19); F(7,22);
423
    F(8,25); F(9,28);
424
    #undef F
425
}
426

427
inline void
428
curve25519_swap_conditional(bignum25519 x, bignum25519 qpx, word32 iswap) {
429
    const word32 swap = (word32)(-(sword32)iswap);
430
    word32 x0,x1,x2,x3,x4,x5,x6,x7,x8,x9;
431

432
    x0 = swap & (x[0] ^ qpx[0]); x[0] ^= x0; qpx[0] ^= x0;
433
    x1 = swap & (x[1] ^ qpx[1]); x[1] ^= x1; qpx[1] ^= x1;
434
    x2 = swap & (x[2] ^ qpx[2]); x[2] ^= x2; qpx[2] ^= x2;
435
    x3 = swap & (x[3] ^ qpx[3]); x[3] ^= x3; qpx[3] ^= x3;
436
    x4 = swap & (x[4] ^ qpx[4]); x[4] ^= x4; qpx[4] ^= x4;
437
    x5 = swap & (x[5] ^ qpx[5]); x[5] ^= x5; qpx[5] ^= x5;
438
    x6 = swap & (x[6] ^ qpx[6]); x[6] ^= x6; qpx[6] ^= x6;
439
    x7 = swap & (x[7] ^ qpx[7]); x[7] ^= x7; qpx[7] ^= x7;
440
    x8 = swap & (x[8] ^ qpx[8]); x[8] ^= x8; qpx[8] ^= x8;
441
    x9 = swap & (x[9] ^ qpx[9]); x[9] ^= x9; qpx[9] ^= x9;
442
}
443

444
/*
445
 * In:  b =   2^5 - 2^0
446
 * Out: b = 2^250 - 2^0
447
 */
448
void
449
curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {
450
    ALIGN(ALIGN_SPEC) bignum25519 t0,c;
451

452
    /* 2^5  - 2^0 */ /* b */
453
    /* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);
454
    /* 2^10 - 2^0 */ curve25519_mul(b, t0, b);
455
    /* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);
456
    /* 2^20 - 2^0 */ curve25519_mul(c, t0, b);
457
    /* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);
458
    /* 2^40 - 2^0 */ curve25519_mul(t0, t0, c);
459
    /* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);
460
    /* 2^50 - 2^0 */ curve25519_mul(b, t0, b);
461
    /* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);
462
    /* 2^100 - 2^0 */ curve25519_mul(c, t0, b);
463
    /* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);
464
    /* 2^200 - 2^0 */ curve25519_mul(t0, t0, c);
465
    /* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);
466
    /* 2^250 - 2^0 */ curve25519_mul(b, t0, b);
467
}
468

469
/*
470
 * z^(p - 2) = z(2^255 - 21)
471
 */
472
void
473
curve25519_recip(bignum25519 out, const bignum25519 z) {
474
    ALIGN(ALIGN_SPEC) bignum25519 a, t0, b;
475

476
    /* 2 */ curve25519_square(a, z); /* a = 2 */
477
    /* 8 */ curve25519_square_times(t0, a, 2);
478
    /* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
479
    /* 11 */ curve25519_mul(a, b, a); /* a = 11 */
480
    /* 22 */ curve25519_square(t0, a);
481
    /* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
482
    /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
483
    /* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);
484
    /* 2^255 - 21 */  curve25519_mul(out, b, a);
485
}
486

487
ANONYMOUS_NAMESPACE_END
488
NAMESPACE_END  // X25519
489
NAMESPACE_END  // Donna
490
NAMESPACE_END  // CryptoPP
491

492
//******************************* ed25519 *******************************//
493

494
NAMESPACE_BEGIN(CryptoPP)
495
NAMESPACE_BEGIN(Donna)
496
NAMESPACE_BEGIN(Ed25519)
497
ANONYMOUS_NAMESPACE_BEGIN
498

499
using CryptoPP::byte;
500
using CryptoPP::word32;
501
using CryptoPP::sword32;
502
using CryptoPP::word64;
503
using CryptoPP::sword64;
504

505
using CryptoPP::GetBlock;
506
using CryptoPP::LittleEndian;
507

508
using CryptoPP::SHA512;
509

510
// Bring in all the symbols from the 32-bit header
511
using namespace CryptoPP::Donna::Arch32;
512

513
/* out = in */
514
inline void
515
curve25519_copy(bignum25519 out, const bignum25519 in) {
516
    out[0] = in[0]; out[1] = in[1];
517
    out[2] = in[2]; out[3] = in[3];
518
    out[4] = in[4]; out[5] = in[5];
519
    out[6] = in[6]; out[7] = in[7];
520
    out[8] = in[8]; out[9] = in[9];
521
}
522

523
/* out = a + b */
524
inline void
525
curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
526
    out[0] = a[0] + b[0]; out[1] = a[1] + b[1];
527
    out[2] = a[2] + b[2]; out[3] = a[3] + b[3];
528
    out[4] = a[4] + b[4]; out[5] = a[5] + b[5];
529
    out[6] = a[6] + b[6]; out[7] = a[7] + b[7];
530
    out[8] = a[8] + b[8]; out[9] = a[9] + b[9];
531
}
532

533
inline void
534
curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
535
    word32 c;
536
    out[0] = a[0] + b[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
537
    out[1] = a[1] + b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
538
    out[2] = a[2] + b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
539
    out[3] = a[3] + b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
540
    out[4] = a[4] + b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
541
    out[5] = a[5] + b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
542
    out[6] = a[6] + b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
543
    out[7] = a[7] + b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
544
    out[8] = a[8] + b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
545
    out[9] = a[9] + b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
546
    out[0] += 19 * c;
547
}
548

549
inline void
550
curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
551
    word32 c;
552
    out[0] = a[0] + b[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
553
    out[1] = a[1] + b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
554
    out[2] = a[2] + b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
555
    out[3] = a[3] + b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
556
    out[4] = a[4] + b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
557
    out[5] = a[5] + b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
558
    out[6] = a[6] + b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
559
    out[7] = a[7] + b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
560
    out[8] = a[8] + b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
561
    out[9] = a[9] + b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
562
    out[0] += 19 * c;
563
}
564

565
/* out = a - b */
566
inline void
567
curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
568
    word32 c;
569
    out[0] = twoP0     + a[0] - b[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
570
    out[1] = twoP13579 + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
571
    out[2] = twoP2468  + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
572
    out[3] = twoP13579 + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
573
    out[4] = twoP2468  + a[4] - b[4] + c;
574
    out[5] = twoP13579 + a[5] - b[5]    ;
575
    out[6] = twoP2468  + a[6] - b[6]    ;
576
    out[7] = twoP13579 + a[7] - b[7]    ;
577
    out[8] = twoP2468  + a[8] - b[8]    ;
578
    out[9] = twoP13579 + a[9] - b[9]    ;
579
}
580

581
/* out = a - b, where a is the result of a basic op (add,sub) */
582
inline void
583
curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
584
    word32 c;
585
    out[0] = fourP0     + a[0] - b[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
586
    out[1] = fourP13579 + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
587
    out[2] = fourP2468  + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
588
    out[3] = fourP13579 + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
589
    out[4] = fourP2468  + a[4] - b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
590
    out[5] = fourP13579 + a[5] - b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
591
    out[6] = fourP2468  + a[6] - b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
592
    out[7] = fourP13579 + a[7] - b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
593
    out[8] = fourP2468  + a[8] - b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
594
    out[9] = fourP13579 + a[9] - b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
595
    out[0] += 19 * c;
596
}
597

598
inline void
599
curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
600
    word32 c;
601
    out[0] = fourP0     + a[0] - b[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
602
    out[1] = fourP13579 + a[1] - b[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
603
    out[2] = fourP2468  + a[2] - b[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
604
    out[3] = fourP13579 + a[3] - b[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
605
    out[4] = fourP2468  + a[4] - b[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
606
    out[5] = fourP13579 + a[5] - b[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
607
    out[6] = fourP2468  + a[6] - b[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
608
    out[7] = fourP13579 + a[7] - b[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
609
    out[8] = fourP2468  + a[8] - b[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
610
    out[9] = fourP13579 + a[9] - b[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
611
    out[0] += 19 * c;
612
}
613

614
/* out = -a */
615
inline void
616
curve25519_neg(bignum25519 out, const bignum25519 a) {
617
    word32 c;
618
    out[0] = twoP0     - a[0]    ; c = (out[0] >> 26); out[0] &= reduce_mask_26;
619
    out[1] = twoP13579 - a[1] + c; c = (out[1] >> 25); out[1] &= reduce_mask_25;
620
    out[2] = twoP2468  - a[2] + c; c = (out[2] >> 26); out[2] &= reduce_mask_26;
621
    out[3] = twoP13579 - a[3] + c; c = (out[3] >> 25); out[3] &= reduce_mask_25;
622
    out[4] = twoP2468  - a[4] + c; c = (out[4] >> 26); out[4] &= reduce_mask_26;
623
    out[5] = twoP13579 - a[5] + c; c = (out[5] >> 25); out[5] &= reduce_mask_25;
624
    out[6] = twoP2468  - a[6] + c; c = (out[6] >> 26); out[6] &= reduce_mask_26;
625
    out[7] = twoP13579 - a[7] + c; c = (out[7] >> 25); out[7] &= reduce_mask_25;
626
    out[8] = twoP2468  - a[8] + c; c = (out[8] >> 26); out[8] &= reduce_mask_26;
627
    out[9] = twoP13579 - a[9] + c; c = (out[9] >> 25); out[9] &= reduce_mask_25;
628
    out[0] += 19 * c;
629
}
630

631
/* out = a * b */
632
void
633
curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) {
634
    word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
635
    word32 s0,s1,s2,s3,s4,s5,s6,s7,s8,s9;
636
    word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
637
    word32 p;
638

639
    r0 = b[0]; r1 = b[1];
640
    r2 = b[2]; r3 = b[3];
641
    r4 = b[4]; r5 = b[5];
642
    r6 = b[6]; r7 = b[7];
643
    r8 = b[8]; r9 = b[9];
644

645
    s0 = a[0]; s1 = a[1];
646
    s2 = a[2]; s3 = a[3];
647
    s4 = a[4]; s5 = a[5];
648
    s6 = a[6]; s7 = a[7];
649
    s8 = a[8]; s9 = a[9];
650

651
    m1 = mul32x32_64(r0, s1) + mul32x32_64(r1, s0);
652
    m3 = mul32x32_64(r0, s3) + mul32x32_64(r1, s2) + mul32x32_64(r2, s1) + mul32x32_64(r3, s0);
653
    m5 = mul32x32_64(r0, s5) + mul32x32_64(r1, s4) + mul32x32_64(r2, s3) + mul32x32_64(r3, s2) + mul32x32_64(r4, s1) + mul32x32_64(r5, s0);
654
    m7 = mul32x32_64(r0, s7) + mul32x32_64(r1, s6) + mul32x32_64(r2, s5) + mul32x32_64(r3, s4) + mul32x32_64(r4, s3) + mul32x32_64(r5, s2) + mul32x32_64(r6, s1) + mul32x32_64(r7, s0);
655
    m9 = mul32x32_64(r0, s9) + mul32x32_64(r1, s8) + mul32x32_64(r2, s7) + mul32x32_64(r3, s6) + mul32x32_64(r4, s5) + mul32x32_64(r5, s4) + mul32x32_64(r6, s3) + mul32x32_64(r7, s2) + mul32x32_64(r8, s1) + mul32x32_64(r9, s0);
656

657
    r1 *= 2; r3 *= 2;
658
    r5 *= 2; r7 *= 2;
659

660
    m0 = mul32x32_64(r0, s0);
661
    m2 = mul32x32_64(r0, s2) + mul32x32_64(r1, s1) + mul32x32_64(r2, s0);
662
    m4 = mul32x32_64(r0, s4) + mul32x32_64(r1, s3) + mul32x32_64(r2, s2) + mul32x32_64(r3, s1) + mul32x32_64(r4, s0);
663
    m6 = mul32x32_64(r0, s6) + mul32x32_64(r1, s5) + mul32x32_64(r2, s4) + mul32x32_64(r3, s3) + mul32x32_64(r4, s2) + mul32x32_64(r5, s1) + mul32x32_64(r6, s0);
664
    m8 = mul32x32_64(r0, s8) + mul32x32_64(r1, s7) + mul32x32_64(r2, s6) + mul32x32_64(r3, s5) + mul32x32_64(r4, s4) + mul32x32_64(r5, s3) + mul32x32_64(r6, s2) + mul32x32_64(r7, s1) + mul32x32_64(r8, s0);
665

666
    r1 *= 19; r2 *= 19;
667
    r3 = (r3 / 2) * 19;
668
    r4 *= 19;
669
    r5 = (r5 / 2) * 19;
670
    r6 *= 19;
671
    r7 = (r7 / 2) * 19;
672
    r8 *= 19; r9 *= 19;
673

674
    m1 += (mul32x32_64(r9, s2) + mul32x32_64(r8, s3) + mul32x32_64(r7, s4) + mul32x32_64(r6, s5) + mul32x32_64(r5, s6) + mul32x32_64(r4, s7) + mul32x32_64(r3, s8) + mul32x32_64(r2, s9));
675
    m3 += (mul32x32_64(r9, s4) + mul32x32_64(r8, s5) + mul32x32_64(r7, s6) + mul32x32_64(r6, s7) + mul32x32_64(r5, s8) + mul32x32_64(r4, s9));
676
    m5 += (mul32x32_64(r9, s6) + mul32x32_64(r8, s7) + mul32x32_64(r7, s8) + mul32x32_64(r6, s9));
677
    m7 += (mul32x32_64(r9, s8) + mul32x32_64(r8, s9));
678

679
    r3 *= 2; r5 *= 2;
680
    r7 *= 2; r9 *= 2;
681

682
    m0 += (mul32x32_64(r9, s1) + mul32x32_64(r8, s2) + mul32x32_64(r7, s3) + mul32x32_64(r6, s4) + mul32x32_64(r5, s5) + mul32x32_64(r4, s6) + mul32x32_64(r3, s7) + mul32x32_64(r2, s8) + mul32x32_64(r1, s9));
683
    m2 += (mul32x32_64(r9, s3) + mul32x32_64(r8, s4) + mul32x32_64(r7, s5) + mul32x32_64(r6, s6) + mul32x32_64(r5, s7) + mul32x32_64(r4, s8) + mul32x32_64(r3, s9));
684
    m4 += (mul32x32_64(r9, s5) + mul32x32_64(r8, s6) + mul32x32_64(r7, s7) + mul32x32_64(r6, s8) + mul32x32_64(r5, s9));
685
    m6 += (mul32x32_64(r9, s7) + mul32x32_64(r8, s8) + mul32x32_64(r7, s9));
686
    m8 += (mul32x32_64(r9, s9));
687

688
                                 r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
689
    m1 += c;                     r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
690
    m2 += c;                     r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
691
    m3 += c;                     r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
692
    m4 += c;                     r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
693
    m5 += c;                     r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
694
    m6 += c;                     r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
695
    m7 += c;                     r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
696
    m8 += c;                     r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
697
    m9 += c;                     r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
698
    m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
699
    r1 += p;
700

701
    out[0] = r0; out[1] = r1;
702
    out[2] = r2; out[3] = r3;
703
    out[4] = r4; out[5] = r5;
704
    out[6] = r6; out[7] = r7;
705
    out[8] = r8; out[9] = r9;
706
}
707

708
/* out = in*in */
709
void
710
curve25519_square(bignum25519 out, const bignum25519 in) {
711
    word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
712
    word32 d6,d7,d8,d9;
713
    word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
714
    word32 p;
715

716
    r0 = in[0]; r1 = in[1];
717
    r2 = in[2]; r3 = in[3];
718
    r4 = in[4]; r5 = in[5];
719
    r6 = in[6]; r7 = in[7];
720
    r8 = in[8]; r9 = in[9];
721

722
    m0 = mul32x32_64(r0, r0);
723
    r0 *= 2;
724
    m1 = mul32x32_64(r0, r1);
725
    m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
726
    r1 *= 2;
727
    m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2    );
728
    m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
729
    r2 *= 2;
730
    m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4    ) + mul32x32_64(r2, r3);
731
    m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
732
    r3 *= 2;
733
    m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6    ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4    );
734
    m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4    );
735
    m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8    ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6    ) + mul32x32_64(r4, r5 * 2);
736

737
    d6 = r6 * 19;
738
    d7 = r7 * 2 * 19;
739
    d8 = r8 * 19;
740
    d9 = r9 * 2 * 19;
741

742
    m0 += (mul32x32_64(d9, r1    ) + mul32x32_64(d8, r2    ) + mul32x32_64(d7, r3    ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
743
    m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3    ) + mul32x32_64(d7, r4    ) + mul32x32_64(d6, r5 * 2));
744
    m2 += (mul32x32_64(d9, r3    ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6    ));
745
    m3 += (mul32x32_64(d9, r4    ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6    ));
746
    m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7    ));
747
    m5 += (mul32x32_64(d9, r6    ) + mul32x32_64(d8, r7 * 2));
748
    m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8    ));
749
    m7 += (mul32x32_64(d9, r8    ));
750
    m8 += (mul32x32_64(d9, r9    ));
751

752
                                 r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
753
    m1 += c;                     r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
754
    m2 += c;                     r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
755
    m3 += c;                     r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
756
    m4 += c;                     r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
757
    m5 += c;                     r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
758
    m6 += c;                     r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
759
    m7 += c;                     r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
760
    m8 += c;                     r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
761
    m9 += c;                     r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
762
    m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
763
    r1 += p;
764

765
    out[0] = r0; out[1] = r1;
766
    out[2] = r2; out[3] = r3;
767
    out[4] = r4; out[5] = r5;
768
    out[6] = r6; out[7] = r7;
769
    out[8] = r8; out[9] = r9;
770
}
771

772
/* out = in ^ (2 * count) */
773
void
774
curve25519_square_times(bignum25519 out, const bignum25519 in, int count) {
775
    word32 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
776
    word32 d6,d7,d8,d9,p;
777
    word64 m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,c;
778

779
    r0 = in[0]; r1 = in[1];
780
    r2 = in[2]; r3 = in[3];
781
    r4 = in[4]; r5 = in[5];
782
    r6 = in[6]; r7 = in[7];
783
    r8 = in[8]; r9 = in[9];
784

785
    do {
786
        m0 = mul32x32_64(r0, r0);
787
        r0 *= 2;
788
        m1 = mul32x32_64(r0, r1);
789
        m2 = mul32x32_64(r0, r2) + mul32x32_64(r1, r1 * 2);
790
        r1 *= 2;
791
        m3 = mul32x32_64(r0, r3) + mul32x32_64(r1, r2    );
792
        m4 = mul32x32_64(r0, r4) + mul32x32_64(r1, r3 * 2) + mul32x32_64(r2, r2);
793
        r2 *= 2;
794
        m5 = mul32x32_64(r0, r5) + mul32x32_64(r1, r4    ) + mul32x32_64(r2, r3);
795
        m6 = mul32x32_64(r0, r6) + mul32x32_64(r1, r5 * 2) + mul32x32_64(r2, r4) + mul32x32_64(r3, r3 * 2);
796
        r3 *= 2;
797
        m7 = mul32x32_64(r0, r7) + mul32x32_64(r1, r6    ) + mul32x32_64(r2, r5) + mul32x32_64(r3, r4    );
798
        m8 = mul32x32_64(r0, r8) + mul32x32_64(r1, r7 * 2) + mul32x32_64(r2, r6) + mul32x32_64(r3, r5 * 2) + mul32x32_64(r4, r4    );
799
        m9 = mul32x32_64(r0, r9) + mul32x32_64(r1, r8    ) + mul32x32_64(r2, r7) + mul32x32_64(r3, r6    ) + mul32x32_64(r4, r5 * 2);
800

801
        d6 = r6 * 19;
802
        d7 = r7 * 2 * 19;
803
        d8 = r8 * 19;
804
        d9 = r9 * 2 * 19;
805

806
        m0 += (mul32x32_64(d9, r1    ) + mul32x32_64(d8, r2    ) + mul32x32_64(d7, r3    ) + mul32x32_64(d6, r4 * 2) + mul32x32_64(r5, r5 * 2 * 19));
807
        m1 += (mul32x32_64(d9, r2 / 2) + mul32x32_64(d8, r3    ) + mul32x32_64(d7, r4    ) + mul32x32_64(d6, r5 * 2));
808
        m2 += (mul32x32_64(d9, r3    ) + mul32x32_64(d8, r4 * 2) + mul32x32_64(d7, r5 * 2) + mul32x32_64(d6, r6    ));
809
        m3 += (mul32x32_64(d9, r4    ) + mul32x32_64(d8, r5 * 2) + mul32x32_64(d7, r6    ));
810
        m4 += (mul32x32_64(d9, r5 * 2) + mul32x32_64(d8, r6 * 2) + mul32x32_64(d7, r7    ));
811
        m5 += (mul32x32_64(d9, r6    ) + mul32x32_64(d8, r7 * 2));
812
        m6 += (mul32x32_64(d9, r7 * 2) + mul32x32_64(d8, r8    ));
813
        m7 += (mul32x32_64(d9, r8    ));
814
        m8 += (mul32x32_64(d9, r9    ));
815

816
                                     r0 = (word32)m0 & reduce_mask_26; c = (m0 >> 26);
817
        m1 += c;                     r1 = (word32)m1 & reduce_mask_25; c = (m1 >> 25);
818
        m2 += c;                     r2 = (word32)m2 & reduce_mask_26; c = (m2 >> 26);
819
        m3 += c;                     r3 = (word32)m3 & reduce_mask_25; c = (m3 >> 25);
820
        m4 += c;                     r4 = (word32)m4 & reduce_mask_26; c = (m4 >> 26);
821
        m5 += c;                     r5 = (word32)m5 & reduce_mask_25; c = (m5 >> 25);
822
        m6 += c;                     r6 = (word32)m6 & reduce_mask_26; c = (m6 >> 26);
823
        m7 += c;                     r7 = (word32)m7 & reduce_mask_25; c = (m7 >> 25);
824
        m8 += c;                     r8 = (word32)m8 & reduce_mask_26; c = (m8 >> 26);
825
        m9 += c;                     r9 = (word32)m9 & reduce_mask_25; p = (word32)(m9 >> 25);
826
        m0 = r0 + mul32x32_64(p,19); r0 = (word32)m0 & reduce_mask_26; p = (word32)(m0 >> 26);
827
        r1 += p;
828
    } while (--count);
829

830
    out[0] = r0; out[1] = r1;
831
    out[2] = r2; out[3] = r3;
832
    out[4] = r4; out[5] = r5;
833
    out[6] = r6; out[7] = r7;
834
    out[8] = r8; out[9] = r9;
835
}
836

837
/* Take a little-endian, 32-byte number and expand it into polynomial form */
838
void
839
curve25519_expand(bignum25519 out, const byte in[32]) {
840
    word32 x0,x1,x2,x3,x4,x5,x6,x7;
841
    GetBlock<word32, LittleEndian> block(in);
842
    block(x0)(x1)(x2)(x3)(x4)(x5)(x6)(x7);
843

844
    out[0] = (                      x0       ) & 0x3ffffff;
845
    out[1] = ((((word64)x1 << 32) | x0) >> 26) & 0x1ffffff;
846
    out[2] = ((((word64)x2 << 32) | x1) >> 19) & 0x3ffffff;
847
    out[3] = ((((word64)x3 << 32) | x2) >> 13) & 0x1ffffff;
848
    out[4] = ((                     x3) >>  6) & 0x3ffffff;
849
    out[5] = (                      x4       ) & 0x1ffffff;
850
    out[6] = ((((word64)x5 << 32) | x4) >> 25) & 0x3ffffff;
851
    out[7] = ((((word64)x6 << 32) | x5) >> 19) & 0x1ffffff;
852
    out[8] = ((((word64)x7 << 32) | x6) >> 12) & 0x3ffffff;
853
    out[9] = ((                     x7) >>  6) & 0x1ffffff;
854
}
855

856
/* Take a fully reduced polynomial form number and contract it into a
857
 * little-endian, 32-byte array
858
 */
859
void
860
curve25519_contract(byte out[32], const bignum25519 in) {
861
    bignum25519 f;
862
    curve25519_copy(f, in);
863

864
    #define carry_pass() \
865
        f[1] += f[0] >> 26; f[0] &= reduce_mask_26; \
866
        f[2] += f[1] >> 25; f[1] &= reduce_mask_25; \
867
        f[3] += f[2] >> 26; f[2] &= reduce_mask_26; \
868
        f[4] += f[3] >> 25; f[3] &= reduce_mask_25; \
869
        f[5] += f[4] >> 26; f[4] &= reduce_mask_26; \
870
        f[6] += f[5] >> 25; f[5] &= reduce_mask_25; \
871
        f[7] += f[6] >> 26; f[6] &= reduce_mask_26; \
872
        f[8] += f[7] >> 25; f[7] &= reduce_mask_25; \
873
        f[9] += f[8] >> 26; f[8] &= reduce_mask_26;
874

875
    #define carry_pass_full() \
876
        carry_pass() \
877
        f[0] += 19 * (f[9] >> 25); f[9] &= reduce_mask_25;
878

879
    #define carry_pass_final() \
880
        carry_pass() \
881
        f[9] &= reduce_mask_25;
882

883
    carry_pass_full()
884
    carry_pass_full()
885

886
    /* now t is between 0 and 2^255-1, properly carried. */
887
    /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
888
    f[0] += 19;
889
    carry_pass_full()
890

891
    /* now between 19 and 2^255-1 in both cases, and offset by 19. */
892
    f[0] += (reduce_mask_26 + 1) - 19;
893
    f[1] += (reduce_mask_25 + 1) - 1;
894
    f[2] += (reduce_mask_26 + 1) - 1;
895
    f[3] += (reduce_mask_25 + 1) - 1;
896
    f[4] += (reduce_mask_26 + 1) - 1;
897
    f[5] += (reduce_mask_25 + 1) - 1;
898
    f[6] += (reduce_mask_26 + 1) - 1;
899
    f[7] += (reduce_mask_25 + 1) - 1;
900
    f[8] += (reduce_mask_26 + 1) - 1;
901
    f[9] += (reduce_mask_25 + 1) - 1;
902

903
    /* now between 2^255 and 2^256-20, and offset by 2^255. */
904
    carry_pass_final()
905

906
    #undef carry_pass
907
    #undef carry_full
908
    #undef carry_final
909

910
    f[1] <<= 2; f[2] <<= 3;
911
    f[3] <<= 5; f[4] <<= 6;
912
    f[6] <<= 1; f[7] <<= 3;
913
    f[8] <<= 4; f[9] <<= 6;
914

915
    #define F(i, s) \
916
        out[s+0] |= (byte)( f[i] & 0xff); \
917
        out[s+1]  = (byte)((f[i] >> 8) & 0xff); \
918
        out[s+2]  = (byte)((f[i] >> 16) & 0xff); \
919
        out[s+3]  = (byte)((f[i] >> 24) & 0xff);
920

921
    out[0] = out[16] = 0;
922
    F(0,0); F(1,3);
923
    F(2,6); F(3,9);
924
    F(4,12); F(5,16);
925
    F(6,19); F(7,22);
926
    F(8,25); F(9,28);
927
    #undef F
928
}
929

930
/* out = (flag) ? in : out */
931
inline void
932
curve25519_move_conditional_bytes(byte out[96], const byte in[96], word32 flag)
933
{
934
    // TODO: enable this code path once we can test and benchmark it.
935
    // It is about 48 insns shorter, it avoids punning which may be UB,
936
    // and it is guaranteed constant time.
937
#if defined(__GNUC__) && defined(__i686__) && 0
938
    const word32 iter = 96/sizeof(word32);
939
    word32* outl = reinterpret_cast<word32*>(out);
940
    const word32* inl = reinterpret_cast<const word32*>(in);
941
    word32 idx=0, val;
942

943
    __asm__ __volatile__ (
944
        ".att_syntax                         ;\n"
945
        "cmpl     $0, %[flag]                ;\n"  // compare, set ZERO flag
946
        "movl     %[iter], %%ecx             ;\n"  // load iteration count
947
        "1:                                  ;\n"
948
        "  movl     (%[idx],%[out]), %[val]  ;\n"  // val = out[idx]
949
        "  cmovnzl  (%[idx],%[in]), %[val]   ;\n"  // copy in[idx] to val if NZ
950
        "  movl     %[val], (%[idx],%[out])  ;\n"  // out[idx] = val
951
        "  leal     4(%[idx]), %[idx]        ;\n"  // increment index
952
        "  loopnz   1b                       ;\n"  // does not affect flags
953
        : [out] "+S" (outl), [in] "+D" (inl),
954
          [idx] "+b" (idx), [val] "=r" (val)
955
        : [flag] "g" (flag), [iter] "I" (iter)
956
        : "ecx", "memory", "cc"
957
    );
958
#else
959
    const word32 nb = flag - 1, b = ~nb;
960
    const word32 *inl = (const word32 *)in;
961
    word32 *outl = (word32 *)out;
962
    outl[0] = (outl[0] & nb) | (inl[0] & b);
963
    outl[1] = (outl[1] & nb) | (inl[1] & b);
964
    outl[2] = (outl[2] & nb) | (inl[2] & b);
965
    outl[3] = (outl[3] & nb) | (inl[3] & b);
966
    outl[4] = (outl[4] & nb) | (inl[4] & b);
967
    outl[5] = (outl[5] & nb) | (inl[5] & b);
968
    outl[6] = (outl[6] & nb) | (inl[6] & b);
969
    outl[7] = (outl[7] & nb) | (inl[7] & b);
970
    outl[8] = (outl[8] & nb) | (inl[8] & b);
971
    outl[9] = (outl[9] & nb) | (inl[9] & b);
972
    outl[10] = (outl[10] & nb) | (inl[10] & b);
973
    outl[11] = (outl[11] & nb) | (inl[11] & b);
974
    outl[12] = (outl[12] & nb) | (inl[12] & b);
975
    outl[13] = (outl[13] & nb) | (inl[13] & b);
976
    outl[14] = (outl[14] & nb) | (inl[14] & b);
977
    outl[15] = (outl[15] & nb) | (inl[15] & b);
978
    outl[16] = (outl[16] & nb) | (inl[16] & b);
979
    outl[17] = (outl[17] & nb) | (inl[17] & b);
980
    outl[18] = (outl[18] & nb) | (inl[18] & b);
981
    outl[19] = (outl[19] & nb) | (inl[19] & b);
982
    outl[20] = (outl[20] & nb) | (inl[20] & b);
983
    outl[21] = (outl[21] & nb) | (inl[21] & b);
984
    outl[22] = (outl[22] & nb) | (inl[22] & b);
985
    outl[23] = (outl[23] & nb) | (inl[23] & b);
986
#endif
987
}
988

989
/* if (iswap) swap(a, b) */
990
inline void
991
curve25519_swap_conditional(bignum25519 a, bignum25519 b, word32 iswap) {
992
    const word32 swap = (word32)(-(sword32)iswap);
993
    word32 x0,x1,x2,x3,x4,x5,x6,x7,x8,x9;
994

995
    x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;
996
    x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;
997
    x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;
998
    x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;
999
    x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;
1000
    x5 = swap & (a[5] ^ b[5]); a[5] ^= x5; b[5] ^= x5;
1001
    x6 = swap & (a[6] ^ b[6]); a[6] ^= x6; b[6] ^= x6;
1002
    x7 = swap & (a[7] ^ b[7]); a[7] ^= x7; b[7] ^= x7;
1003
    x8 = swap & (a[8] ^ b[8]); a[8] ^= x8; b[8] ^= x8;
1004
    x9 = swap & (a[9] ^ b[9]); a[9] ^= x9; b[9] ^= x9;
1005
}
1006

1007
/*
1008
 * In:  b =   2^5 - 2^0
1009
 * Out: b = 2^250 - 2^0
1010
 */
1011
void
1012
curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {
1013
    ALIGN(ALIGN_SPEC) bignum25519 t0,c;
1014

1015
    /* 2^5  - 2^0 */ /* b */
1016
    /* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);
1017
    /* 2^10 - 2^0 */ curve25519_mul(b, t0, b);
1018
    /* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);
1019
    /* 2^20 - 2^0 */ curve25519_mul(c, t0, b);
1020
    /* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);
1021
    /* 2^40 - 2^0 */ curve25519_mul(t0, t0, c);
1022
    /* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);
1023
    /* 2^50 - 2^0 */ curve25519_mul(b, t0, b);
1024
    /* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);
1025
    /* 2^100 - 2^0 */ curve25519_mul(c, t0, b);
1026
    /* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);
1027
    /* 2^200 - 2^0 */ curve25519_mul(t0, t0, c);
1028
    /* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);
1029
    /* 2^250 - 2^0 */ curve25519_mul(b, t0, b);
1030
}
1031

1032
/*
1033
 * z^(p - 2) = z(2^255 - 21)
1034
 */
1035
void
1036
curve25519_recip(bignum25519 out, const bignum25519 z) {
1037
    ALIGN(ALIGN_SPEC) bignum25519 a,t0,b;
1038

1039
    /* 2 */ curve25519_square_times(a, z, 1); /* a = 2 */
1040
    /* 8 */ curve25519_square_times(t0, a, 2);
1041
    /* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
1042
    /* 11 */ curve25519_mul(a, b, a); /* a = 11 */
1043
    /* 22 */ curve25519_square_times(t0, a, 1);
1044
    /* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
1045
    /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
1046
    /* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);
1047
    /* 2^255 - 21 */ curve25519_mul(out, b, a);
1048
}
1049

1050
/*
1051
 * z^((p-5)/8) = z^(2^252 - 3)
1052
 */
1053
void
1054
curve25519_pow_two252m3(bignum25519 two252m3, const bignum25519 z) {
1055
    ALIGN(ALIGN_SPEC) bignum25519 b,c,t0;
1056

1057
    /* 2 */ curve25519_square_times(c, z, 1); /* c = 2 */
1058
    /* 8 */ curve25519_square_times(t0, c, 2); /* t0 = 8 */
1059
    /* 9 */ curve25519_mul(b, t0, z); /* b = 9 */
1060
    /* 11 */ curve25519_mul(c, b, c); /* c = 11 */
1061
    /* 22 */ curve25519_square_times(t0, c, 1);
1062
    /* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);
1063
    /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);
1064
    /* 2^252 - 2^2 */ curve25519_square_times(b, b, 2);
1065
    /* 2^252 - 3 */ curve25519_mul(two252m3, b, z);
1066
}
1067

1068
inline void
1069
ed25519_hash(byte *hash, const byte *in, size_t inlen) {
1070
    SHA512().CalculateDigest(hash, in, inlen);
1071
}
1072

1073
inline void
1074
ed25519_extsk(hash_512bits extsk, const byte sk[32]) {
1075
    ed25519_hash(extsk, sk, 32);
1076
    extsk[0] &= 248;
1077
    extsk[31] &= 127;
1078
    extsk[31] |= 64;
1079
}
1080

1081
void
1082
UpdateFromStream(HashTransformation& hash, std::istream& stream)
1083
{
1084
    SecByteBlock block(4096);
1085
    while (stream.read((char*)block.begin(), block.size()))
1086
        hash.Update(block, block.size());
1087

1088
    std::streamsize rem = stream.gcount();
1089
    if (rem)
1090
        hash.Update(block, (size_t)rem);
1091

1092
    block.SetMark(0);
1093
}
1094

1095
void
1096
ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], const byte *m, size_t mlen) {
1097
    SHA512 hash;
1098
    hash.Update(RS, 32);
1099
    hash.Update(pk, 32);
1100
    hash.Update(m, mlen);
1101
    hash.Final(hram);
1102
}
1103

1104
void
1105
ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], std::istream& stream) {
1106
    SHA512 hash;
1107
    hash.Update(RS, 32);
1108
    hash.Update(pk, 32);
1109
    UpdateFromStream(hash, stream);
1110
    hash.Final(hram);
1111
}
1112

1113
inline bignum256modm_element_t
1114
lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
1115
    return (a - b) >> 31;
1116
}
1117

1118
/* see HAC, Alg. 14.42 Step 4 */
1119
void
1120
reduce256_modm(bignum256modm r) {
1121
    bignum256modm t;
1122
    bignum256modm_element_t b = 0, pb, mask;
1123

1124
    /* t = r - m */
1125
    pb = 0;
1126
    pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 30)); pb = b;
1127
    pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 30)); pb = b;
1128
    pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 30)); pb = b;
1129
    pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 30)); pb = b;
1130
    pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 30)); pb = b;
1131
    pb += modm_m[5]; b = lt_modm(r[5], pb); t[5] = (r[5] - pb + (b << 30)); pb = b;
1132
    pb += modm_m[6]; b = lt_modm(r[6], pb); t[6] = (r[6] - pb + (b << 30)); pb = b;
1133
    pb += modm_m[7]; b = lt_modm(r[7], pb); t[7] = (r[7] - pb + (b << 30)); pb = b;
1134
    pb += modm_m[8]; b = lt_modm(r[8], pb); t[8] = (r[8] - pb + (b << 16));
1135

1136
    /* keep r if r was smaller than m */
1137
    mask = b - 1;
1138
    r[0] ^= mask & (r[0] ^ t[0]);
1139
    r[1] ^= mask & (r[1] ^ t[1]);
1140
    r[2] ^= mask & (r[2] ^ t[2]);
1141
    r[3] ^= mask & (r[3] ^ t[3]);
1142
    r[4] ^= mask & (r[4] ^ t[4]);
1143
    r[5] ^= mask & (r[5] ^ t[5]);
1144
    r[6] ^= mask & (r[6] ^ t[6]);
1145
    r[7] ^= mask & (r[7] ^ t[7]);
1146
    r[8] ^= mask & (r[8] ^ t[8]);
1147
}
1148

1149
/* Barrett reduction, see HAC, Alg. 14.42 */
1150
void
1151
barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
1152
    bignum256modm q3, r2;
1153
    word64 c;
1154
    bignum256modm_element_t f, b, pb;
1155

1156
    /* q1 = x >> 248 = 264 bits = 9 30 bit elements
1157
       q2 = mu * q1
1158
       q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264
1159
     */
1160
    c  = mul32x32_64(modm_mu[0], q1[7]) + mul32x32_64(modm_mu[1], q1[6]) + mul32x32_64(modm_mu[2], q1[5]) + mul32x32_64(modm_mu[3], q1[4]) + mul32x32_64(modm_mu[4], q1[3]) + mul32x32_64(modm_mu[5], q1[2]) + mul32x32_64(modm_mu[6], q1[1]) + mul32x32_64(modm_mu[7], q1[0]);
1161
    c >>= 30;
1162
    c += mul32x32_64(modm_mu[0], q1[8]) + mul32x32_64(modm_mu[1], q1[7]) + mul32x32_64(modm_mu[2], q1[6]) + mul32x32_64(modm_mu[3], q1[5]) + mul32x32_64(modm_mu[4], q1[4]) + mul32x32_64(modm_mu[5], q1[3]) + mul32x32_64(modm_mu[6], q1[2]) + mul32x32_64(modm_mu[7], q1[1]) + mul32x32_64(modm_mu[8], q1[0]);
1163
    f = (bignum256modm_element_t)c; q3[0] = (f >> 24) & 0x3f; c >>= 30;
1164
    c += mul32x32_64(modm_mu[1], q1[8]) + mul32x32_64(modm_mu[2], q1[7]) + mul32x32_64(modm_mu[3], q1[6]) + mul32x32_64(modm_mu[4], q1[5]) + mul32x32_64(modm_mu[5], q1[4]) + mul32x32_64(modm_mu[6], q1[3]) + mul32x32_64(modm_mu[7], q1[2]) + mul32x32_64(modm_mu[8], q1[1]);
1165
    f = (bignum256modm_element_t)c; q3[0] |= (f << 6) & 0x3fffffff; q3[1] = (f >> 24) & 0x3f; c >>= 30;
1166
    c += mul32x32_64(modm_mu[2], q1[8]) + mul32x32_64(modm_mu[3], q1[7]) + mul32x32_64(modm_mu[4], q1[6]) + mul32x32_64(modm_mu[5], q1[5]) + mul32x32_64(modm_mu[6], q1[4]) + mul32x32_64(modm_mu[7], q1[3]) + mul32x32_64(modm_mu[8], q1[2]);
1167
    f = (bignum256modm_element_t)c; q3[1] |= (f << 6) & 0x3fffffff; q3[2] = (f >> 24) & 0x3f; c >>= 30;
1168
    c += mul32x32_64(modm_mu[3], q1[8]) + mul32x32_64(modm_mu[4], q1[7]) + mul32x32_64(modm_mu[5], q1[6]) + mul32x32_64(modm_mu[6], q1[5]) + mul32x32_64(modm_mu[7], q1[4]) + mul32x32_64(modm_mu[8], q1[3]);
1169
    f = (bignum256modm_element_t)c; q3[2] |= (f << 6) & 0x3fffffff; q3[3] = (f >> 24) & 0x3f; c >>= 30;
1170
    c += mul32x32_64(modm_mu[4], q1[8]) + mul32x32_64(modm_mu[5], q1[7]) + mul32x32_64(modm_mu[6], q1[6]) + mul32x32_64(modm_mu[7], q1[5]) + mul32x32_64(modm_mu[8], q1[4]);
1171
    f = (bignum256modm_element_t)c; q3[3] |= (f << 6) & 0x3fffffff; q3[4] = (f >> 24) & 0x3f; c >>= 30;
1172
    c += mul32x32_64(modm_mu[5], q1[8]) + mul32x32_64(modm_mu[6], q1[7]) + mul32x32_64(modm_mu[7], q1[6]) + mul32x32_64(modm_mu[8], q1[5]);
1173
    f = (bignum256modm_element_t)c; q3[4] |= (f << 6) & 0x3fffffff; q3[5] = (f >> 24) & 0x3f; c >>= 30;
1174
    c += mul32x32_64(modm_mu[6], q1[8]) + mul32x32_64(modm_mu[7], q1[7]) + mul32x32_64(modm_mu[8], q1[6]);
1175
    f = (bignum256modm_element_t)c; q3[5] |= (f << 6) & 0x3fffffff; q3[6] = (f >> 24) & 0x3f; c >>= 30;
1176
    c += mul32x32_64(modm_mu[7], q1[8]) + mul32x32_64(modm_mu[8], q1[7]);
1177
    f = (bignum256modm_element_t)c; q3[6] |= (f << 6) & 0x3fffffff; q3[7] = (f >> 24) & 0x3f; c >>= 30;
1178
    c += mul32x32_64(modm_mu[8], q1[8]);
1179
    f = (bignum256modm_element_t)c; q3[7] |= (f << 6) & 0x3fffffff; q3[8] = (bignum256modm_element_t)(c >> 24);
1180

1181
    /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
1182
       r2 = (q3 * m) mod (256^(32+1)) = (q3 * m) & ((1 << 264) - 1)
1183
     */
1184
    c = mul32x32_64(modm_m[0], q3[0]);
1185
    r2[0] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1186
    c += mul32x32_64(modm_m[0], q3[1]) + mul32x32_64(modm_m[1], q3[0]);
1187
    r2[1] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1188
    c += mul32x32_64(modm_m[0], q3[2]) + mul32x32_64(modm_m[1], q3[1]) + mul32x32_64(modm_m[2], q3[0]);
1189
    r2[2] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1190
    c += mul32x32_64(modm_m[0], q3[3]) + mul32x32_64(modm_m[1], q3[2]) + mul32x32_64(modm_m[2], q3[1]) + mul32x32_64(modm_m[3], q3[0]);
1191
    r2[3] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1192
    c += mul32x32_64(modm_m[0], q3[4]) + mul32x32_64(modm_m[1], q3[3]) + mul32x32_64(modm_m[2], q3[2]) + mul32x32_64(modm_m[3], q3[1]) + mul32x32_64(modm_m[4], q3[0]);
1193
    r2[4] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1194
    c += mul32x32_64(modm_m[0], q3[5]) + mul32x32_64(modm_m[1], q3[4]) + mul32x32_64(modm_m[2], q3[3]) + mul32x32_64(modm_m[3], q3[2]) + mul32x32_64(modm_m[4], q3[1]) + mul32x32_64(modm_m[5], q3[0]);
1195
    r2[5] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1196
    c += mul32x32_64(modm_m[0], q3[6]) + mul32x32_64(modm_m[1], q3[5]) + mul32x32_64(modm_m[2], q3[4]) + mul32x32_64(modm_m[3], q3[3]) + mul32x32_64(modm_m[4], q3[2]) + mul32x32_64(modm_m[5], q3[1]) + mul32x32_64(modm_m[6], q3[0]);
1197
    r2[6] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1198
    c += mul32x32_64(modm_m[0], q3[7]) + mul32x32_64(modm_m[1], q3[6]) + mul32x32_64(modm_m[2], q3[5]) + mul32x32_64(modm_m[3], q3[4]) + mul32x32_64(modm_m[4], q3[3]) + mul32x32_64(modm_m[5], q3[2]) + mul32x32_64(modm_m[6], q3[1]) + mul32x32_64(modm_m[7], q3[0]);
1199
    r2[7] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
1200
    c += mul32x32_64(modm_m[0], q3[8]) + mul32x32_64(modm_m[1], q3[7]) + mul32x32_64(modm_m[2], q3[6]) + mul32x32_64(modm_m[3], q3[5]) + mul32x32_64(modm_m[4], q3[4]) + mul32x32_64(modm_m[5], q3[3]) + mul32x32_64(modm_m[6], q3[2]) + mul32x32_64(modm_m[7], q3[1]) + mul32x32_64(modm_m[8], q3[0]);
1201
    r2[8] = (bignum256modm_element_t)(c & 0xffffff);
1202

1203
    /* r = r1 - r2
1204
       if (r < 0) r += (1 << 264) */
1205
    pb = 0;
1206
    pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 30)); pb = b;
1207
    pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 30)); pb = b;
1208
    pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 30)); pb = b;
1209
    pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 30)); pb = b;
1210
    pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 30)); pb = b;
1211
    pb += r2[5]; b = lt_modm(r1[5], pb); r[5] = (r1[5] - pb + (b << 30)); pb = b;
1212
    pb += r2[6]; b = lt_modm(r1[6], pb); r[6] = (r1[6] - pb + (b << 30)); pb = b;
1213
    pb += r2[7]; b = lt_modm(r1[7], pb); r[7] = (r1[7] - pb + (b << 30)); pb = b;
1214
    pb += r2[8]; b = lt_modm(r1[8], pb); r[8] = (r1[8] - pb + (b << 24));
1215

1216
    reduce256_modm(r);
1217
    reduce256_modm(r);
1218
}
1219

1220
/* addition modulo m */
1221
void
1222
add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
1223
    bignum256modm_element_t c;
1224

1225
    c  = x[0] + y[0]; r[0] = c & 0x3fffffff; c >>= 30;
1226
    c += x[1] + y[1]; r[1] = c & 0x3fffffff; c >>= 30;
1227
    c += x[2] + y[2]; r[2] = c & 0x3fffffff; c >>= 30;
1228
    c += x[3] + y[3]; r[3] = c & 0x3fffffff; c >>= 30;
1229
    c += x[4] + y[4]; r[4] = c & 0x3fffffff; c >>= 30;
1230
    c += x[5] + y[5]; r[5] = c & 0x3fffffff; c >>= 30;
1231
    c += x[6] + y[6]; r[6] = c & 0x3fffffff; c >>= 30;
1232
    c += x[7] + y[7]; r[7] = c & 0x3fffffff; c >>= 30;
1233
    c += x[8] + y[8]; r[8] = c;
1234

1235
    reduce256_modm(r);
1236
}
1237

1238
/* multiplication modulo m */
1239
void
1240
mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
1241
    bignum256modm r1, q1;
1242
    word64 c;
1243
    bignum256modm_element_t f;
1244

1245
    c = mul32x32_64(x[0], y[0]);
1246
    f = (bignum256modm_element_t)c; r1[0] = (f & 0x3fffffff); c >>= 30;
1247
    c += mul32x32_64(x[0], y[1]) + mul32x32_64(x[1], y[0]);
1248
    f = (bignum256modm_element_t)c; r1[1] = (f & 0x3fffffff); c >>= 30;
1249
    c += mul32x32_64(x[0], y[2]) + mul32x32_64(x[1], y[1]) + mul32x32_64(x[2], y[0]);
1250
    f = (bignum256modm_element_t)c; r1[2] = (f & 0x3fffffff); c >>= 30;
1251
    c += mul32x32_64(x[0], y[3]) + mul32x32_64(x[1], y[2]) + mul32x32_64(x[2], y[1]) + mul32x32_64(x[3], y[0]);
1252
    f = (bignum256modm_element_t)c; r1[3] = (f & 0x3fffffff); c >>= 30;
1253
    c += mul32x32_64(x[0], y[4]) + mul32x32_64(x[1], y[3]) + mul32x32_64(x[2], y[2]) + mul32x32_64(x[3], y[1]) + mul32x32_64(x[4], y[0]);
1254
    f = (bignum256modm_element_t)c; r1[4] = (f & 0x3fffffff); c >>= 30;
1255
    c += mul32x32_64(x[0], y[5]) + mul32x32_64(x[1], y[4]) + mul32x32_64(x[2], y[3]) + mul32x32_64(x[3], y[2]) + mul32x32_64(x[4], y[1]) + mul32x32_64(x[5], y[0]);
1256
    f = (bignum256modm_element_t)c; r1[5] = (f & 0x3fffffff); c >>= 30;
1257
    c += mul32x32_64(x[0], y[6]) + mul32x32_64(x[1], y[5]) + mul32x32_64(x[2], y[4]) + mul32x32_64(x[3], y[3]) + mul32x32_64(x[4], y[2]) + mul32x32_64(x[5], y[1]) + mul32x32_64(x[6], y[0]);
1258
    f = (bignum256modm_element_t)c; r1[6] = (f & 0x3fffffff); c >>= 30;
1259
    c += mul32x32_64(x[0], y[7]) + mul32x32_64(x[1], y[6]) + mul32x32_64(x[2], y[5]) + mul32x32_64(x[3], y[4]) + mul32x32_64(x[4], y[3]) + mul32x32_64(x[5], y[2]) + mul32x32_64(x[6], y[1]) + mul32x32_64(x[7], y[0]);
1260
    f = (bignum256modm_element_t)c; r1[7] = (f & 0x3fffffff); c >>= 30;
1261
    c += mul32x32_64(x[0], y[8]) + mul32x32_64(x[1], y[7]) + mul32x32_64(x[2], y[6]) + mul32x32_64(x[3], y[5]) + mul32x32_64(x[4], y[4]) + mul32x32_64(x[5], y[3]) + mul32x32_64(x[6], y[2]) + mul32x32_64(x[7], y[1]) + mul32x32_64(x[8], y[0]);
1262
    f = (bignum256modm_element_t)c; r1[8] = (f & 0x00ffffff); q1[0] = (f >> 8) & 0x3fffff; c >>= 30;
1263
    c += mul32x32_64(x[1], y[8]) + mul32x32_64(x[2], y[7]) + mul32x32_64(x[3], y[6]) + mul32x32_64(x[4], y[5]) + mul32x32_64(x[5], y[4]) + mul32x32_64(x[6], y[3]) + mul32x32_64(x[7], y[2]) + mul32x32_64(x[8], y[1]);
1264
    f = (bignum256modm_element_t)c; q1[0] = (q1[0] | (f << 22)) & 0x3fffffff; q1[1] = (f >> 8) & 0x3fffff; c >>= 30;
1265
    c += mul32x32_64(x[2], y[8]) + mul32x32_64(x[3], y[7]) + mul32x32_64(x[4], y[6]) + mul32x32_64(x[5], y[5]) + mul32x32_64(x[6], y[4]) + mul32x32_64(x[7], y[3]) + mul32x32_64(x[8], y[2]);
1266
    f = (bignum256modm_element_t)c; q1[1] = (q1[1] | (f << 22)) & 0x3fffffff; q1[2] = (f >> 8) & 0x3fffff; c >>= 30;
1267
    c += mul32x32_64(x[3], y[8]) + mul32x32_64(x[4], y[7]) + mul32x32_64(x[5], y[6]) + mul32x32_64(x[6], y[5]) + mul32x32_64(x[7], y[4]) + mul32x32_64(x[8], y[3]);
1268
    f = (bignum256modm_element_t)c; q1[2] = (q1[2] | (f << 22)) & 0x3fffffff; q1[3] = (f >> 8) & 0x3fffff; c >>= 30;
1269
    c += mul32x32_64(x[4], y[8]) + mul32x32_64(x[5], y[7]) + mul32x32_64(x[6], y[6]) + mul32x32_64(x[7], y[5]) + mul32x32_64(x[8], y[4]);
1270
    f = (bignum256modm_element_t)c; q1[3] = (q1[3] | (f << 22)) & 0x3fffffff; q1[4] = (f >> 8) & 0x3fffff; c >>= 30;
1271
    c += mul32x32_64(x[5], y[8]) + mul32x32_64(x[6], y[7]) + mul32x32_64(x[7], y[6]) + mul32x32_64(x[8], y[5]);
1272
    f = (bignum256modm_element_t)c; q1[4] = (q1[4] | (f << 22)) & 0x3fffffff; q1[5] = (f >> 8) & 0x3fffff; c >>= 30;
1273
    c += mul32x32_64(x[6], y[8]) + mul32x32_64(x[7], y[7]) + mul32x32_64(x[8], y[6]);
1274
    f = (bignum256modm_element_t)c; q1[5] = (q1[5] | (f << 22)) & 0x3fffffff; q1[6] = (f >> 8) & 0x3fffff; c >>= 30;
1275
    c += mul32x32_64(x[7], y[8]) + mul32x32_64(x[8], y[7]);
1276
    f = (bignum256modm_element_t)c; q1[6] = (q1[6] | (f << 22)) & 0x3fffffff; q1[7] = (f >> 8) & 0x3fffff; c >>= 30;
1277
    c += mul32x32_64(x[8], y[8]);
1278
    f = (bignum256modm_element_t)c; q1[7] = (q1[7] | (f << 22)) & 0x3fffffff; q1[8] = (f >> 8) & 0x3fffff;
1279

1280
    barrett_reduce256_modm(r, q1, r1);
1281
}
1282

1283
void
1284
expand256_modm(bignum256modm out, const byte *in, size_t len) {
1285
    byte work[64] = {0};
1286
    bignum256modm_element_t x[16];
1287
    bignum256modm q1;
1288

1289
    std::memcpy(work, in, len);
1290
    x[0] = U8TO32_LE(work +  0);
1291
    x[1] = U8TO32_LE(work +  4);
1292
    x[2] = U8TO32_LE(work +  8);
1293
    x[3] = U8TO32_LE(work + 12);
1294
    x[4] = U8TO32_LE(work + 16);
1295
    x[5] = U8TO32_LE(work + 20);
1296
    x[6] = U8TO32_LE(work + 24);
1297
    x[7] = U8TO32_LE(work + 28);
1298
    x[8] = U8TO32_LE(work + 32);
1299
    x[9] = U8TO32_LE(work + 36);
1300
    x[10] = U8TO32_LE(work + 40);
1301
    x[11] = U8TO32_LE(work + 44);
1302
    x[12] = U8TO32_LE(work + 48);
1303
    x[13] = U8TO32_LE(work + 52);
1304
    x[14] = U8TO32_LE(work + 56);
1305
    x[15] = U8TO32_LE(work + 60);
1306

1307
    /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
1308
    out[0] = (                         x[0]) & 0x3fffffff;
1309
    out[1] = ((x[ 0] >> 30) | (x[ 1] <<  2)) & 0x3fffffff;
1310
    out[2] = ((x[ 1] >> 28) | (x[ 2] <<  4)) & 0x3fffffff;
1311
    out[3] = ((x[ 2] >> 26) | (x[ 3] <<  6)) & 0x3fffffff;
1312
    out[4] = ((x[ 3] >> 24) | (x[ 4] <<  8)) & 0x3fffffff;
1313
    out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
1314
    out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
1315
    out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
1316
    out[8] = ((x[ 7] >> 16) | (x[ 8] << 16)) & 0x00ffffff;
1317

1318
    /* 8*31 = 248 bits, no need to reduce */
1319
    if (len < 32)
1320
            return;
1321

1322
    /* q1 = x >> 248 = 264 bits = 9 30 bit elements */
1323
    q1[0] = ((x[ 7] >> 24) | (x[ 8] <<  8)) & 0x3fffffff;
1324
    q1[1] = ((x[ 8] >> 22) | (x[ 9] << 10)) & 0x3fffffff;
1325
    q1[2] = ((x[ 9] >> 20) | (x[10] << 12)) & 0x3fffffff;
1326
    q1[3] = ((x[10] >> 18) | (x[11] << 14)) & 0x3fffffff;
1327
    q1[4] = ((x[11] >> 16) | (x[12] << 16)) & 0x3fffffff;
1328
    q1[5] = ((x[12] >> 14) | (x[13] << 18)) & 0x3fffffff;
1329
    q1[6] = ((x[13] >> 12) | (x[14] << 20)) & 0x3fffffff;
1330
    q1[7] = ((x[14] >> 10) | (x[15] << 22)) & 0x3fffffff;
1331
    q1[8] = ((x[15] >>  8)                );
1332

1333
    barrett_reduce256_modm(out, q1, out);
1334
}
1335

1336
void
1337
expand_raw256_modm(bignum256modm out, const byte in[32]) {
1338
    bignum256modm_element_t x[8];
1339

1340
    x[0] = U8TO32_LE(in +  0);
1341
    x[1] = U8TO32_LE(in +  4);
1342
    x[2] = U8TO32_LE(in +  8);
1343
    x[3] = U8TO32_LE(in + 12);
1344
    x[4] = U8TO32_LE(in + 16);
1345
    x[5] = U8TO32_LE(in + 20);
1346
    x[6] = U8TO32_LE(in + 24);
1347
    x[7] = U8TO32_LE(in + 28);
1348

1349
    out[0] = (                         x[0]) & 0x3fffffff;
1350
    out[1] = ((x[ 0] >> 30) | (x[ 1] <<  2)) & 0x3fffffff;
1351
    out[2] = ((x[ 1] >> 28) | (x[ 2] <<  4)) & 0x3fffffff;
1352
    out[3] = ((x[ 2] >> 26) | (x[ 3] <<  6)) & 0x3fffffff;
1353
    out[4] = ((x[ 3] >> 24) | (x[ 4] <<  8)) & 0x3fffffff;
1354
    out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
1355
    out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
1356
    out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
1357
    out[8] = ((x[ 7] >> 16)                ) & 0x0000ffff;
1358
}
1359

1360
void
1361
contract256_modm(byte out[32], const bignum256modm in) {
1362
    U32TO8_LE(out +  0, (in[0]      ) | (in[1] << 30));
1363
    U32TO8_LE(out +  4, (in[1] >>  2) | (in[2] << 28));
1364
    U32TO8_LE(out +  8, (in[2] >>  4) | (in[3] << 26));
1365
    U32TO8_LE(out + 12, (in[3] >>  6) | (in[4] << 24));
1366
    U32TO8_LE(out + 16, (in[4] >>  8) | (in[5] << 22));
1367
    U32TO8_LE(out + 20, (in[5] >> 10) | (in[6] << 20));
1368
    U32TO8_LE(out + 24, (in[6] >> 12) | (in[7] << 18));
1369
    U32TO8_LE(out + 28, (in[7] >> 14) | (in[8] << 16));
1370
}
1371

1372
void
1373
contract256_window4_modm(signed char r[64], const bignum256modm in) {
1374
    char carry;
1375
    signed char *quads = r;
1376
    bignum256modm_element_t i, j, v;
1377

1378
    for (i = 0; i < 8; i += 2) {
1379
        v = in[i];
1380
        for (j = 0; j < 7; j++) {
1381
            *quads++ = (v & 15);
1382
            v >>= 4;
1383
        }
1384
        v |= (in[i+1] << 2);
1385
        for (j = 0; j < 8; j++) {
1386
            *quads++ = (v & 15);
1387
            v >>= 4;
1388
        }
1389
    }
1390

1391
    v = in[8];
1392
    *quads++ = (v & 15); v >>= 4;
1393
    *quads++ = (v & 15); v >>= 4;
1394
    *quads++ = (v & 15); v >>= 4;
1395
    *quads++ = (v & 15); v >>= 4;
1396

1397
    /* making it signed */
1398
    carry = 0;
1399
    for(i = 0; i < 63; i++) {
1400
        r[i] += carry;
1401
        r[i+1] += (r[i] >> 4);
1402
        r[i] &= 15;
1403
        carry = (r[i] >> 3);
1404
        r[i] -= (carry << 4);
1405
    }
1406
    r[63] += carry;
1407
}
1408

1409
void
1410
contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
1411
    int i,j,k,b;
1412
    int m = (1 << (windowsize - 1)) - 1, soplen = 256;
1413
    signed char *bits = r;
1414
    bignum256modm_element_t v;
1415

1416
    /* first put the binary expansion into r  */
1417
    for (i = 0; i < 8; i++) {
1418
        v = s[i];
1419
        for (j = 0; j < 30; j++, v >>= 1)
1420
            *bits++ = (v & 1);
1421
    }
1422
    v = s[8];
1423
    for (j = 0; j < 16; j++, v >>= 1)
1424
        *bits++ = (v & 1);
1425

1426
    /* Making it sliding window */
1427
    for (j = 0; j < soplen; j++) {
1428
        if (!r[j])
1429
            continue;
1430

1431
        for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
1432
            if ((r[j] + (r[j + b] << b)) <= m) {
1433
                r[j] += r[j + b] << b;
1434
                r[j + b] = 0;
1435
            } else if ((r[j] - (r[j + b] << b)) >= -m) {
1436
                r[j] -= r[j + b] << b;
1437
                for (k = j + b; k < soplen; k++) {
1438
                    if (!r[k]) {
1439
                        r[k] = 1;
1440
                        break;
1441
                    }
1442
                    r[k] = 0;
1443
                }
1444
            } else if (r[j + b]) {
1445
                break;
1446
            }
1447
        }
1448
    }
1449
}
1450

1451
inline void
1452
ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
1453
    curve25519_mul(r->x, p->x, p->t);
1454
    curve25519_mul(r->y, p->y, p->z);
1455
    curve25519_mul(r->z, p->z, p->t);
1456
}
1457

1458
inline void
1459
ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
1460
    curve25519_mul(r->x, p->x, p->t);
1461
    curve25519_mul(r->y, p->y, p->z);
1462
    curve25519_mul(r->z, p->z, p->t);
1463
    curve25519_mul(r->t, p->x, p->y);
1464
}
1465

1466
void
1467
ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {
1468
    curve25519_sub(p->ysubx, r->y, r->x);
1469
    curve25519_add(p->xaddy, r->y, r->x);
1470
    curve25519_copy(p->z, r->z);
1471
    curve25519_mul(p->t2d, r->t, ge25519_ec2d);
1472
}
1473

1474
void
1475
ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {
1476
    bignum25519 a,b,c,d,t,u;
1477

1478
    curve25519_sub(a, p->y, p->x);
1479
    curve25519_add(b, p->y, p->x);
1480
    curve25519_sub(t, q->y, q->x);
1481
    curve25519_add(u, q->y, q->x);
1482
    curve25519_mul(a, a, t);
1483
    curve25519_mul(b, b, u);
1484
    curve25519_mul(c, p->t, q->t);
1485
    curve25519_mul(c, c, ge25519_ec2d);
1486
    curve25519_mul(d, p->z, q->z);
1487
    curve25519_add(d, d, d);
1488
    curve25519_sub(r->x, b, a);
1489
    curve25519_add(r->y, b, a);
1490
    curve25519_add_after_basic(r->z, d, c);
1491
    curve25519_sub_after_basic(r->t, d, c);
1492
}
1493

1494
void
1495
ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {
1496
    bignum25519 a,b,c;
1497

1498
    curve25519_square(a, p->x);
1499
    curve25519_square(b, p->y);
1500
    curve25519_square(c, p->z);
1501
    curve25519_add_reduce(c, c, c);
1502
    curve25519_add(r->x, p->x, p->y);
1503
    curve25519_square(r->x, r->x);
1504
    curve25519_add(r->y, b, a);
1505
    curve25519_sub(r->z, b, a);
1506
    curve25519_sub_after_basic(r->x, r->x, r->y);
1507
    curve25519_sub_after_basic(r->t, c, r->z);
1508
}
1509

1510
void
1511
ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, byte signbit) {
1512
    const bignum25519 *qb = (const bignum25519 *)q;
1513
    bignum25519 *rb = (bignum25519 *)r;
1514
    bignum25519 a,b,c;
1515

1516
    curve25519_sub(a, p->y, p->x);
1517
    curve25519_add(b, p->y, p->x);
1518
    curve25519_mul(a, a, qb[signbit]); /* x for +, y for - */
1519
    curve25519_mul(r->x, b, qb[signbit^1]); /* y for +, x for - */
1520
    curve25519_add(r->y, r->x, a);
1521
    curve25519_sub(r->x, r->x, a);
1522
    curve25519_mul(c, p->t, q->t2d);
1523
    curve25519_add_reduce(r->t, p->z, p->z);
1524
    curve25519_copy(r->z, r->t);
1525
    curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */
1526
    curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */
1527
}
1528

1529
void
1530
ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, byte signbit) {
1531
    const bignum25519 *qb = (const bignum25519 *)q;
1532
    bignum25519 *rb = (bignum25519 *)r;
1533
    bignum25519 a,b,c;
1534

1535
    curve25519_sub(a, p->y, p->x);
1536
    curve25519_add(b, p->y, p->x);
1537
    curve25519_mul(a, a, qb[signbit]); /* ysubx for +, xaddy for - */
1538
    curve25519_mul(r->x, b, qb[signbit^1]); /* xaddy for +, ysubx for - */
1539
    curve25519_add(r->y, r->x, a);
1540
    curve25519_sub(r->x, r->x, a);
1541
    curve25519_mul(c, p->t, q->t2d);
1542
    curve25519_mul(r->t, p->z, q->z);
1543
    curve25519_add_reduce(r->t, r->t, r->t);
1544
    curve25519_copy(r->z, r->t);
1545
    curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */
1546
    curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */
1547
}
1548

1549
void
1550
ge25519_double_partial(ge25519 *r, const ge25519 *p) {
1551
    ge25519_p1p1 t;
1552
    ge25519_double_p1p1(&t, p);
1553
    ge25519_p1p1_to_partial(r, &t);
1554
}
1555

1556
void
1557
ge25519_double(ge25519 *r, const ge25519 *p) {
1558
    ge25519_p1p1 t;
1559
    ge25519_double_p1p1(&t, p);
1560
    ge25519_p1p1_to_full(r, &t);
1561
}
1562

1563
void
1564
ge25519_add(ge25519 *r, const ge25519 *p,  const ge25519 *q) {
1565
    ge25519_p1p1 t;
1566
    ge25519_add_p1p1(&t, p, q);
1567
    ge25519_p1p1_to_full(r, &t);
1568
}
1569

1570
void
1571
ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {
1572
    bignum25519 a,b,c,e,f,g,h;
1573

1574
    curve25519_sub(a, r->y, r->x);
1575
    curve25519_add(b, r->y, r->x);
1576
    curve25519_mul(a, a, q->ysubx);
1577
    curve25519_mul(e, b, q->xaddy);
1578
    curve25519_add(h, e, a);
1579
    curve25519_sub(e, e, a);
1580
    curve25519_mul(c, r->t, q->t2d);
1581
    curve25519_add(f, r->z, r->z);
1582
    curve25519_add_after_basic(g, f, c);
1583
    curve25519_sub_after_basic(f, f, c);
1584
    curve25519_mul(r->x, e, f);
1585
    curve25519_mul(r->y, h, g);
1586
    curve25519_mul(r->z, g, f);
1587
    curve25519_mul(r->t, e, h);
1588
}
1589

1590
void
1591
ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {
1592
    bignum25519 a,b,c,x,y,z,t;
1593

1594
    curve25519_sub(a, p->y, p->x);
1595
    curve25519_add(b, p->y, p->x);
1596
    curve25519_mul(a, a, q->ysubx);
1597
    curve25519_mul(x, b, q->xaddy);
1598
    curve25519_add(y, x, a);
1599
    curve25519_sub(x, x, a);
1600
    curve25519_mul(c, p->t, q->t2d);
1601
    curve25519_mul(t, p->z, q->z);
1602
    curve25519_add(t, t, t);
1603
    curve25519_add_after_basic(z, t, c);
1604
    curve25519_sub_after_basic(t, t, c);
1605
    curve25519_mul(r->xaddy, x, t);
1606
    curve25519_mul(r->ysubx, y, z);
1607
    curve25519_mul(r->z, z, t);
1608
    curve25519_mul(r->t2d, x, y);
1609
    curve25519_copy(y, r->ysubx);
1610
    curve25519_sub(r->ysubx, r->ysubx, r->xaddy);
1611
    curve25519_add(r->xaddy, r->xaddy, y);
1612
    curve25519_mul(r->t2d, r->t2d, ge25519_ec2d);
1613
}
1614

1615
void
1616
ge25519_pack(byte r[32], const ge25519 *p) {
1617
    bignum25519 tx, ty, zi;
1618
    byte parity[32];
1619
    curve25519_recip(zi, p->z);
1620
    curve25519_mul(tx, p->x, zi);
1621
    curve25519_mul(ty, p->y, zi);
1622
    curve25519_contract(r, ty);
1623
    curve25519_contract(parity, tx);
1624
    r[31] ^= ((parity[0] & 1) << 7);
1625
}
1626

1627
int
1628
ed25519_verify(const byte *x, const byte *y, size_t len) {
1629
    size_t differentbits = 0;
1630
    while (len--)
1631
        differentbits |= (*x++ ^ *y++);
1632
    return (int) (1 & ((differentbits - 1) >> 8));
1633
}
1634

1635
int
1636
ge25519_unpack_negative_vartime(ge25519 *r, const byte p[32]) {
1637
    const byte zero[32] = {0};
1638
    const bignum25519 one = {1};
1639
    byte parity = p[31] >> 7;
1640
    byte check[32];
1641
    bignum25519 t, root, num, den, d3;
1642

1643
    curve25519_expand(r->y, p);
1644
    curve25519_copy(r->z, one);
1645
    curve25519_square(num, r->y); /* x = y^2 */
1646
    curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */
1647
    curve25519_sub_reduce(num, num, r->z); /* x = y^1 - 1 */
1648
    curve25519_add(den, den, r->z); /* den = dy^2 + 1 */
1649

1650
    /* Computation of sqrt(num/den) */
1651
    /* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */
1652
    curve25519_square(t, den);
1653
    curve25519_mul(d3, t, den);
1654
    curve25519_square(r->x, d3);
1655
    curve25519_mul(r->x, r->x, den);
1656
    curve25519_mul(r->x, r->x, num);
1657
    curve25519_pow_two252m3(r->x, r->x);
1658

1659
    /* 2. computation of r->x = num * den^3 * (num*den^7)^((p-5)/8) */
1660
    curve25519_mul(r->x, r->x, d3);
1661
    curve25519_mul(r->x, r->x, num);
1662

1663
    /* 3. Check if either of the roots works: */
1664
    curve25519_square(t, r->x);
1665
    curve25519_mul(t, t, den);
1666
    curve25519_sub_reduce(root, t, num);
1667
    curve25519_contract(check, root);
1668
    if (!ed25519_verify(check, zero, 32)) {
1669
        curve25519_add_reduce(t, t, num);
1670
        curve25519_contract(check, t);
1671
        if (!ed25519_verify(check, zero, 32))
1672
            return 0;
1673
        curve25519_mul(r->x, r->x, ge25519_sqrtneg1);
1674
    }
1675

1676
    curve25519_contract(check, r->x);
1677
    if ((check[0] & 1) == parity) {
1678
        curve25519_copy(t, r->x);
1679
        curve25519_neg(r->x, t);
1680
    }
1681
    curve25519_mul(r->t, r->x, r->y);
1682
    return 1;
1683
}
1684

1685
/* computes [s1]p1 + [s2]basepoint */
1686
void
1687
ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
1688
    signed char slide1[256], slide2[256];
1689
    ge25519_pniels pre1[S1_TABLE_SIZE];
1690
    ge25519 d1;
1691
    ge25519_p1p1 t;
1692
    sword32 i;
1693

1694
    contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
1695
    contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);
1696

1697
    ge25519_double(&d1, p1);
1698
    ge25519_full_to_pniels(pre1, p1);
1699
    for (i = 0; i < S1_TABLE_SIZE - 1; i++)
1700
        ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
1701

1702
    /* set neutral */
1703
    std::memset(r, 0, sizeof(ge25519));
1704
    r->y[0] = 1;
1705
    r->z[0] = 1;
1706

1707
    i = 255;
1708
    while ((i >= 0) && !(slide1[i] | slide2[i]))
1709
        i--;
1710

1711
    for (; i >= 0; i--) {
1712
        ge25519_double_p1p1(&t, r);
1713

1714
        if (slide1[i]) {
1715
            ge25519_p1p1_to_full(r, &t);
1716
            ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (byte)slide1[i] >> 7);
1717
        }
1718

1719
        if (slide2[i]) {
1720
            ge25519_p1p1_to_full(r, &t);
1721
            ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (byte)slide2[i] >> 7);
1722
        }
1723

1724
        ge25519_p1p1_to_partial(r, &t);
1725
    }
1726
}
1727

1728
#if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)
1729

1730
word32
1731
ge25519_windowb_equal(word32 b, word32 c) {
1732
    return ((b ^ c) - 1) >> 31;
1733
}
1734

1735
void
1736
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const byte table[256][96], word32 pos, signed char b) {
1737
    bignum25519 neg;
1738
    word32 sign = (word32)((byte)b >> 7);
1739
    word32 mask = ~(sign - 1);
1740
    word32 u = (b + mask) ^ mask;
1741
    word32 i;
1742

1743
    /* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */
1744
    byte packed[96] = {0};
1745
    packed[0] = 1;
1746
    packed[32] = 1;
1747

1748
    for (i = 0; i < 8; i++)
1749
        curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1));
1750

1751
    /* expand in to t */
1752
    curve25519_expand(t->ysubx, packed +  0);
1753
    curve25519_expand(t->xaddy, packed + 32);
1754
    curve25519_expand(t->t2d  , packed + 64);
1755

1756
    /* adjust for sign */
1757
    curve25519_swap_conditional(t->ysubx, t->xaddy, sign);
1758
    curve25519_neg(neg, t->t2d);
1759
    curve25519_swap_conditional(t->t2d, neg, sign);
1760
}
1761

1762
#endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */
1763

1764
/* computes [s]basepoint */
1765
void
1766
ge25519_scalarmult_base_niels(ge25519 *r, const byte basepoint_table[256][96], const bignum256modm s) {
1767
    signed char b[64];
1768
    word32 i;
1769
    ge25519_niels t;
1770

1771
    contract256_window4_modm(b, s);
1772

1773
    ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[1]);
1774
    curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
1775
    curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
1776
    std::memset(r->z, 0, sizeof(bignum25519));
1777
    curve25519_copy(r->t, t.t2d);
1778
    r->z[0] = 2;
1779
    for (i = 3; i < 64; i += 2) {
1780
        ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
1781
        ge25519_nielsadd2(r, &t);
1782
    }
1783
    ge25519_double_partial(r, r);
1784
    ge25519_double_partial(r, r);
1785
    ge25519_double_partial(r, r);
1786
    ge25519_double(r, r);
1787
    ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[0]);
1788
    curve25519_mul(t.t2d, t.t2d, ge25519_ecd);
1789
    ge25519_nielsadd2(r, &t);
1790
    for(i = 2; i < 64; i += 2) {
1791
        ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
1792
        ge25519_nielsadd2(r, &t);
1793
    }
1794
}
1795

1796
ANONYMOUS_NAMESPACE_END
1797
NAMESPACE_END  // Ed25519
1798
NAMESPACE_END  // Donna
1799
NAMESPACE_END  // CryptoPP
1800

1801
//***************************** curve25519 *****************************//
1802

1803
NAMESPACE_BEGIN(CryptoPP)
1804
NAMESPACE_BEGIN(Donna)
1805

1806
int curve25519_mult_CXX(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])
1807
{
1808
    using namespace CryptoPP::Donna::X25519;
1809

1810
    FixedSizeSecBlock<byte, 32> e;
1811
    for (size_t i = 0; i < 32; ++i)
1812
        e[i] = secretKey[i];
1813
    e[0] &= 0xf8; e[31] &= 0x7f; e[31] |= 0x40;
1814

1815
    bignum25519 nqpqx = {1}, nqpqz = {0}, nqz = {1}, nqx;
1816
    bignum25519 q, qx, qpqx, qqx, zzz, zmone;
1817
    size_t bit, lastbit;
1818

1819
    curve25519_expand(q, othersKey);
1820
    curve25519_copy(nqx, q);
1821

1822
    /* bit 255 is always 0, and bit 254 is always 1, so skip bit 255 and
1823
       start pre-swapped on bit 254 */
1824
    lastbit = 1;
1825

1826
    /* we are doing bits 254..3 in the loop, but are swapping in bits 253..2 */
1827
    for (int i = 253; i >= 2; i--) {
1828
        curve25519_add(qx, nqx, nqz);
1829
        curve25519_sub(nqz, nqx, nqz);
1830
        curve25519_add(qpqx, nqpqx, nqpqz);
1831
        curve25519_sub(nqpqz, nqpqx, nqpqz);
1832
        curve25519_mul(nqpqx, qpqx, nqz);
1833
        curve25519_mul(nqpqz, qx, nqpqz);
1834
        curve25519_add(qqx, nqpqx, nqpqz);
1835
        curve25519_sub(nqpqz, nqpqx, nqpqz);
1836
        curve25519_square(nqpqz, nqpqz);
1837
        curve25519_square(nqpqx, qqx);
1838
        curve25519_mul(nqpqz, nqpqz, q);
1839
        curve25519_square(qx, qx);
1840
        curve25519_square(nqz, nqz);
1841
        curve25519_mul(nqx, qx, nqz);
1842
        curve25519_sub(nqz, qx, nqz);
1843
        curve25519_scalar_product(zzz, nqz, 121665);
1844
        curve25519_add(zzz, zzz, qx);
1845
        curve25519_mul(nqz, nqz, zzz);
1846

1847
        bit = (e[i/8] >> (i & 7)) & 1;
1848
        curve25519_swap_conditional(nqx, nqpqx, (word32)(bit ^ lastbit));
1849
        curve25519_swap_conditional(nqz, nqpqz, (word32)(bit ^ lastbit));
1850
        lastbit = bit;
1851
    }
1852

1853
    /* the final 3 bits are always zero, so we only need to double */
1854
    for (int i = 0; i < 3; i++) {
1855
        curve25519_add(qx, nqx, nqz);
1856
        curve25519_sub(nqz, nqx, nqz);
1857
        curve25519_square(qx, qx);
1858
        curve25519_square(nqz, nqz);
1859
        curve25519_mul(nqx, qx, nqz);
1860
        curve25519_sub(nqz, qx, nqz);
1861
        curve25519_scalar_product(zzz, nqz, 121665);
1862
        curve25519_add(zzz, zzz, qx);
1863
        curve25519_mul(nqz, nqz, zzz);
1864
    }
1865

1866
    curve25519_recip(zmone, nqz);
1867
    curve25519_mul(nqz, nqx, zmone);
1868
    curve25519_contract(sharedKey, nqz);
1869

1870
    return 0;
1871
}
1872

1873
int curve25519_mult(byte publicKey[32], const byte secretKey[32])
1874
{
1875
    using namespace CryptoPP::Donna::X25519;
1876

1877
#if (CRYPTOPP_CURVE25519_SSE2)
1878
    if (HasSSE2())
1879
        return curve25519_mult_SSE2(publicKey, secretKey, basePoint);
1880
    else
1881
#endif
1882

1883
    return curve25519_mult_CXX(publicKey, secretKey, basePoint);
1884
}
1885

1886
int curve25519_mult(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])
1887
{
1888
#if (CRYPTOPP_CURVE25519_SSE2)
1889
    if (HasSSE2())
1890
        return curve25519_mult_SSE2(sharedKey, secretKey, othersKey);
1891
    else
1892
#endif
1893

1894
    return curve25519_mult_CXX(sharedKey, secretKey, othersKey);
1895
}
1896

1897
NAMESPACE_END  // Donna
1898
NAMESPACE_END  // CryptoPP
1899

1900
//******************************* ed25519 *******************************//
1901

1902
NAMESPACE_BEGIN(CryptoPP)
1903
NAMESPACE_BEGIN(Donna)
1904

1905
int
1906
ed25519_publickey_CXX(byte publicKey[32], const byte secretKey[32])
1907
{
1908
    using namespace CryptoPP::Donna::Ed25519;
1909

1910
    bignum256modm a;
1911
    ALIGN(ALIGN_SPEC) ge25519 A;
1912
    hash_512bits extsk;
1913

1914
    /* A = aB */
1915
    ed25519_extsk(extsk, secretKey);
1916
    expand256_modm(a, extsk, 32);
1917
    ge25519_scalarmult_base_niels(&A, ge25519_niels_base_multiples, a);
1918
    ge25519_pack(publicKey, &A);
1919

1920
    return 0;
1921
}
1922

1923
int
1924
ed25519_publickey(byte publicKey[32], const byte secretKey[32])
1925
{
1926
    return ed25519_publickey_CXX(publicKey, secretKey);
1927
}
1928

1929
int
1930
ed25519_sign_CXX(std::istream& stream, const byte sk[32], const byte pk[32], byte RS[64])
1931
{
1932
    using namespace CryptoPP::Donna::Ed25519;
1933

1934
    bignum256modm r, S, a;
1935
    ALIGN(ALIGN_SPEC) ge25519 R;
1936
    hash_512bits extsk, hashr, hram;
1937

1938
    // Unfortunately we need to read the stream twice. The first time calculates
1939
    // 'r = H(aExt[32..64], m)'. The second time calculates 'S = H(R,A,m)'. There
1940
    // is a data dependency due to hashing 'RS' with 'R = [r]B' that does not
1941
    // allow us to read the stream once.
1942
    std::streampos where = stream.tellg();
1943

1944
    ed25519_extsk(extsk, sk);
1945

1946
    /* r = H(aExt[32..64], m) */
1947
    SHA512 hash;
1948
    hash.Update(extsk + 32, 32);
1949
    UpdateFromStream(hash, stream);
1950
    hash.Final(hashr);
1951
    expand256_modm(r, hashr, 64);
1952

1953
    /* R = rB */
1954
    ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);
1955
    ge25519_pack(RS, &R);
1956

1957
    // Reset stream for the second digest
1958
    stream.clear();
1959
    stream.seekg(where);
1960

1961
    /* S = H(R,A,m).. */
1962
    ed25519_hram(hram, RS, pk, stream);
1963
    expand256_modm(S, hram, 64);
1964

1965
    /* S = H(R,A,m)a */
1966
    expand256_modm(a, extsk, 32);
1967
    mul256_modm(S, S, a);
1968

1969
    /* S = (r + H(R,A,m)a) */
1970
    add256_modm(S, S, r);
1971

1972
    /* S = (r + H(R,A,m)a) mod L */
1973
    contract256_modm(RS + 32, S);
1974

1975
    return 0;
1976
}
1977

1978
int
1979
ed25519_sign_CXX(const byte *m, size_t mlen, const byte sk[32], const byte pk[32], byte RS[64])
1980
{
1981
    using namespace CryptoPP::Donna::Ed25519;
1982

1983
    bignum256modm r, S, a;
1984
    ALIGN(ALIGN_SPEC) ge25519 R;
1985
    hash_512bits extsk, hashr, hram;
1986

1987
    ed25519_extsk(extsk, sk);
1988

1989
    /* r = H(aExt[32..64], m) */
1990
    SHA512 hash;
1991
    hash.Update(extsk + 32, 32);
1992
    hash.Update(m, mlen);
1993
    hash.Final(hashr);
1994
    expand256_modm(r, hashr, 64);
1995

1996
    /* R = rB */
1997
    ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);
1998
    ge25519_pack(RS, &R);
1999

2000
    /* S = H(R,A,m).. */
2001
    ed25519_hram(hram, RS, pk, m, mlen);
2002
    expand256_modm(S, hram, 64);
2003

2004
    /* S = H(R,A,m)a */
2005
    expand256_modm(a, extsk, 32);
2006
    mul256_modm(S, S, a);
2007

2008
    /* S = (r + H(R,A,m)a) */
2009
    add256_modm(S, S, r);
2010

2011
    /* S = (r + H(R,A,m)a) mod L */
2012
    contract256_modm(RS + 32, S);
2013

2014
    return 0;
2015
}
2016

2017
int
2018
ed25519_sign(std::istream& stream, const byte secretKey[32], const byte publicKey[32],
2019
             byte signature[64])
2020
{
2021
    return ed25519_sign_CXX(stream, secretKey, publicKey, signature);
2022
}
2023

2024
int
2025
ed25519_sign(const byte* message, size_t messageLength, const byte secretKey[32],
2026
             const byte publicKey[32], byte signature[64])
2027
{
2028
    return ed25519_sign_CXX(message, messageLength, secretKey, publicKey, signature);
2029
}
2030

2031
int
2032
ed25519_sign_open_CXX(std::istream& stream, const byte pk[32], const byte RS[64]) {
2033

2034
    using namespace CryptoPP::Donna::Ed25519;
2035

2036
    ALIGN(ALIGN_SPEC) ge25519 R, A;
2037
    hash_512bits hash;
2038
    bignum256modm hram, S;
2039
    byte checkR[32];
2040

2041
    if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))
2042
        return -1;
2043

2044
    /* hram = H(R,A,m) */
2045
    ed25519_hram(hash, RS, pk, stream);
2046
    expand256_modm(hram, hash, 64);
2047

2048
    /* S */
2049
    expand256_modm(S, RS + 32, 32);
2050

2051
    /* SB - H(R,A,m)A */
2052
    ge25519_double_scalarmult_vartime(&R, &A, hram, S);
2053
    ge25519_pack(checkR, &R);
2054

2055
    /* check that R = SB - H(R,A,m)A */
2056
    return ed25519_verify(RS, checkR, 32) ? 0 : -1;
2057
}
2058

2059
int
2060
ed25519_sign_open_CXX(const byte *m, size_t mlen, const byte pk[32], const byte RS[64]) {
2061

2062
    using namespace CryptoPP::Donna::Ed25519;
2063

2064
    ALIGN(ALIGN_SPEC) ge25519 R, A;
2065
    hash_512bits hash;
2066
    bignum256modm hram, S;
2067
    byte checkR[32];
2068

2069
    if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))
2070
        return -1;
2071

2072
    /* hram = H(R,A,m) */
2073
    ed25519_hram(hash, RS, pk, m, mlen);
2074
    expand256_modm(hram, hash, 64);
2075

2076
    /* S */
2077
    expand256_modm(S, RS + 32, 32);
2078

2079
    /* SB - H(R,A,m)A */
2080
    ge25519_double_scalarmult_vartime(&R, &A, hram, S);
2081
    ge25519_pack(checkR, &R);
2082

2083
    /* check that R = SB - H(R,A,m)A */
2084
    return ed25519_verify(RS, checkR, 32) ? 0 : -1;
2085
}
2086

2087
int
2088
ed25519_sign_open(const byte *message, size_t messageLength, const byte publicKey[32], const byte signature[64])
2089
{
2090
    return ed25519_sign_open_CXX(message, messageLength, publicKey, signature);
2091
}
2092

2093
int
2094
ed25519_sign_open(std::istream& stream, const byte publicKey[32], const byte signature[64])
2095
{
2096
    return ed25519_sign_open_CXX(stream, publicKey, signature);
2097
}
2098

2099
NAMESPACE_END  // Donna
2100
NAMESPACE_END  // CryptoPP
2101

2102
#endif  // CRYPTOPP_CURVE25519_32BIT
2103

2104
Product

Resources

Company