Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/algo/scrypt.c
1201 views
1
/*
2
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
*
14
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
* SUCH DAMAGE.
25
*
26
* This file was originally written by Colin Percival as part of the Tarsnap
27
* online backup system.
28
*/
29
30
#include "miner.h"
31
32
#include <stdlib.h>
33
#include <string.h>
34
#include <inttypes.h>
35
36
static const uint32_t keypad[12] = {
37
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
38
};
39
static const uint32_t innerpad[11] = {
40
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
41
};
42
static const uint32_t outerpad[8] = {
43
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
44
};
45
static const uint32_t finalblk[16] = {
46
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
47
};
48
49
static inline void HMAC_SHA256_80_init(const uint32_t *key,
50
uint32_t *tstate, uint32_t *ostate)
51
{
52
uint32_t ihash[8];
53
uint32_t pad[16];
54
int i;
55
56
/* tstate is assumed to contain the midstate of key */
57
memcpy(pad, key + 16, 16);
58
memcpy(pad + 4, keypad, 48);
59
sha256_transform(tstate, pad, 0);
60
memcpy(ihash, tstate, 32);
61
62
sha256_init(ostate);
63
for (i = 0; i < 8; i++)
64
pad[i] = ihash[i] ^ 0x5c5c5c5c;
65
for (; i < 16; i++)
66
pad[i] = 0x5c5c5c5c;
67
sha256_transform(ostate, pad, 0);
68
69
sha256_init(tstate);
70
for (i = 0; i < 8; i++)
71
pad[i] = ihash[i] ^ 0x36363636;
72
for (; i < 16; i++)
73
pad[i] = 0x36363636;
74
sha256_transform(tstate, pad, 0);
75
}
76
77
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
78
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
79
{
80
uint32_t istate[8], ostate2[8];
81
uint32_t ibuf[16], obuf[16];
82
int i, j;
83
84
memcpy(istate, tstate, 32);
85
sha256_transform(istate, salt, 0);
86
87
memcpy(ibuf, salt + 16, 16);
88
memcpy(ibuf + 5, innerpad, 44);
89
memcpy(obuf + 8, outerpad, 32);
90
91
for (i = 0; i < 4; i++) {
92
memcpy(obuf, istate, 32);
93
ibuf[4] = i + 1;
94
sha256_transform(obuf, ibuf, 0);
95
96
memcpy(ostate2, ostate, 32);
97
sha256_transform(ostate2, obuf, 0);
98
for (j = 0; j < 8; j++)
99
output[8 * i + j] = swab32(ostate2[j]);
100
}
101
}
102
103
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
104
const uint32_t *salt, uint32_t *output)
105
{
106
uint32_t buf[16];
107
int i;
108
109
sha256_transform(tstate, salt, 1);
110
sha256_transform(tstate, salt + 16, 1);
111
sha256_transform(tstate, finalblk, 0);
112
memcpy(buf, tstate, 32);
113
memcpy(buf + 8, outerpad, 32);
114
115
sha256_transform(ostate, buf, 0);
116
for (i = 0; i < 8; i++)
117
output[i] = swab32(ostate[i]);
118
}
119
120
121
#ifdef HAVE_SHA256_4WAY
122
123
static const uint32_t keypad_4way[4 * 12] = {
124
0x80000000, 0x80000000, 0x80000000, 0x80000000,
125
0x00000000, 0x00000000, 0x00000000, 0x00000000,
126
0x00000000, 0x00000000, 0x00000000, 0x00000000,
127
0x00000000, 0x00000000, 0x00000000, 0x00000000,
128
0x00000000, 0x00000000, 0x00000000, 0x00000000,
129
0x00000000, 0x00000000, 0x00000000, 0x00000000,
130
0x00000000, 0x00000000, 0x00000000, 0x00000000,
131
0x00000000, 0x00000000, 0x00000000, 0x00000000,
132
0x00000000, 0x00000000, 0x00000000, 0x00000000,
133
0x00000000, 0x00000000, 0x00000000, 0x00000000,
134
0x00000000, 0x00000000, 0x00000000, 0x00000000,
135
0x00000280, 0x00000280, 0x00000280, 0x00000280
136
};
137
static const uint32_t innerpad_4way[4 * 11] = {
138
0x80000000, 0x80000000, 0x80000000, 0x80000000,
139
0x00000000, 0x00000000, 0x00000000, 0x00000000,
140
0x00000000, 0x00000000, 0x00000000, 0x00000000,
141
0x00000000, 0x00000000, 0x00000000, 0x00000000,
142
0x00000000, 0x00000000, 0x00000000, 0x00000000,
143
0x00000000, 0x00000000, 0x00000000, 0x00000000,
144
0x00000000, 0x00000000, 0x00000000, 0x00000000,
145
0x00000000, 0x00000000, 0x00000000, 0x00000000,
146
0x00000000, 0x00000000, 0x00000000, 0x00000000,
147
0x00000000, 0x00000000, 0x00000000, 0x00000000,
148
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
149
};
150
static const uint32_t outerpad_4way[4 * 8] = {
151
0x80000000, 0x80000000, 0x80000000, 0x80000000,
152
0x00000000, 0x00000000, 0x00000000, 0x00000000,
153
0x00000000, 0x00000000, 0x00000000, 0x00000000,
154
0x00000000, 0x00000000, 0x00000000, 0x00000000,
155
0x00000000, 0x00000000, 0x00000000, 0x00000000,
156
0x00000000, 0x00000000, 0x00000000, 0x00000000,
157
0x00000000, 0x00000000, 0x00000000, 0x00000000,
158
0x00000300, 0x00000300, 0x00000300, 0x00000300
159
};
160
static const uint32_t _ALIGN(16) finalblk_4way[4 * 16] = {
161
0x00000001, 0x00000001, 0x00000001, 0x00000001,
162
0x80000000, 0x80000000, 0x80000000, 0x80000000,
163
0x00000000, 0x00000000, 0x00000000, 0x00000000,
164
0x00000000, 0x00000000, 0x00000000, 0x00000000,
165
0x00000000, 0x00000000, 0x00000000, 0x00000000,
166
0x00000000, 0x00000000, 0x00000000, 0x00000000,
167
0x00000000, 0x00000000, 0x00000000, 0x00000000,
168
0x00000000, 0x00000000, 0x00000000, 0x00000000,
169
0x00000000, 0x00000000, 0x00000000, 0x00000000,
170
0x00000000, 0x00000000, 0x00000000, 0x00000000,
171
0x00000000, 0x00000000, 0x00000000, 0x00000000,
172
0x00000000, 0x00000000, 0x00000000, 0x00000000,
173
0x00000000, 0x00000000, 0x00000000, 0x00000000,
174
0x00000000, 0x00000000, 0x00000000, 0x00000000,
175
0x00000000, 0x00000000, 0x00000000, 0x00000000,
176
0x00000620, 0x00000620, 0x00000620, 0x00000620
177
};
178
179
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
180
uint32_t *tstate, uint32_t *ostate)
181
{
182
uint32_t _ALIGN(16) ihash[4 * 8];
183
uint32_t _ALIGN(16) pad[4 * 16];
184
int i;
185
186
/* tstate is assumed to contain the midstate of key */
187
memcpy(pad, key + 4 * 16, 4 * 16);
188
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
189
sha256_transform_4way(tstate, pad, 0);
190
memcpy(ihash, tstate, 4 * 32);
191
192
sha256_init_4way(ostate);
193
for (i = 0; i < 4 * 8; i++)
194
pad[i] = ihash[i] ^ 0x5c5c5c5c;
195
for (; i < 4 * 16; i++)
196
pad[i] = 0x5c5c5c5c;
197
sha256_transform_4way(ostate, pad, 0);
198
199
sha256_init_4way(tstate);
200
for (i = 0; i < 4 * 8; i++)
201
pad[i] = ihash[i] ^ 0x36363636;
202
for (; i < 4 * 16; i++)
203
pad[i] = 0x36363636;
204
sha256_transform_4way(tstate, pad, 0);
205
}
206
207
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
208
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
209
{
210
uint32_t _ALIGN(16) istate[4 * 8];
211
uint32_t _ALIGN(16) ostate2[4 * 8];
212
uint32_t _ALIGN(16) ibuf[4 * 16];
213
uint32_t _ALIGN(16) obuf[4 * 16];
214
int i, j;
215
216
memcpy(istate, tstate, 4 * 32);
217
sha256_transform_4way(istate, salt, 0);
218
219
memcpy(ibuf, salt + 4 * 16, 4 * 16);
220
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
221
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
222
223
for (i = 0; i < 4; i++) {
224
memcpy(obuf, istate, 4 * 32);
225
ibuf[4 * 4 + 0] = i + 1;
226
ibuf[4 * 4 + 1] = i + 1;
227
ibuf[4 * 4 + 2] = i + 1;
228
ibuf[4 * 4 + 3] = i + 1;
229
sha256_transform_4way(obuf, ibuf, 0);
230
231
memcpy(ostate2, ostate, 4 * 32);
232
sha256_transform_4way(ostate2, obuf, 0);
233
for (j = 0; j < 4 * 8; j++)
234
output[4 * 8 * i + j] = swab32(ostate2[j]);
235
}
236
}
237
238
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
239
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
240
{
241
uint32_t _ALIGN(16) buf[4 * 16];
242
int i;
243
244
sha256_transform_4way(tstate, salt, 1);
245
sha256_transform_4way(tstate, salt + 4 * 16, 1);
246
sha256_transform_4way(tstate, finalblk_4way, 0);
247
memcpy(buf, tstate, 4 * 32);
248
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
249
250
sha256_transform_4way(ostate, buf, 0);
251
for (i = 0; i < 4 * 8; i++)
252
output[i] = swab32(ostate[i]);
253
}
254
255
#endif /* HAVE_SHA256_4WAY */
256
257
258
#ifdef HAVE_SHA256_8WAY
259
260
static const uint32_t _ALIGN(32) finalblk_8way[8 * 16] = {
261
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
262
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
263
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
264
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
265
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
266
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
267
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
268
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
269
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
270
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
271
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
272
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
273
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
274
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
275
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
276
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
277
};
278
279
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
280
uint32_t *tstate, uint32_t *ostate)
281
{
282
uint32_t _ALIGN(32) ihash[8 * 8];
283
uint32_t _ALIGN(32) pad[8 * 16];
284
int i;
285
286
/* tstate is assumed to contain the midstate of key */
287
memcpy(pad, key + 8 * 16, 8 * 16);
288
for (i = 0; i < 8; i++)
289
pad[8 * 4 + i] = 0x80000000;
290
memset(pad + 8 * 5, 0x00, 8 * 40);
291
for (i = 0; i < 8; i++)
292
pad[8 * 15 + i] = 0x00000280;
293
sha256_transform_8way(tstate, pad, 0);
294
memcpy(ihash, tstate, 8 * 32);
295
296
sha256_init_8way(ostate);
297
for (i = 0; i < 8 * 8; i++)
298
pad[i] = ihash[i] ^ 0x5c5c5c5c;
299
for (; i < 8 * 16; i++)
300
pad[i] = 0x5c5c5c5c;
301
sha256_transform_8way(ostate, pad, 0);
302
303
sha256_init_8way(tstate);
304
for (i = 0; i < 8 * 8; i++)
305
pad[i] = ihash[i] ^ 0x36363636;
306
for (; i < 8 * 16; i++)
307
pad[i] = 0x36363636;
308
sha256_transform_8way(tstate, pad, 0);
309
}
310
311
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
312
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
313
{
314
uint32_t _ALIGN(32) istate[8 * 8];
315
uint32_t _ALIGN(32) ostate2[8 * 8];
316
uint32_t _ALIGN(32) ibuf[8 * 16];
317
uint32_t _ALIGN(32) obuf[8 * 16];
318
int i, j;
319
320
memcpy(istate, tstate, 8 * 32);
321
sha256_transform_8way(istate, salt, 0);
322
323
memcpy(ibuf, salt + 8 * 16, 8 * 16);
324
for (i = 0; i < 8; i++)
325
ibuf[8 * 5 + i] = 0x80000000;
326
memset(ibuf + 8 * 6, 0x00, 8 * 36);
327
for (i = 0; i < 8; i++)
328
ibuf[8 * 15 + i] = 0x000004a0;
329
330
for (i = 0; i < 8; i++)
331
obuf[8 * 8 + i] = 0x80000000;
332
memset(obuf + 8 * 9, 0x00, 8 * 24);
333
for (i = 0; i < 8; i++)
334
obuf[8 * 15 + i] = 0x00000300;
335
336
for (i = 0; i < 4; i++) {
337
memcpy(obuf, istate, 8 * 32);
338
ibuf[8 * 4 + 0] = i + 1;
339
ibuf[8 * 4 + 1] = i + 1;
340
ibuf[8 * 4 + 2] = i + 1;
341
ibuf[8 * 4 + 3] = i + 1;
342
ibuf[8 * 4 + 4] = i + 1;
343
ibuf[8 * 4 + 5] = i + 1;
344
ibuf[8 * 4 + 6] = i + 1;
345
ibuf[8 * 4 + 7] = i + 1;
346
sha256_transform_8way(obuf, ibuf, 0);
347
348
memcpy(ostate2, ostate, 8 * 32);
349
sha256_transform_8way(ostate2, obuf, 0);
350
for (j = 0; j < 8 * 8; j++)
351
output[8 * 8 * i + j] = swab32(ostate2[j]);
352
}
353
}
354
355
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
356
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
357
{
358
uint32_t _ALIGN(32) buf[8 * 16];
359
int i;
360
361
sha256_transform_8way(tstate, salt, 1);
362
sha256_transform_8way(tstate, salt + 8 * 16, 1);
363
sha256_transform_8way(tstate, finalblk_8way, 0);
364
365
memcpy(buf, tstate, 8 * 32);
366
for (i = 0; i < 8; i++)
367
buf[8 * 8 + i] = 0x80000000;
368
memset(buf + 8 * 9, 0x00, 8 * 24);
369
for (i = 0; i < 8; i++)
370
buf[8 * 15 + i] = 0x00000300;
371
sha256_transform_8way(ostate, buf, 0);
372
373
for (i = 0; i < 8 * 8; i++)
374
output[i] = swab32(ostate[i]);
375
}
376
377
#endif /* HAVE_SHA256_8WAY */
378
379
380
#if defined(USE_ASM) && defined(__x86_64__)
381
382
#define SCRYPT_MAX_WAYS 12
383
#define HAVE_SCRYPT_3WAY 1
384
int scrypt_best_throughput();
385
void scrypt_core(uint32_t *X, uint32_t *V, int N);
386
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
387
#if defined(USE_AVX2)
388
#undef SCRYPT_MAX_WAYS
389
#define SCRYPT_MAX_WAYS 24
390
#define HAVE_SCRYPT_6WAY 1
391
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
392
#endif
393
394
#elif defined(USE_ASM) && defined(__i386__)
395
396
#define SCRYPT_MAX_WAYS 4
397
#define scrypt_best_throughput() 1
398
void scrypt_core(uint32_t *X, uint32_t *V, int N);
399
400
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
401
402
void scrypt_core(uint32_t *X, uint32_t *V, int N);
403
#if defined(__ARM_NEON__)
404
#undef HAVE_SHA256_4WAY
405
#define SCRYPT_MAX_WAYS 3
406
#define HAVE_SCRYPT_3WAY 1
407
#define scrypt_best_throughput() 3
408
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
409
#endif
410
411
#else
412
413
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
414
{
415
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
416
int i;
417
418
x00 = (B[ 0] ^= Bx[ 0]);
419
x01 = (B[ 1] ^= Bx[ 1]);
420
x02 = (B[ 2] ^= Bx[ 2]);
421
x03 = (B[ 3] ^= Bx[ 3]);
422
x04 = (B[ 4] ^= Bx[ 4]);
423
x05 = (B[ 5] ^= Bx[ 5]);
424
x06 = (B[ 6] ^= Bx[ 6]);
425
x07 = (B[ 7] ^= Bx[ 7]);
426
x08 = (B[ 8] ^= Bx[ 8]);
427
x09 = (B[ 9] ^= Bx[ 9]);
428
x10 = (B[10] ^= Bx[10]);
429
x11 = (B[11] ^= Bx[11]);
430
x12 = (B[12] ^= Bx[12]);
431
x13 = (B[13] ^= Bx[13]);
432
x14 = (B[14] ^= Bx[14]);
433
x15 = (B[15] ^= Bx[15]);
434
for (i = 0; i < 8; i += 2) {
435
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
436
/* Operate on columns. */
437
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
438
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
439
440
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
441
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
442
443
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
444
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
445
446
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
447
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
448
449
/* Operate on rows. */
450
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
451
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
452
453
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
454
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
455
456
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
457
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
458
459
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
460
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
461
#undef R
462
}
463
B[ 0] += x00;
464
B[ 1] += x01;
465
B[ 2] += x02;
466
B[ 3] += x03;
467
B[ 4] += x04;
468
B[ 5] += x05;
469
B[ 6] += x06;
470
B[ 7] += x07;
471
B[ 8] += x08;
472
B[ 9] += x09;
473
B[10] += x10;
474
B[11] += x11;
475
B[12] += x12;
476
B[13] += x13;
477
B[14] += x14;
478
B[15] += x15;
479
}
480
481
static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
482
{
483
int i;
484
485
for (i = 0; i < N; i++) {
486
memcpy(&V[i * 32], X, 128);
487
xor_salsa8(&X[0], &X[16]);
488
xor_salsa8(&X[16], &X[0]);
489
}
490
for (i = 0; i < N; i++) {
491
uint32_t j = 32 * (X[16] & (N - 1));
492
for (uint8_t k = 0; k < 32; k++)
493
X[k] ^= V[j + k];
494
xor_salsa8(&X[0], &X[16]);
495
xor_salsa8(&X[16], &X[0]);
496
}
497
}
498
499
#endif
500
501
#ifndef SCRYPT_MAX_WAYS
502
#define SCRYPT_MAX_WAYS 1
503
#define scrypt_best_throughput() 1
504
#endif
505
506
unsigned char *scrypt_buffer_alloc(int N)
507
{
508
return (uchar*) malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
509
}
510
511
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
512
uint32_t *midstate, unsigned char *scratchpad, int N)
513
{
514
uint32_t tstate[8], ostate[8];
515
uint32_t X[32];
516
uint32_t *V;
517
518
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
519
520
memcpy(tstate, midstate, 32);
521
HMAC_SHA256_80_init(input, tstate, ostate);
522
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
523
524
scrypt_core(X, V, N);
525
526
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
527
}
528
529
#ifdef HAVE_SHA256_4WAY
530
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
531
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
532
{
533
uint32_t _ALIGN(128) tstate[4 * 8];
534
uint32_t _ALIGN(128) ostate[4 * 8];
535
uint32_t _ALIGN(128) W[4 * 32];
536
uint32_t _ALIGN(128) X[4 * 32];
537
uint32_t *V;
538
int i, k;
539
540
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
541
542
for (i = 0; i < 20; i++)
543
for (k = 0; k < 4; k++)
544
W[4 * i + k] = input[k * 20 + i];
545
for (i = 0; i < 8; i++)
546
for (k = 0; k < 4; k++)
547
tstate[4 * i + k] = midstate[i];
548
HMAC_SHA256_80_init_4way(W, tstate, ostate);
549
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
550
for (i = 0; i < 32; i++)
551
for (k = 0; k < 4; k++)
552
X[k * 32 + i] = W[4 * i + k];
553
scrypt_core(X + 0 * 32, V, N);
554
scrypt_core(X + 1 * 32, V, N);
555
scrypt_core(X + 2 * 32, V, N);
556
scrypt_core(X + 3 * 32, V, N);
557
for (i = 0; i < 32; i++)
558
for (k = 0; k < 4; k++)
559
W[4 * i + k] = X[k * 32 + i];
560
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
561
for (i = 0; i < 8; i++)
562
for (k = 0; k < 4; k++)
563
output[k * 8 + i] = W[4 * i + k];
564
}
565
#endif /* HAVE_SHA256_4WAY */
566
567
#ifdef HAVE_SCRYPT_3WAY
568
569
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
570
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
571
{
572
uint32_t _ALIGN(64) tstate[3 * 8], ostate[3 * 8];
573
uint32_t _ALIGN(64) X[3 * 32];
574
uint32_t *V;
575
576
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
577
578
memcpy(tstate + 0, midstate, 32);
579
memcpy(tstate + 8, midstate, 32);
580
memcpy(tstate + 16, midstate, 32);
581
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
582
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
583
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
584
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
585
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
586
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
587
588
scrypt_core_3way(X, V, N);
589
590
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
591
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
592
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
593
}
594
595
#ifdef HAVE_SHA256_4WAY
596
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
597
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
598
{
599
uint32_t _ALIGN(128) tstate[12 * 8];
600
uint32_t _ALIGN(128) ostate[12 * 8];
601
uint32_t _ALIGN(128) W[12 * 32];
602
uint32_t _ALIGN(128) X[12 * 32];
603
uint32_t *V;
604
int i, j, k;
605
606
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
607
608
for (j = 0; j < 3; j++)
609
for (i = 0; i < 20; i++)
610
for (k = 0; k < 4; k++)
611
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
612
for (j = 0; j < 3; j++)
613
for (i = 0; i < 8; i++)
614
for (k = 0; k < 4; k++)
615
tstate[32 * j + 4 * i + k] = midstate[i];
616
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
617
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
618
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
619
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
620
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
621
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
622
for (j = 0; j < 3; j++)
623
for (i = 0; i < 32; i++)
624
for (k = 0; k < 4; k++)
625
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
626
scrypt_core_3way(X + 0 * 96, V, N);
627
scrypt_core_3way(X + 1 * 96, V, N);
628
scrypt_core_3way(X + 2 * 96, V, N);
629
scrypt_core_3way(X + 3 * 96, V, N);
630
for (j = 0; j < 3; j++)
631
for (i = 0; i < 32; i++)
632
for (k = 0; k < 4; k++)
633
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
634
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
635
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
636
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
637
for (j = 0; j < 3; j++)
638
for (i = 0; i < 8; i++)
639
for (k = 0; k < 4; k++)
640
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
641
}
642
#endif /* HAVE_SHA256_4WAY */
643
644
#endif /* HAVE_SCRYPT_3WAY */
645
646
#ifdef HAVE_SCRYPT_6WAY
647
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
648
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
649
{
650
uint32_t _ALIGN(128) tstate[24 * 8];
651
uint32_t _ALIGN(128) ostate[24 * 8];
652
uint32_t _ALIGN(128) W[24 * 32];
653
uint32_t _ALIGN(128) X[24 * 32];
654
uint32_t *V;
655
int i, j, k;
656
657
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
658
659
for (j = 0; j < 3; j++)
660
for (i = 0; i < 20; i++)
661
for (k = 0; k < 8; k++)
662
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
663
for (j = 0; j < 3; j++)
664
for (i = 0; i < 8; i++)
665
for (k = 0; k < 8; k++)
666
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
667
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
668
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
669
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
670
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
671
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
672
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
673
for (j = 0; j < 3; j++)
674
for (i = 0; i < 32; i++)
675
for (k = 0; k < 8; k++)
676
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
677
scrypt_core_6way(X + 0 * 32, V, N);
678
scrypt_core_6way(X + 6 * 32, V, N);
679
scrypt_core_6way(X + 12 * 32, V, N);
680
scrypt_core_6way(X + 18 * 32, V, N);
681
for (j = 0; j < 3; j++)
682
for (i = 0; i < 32; i++)
683
for (k = 0; k < 8; k++)
684
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
685
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
686
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
687
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
688
for (j = 0; j < 3; j++)
689
for (i = 0; i < 8; i++)
690
for (k = 0; k < 8; k++)
691
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
692
}
693
#endif /* HAVE_SCRYPT_6WAY */
694
695
extern int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done,
696
unsigned char *scratchbuf, uint32_t N)
697
{
698
uint32_t *pdata = work->data;
699
uint32_t *ptarget = work->target;
700
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
701
uint32_t midstate[8];
702
uint32_t n = pdata[19] - 1;
703
const uint32_t Htarg = ptarget[7];
704
int throughput = scrypt_best_throughput();
705
int i;
706
707
#ifdef HAVE_SHA256_4WAY
708
if (sha256_use_4way())
709
throughput *= 4;
710
#endif
711
712
for (i = 0; i < throughput; i++)
713
memcpy(data + i * 20, pdata, 80);
714
715
sha256_init(midstate);
716
sha256_transform(midstate, data, 0);
717
718
do {
719
for (i = 0; i < throughput; i++)
720
data[i * 20 + 19] = ++n;
721
722
#if defined(HAVE_SHA256_4WAY)
723
if (throughput == 4)
724
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
725
else
726
#endif
727
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
728
if (throughput == 12)
729
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
730
else
731
#endif
732
#if defined(HAVE_SCRYPT_6WAY)
733
if (throughput == 24)
734
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
735
else
736
#endif
737
#if defined(HAVE_SCRYPT_3WAY)
738
if (throughput == 3)
739
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
740
else
741
#endif
742
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
743
744
for (i = 0; i < throughput; i++) {
745
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
746
work_set_target_ratio(work, hash + i * 8);
747
*hashes_done = n - pdata[19] + 1;
748
pdata[19] = data[i * 20 + 19];
749
return 1;
750
}
751
}
752
} while (likely(n < max_nonce && !work_restart[thr_id].restart));
753
754
*hashes_done = n - pdata[19] + 1;
755
pdata[19] = n;
756
return 0;
757
}
758
759
/* simple cpu test (util.c) */
760
void scrypthash(void *output, const void *input, uint32_t N)
761
{
762
uint32_t midstate[8];
763
char *scratchbuf = scrypt_buffer_alloc(N);
764
765
memset(output, 0, 32);
766
if (!scratchbuf)
767
return;
768
769
sha256_init(midstate);
770
sha256_transform(midstate, input, 0);
771
772
scrypt_1024_1_1_256((uint32_t*)input, (uint32_t*)output, midstate, scratchbuf, N);
773
774
free(scratchbuf);
775
}
776
777