Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/algo/neoscrypt.c
1201 views
1
/*
2
* Copyright (c) 2009 Colin Percival, 2011 ArtForz
3
* Copyright (c) 2012 Andrew Moon (floodyberry)
4
* Copyright (c) 2012 Samuel Neves <[email protected]>
5
* Copyright (c) 2014 John Doering <[email protected]>
6
* All rights reserved.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
* SUCH DAMAGE.
28
*/
29
#include <stdlib.h>
30
#include <stdint.h>
31
#include <string.h>
32
33
#include "miner.h"
34
35
#define USE_CUSTOM_BLAKE2S
36
// TODO: try blake2sp
37
//#include "crypto/blake2s.h"
38
39
#define STACK_ALIGN 0x40
40
41
#ifdef _MSC_VER // todo: msvc
42
#define ASM 0
43
#elif defined(__arm__)
44
#define ASM 0
45
#endif
46
47
#ifdef __GNUC__
48
#if defined(NOASM) || defined(__arm__)
49
#define ASM 0
50
#else
51
#define ASM 1
52
#endif
53
#endif
54
55
#if (WINDOWS)
56
/* sizeof(unsigned long) = 4 for MinGW64 */
57
typedef unsigned long long ulong;
58
#else
59
typedef unsigned long ulong;
60
#endif
61
typedef unsigned int uint;
62
63
64
#define MIN(a, b) ((a) < (b) ? a : b)
65
#define MAX(a, b) ((a) > (b) ? a : b)
66
67
#define SCRYPT_BLOCK_SIZE 64U
68
#define SCRYPT_HASH_BLOCK_SIZE 64U
69
#define SCRYPT_HASH_DIGEST_SIZE 32U
70
71
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
72
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
73
74
#define U8TO32_BE(p) \
75
(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
76
((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3])))
77
78
#define U32TO8_BE(p, v) \
79
(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
80
(p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
81
82
#define U64TO8_BE(p, v) \
83
U32TO8_BE((p), (uint32_t)((v) >> 32)); \
84
U32TO8_BE((p) + 4, (uint32_t)((v) ));
85
86
87
typedef uint8_t hash_digest[SCRYPT_HASH_DIGEST_SIZE];
88
89
90
/* SHA-256 */
91
92
static const uint32_t sha256_constants[64] = {
93
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
94
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
95
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
96
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
97
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
98
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
99
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
100
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
101
};
102
103
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
104
#define Maj(x,y,z) (((x | y) & z) | (x & y))
105
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
106
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
107
#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3))
108
#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
109
#define W0(in,i) (U8TO32_BE(&in[i * 4]))
110
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
111
#define STEP(i) \
112
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
113
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \
114
r[7] = r[6]; \
115
r[6] = r[5]; \
116
r[5] = r[4]; \
117
r[4] = r[3] + t0; \
118
r[3] = r[2]; \
119
r[2] = r[1]; \
120
r[1] = r[0]; \
121
r[0] = t0 + t1;
122
123
124
typedef struct sha256_hash_state_t {
125
uint32_t H[8];
126
uint64_t T;
127
uint32_t leftover;
128
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
129
} sha256_hash_state;
130
131
132
static void sha256_blocks(sha256_hash_state *S, const uint8_t *in, size_t blocks) {
133
uint32_t r[8], w[64], t0, t1;
134
size_t i;
135
136
for(i = 0; i < 8; i++)
137
r[i] = S->H[i];
138
139
while(blocks--) {
140
for(i = 0; i < 16; i++) {
141
w[i] = W0(in, i);
142
}
143
for(i = 16; i < 64; i++) {
144
w[i] = W1(i);
145
}
146
for(i = 0; i < 64; i++) {
147
STEP(i);
148
}
149
for(i = 0; i < 8; i++) {
150
r[i] += S->H[i];
151
S->H[i] = r[i];
152
}
153
S->T += SCRYPT_HASH_BLOCK_SIZE * 8;
154
in += SCRYPT_HASH_BLOCK_SIZE;
155
}
156
}
157
158
static void neoscrypt_hash_init_sha256(sha256_hash_state *S) {
159
S->H[0] = 0x6a09e667;
160
S->H[1] = 0xbb67ae85;
161
S->H[2] = 0x3c6ef372;
162
S->H[3] = 0xa54ff53a;
163
S->H[4] = 0x510e527f;
164
S->H[5] = 0x9b05688c;
165
S->H[6] = 0x1f83d9ab;
166
S->H[7] = 0x5be0cd19;
167
S->T = 0;
168
S->leftover = 0;
169
}
170
171
static void neoscrypt_hash_update_sha256(sha256_hash_state *S, const uint8_t *in, size_t inlen) {
172
size_t blocks, want;
173
174
/* handle the previous data */
175
if(S->leftover) {
176
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
177
want = (want < inlen) ? want : inlen;
178
memcpy(S->buffer + S->leftover, in, want);
179
S->leftover += (uint32_t)want;
180
if(S->leftover < SCRYPT_HASH_BLOCK_SIZE)
181
return;
182
in += want;
183
inlen -= want;
184
sha256_blocks(S, S->buffer, 1);
185
}
186
187
/* handle the current data */
188
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
189
S->leftover = (uint32_t)(inlen - blocks);
190
if(blocks) {
191
sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
192
in += blocks;
193
}
194
195
/* handle leftover data */
196
if(S->leftover)
197
memcpy(S->buffer, in, S->leftover);
198
}
199
200
static void neoscrypt_hash_finish_sha256(sha256_hash_state *S, uint8_t *hash) {
201
uint64_t t = S->T + (S->leftover * 8);
202
203
S->buffer[S->leftover] = 0x80;
204
if(S->leftover <= 55) {
205
memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
206
} else {
207
memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
208
sha256_blocks(S, S->buffer, 1);
209
memset(S->buffer, 0, 56);
210
}
211
212
U64TO8_BE(S->buffer + 56, t);
213
sha256_blocks(S, S->buffer, 1);
214
215
U32TO8_BE(&hash[ 0], S->H[0]);
216
U32TO8_BE(&hash[ 4], S->H[1]);
217
U32TO8_BE(&hash[ 8], S->H[2]);
218
U32TO8_BE(&hash[12], S->H[3]);
219
U32TO8_BE(&hash[16], S->H[4]);
220
U32TO8_BE(&hash[20], S->H[5]);
221
U32TO8_BE(&hash[24], S->H[6]);
222
U32TO8_BE(&hash[28], S->H[7]);
223
}
224
225
static void neoscrypt_hash_sha256(hash_digest hash, const uint8_t *m, size_t mlen) {
226
sha256_hash_state st;
227
neoscrypt_hash_init_sha256(&st);
228
neoscrypt_hash_update_sha256(&st, m, mlen);
229
neoscrypt_hash_finish_sha256(&st, hash);
230
}
231
232
233
/* HMAC for SHA-256 */
234
235
typedef struct sha256_hmac_state_t {
236
sha256_hash_state inner, outer;
237
} sha256_hmac_state;
238
239
static void neoscrypt_hmac_init_sha256(sha256_hmac_state *st, const uint8_t *key, size_t keylen) {
240
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
241
size_t i;
242
243
neoscrypt_hash_init_sha256(&st->inner);
244
neoscrypt_hash_init_sha256(&st->outer);
245
246
if(keylen <= SCRYPT_HASH_BLOCK_SIZE) {
247
/* use the key directly if it's <= blocksize bytes */
248
memcpy(pad, key, keylen);
249
} else {
250
/* if it's > blocksize bytes, hash it */
251
neoscrypt_hash_sha256(pad, key, keylen);
252
}
253
254
/* inner = (key ^ 0x36) */
255
/* h(inner || ...) */
256
for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
257
pad[i] ^= 0x36;
258
neoscrypt_hash_update_sha256(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
259
260
/* outer = (key ^ 0x5c) */
261
/* h(outer || ...) */
262
for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
263
pad[i] ^= (0x5c ^ 0x36);
264
neoscrypt_hash_update_sha256(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
265
}
266
267
static void neoscrypt_hmac_update_sha256(sha256_hmac_state *st, const uint8_t *m, size_t mlen) {
268
/* h(inner || m...) */
269
neoscrypt_hash_update_sha256(&st->inner, m, mlen);
270
}
271
272
static void neoscrypt_hmac_finish_sha256(sha256_hmac_state *st, hash_digest mac) {
273
/* h(inner || m) */
274
hash_digest innerhash;
275
neoscrypt_hash_finish_sha256(&st->inner, innerhash);
276
277
/* h(outer || h(inner || m)) */
278
neoscrypt_hash_update_sha256(&st->outer, innerhash, sizeof(innerhash));
279
neoscrypt_hash_finish_sha256(&st->outer, mac);
280
}
281
282
283
/* PBKDF2 for SHA-256 */
284
285
static void neoscrypt_pbkdf2_sha256(const uint8_t *password, size_t password_len,
286
const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *output, size_t output_len) {
287
sha256_hmac_state hmac_pw, hmac_pw_salt, work;
288
hash_digest ti, u;
289
uint8_t be[4];
290
uint32_t i, j, k, blocks;
291
292
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
293
294
/* hmac(password, ...) */
295
neoscrypt_hmac_init_sha256(&hmac_pw, password, password_len);
296
297
/* hmac(password, salt...) */
298
hmac_pw_salt = hmac_pw;
299
neoscrypt_hmac_update_sha256(&hmac_pw_salt, salt, salt_len);
300
301
blocks = ((uint32_t)output_len + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
302
for(i = 1; i <= blocks; i++) {
303
/* U1 = hmac(password, salt || be(i)) */
304
U32TO8_BE(be, i);
305
work = hmac_pw_salt;
306
neoscrypt_hmac_update_sha256(&work, be, 4);
307
neoscrypt_hmac_finish_sha256(&work, ti);
308
memcpy(u, ti, sizeof(u));
309
310
/* T[i] = U1 ^ U2 ^ U3... */
311
for(j = 0; j < N - 1; j++) {
312
/* UX = hmac(password, U{X-1}) */
313
work = hmac_pw;
314
neoscrypt_hmac_update_sha256(&work, u, SCRYPT_HASH_DIGEST_SIZE);
315
neoscrypt_hmac_finish_sha256(&work, u);
316
317
/* T[i] ^= UX */
318
for(k = 0; k < sizeof(u); k++)
319
ti[k] ^= u[k];
320
}
321
322
memcpy(output, ti, (output_len > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : output_len);
323
output += SCRYPT_HASH_DIGEST_SIZE;
324
output_len -= SCRYPT_HASH_DIGEST_SIZE;
325
}
326
}
327
328
329
/* NeoScrypt */
330
331
#if (ASM)
332
333
extern void neoscrypt_salsa(uint *X, uint rounds);
334
extern void neoscrypt_salsa_tangle(uint *X, uint count);
335
extern void neoscrypt_chacha(uint *X, uint rounds);
336
337
extern void neoscrypt_blkcpy(void *dstp, const void *srcp, uint len);
338
extern void neoscrypt_blkswp(void *blkAp, void *blkBp, uint len);
339
extern void neoscrypt_blkxor(void *dstp, const void *srcp, uint len);
340
341
#else
342
343
/* Salsa20, rounds must be a multiple of 2 */
344
static void neoscrypt_salsa(uint *X, uint rounds) {
345
uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t;
346
347
x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3];
348
x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7];
349
x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11];
350
x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15];
351
352
#define quarter(a, b, c, d) \
353
t = a + d; t = ROTL32(t, 7); b ^= t; \
354
t = b + a; t = ROTL32(t, 9); c ^= t; \
355
t = c + b; t = ROTL32(t, 13); d ^= t; \
356
t = d + c; t = ROTL32(t, 18); a ^= t;
357
358
for(; rounds; rounds -= 2) {
359
quarter( x0, x4, x8, x12);
360
quarter( x5, x9, x13, x1);
361
quarter(x10, x14, x2, x6);
362
quarter(x15, x3, x7, x11);
363
quarter( x0, x1, x2, x3);
364
quarter( x5, x6, x7, x4);
365
quarter(x10, x11, x8, x9);
366
quarter(x15, x12, x13, x14);
367
}
368
369
X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3;
370
X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7;
371
X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11;
372
X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15;
373
374
#undef quarter
375
}
376
377
/* ChaCha20, rounds must be a multiple of 2 */
378
static void neoscrypt_chacha(uint *X, uint rounds) {
379
uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t;
380
381
x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3];
382
x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7];
383
x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11];
384
x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15];
385
386
#define quarter(a,b,c,d) \
387
a += b; t = d ^ a; d = ROTL32(t, 16); \
388
c += d; t = b ^ c; b = ROTL32(t, 12); \
389
a += b; t = d ^ a; d = ROTL32(t, 8); \
390
c += d; t = b ^ c; b = ROTL32(t, 7);
391
392
for(; rounds; rounds -= 2) {
393
quarter( x0, x4, x8, x12);
394
quarter( x1, x5, x9, x13);
395
quarter( x2, x6, x10, x14);
396
quarter( x3, x7, x11, x15);
397
quarter( x0, x5, x10, x15);
398
quarter( x1, x6, x11, x12);
399
quarter( x2, x7, x8, x13);
400
quarter( x3, x4, x9, x14);
401
}
402
403
X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3;
404
X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7;
405
X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11;
406
X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15;
407
408
#undef quarter
409
}
410
411
412
/* Fast 32-bit / 64-bit memcpy();
413
* len must be a multiple of 32 bytes */
414
static void neoscrypt_blkcpy(void *dstp, const void *srcp, uint len) {
415
ulong *dst = (ulong *) dstp;
416
ulong *src = (ulong *) srcp;
417
uint i;
418
419
for(i = 0; i < (len / sizeof(ulong)); i += 4) {
420
dst[i] = src[i];
421
dst[i + 1] = src[i + 1];
422
dst[i + 2] = src[i + 2];
423
dst[i + 3] = src[i + 3];
424
}
425
}
426
427
/* Fast 32-bit / 64-bit block swapper;
428
* len must be a multiple of 32 bytes */
429
static void neoscrypt_blkswp(void *blkAp, void *blkBp, uint len) {
430
ulong *blkA = (ulong *) blkAp;
431
ulong *blkB = (ulong *) blkBp;
432
register ulong t0, t1, t2, t3;
433
uint i;
434
435
for(i = 0; i < (len / sizeof(ulong)); i += 4) {
436
t0 = blkA[i];
437
t1 = blkA[i + 1];
438
t2 = blkA[i + 2];
439
t3 = blkA[i + 3];
440
blkA[i] = blkB[i];
441
blkA[i + 1] = blkB[i + 1];
442
blkA[i + 2] = blkB[i + 2];
443
blkA[i + 3] = blkB[i + 3];
444
blkB[i] = t0;
445
blkB[i + 1] = t1;
446
blkB[i + 2] = t2;
447
blkB[i + 3] = t3;
448
}
449
}
450
451
/* Fast 32-bit / 64-bit block XOR engine;
452
* len must be a multiple of 32 bytes */
453
static void neoscrypt_blkxor(void *dstp, const void *srcp, uint len) {
454
ulong *dst = (ulong *) dstp;
455
ulong *src = (ulong *) srcp;
456
uint i;
457
458
for(i = 0; i < (len / sizeof(ulong)); i += 4) {
459
dst[i] ^= src[i];
460
dst[i + 1] ^= src[i + 1];
461
dst[i + 2] ^= src[i + 2];
462
dst[i + 3] ^= src[i + 3];
463
}
464
}
465
466
#endif
467
468
/* 32-bit / 64-bit optimised memcpy() */
469
static void neoscrypt_copy(void *dstp, const void *srcp, uint len) {
470
ulong *dst = (ulong *) dstp;
471
ulong *src = (ulong *) srcp;
472
uint i, tail;
473
474
for(i = 0; i < (len / sizeof(ulong)); i++)
475
dst[i] = src[i];
476
477
tail = len & (sizeof(ulong) - 1);
478
if(tail) {
479
uchar *dstb = (uchar *) dstp;
480
uchar *srcb = (uchar *) srcp;
481
482
for(i = len - tail; i < len; i++)
483
dstb[i] = srcb[i];
484
}
485
}
486
487
/* 32-bit / 64-bit optimised memory erase aka memset() to zero */
488
static void neoscrypt_erase(void *dstp, uint len) {
489
const ulong null = 0;
490
ulong *dst = (ulong *) dstp;
491
uint i, tail;
492
493
for(i = 0; i < (len / sizeof(ulong)); i++)
494
dst[i] = null;
495
496
tail = len & (sizeof(ulong) - 1);
497
if(tail) {
498
uchar *dstb = (uchar *) dstp;
499
500
for(i = len - tail; i < len; i++)
501
dstb[i] = (uchar)null;
502
}
503
}
504
505
/* 32-bit / 64-bit optimised XOR engine */
506
static void neoscrypt_xor(void *dstp, const void *srcp, uint len) {
507
ulong *dst = (ulong *) dstp;
508
ulong *src = (ulong *) srcp;
509
uint i, tail;
510
511
for(i = 0; i < (len / sizeof(ulong)); i++)
512
dst[i] ^= src[i];
513
514
tail = len & (sizeof(ulong) - 1);
515
if(tail) {
516
uchar *dstb = (uchar *) dstp;
517
uchar *srcb = (uchar *) srcp;
518
519
for(i = len - tail; i < len; i++)
520
dstb[i] ^= srcb[i];
521
}
522
}
523
524
/* BLAKE2s */
525
526
#define BLAKE2S_BLOCK_SIZE 64U
527
#define BLAKE2S_OUT_SIZE 32U
528
#define BLAKE2S_KEY_SIZE 32U
529
530
static const uint blake2s_IV[8] = {
531
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
532
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
533
};
534
535
#ifdef USE_CUSTOM_BLAKE2S
536
537
static const uint8_t blake2s_sigma[10][16] = {
538
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
539
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
540
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
541
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
542
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
543
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
544
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
545
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
546
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
547
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
548
};
549
550
/* Parameter block of 32 bytes */
551
typedef struct blake2s_param_t {
552
uchar digest_length;
553
uchar key_length;
554
uchar fanout;
555
uchar depth;
556
uint leaf_length;
557
uchar node_offset[6];
558
uchar node_depth;
559
uchar inner_length;
560
uchar salt[8];
561
uchar personal[8];
562
} blake2s_param;
563
564
/* State block of 180 bytes */
565
typedef struct blake2s_state_t {
566
uint h[8];
567
uint t[2];
568
uint f[2];
569
uchar buf[2 * BLAKE2S_BLOCK_SIZE];
570
uint buflen;
571
} blake2s_state;
572
573
static void blake2s_compress(blake2s_state *S, const void *buf) {
574
uint i;
575
uint m[16];
576
uint v[16];
577
578
neoscrypt_copy(m, buf, 64);
579
neoscrypt_copy(v, S, 32);
580
581
v[ 8] = blake2s_IV[0];
582
v[ 9] = blake2s_IV[1];
583
v[10] = blake2s_IV[2];
584
v[11] = blake2s_IV[3];
585
v[12] = S->t[0] ^ blake2s_IV[4];
586
v[13] = S->t[1] ^ blake2s_IV[5];
587
v[14] = S->f[0] ^ blake2s_IV[6];
588
v[15] = S->f[1] ^ blake2s_IV[7];
589
#define G(r,i,a,b,c,d) \
590
do { \
591
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
592
d = ROTR32(d ^ a, 16); \
593
c = c + d; \
594
b = ROTR32(b ^ c, 12); \
595
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
596
d = ROTR32(d ^ a, 8); \
597
c = c + d; \
598
b = ROTR32(b ^ c, 7); \
599
} while(0)
600
#define ROUND(r) \
601
do { \
602
G(r, 0, v[ 0], v[ 4], v[ 8], v[12]); \
603
G(r, 1, v[ 1], v[ 5], v[ 9], v[13]); \
604
G(r, 2, v[ 2], v[ 6], v[10], v[14]); \
605
G(r, 3, v[ 3], v[ 7], v[11], v[15]); \
606
G(r, 4, v[ 0], v[ 5], v[10], v[15]); \
607
G(r, 5, v[ 1], v[ 6], v[11], v[12]); \
608
G(r, 6, v[ 2], v[ 7], v[ 8], v[13]); \
609
G(r, 7, v[ 3], v[ 4], v[ 9], v[14]); \
610
} while(0)
611
ROUND(0);
612
ROUND(1);
613
ROUND(2);
614
ROUND(3);
615
ROUND(4);
616
ROUND(5);
617
ROUND(6);
618
ROUND(7);
619
ROUND(8);
620
ROUND(9);
621
622
for(i = 0; i < 8; i++)
623
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
624
625
#undef G
626
#undef ROUND
627
}
628
629
static void blake2s_update(blake2s_state *S, const uchar *input, uint input_size) {
630
uint left, fill;
631
632
while(input_size > 0) {
633
left = S->buflen;
634
fill = 2 * BLAKE2S_BLOCK_SIZE - left;
635
if(input_size > fill) {
636
/* Buffer fill */
637
neoscrypt_copy(S->buf + left, input, fill);
638
S->buflen += fill;
639
/* Counter increment */
640
S->t[0] += BLAKE2S_BLOCK_SIZE;
641
/* Compress */
642
blake2s_compress(S, (void *) S->buf);
643
/* Shift buffer left */
644
neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, BLAKE2S_BLOCK_SIZE);
645
S->buflen -= BLAKE2S_BLOCK_SIZE;
646
input += fill;
647
input_size -= fill;
648
} else {
649
neoscrypt_copy(S->buf + left, input, input_size);
650
S->buflen += input_size;
651
/* Do not compress */
652
input += input_size;
653
input_size = 0;
654
}
655
}
656
}
657
#endif
658
659
static void neoscrypt_blake2s(const void *input, const uint input_size, const void *key, const uchar key_size,
660
void *output, const uchar output_size) {
661
uchar block[BLAKE2S_BLOCK_SIZE];
662
blake2s_param P[1];
663
blake2s_state S[1];
664
665
/* Initialise */
666
neoscrypt_erase(P, 32);
667
P->digest_length = output_size;
668
P->key_length = key_size;
669
P->fanout = 1;
670
P->depth = 1;
671
672
neoscrypt_erase(S, 180);
673
neoscrypt_copy(S, blake2s_IV, 32);
674
neoscrypt_xor(S, P, 32);
675
676
neoscrypt_erase(block, BLAKE2S_BLOCK_SIZE);
677
neoscrypt_copy(block, key, key_size);
678
blake2s_update(S, (uchar *) block, BLAKE2S_BLOCK_SIZE);
679
680
/* Update */
681
blake2s_update(S, (uchar *) input, input_size);
682
683
/* Finish */
684
if(S->buflen > BLAKE2S_BLOCK_SIZE) {
685
S->t[0] += BLAKE2S_BLOCK_SIZE;
686
blake2s_compress(S, (void *) S->buf);
687
S->buflen -= BLAKE2S_BLOCK_SIZE;
688
neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, S->buflen);
689
}
690
S->t[0] += S->buflen;
691
S->f[0] = ~0U;
692
neoscrypt_erase(S->buf + S->buflen, 2 * BLAKE2S_BLOCK_SIZE - S->buflen);
693
blake2s_compress(S, (void *) S->buf);
694
695
/* Write back */
696
neoscrypt_copy(output, S, output_size);
697
}
698
699
700
#define FASTKDF_BUFFER_SIZE 256U
701
702
/* FastKDF, a fast buffered key derivation function:
703
* FASTKDF_BUFFER_SIZE must be a power of 2;
704
* password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE;
705
* prf_output_size must be <= prf_key_size; */
706
static void neoscrypt_fastkdf(const uchar *password, uint password_len, const uchar *salt, uint salt_len,
707
uint N, uchar *output, uint output_len) {
708
709
#define kdf_buf_size FASTKDF_BUFFER_SIZE
710
#define prf_input_size BLAKE2S_BLOCK_SIZE
711
#define prf_key_size BLAKE2S_KEY_SIZE
712
#define prf_output_size BLAKE2S_OUT_SIZE
713
714
uint bufptr, a, b, i, j;
715
uchar *A, *B, *prf_input, *prf_key, *prf_output;
716
717
/* Align and set up the buffers in stack */
718
uchar stack[2 * kdf_buf_size + prf_input_size + prf_key_size + prf_output_size + STACK_ALIGN];
719
A = &stack[STACK_ALIGN & ~(STACK_ALIGN - 1)];
720
B = &A[kdf_buf_size + prf_input_size];
721
prf_output = &A[2 * kdf_buf_size + prf_input_size + prf_key_size];
722
723
/* Initialise the password buffer */
724
if(password_len > kdf_buf_size)
725
password_len = kdf_buf_size;
726
727
a = kdf_buf_size / password_len;
728
for(i = 0; i < a; i++)
729
neoscrypt_copy(&A[i * password_len], &password[0], password_len);
730
b = kdf_buf_size - a * password_len;
731
if(b)
732
neoscrypt_copy(&A[a * password_len], &password[0], b);
733
neoscrypt_copy(&A[kdf_buf_size], &password[0], prf_input_size);
734
735
/* Initialise the salt buffer */
736
if(salt_len > kdf_buf_size)
737
salt_len = kdf_buf_size;
738
739
a = kdf_buf_size / salt_len;
740
for(i = 0; i < a; i++)
741
neoscrypt_copy(&B[i * salt_len], &salt[0], salt_len);
742
b = kdf_buf_size - a * salt_len;
743
if(b)
744
neoscrypt_copy(&B[a * salt_len], &salt[0], b);
745
neoscrypt_copy(&B[kdf_buf_size], &salt[0], prf_key_size);
746
747
/* The primary iteration */
748
for(i = 0, bufptr = 0; i < N; i++) {
749
750
/* Map the PRF input buffer */
751
prf_input = &A[bufptr];
752
753
/* Map the PRF key buffer */
754
prf_key = &B[bufptr];
755
756
/* PRF */
757
neoscrypt_blake2s(prf_input, prf_input_size, prf_key, prf_key_size, prf_output, prf_output_size);
758
759
/* Calculate the next buffer pointer */
760
for(j = 0, bufptr = 0; j < prf_output_size; j++)
761
bufptr += prf_output[j];
762
bufptr &= (kdf_buf_size - 1);
763
764
/* Modify the salt buffer */
765
neoscrypt_xor(&B[bufptr], &prf_output[0], prf_output_size);
766
767
/* Head modified, tail updated */
768
if(bufptr < prf_key_size)
769
neoscrypt_copy(&B[kdf_buf_size + bufptr], &B[bufptr], MIN(prf_output_size, prf_key_size - bufptr));
770
771
/* Tail modified, head updated */
772
if((kdf_buf_size - bufptr) < prf_output_size)
773
neoscrypt_copy(&B[0], &B[kdf_buf_size], prf_output_size - (kdf_buf_size - bufptr));
774
775
}
776
777
/* Modify and copy into the output buffer */
778
if(output_len > kdf_buf_size)
779
output_len = kdf_buf_size;
780
781
a = kdf_buf_size - bufptr;
782
if(a >= output_len) {
783
neoscrypt_xor(&B[bufptr], &A[0], output_len);
784
neoscrypt_copy(&output[0], &B[bufptr], output_len);
785
} else {
786
neoscrypt_xor(&B[bufptr], &A[0], a);
787
neoscrypt_xor(&B[0], &A[a], output_len - a);
788
neoscrypt_copy(&output[0], &B[bufptr], a);
789
neoscrypt_copy(&output[a], &B[0], output_len - a);
790
}
791
792
}
793
794
795
/* Configurable optimised block mixer */
796
static void neoscrypt_blkmix(uint *X, uint *Y, uint r, uint mixmode) {
797
uint i, mixer, rounds;
798
799
mixer = mixmode >> 8;
800
rounds = mixmode & 0xFF;
801
802
/* NeoScrypt flow: Scrypt flow:
803
Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa";
804
Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb";
805
Xc ^= Xb"; M(Xc'); Yc = Xc"; Xa" = Ya;
806
Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb;
807
Xa" = Ya; Xb" = Yc;
808
Xc" = Yb; Xd" = Yd; */
809
810
if(r == 1) {
811
neoscrypt_blkxor(&X[0], &X[16], SCRYPT_BLOCK_SIZE);
812
if(mixer)
813
neoscrypt_chacha(&X[0], rounds);
814
else
815
neoscrypt_salsa(&X[0], rounds);
816
neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE);
817
if(mixer)
818
neoscrypt_chacha(&X[16], rounds);
819
else
820
neoscrypt_salsa(&X[16], rounds);
821
return;
822
}
823
824
if(r == 2) {
825
neoscrypt_blkxor(&X[0], &X[48], SCRYPT_BLOCK_SIZE);
826
if(mixer)
827
neoscrypt_chacha(&X[0], rounds);
828
else
829
neoscrypt_salsa(&X[0], rounds);
830
neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE);
831
if(mixer)
832
neoscrypt_chacha(&X[16], rounds);
833
else
834
neoscrypt_salsa(&X[16], rounds);
835
neoscrypt_blkxor(&X[32], &X[16], SCRYPT_BLOCK_SIZE);
836
if(mixer)
837
neoscrypt_chacha(&X[32], rounds);
838
else
839
neoscrypt_salsa(&X[32], rounds);
840
neoscrypt_blkxor(&X[48], &X[32], SCRYPT_BLOCK_SIZE);
841
if(mixer)
842
neoscrypt_chacha(&X[48], rounds);
843
else
844
neoscrypt_salsa(&X[48], rounds);
845
neoscrypt_blkswp(&X[16], &X[32], SCRYPT_BLOCK_SIZE);
846
return;
847
}
848
849
/* Reference code for any reasonable r */
850
for(i = 0; i < 2 * r; i++) {
851
if(i) neoscrypt_blkxor(&X[16 * i], &X[16 * (i - 1)], SCRYPT_BLOCK_SIZE);
852
else neoscrypt_blkxor(&X[0], &X[16 * (2 * r - 1)], SCRYPT_BLOCK_SIZE);
853
if(mixer)
854
neoscrypt_chacha(&X[16 * i], rounds);
855
else
856
neoscrypt_salsa(&X[16 * i], rounds);
857
neoscrypt_blkcpy(&Y[16 * i], &X[16 * i], SCRYPT_BLOCK_SIZE);
858
}
859
for(i = 0; i < r; i++)
860
neoscrypt_blkcpy(&X[16 * i], &Y[16 * 2 * i], SCRYPT_BLOCK_SIZE);
861
for(i = 0; i < r; i++)
862
neoscrypt_blkcpy(&X[16 * (i + r)], &Y[16 * (2 * i + 1)], SCRYPT_BLOCK_SIZE);
863
}
864
865
/* NeoScrypt core engine:
866
* p = 1, salt = password;
867
* Basic customisation (required):
868
* profile bit 0:
869
* 0 = NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20;
870
* 1 = Scrypt(1024, 1, 1) with Salsa20/8;
871
* profile bits 4 to 1:
872
* 0000 = FastKDF-BLAKE2s;
873
* 0001 = PBKDF2-HMAC-SHA256;
874
* Extended customisation (optional):
875
* profile bit 31:
876
* 0 = extended customisation absent;
877
* 1 = extended customisation present;
878
* profile bits 7 to 5 (rfactor):
879
* 000 = r of 1;
880
* 001 = r of 2;
881
* 010 = r of 4;
882
* ...
883
* 111 = r of 128;
884
* profile bits 12 to 8 (Nfactor):
885
* 00000 = N of 2;
886
* 00001 = N of 4;
887
* 00010 = N of 8;
888
* .....
889
* 00110 = N of 128;
890
* .....
891
* 01001 = N of 1024;
892
* .....
893
* 11110 = N of 2147483648;
894
* profile bits 30 to 13 are reserved */
895
void neoscrypt(uchar *output, const uchar *password, uint32_t profile)
896
{
897
uint N = 128, r = 2, dblmix = 1, mixmode = 0x14;
898
uint kdf, i, j;
899
uint *X, *Y, *Z, *V;
900
901
if(profile & 0x1) {
902
N = 1024; /* N = (1 << (Nfactor + 1)); */
903
r = 1; /* r = (1 << rfactor); */
904
dblmix = 0; /* Salsa only */
905
mixmode = 0x08; /* 8 rounds */
906
}
907
908
if(profile >> 31) {
909
N = (1 << (((profile >> 8) & 0x1F) + 1));
910
r = (1 << ((profile >> 5) & 0x7));
911
}
912
913
uchar *stack = (uchar*) malloc((N + 3) * r * 2 * SCRYPT_BLOCK_SIZE + STACK_ALIGN);
914
/* X = r * 2 * SCRYPT_BLOCK_SIZE */
915
X = (uint *) &stack[STACK_ALIGN & ~(STACK_ALIGN - 1)];
916
/* Z is a copy of X for ChaCha */
917
Z = &X[32 * r];
918
/* Y is an X sized temporal space */
919
Y = &X[64 * r];
920
/* V = N * r * 2 * SCRYPT_BLOCK_SIZE */
921
V = &X[96 * r];
922
923
/* X = KDF(password, salt) */
924
kdf = (profile >> 1) & 0xF;
925
926
switch(kdf) {
927
928
default:
929
case(0x0):
930
neoscrypt_fastkdf(password, 80, password, 80, 32, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE);
931
break;
932
933
case(0x1):
934
neoscrypt_pbkdf2_sha256(password, 80, password, 80, 1, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE);
935
break;
936
937
}
938
939
/* Process ChaCha 1st, Salsa 2nd and XOR them into FastKDF; otherwise Salsa only */
940
941
if(dblmix) {
942
/* blkcpy(Z, X) */
943
neoscrypt_blkcpy(&Z[0], &X[0], r * 2 * SCRYPT_BLOCK_SIZE);
944
945
/* Z = SMix(Z) */
946
for(i = 0; i < N; i++) {
947
/* blkcpy(V, Z) */
948
neoscrypt_blkcpy(&V[i * (32 * r)], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE);
949
/* blkmix(Z, Y) */
950
neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100));
951
}
952
for(i = 0; i < N; i++) {
953
/* integerify(Z) mod N */
954
j = (32 * r) * (Z[16 * (2 * r - 1)] & (N - 1));
955
/* blkxor(Z, V) */
956
neoscrypt_blkxor(&Z[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE);
957
/* blkmix(Z, Y) */
958
neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100));
959
}
960
}
961
962
#if (ASM)
963
/* Must be called before and after SSE2 Salsa */
964
neoscrypt_salsa_tangle(&X[0], r * 2);
965
#endif
966
967
/* X = SMix(X) */
968
for(i = 0; i < N; i++) {
969
/* blkcpy(V, X) */
970
neoscrypt_blkcpy(&V[i * (32 * r)], &X[0], r * 2 * SCRYPT_BLOCK_SIZE);
971
/* blkmix(X, Y) */
972
neoscrypt_blkmix(&X[0], &Y[0], r, mixmode);
973
}
974
for(i = 0; i < N; i++) {
975
/* integerify(X) mod N */
976
j = (32 * r) * (X[16 * (2 * r - 1)] & (N - 1));
977
/* blkxor(X, V) */
978
neoscrypt_blkxor(&X[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE);
979
/* blkmix(X, Y) */
980
neoscrypt_blkmix(&X[0], &Y[0], r, mixmode);
981
}
982
983
#if (ASM)
984
neoscrypt_salsa_tangle(&X[0], r * 2);
985
#endif
986
987
if(dblmix)
988
/* blkxor(X, Z) */
989
neoscrypt_blkxor(&X[0], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE);
990
991
/* output = KDF(password, X) */
992
switch(kdf) {
993
default:
994
case(0x0):
995
neoscrypt_fastkdf(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 32, output, 32);
996
break;
997
998
case(0x1):
999
neoscrypt_pbkdf2_sha256(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 1, output, 32);
1000
break;
1001
}
1002
1003
free(stack);
1004
}
1005
1006
static bool fulltest_le(const uint *hash, const uint *target)
1007
{
1008
bool rc = false;
1009
1010
for (int i = 7; i >= 0; i--) {
1011
if (hash[i] > target[i]) {
1012
rc = false;
1013
break;
1014
}
1015
if(hash[i] < target[i]) {
1016
rc = true;
1017
break;
1018
}
1019
}
1020
1021
if (opt_debug) {
1022
uchar hash_str[65], target_str[65];
1023
1024
bin2hex(hash_str, (uint8_t *) hash, 32);
1025
bin2hex(target_str, (uint8_t *) target, 32);
1026
1027
applog(LOG_DEBUG, "DEBUG (little endian): %s\nHash: %sx0\nTarget: %sx0",
1028
rc ? "hash <= target" : "hash > target (false positive)",
1029
hash_str, target_str);
1030
}
1031
1032
return(rc);
1033
}
1034
1035
int scanhash_neoscrypt(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done,
1036
uint32_t profile)
1037
{
1038
uint32_t _ALIGN(128) hash[8];
1039
uint32_t *pdata = work->data;
1040
uint32_t *ptarget = work->target;
1041
1042
const uint32_t Htarg = ptarget[7];
1043
const uint32_t first_nonce = pdata[19];
1044
1045
while (pdata[19] < max_nonce && !work_restart[thr_id].restart)
1046
{
1047
neoscrypt((uint8_t *) hash, (uint8_t *) pdata, profile);
1048
1049
/* Quick hash check */
1050
if (hash[7] <= Htarg && fulltest_le(hash, ptarget)) {
1051
work_set_target_ratio(work, hash);
1052
*hashes_done = pdata[19] - first_nonce + 1;
1053
return 1;
1054
}
1055
1056
pdata[19]++;
1057
}
1058
1059
*hashes_done = pdata[19] - first_nonce;
1060
return 0;
1061
}
1062
1063