Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/algo/cryptonight.c
1201 views
1
// Copyright (c) 2012-2013 The Cryptonote developers
2
// Distributed under the MIT/X11 software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
// Modified for CPUminer by Lucas Jones
6
7
#include "miner.h"
8
9
#if defined(__arm__) || defined(_MSC_VER)
10
#ifndef NOASM
11
#define NOASM
12
#endif
13
#endif
14
15
#include "crypto/oaes_lib.h"
16
#include "crypto/c_keccak.h"
17
#include "crypto/c_groestl.h"
18
#include "crypto/c_blake256.h"
19
#include "crypto/c_jh.h"
20
#include "crypto/c_skein.h"
21
#include "crypto/int-util.h"
22
#include "crypto/hash-ops.h"
23
24
#if USE_INT128
25
26
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
27
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
28
#elif defined (_MSC_VER)
29
/* only for mingw64 on windows */
30
#undef USE_INT128
31
#define USE_INT128 (0)
32
#else
33
typedef __uint128_t uint128_t;
34
#endif
35
36
#endif
37
38
#define LITE 0
39
#if LITE /* cryptonight-light */
40
#define MEMORY (1 << 20)
41
#define ITER (1 << 19)
42
#else
43
#define MEMORY (1 << 21) /* 2 MiB */
44
#define ITER (1 << 20)
45
#endif
46
47
#define AES_BLOCK_SIZE 16
48
#define AES_KEY_SIZE 32 /*16*/
49
#define INIT_SIZE_BLK 8
50
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
51
52
#pragma pack(push, 1)
53
union cn_slow_hash_state {
54
union hash_state hs;
55
struct {
56
uint8_t k[64];
57
uint8_t init[INIT_SIZE_BYTE];
58
};
59
};
60
#pragma pack(pop)
61
62
static void do_blake_hash(const void* input, size_t len, char* output) {
63
blake256_hash((uint8_t*)output, input, len);
64
}
65
66
static void do_groestl_hash(const void* input, size_t len, char* output) {
67
groestl(input, len * 8, (uint8_t*)output);
68
}
69
70
static void do_jh_hash(const void* input, size_t len, char* output) {
71
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
72
assert(likely(SUCCESS == r));
73
}
74
75
static void do_skein_hash(const void* input, size_t len, char* output) {
76
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
77
assert(likely(SKEIN_SUCCESS == r));
78
}
79
80
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
81
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
82
#if !defined(_MSC_VER) && !defined(NOASM)
83
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
84
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
85
#else
86
#define fast_aesb_single_round aesb_single_round
87
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
88
#endif
89
90
#if defined(NOASM) || !defined(__x86_64__)
91
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
92
// multiplier = ab = a * 2^32 + b
93
// multiplicand = cd = c * 2^32 + d
94
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
95
uint64_t a = hi_dword(multiplier);
96
uint64_t b = lo_dword(multiplier);
97
uint64_t c = hi_dword(multiplicand);
98
uint64_t d = lo_dword(multiplicand);
99
100
uint64_t ac = a * c;
101
uint64_t ad = a * d;
102
uint64_t bc = b * c;
103
uint64_t bd = b * d;
104
105
uint64_t adbc = ad + bc;
106
uint64_t adbc_carry = adbc < ad ? 1 : 0;
107
108
// multiplier * multiplicand = product_hi * 2^64 + product_lo
109
uint64_t product_lo = bd + (adbc << 32);
110
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
111
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
112
assert(ac <= *product_hi);
113
114
return product_lo;
115
}
116
#else
117
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
118
#endif
119
120
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
121
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
122
};
123
124
125
static inline size_t e2i(const uint8_t* a) {
126
#if !LITE
127
return ((uint32_t *)a)[0] & 0x1FFFF0;
128
#else
129
return ((uint32_t *)a)[0] & 0xFFFF0;
130
#endif
131
}
132
133
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst, int variant, const uint64_t tweak) {
134
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
135
hi += ((uint64_t*) c)[0];
136
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
137
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
138
((uint64_t*) dst)[0] = hi;
139
((uint64_t*) dst)[1] = variant ? lo ^ tweak : lo;
140
}
141
142
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
143
#if USE_INT128
144
*((uint128_t*) a) ^= *((uint128_t*) b);
145
#else
146
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
147
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
148
#endif
149
}
150
151
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
152
#if USE_INT128
153
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
154
#else
155
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
156
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
157
#endif
158
}
159
160
static void cryptonight_store_variant(void* state, int variant) {
161
if (variant == 1) {
162
const uint8_t tmp = ((const uint8_t*)(state))[11];
163
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1;
164
((uint8_t*)(state))[11] = tmp ^ ((0x75310 >> index) & 0x30);
165
}
166
}
167
168
struct cryptonight_ctx {
169
uint8_t _ALIGN(16) long_state[MEMORY];
170
union cn_slow_hash_state state;
171
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
172
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
173
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
174
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
175
oaes_ctx* aes_ctx;
176
};
177
178
static void cryptonight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx, int variant)
179
{
180
size_t i, j;
181
182
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
183
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
184
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
185
186
const uint64_t tweak = variant ? *((uint64_t*) (((uint8_t*)input) + 35)) ^ ctx->state.hs.w[24] : 0;
187
188
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
189
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
190
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
191
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
192
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
193
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
194
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
195
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
196
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
197
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
198
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
199
}
200
201
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
202
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
203
204
for (i = 0; likely(i < ITER / 4); ++i) {
205
/* Dependency chain: address -> read value ------+
206
* written value <-+ hard function (AES or MUL) <+
207
* next address <-+
208
*/
209
/* Iteration 1 */
210
j = e2i(ctx->a);
211
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
212
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
213
/* Iteration 2 */
214
cryptonight_store_variant(&ctx->long_state[j], variant);
215
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)], variant, tweak);
216
217
/* Iteration 3 */
218
j = e2i(ctx->a);
219
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
220
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
221
/* Iteration 4 */
222
cryptonight_store_variant(&ctx->long_state[j], variant);
223
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)], variant, tweak);
224
}
225
226
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
227
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
228
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
229
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
230
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
231
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
232
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
233
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
234
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
235
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
236
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
237
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
238
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
239
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
240
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
241
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
242
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
243
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
244
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
245
}
246
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
247
hash_permutation(&ctx->state.hs);
248
/*memcpy(hash, &state, 32);*/
249
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
250
oaes_free((OAES_CTX **) &ctx->aes_ctx);
251
}
252
253
void cryptonight_hash(void* output, const void* input) {
254
const int variant = 1;
255
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
256
cryptonight_hash_ctx(output, input, 76, ctx, variant);
257
free(ctx);
258
}
259
260
void cryptonight_hash_v1(void* output, const void* input) {
261
const int variant = 0;
262
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
263
cryptonight_hash_ctx(output, input, 76, ctx, variant);
264
free(ctx);
265
}
266
267
static void cryptonight_hash_ctx_aes_ni(void* output, const void* input, int len, struct cryptonight_ctx* ctx, int variant)
268
{
269
size_t i, j;
270
271
hash_process(&ctx->state.hs, (const uint8_t*)input, len);
272
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
273
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
274
275
const uint64_t tweak = variant ? *((uint64_t*) (((uint8_t*)input) + 35)) ^ ctx->state.hs.w[24] : 0;
276
277
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
278
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
279
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
280
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
281
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
282
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
283
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
284
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
285
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
286
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
287
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
288
}
289
290
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
291
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
292
293
for (i = 0; likely(i < ITER / 4); ++i) {
294
/* Dependency chain: address -> read value ------+
295
* written value <-+ hard function (AES or MUL) <+
296
* next address <-+
297
*/
298
/* Iteration 1 */
299
j = e2i(ctx->a);
300
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
301
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
302
/* Iteration 2 */
303
cryptonight_store_variant(&ctx->long_state[j], variant);
304
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)], variant, tweak);
305
306
/* Iteration 3 */
307
j = e2i(ctx->a);
308
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
309
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
310
/* Iteration 4 */
311
cryptonight_store_variant(&ctx->long_state[j], variant);
312
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)], variant, tweak);
313
}
314
315
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
316
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
317
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
318
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
319
fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
320
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
321
fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
322
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
323
fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
324
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
325
fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
326
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
327
fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
328
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
329
fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
330
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
331
fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
332
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
333
fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
334
}
335
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
336
hash_permutation(&ctx->state.hs);
337
/*memcpy(hash, &state, 32);*/
338
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
339
oaes_free((OAES_CTX **) &ctx->aes_ctx);
340
}
341
342
int scanhash_cryptonight(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
343
{
344
uint32_t _ALIGN(128) hash[HASH_SIZE / 4];
345
uint32_t *pdata = work->data;
346
uint32_t *ptarget = work->target;
347
348
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
349
uint32_t n = *nonceptr - 1;
350
const uint32_t first_nonce = n + 1;
351
352
// todo: make it dynamic
353
const int variant = 1;
354
355
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
356
357
if (aes_ni_supported) {
358
do {
359
*nonceptr = ++n;
360
cryptonight_hash_ctx_aes_ni(hash, pdata, 76, ctx, variant);
361
if (unlikely(hash[7] < ptarget[7])) {
362
work_set_target_ratio(work, hash);
363
*hashes_done = n - first_nonce + 1;
364
free(ctx);
365
return 1;
366
}
367
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
368
} else {
369
do {
370
*nonceptr = ++n;
371
cryptonight_hash_ctx(hash, pdata, 76, ctx, variant);
372
if (unlikely(hash[7] < ptarget[7])) {
373
work_set_target_ratio(work, hash);
374
*hashes_done = n - first_nonce + 1;
375
free(ctx);
376
return 1;
377
}
378
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
379
}
380
381
free(ctx);
382
*hashes_done = n - first_nonce + 1;
383
return 0;
384
}
385
386