Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/sha3/sph_echo.c
1201 views
1
/* $Id: echo.c 227 2010-06-16 17:28:38Z tp $ */
2
/*
3
* ECHO implementation.
4
*
5
* ==========================(LICENSE BEGIN)============================
6
*
7
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice shall be
18
* included in all copies or substantial portions of the Software.
19
*
20
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
*
28
* ===========================(LICENSE END)=============================
29
*
30
* @author Thomas Pornin <[email protected]>
31
*/
32
33
#include <stddef.h>
34
#include <string.h>
35
#include <limits.h>
36
37
#include "sph_echo.h"
38
39
#ifdef __cplusplus
40
extern "C"{
41
#endif
42
43
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_ECHO
44
#define SPH_SMALL_FOOTPRINT_ECHO 1
45
#endif
46
47
/*
48
* Some measures tend to show that the 64-bit implementation offers
49
* better performance only on a "64-bit architectures", those which have
50
* actual 64-bit registers.
51
*/
52
#if !defined SPH_ECHO_64 && SPH_64_TRUE
53
#define SPH_ECHO_64 1
54
#endif
55
56
/*
57
* We can use a 64-bit implementation only if a 64-bit type is available.
58
*/
59
#if !SPH_64
60
#undef SPH_ECHO_64
61
#endif
62
63
#ifdef _MSC_VER
64
#pragma warning (disable: 4146)
65
#endif
66
67
#define T32 SPH_T32
68
#define C32 SPH_C32
69
#if SPH_64
70
#define C64 SPH_C64
71
#endif
72
73
#define AES_BIG_ENDIAN 0
74
#include "aes_helper.c"
75
76
#if SPH_ECHO_64
77
78
#define DECL_STATE_SMALL \
79
sph_u64 W[16][2];
80
81
#define DECL_STATE_BIG \
82
sph_u64 W[16][2];
83
84
#define INPUT_BLOCK_SMALL(sc) do { \
85
unsigned u; \
86
memcpy(W, sc->u.Vb, 8 * sizeof(sph_u64)); \
87
for (u = 0; u < 12; u ++) { \
88
W[u + 4][0] = sph_dec64le_aligned( \
89
sc->buf + 16 * u); \
90
W[u + 4][1] = sph_dec64le_aligned( \
91
sc->buf + 16 * u + 8); \
92
} \
93
} while (0)
94
95
#define INPUT_BLOCK_BIG(sc) do { \
96
unsigned u; \
97
memcpy(W, sc->u.Vb, 16 * sizeof(sph_u64)); \
98
for (u = 0; u < 8; u ++) { \
99
W[u + 8][0] = sph_dec64le_aligned( \
100
sc->buf + 16 * u); \
101
W[u + 8][1] = sph_dec64le_aligned( \
102
sc->buf + 16 * u + 8); \
103
} \
104
} while (0)
105
106
#if SPH_SMALL_FOOTPRINT_ECHO
107
108
static void
109
aes_2rounds_all(sph_u64 W[16][2],
110
sph_u32 *pK0, sph_u32 *pK1, sph_u32 *pK2, sph_u32 *pK3)
111
{
112
int n;
113
sph_u32 K0 = *pK0;
114
sph_u32 K1 = *pK1;
115
sph_u32 K2 = *pK2;
116
sph_u32 K3 = *pK3;
117
118
for (n = 0; n < 16; n ++) {
119
sph_u64 Wl = W[n][0];
120
sph_u64 Wh = W[n][1];
121
sph_u32 X0 = (sph_u32)Wl;
122
sph_u32 X1 = (sph_u32)(Wl >> 32);
123
sph_u32 X2 = (sph_u32)Wh;
124
sph_u32 X3 = (sph_u32)(Wh >> 32);
125
sph_u32 Y0, Y1, Y2, Y3; \
126
AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3);
127
AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
128
W[n][0] = (sph_u64)X0 | ((sph_u64)X1 << 32);
129
W[n][1] = (sph_u64)X2 | ((sph_u64)X3 << 32);
130
if ((K0 = T32(K0 + 1)) == 0) {
131
if ((K1 = T32(K1 + 1)) == 0)
132
if ((K2 = T32(K2 + 1)) == 0)
133
K3 = T32(K3 + 1);
134
}
135
}
136
*pK0 = K0;
137
*pK1 = K1;
138
*pK2 = K2;
139
*pK3 = K3;
140
}
141
142
#define BIG_SUB_WORDS do { \
143
aes_2rounds_all(W, &K0, &K1, &K2, &K3); \
144
} while (0)
145
146
#else
147
148
#define AES_2ROUNDS(X) do { \
149
sph_u32 X0 = (sph_u32)(X[0]); \
150
sph_u32 X1 = (sph_u32)(X[0] >> 32); \
151
sph_u32 X2 = (sph_u32)(X[1]); \
152
sph_u32 X3 = (sph_u32)(X[1] >> 32); \
153
sph_u32 Y0, Y1, Y2, Y3; \
154
AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
155
AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X0, X1, X2, X3); \
156
X[0] = (sph_u64)X0 | ((sph_u64)X1 << 32); \
157
X[1] = (sph_u64)X2 | ((sph_u64)X3 << 32); \
158
if ((K0 = T32(K0 + 1)) == 0) { \
159
if ((K1 = T32(K1 + 1)) == 0) \
160
if ((K2 = T32(K2 + 1)) == 0) \
161
K3 = T32(K3 + 1); \
162
} \
163
} while (0)
164
165
#define BIG_SUB_WORDS do { \
166
AES_2ROUNDS(W[ 0]); \
167
AES_2ROUNDS(W[ 1]); \
168
AES_2ROUNDS(W[ 2]); \
169
AES_2ROUNDS(W[ 3]); \
170
AES_2ROUNDS(W[ 4]); \
171
AES_2ROUNDS(W[ 5]); \
172
AES_2ROUNDS(W[ 6]); \
173
AES_2ROUNDS(W[ 7]); \
174
AES_2ROUNDS(W[ 8]); \
175
AES_2ROUNDS(W[ 9]); \
176
AES_2ROUNDS(W[10]); \
177
AES_2ROUNDS(W[11]); \
178
AES_2ROUNDS(W[12]); \
179
AES_2ROUNDS(W[13]); \
180
AES_2ROUNDS(W[14]); \
181
AES_2ROUNDS(W[15]); \
182
} while (0)
183
184
#endif
185
186
#define SHIFT_ROW1(a, b, c, d) do { \
187
sph_u64 tmp; \
188
tmp = W[a][0]; \
189
W[a][0] = W[b][0]; \
190
W[b][0] = W[c][0]; \
191
W[c][0] = W[d][0]; \
192
W[d][0] = tmp; \
193
tmp = W[a][1]; \
194
W[a][1] = W[b][1]; \
195
W[b][1] = W[c][1]; \
196
W[c][1] = W[d][1]; \
197
W[d][1] = tmp; \
198
} while (0)
199
200
#define SHIFT_ROW2(a, b, c, d) do { \
201
sph_u64 tmp; \
202
tmp = W[a][0]; \
203
W[a][0] = W[c][0]; \
204
W[c][0] = tmp; \
205
tmp = W[b][0]; \
206
W[b][0] = W[d][0]; \
207
W[d][0] = tmp; \
208
tmp = W[a][1]; \
209
W[a][1] = W[c][1]; \
210
W[c][1] = tmp; \
211
tmp = W[b][1]; \
212
W[b][1] = W[d][1]; \
213
W[d][1] = tmp; \
214
} while (0)
215
216
#define SHIFT_ROW3(a, b, c, d) SHIFT_ROW1(d, c, b, a)
217
218
#define BIG_SHIFT_ROWS do { \
219
SHIFT_ROW1(1, 5, 9, 13); \
220
SHIFT_ROW2(2, 6, 10, 14); \
221
SHIFT_ROW3(3, 7, 11, 15); \
222
} while (0)
223
224
#if SPH_SMALL_FOOTPRINT_ECHO
225
226
static void
227
mix_column(sph_u64 W[16][2], int ia, int ib, int ic, int id)
228
{
229
int n;
230
231
for (n = 0; n < 2; n ++) {
232
sph_u64 a = W[ia][n];
233
sph_u64 b = W[ib][n];
234
sph_u64 c = W[ic][n];
235
sph_u64 d = W[id][n];
236
sph_u64 ab = a ^ b;
237
sph_u64 bc = b ^ c;
238
sph_u64 cd = c ^ d;
239
sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U
240
^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1);
241
sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U
242
^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1);
243
sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U
244
^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1);
245
W[ia][n] = abx ^ bc ^ d;
246
W[ib][n] = bcx ^ a ^ cd;
247
W[ic][n] = cdx ^ ab ^ d;
248
W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c;
249
}
250
}
251
252
#define MIX_COLUMN(a, b, c, d) mix_column(W, a, b, c, d)
253
254
#else
255
256
#define MIX_COLUMN1(ia, ib, ic, id, n) do { \
257
sph_u64 a = W[ia][n]; \
258
sph_u64 b = W[ib][n]; \
259
sph_u64 c = W[ic][n]; \
260
sph_u64 d = W[id][n]; \
261
sph_u64 ab = a ^ b; \
262
sph_u64 bc = b ^ c; \
263
sph_u64 cd = c ^ d; \
264
sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U \
265
^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
266
sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U \
267
^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
268
sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U \
269
^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
270
W[ia][n] = abx ^ bc ^ d; \
271
W[ib][n] = bcx ^ a ^ cd; \
272
W[ic][n] = cdx ^ ab ^ d; \
273
W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c; \
274
} while (0)
275
276
#define MIX_COLUMN(a, b, c, d) do { \
277
MIX_COLUMN1(a, b, c, d, 0); \
278
MIX_COLUMN1(a, b, c, d, 1); \
279
} while (0)
280
281
#endif
282
283
#define BIG_MIX_COLUMNS do { \
284
MIX_COLUMN(0, 1, 2, 3); \
285
MIX_COLUMN(4, 5, 6, 7); \
286
MIX_COLUMN(8, 9, 10, 11); \
287
MIX_COLUMN(12, 13, 14, 15); \
288
} while (0)
289
290
#define BIG_ROUND do { \
291
BIG_SUB_WORDS; \
292
BIG_SHIFT_ROWS; \
293
BIG_MIX_COLUMNS; \
294
} while (0)
295
296
#define FINAL_SMALL do { \
297
unsigned u; \
298
sph_u64 *VV = &sc->u.Vb[0][0]; \
299
sph_u64 *WW = &W[0][0]; \
300
for (u = 0; u < 8; u ++) { \
301
VV[u] ^= sph_dec64le_aligned(sc->buf + (u * 8)) \
302
^ sph_dec64le_aligned(sc->buf + (u * 8) + 64) \
303
^ sph_dec64le_aligned(sc->buf + (u * 8) + 128) \
304
^ WW[u] ^ WW[u + 8] \
305
^ WW[u + 16] ^ WW[u + 24]; \
306
} \
307
} while (0)
308
309
#define FINAL_BIG do { \
310
unsigned u; \
311
sph_u64 *VV = &sc->u.Vb[0][0]; \
312
sph_u64 *WW = &W[0][0]; \
313
for (u = 0; u < 16; u ++) { \
314
VV[u] ^= sph_dec64le_aligned(sc->buf + (u * 8)) \
315
^ WW[u] ^ WW[u + 16]; \
316
} \
317
} while (0)
318
319
#define COMPRESS_SMALL(sc) do { \
320
sph_u32 K0 = sc->C0; \
321
sph_u32 K1 = sc->C1; \
322
sph_u32 K2 = sc->C2; \
323
sph_u32 K3 = sc->C3; \
324
unsigned u; \
325
INPUT_BLOCK_SMALL(sc); \
326
for (u = 0; u < 8; u ++) { \
327
BIG_ROUND; \
328
} \
329
FINAL_SMALL; \
330
} while (0)
331
332
#define COMPRESS_BIG(sc) do { \
333
sph_u32 K0 = sc->C0; \
334
sph_u32 K1 = sc->C1; \
335
sph_u32 K2 = sc->C2; \
336
sph_u32 K3 = sc->C3; \
337
unsigned u; \
338
INPUT_BLOCK_BIG(sc); \
339
for (u = 0; u < 10; u ++) { \
340
BIG_ROUND; \
341
} \
342
FINAL_BIG; \
343
} while (0)
344
345
#else
346
347
#define DECL_STATE_SMALL \
348
sph_u32 W[16][4];
349
350
#define DECL_STATE_BIG \
351
sph_u32 W[16][4];
352
353
#define INPUT_BLOCK_SMALL(sc) do { \
354
unsigned u; \
355
memcpy(W, sc->u.Vs, 16 * sizeof(sph_u32)); \
356
for (u = 0; u < 12; u ++) { \
357
W[u + 4][0] = sph_dec32le_aligned( \
358
sc->buf + 16 * u); \
359
W[u + 4][1] = sph_dec32le_aligned( \
360
sc->buf + 16 * u + 4); \
361
W[u + 4][2] = sph_dec32le_aligned( \
362
sc->buf + 16 * u + 8); \
363
W[u + 4][3] = sph_dec32le_aligned( \
364
sc->buf + 16 * u + 12); \
365
} \
366
} while (0)
367
368
#define INPUT_BLOCK_BIG(sc) do { \
369
unsigned u; \
370
memcpy(W, sc->u.Vs, 32 * sizeof(sph_u32)); \
371
for (u = 0; u < 8; u ++) { \
372
W[u + 8][0] = sph_dec32le_aligned( \
373
sc->buf + 16 * u); \
374
W[u + 8][1] = sph_dec32le_aligned( \
375
sc->buf + 16 * u + 4); \
376
W[u + 8][2] = sph_dec32le_aligned( \
377
sc->buf + 16 * u + 8); \
378
W[u + 8][3] = sph_dec32le_aligned( \
379
sc->buf + 16 * u + 12); \
380
} \
381
} while (0)
382
383
#if SPH_SMALL_FOOTPRINT_ECHO
384
385
static void
386
aes_2rounds_all(sph_u32 W[16][4],
387
sph_u32 *pK0, sph_u32 *pK1, sph_u32 *pK2, sph_u32 *pK3)
388
{
389
int n;
390
sph_u32 K0 = *pK0;
391
sph_u32 K1 = *pK1;
392
sph_u32 K2 = *pK2;
393
sph_u32 K3 = *pK3;
394
395
for (n = 0; n < 16; n ++) {
396
sph_u32 *X = W[n];
397
sph_u32 Y0, Y1, Y2, Y3;
398
AES_ROUND_LE(X[0], X[1], X[2], X[3],
399
K0, K1, K2, K3, Y0, Y1, Y2, Y3);
400
AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X[0], X[1], X[2], X[3]);
401
if ((K0 = T32(K0 + 1)) == 0) {
402
if ((K1 = T32(K1 + 1)) == 0)
403
if ((K2 = T32(K2 + 1)) == 0)
404
K3 = T32(K3 + 1);
405
}
406
}
407
*pK0 = K0;
408
*pK1 = K1;
409
*pK2 = K2;
410
*pK3 = K3;
411
}
412
413
#define BIG_SUB_WORDS do { \
414
aes_2rounds_all(W, &K0, &K1, &K2, &K3); \
415
} while (0)
416
417
#else
418
419
#define AES_2ROUNDS(X) do { \
420
sph_u32 Y0, Y1, Y2, Y3; \
421
AES_ROUND_LE(X[0], X[1], X[2], X[3], \
422
K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
423
AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X[0], X[1], X[2], X[3]); \
424
if ((K0 = T32(K0 + 1)) == 0) { \
425
if ((K1 = T32(K1 + 1)) == 0) \
426
if ((K2 = T32(K2 + 1)) == 0) \
427
K3 = T32(K3 + 1); \
428
} \
429
} while (0)
430
431
#define BIG_SUB_WORDS do { \
432
AES_2ROUNDS(W[ 0]); \
433
AES_2ROUNDS(W[ 1]); \
434
AES_2ROUNDS(W[ 2]); \
435
AES_2ROUNDS(W[ 3]); \
436
AES_2ROUNDS(W[ 4]); \
437
AES_2ROUNDS(W[ 5]); \
438
AES_2ROUNDS(W[ 6]); \
439
AES_2ROUNDS(W[ 7]); \
440
AES_2ROUNDS(W[ 8]); \
441
AES_2ROUNDS(W[ 9]); \
442
AES_2ROUNDS(W[10]); \
443
AES_2ROUNDS(W[11]); \
444
AES_2ROUNDS(W[12]); \
445
AES_2ROUNDS(W[13]); \
446
AES_2ROUNDS(W[14]); \
447
AES_2ROUNDS(W[15]); \
448
} while (0)
449
450
#endif
451
452
#define SHIFT_ROW1(a, b, c, d) do { \
453
sph_u32 tmp; \
454
tmp = W[a][0]; \
455
W[a][0] = W[b][0]; \
456
W[b][0] = W[c][0]; \
457
W[c][0] = W[d][0]; \
458
W[d][0] = tmp; \
459
tmp = W[a][1]; \
460
W[a][1] = W[b][1]; \
461
W[b][1] = W[c][1]; \
462
W[c][1] = W[d][1]; \
463
W[d][1] = tmp; \
464
tmp = W[a][2]; \
465
W[a][2] = W[b][2]; \
466
W[b][2] = W[c][2]; \
467
W[c][2] = W[d][2]; \
468
W[d][2] = tmp; \
469
tmp = W[a][3]; \
470
W[a][3] = W[b][3]; \
471
W[b][3] = W[c][3]; \
472
W[c][3] = W[d][3]; \
473
W[d][3] = tmp; \
474
} while (0)
475
476
#define SHIFT_ROW2(a, b, c, d) do { \
477
sph_u32 tmp; \
478
tmp = W[a][0]; \
479
W[a][0] = W[c][0]; \
480
W[c][0] = tmp; \
481
tmp = W[b][0]; \
482
W[b][0] = W[d][0]; \
483
W[d][0] = tmp; \
484
tmp = W[a][1]; \
485
W[a][1] = W[c][1]; \
486
W[c][1] = tmp; \
487
tmp = W[b][1]; \
488
W[b][1] = W[d][1]; \
489
W[d][1] = tmp; \
490
tmp = W[a][2]; \
491
W[a][2] = W[c][2]; \
492
W[c][2] = tmp; \
493
tmp = W[b][2]; \
494
W[b][2] = W[d][2]; \
495
W[d][2] = tmp; \
496
tmp = W[a][3]; \
497
W[a][3] = W[c][3]; \
498
W[c][3] = tmp; \
499
tmp = W[b][3]; \
500
W[b][3] = W[d][3]; \
501
W[d][3] = tmp; \
502
} while (0)
503
504
#define SHIFT_ROW3(a, b, c, d) SHIFT_ROW1(d, c, b, a)
505
506
#define BIG_SHIFT_ROWS do { \
507
SHIFT_ROW1(1, 5, 9, 13); \
508
SHIFT_ROW2(2, 6, 10, 14); \
509
SHIFT_ROW3(3, 7, 11, 15); \
510
} while (0)
511
512
#if SPH_SMALL_FOOTPRINT_ECHO
513
514
static void
515
mix_column(sph_u32 W[16][4], int ia, int ib, int ic, int id)
516
{
517
int n;
518
519
for (n = 0; n < 4; n ++) {
520
sph_u32 a = W[ia][n];
521
sph_u32 b = W[ib][n];
522
sph_u32 c = W[ic][n];
523
sph_u32 d = W[id][n];
524
sph_u32 ab = a ^ b;
525
sph_u32 bc = b ^ c;
526
sph_u32 cd = c ^ d;
527
sph_u32 abx = ((ab & C32(0x80808080)) >> 7) * 27U
528
^ ((ab & C32(0x7F7F7F7F)) << 1);
529
sph_u32 bcx = ((bc & C32(0x80808080)) >> 7) * 27U
530
^ ((bc & C32(0x7F7F7F7F)) << 1);
531
sph_u32 cdx = ((cd & C32(0x80808080)) >> 7) * 27U
532
^ ((cd & C32(0x7F7F7F7F)) << 1);
533
W[ia][n] = abx ^ bc ^ d;
534
W[ib][n] = bcx ^ a ^ cd;
535
W[ic][n] = cdx ^ ab ^ d;
536
W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c;
537
}
538
}
539
540
#define MIX_COLUMN(a, b, c, d) mix_column(W, a, b, c, d)
541
542
#else
543
544
#define MIX_COLUMN1(ia, ib, ic, id, n) do { \
545
sph_u32 a = W[ia][n]; \
546
sph_u32 b = W[ib][n]; \
547
sph_u32 c = W[ic][n]; \
548
sph_u32 d = W[id][n]; \
549
sph_u32 ab = a ^ b; \
550
sph_u32 bc = b ^ c; \
551
sph_u32 cd = c ^ d; \
552
sph_u32 abx = ((ab & C32(0x80808080)) >> 7) * 27U \
553
^ ((ab & C32(0x7F7F7F7F)) << 1); \
554
sph_u32 bcx = ((bc & C32(0x80808080)) >> 7) * 27U \
555
^ ((bc & C32(0x7F7F7F7F)) << 1); \
556
sph_u32 cdx = ((cd & C32(0x80808080)) >> 7) * 27U \
557
^ ((cd & C32(0x7F7F7F7F)) << 1); \
558
W[ia][n] = abx ^ bc ^ d; \
559
W[ib][n] = bcx ^ a ^ cd; \
560
W[ic][n] = cdx ^ ab ^ d; \
561
W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c; \
562
} while (0)
563
564
#define MIX_COLUMN(a, b, c, d) do { \
565
MIX_COLUMN1(a, b, c, d, 0); \
566
MIX_COLUMN1(a, b, c, d, 1); \
567
MIX_COLUMN1(a, b, c, d, 2); \
568
MIX_COLUMN1(a, b, c, d, 3); \
569
} while (0)
570
571
#endif
572
573
#define BIG_MIX_COLUMNS do { \
574
MIX_COLUMN(0, 1, 2, 3); \
575
MIX_COLUMN(4, 5, 6, 7); \
576
MIX_COLUMN(8, 9, 10, 11); \
577
MIX_COLUMN(12, 13, 14, 15); \
578
} while (0)
579
580
#define BIG_ROUND do { \
581
BIG_SUB_WORDS; \
582
BIG_SHIFT_ROWS; \
583
BIG_MIX_COLUMNS; \
584
} while (0)
585
586
#define FINAL_SMALL do { \
587
unsigned u; \
588
sph_u32 *VV = &sc->u.Vs[0][0]; \
589
sph_u32 *WW = &W[0][0]; \
590
for (u = 0; u < 16; u ++) { \
591
VV[u] ^= sph_dec32le_aligned(sc->buf + (u * 4)) \
592
^ sph_dec32le_aligned(sc->buf + (u * 4) + 64) \
593
^ sph_dec32le_aligned(sc->buf + (u * 4) + 128) \
594
^ WW[u] ^ WW[u + 16] \
595
^ WW[u + 32] ^ WW[u + 48]; \
596
} \
597
} while (0)
598
599
#define FINAL_BIG do { \
600
unsigned u; \
601
sph_u32 *VV = &sc->u.Vs[0][0]; \
602
sph_u32 *WW = &W[0][0]; \
603
for (u = 0; u < 32; u ++) { \
604
VV[u] ^= sph_dec32le_aligned(sc->buf + (u * 4)) \
605
^ WW[u] ^ WW[u + 32]; \
606
} \
607
} while (0)
608
609
#define COMPRESS_SMALL(sc) do { \
610
sph_u32 K0 = sc->C0; \
611
sph_u32 K1 = sc->C1; \
612
sph_u32 K2 = sc->C2; \
613
sph_u32 K3 = sc->C3; \
614
unsigned u; \
615
INPUT_BLOCK_SMALL(sc); \
616
for (u = 0; u < 8; u ++) { \
617
BIG_ROUND; \
618
} \
619
FINAL_SMALL; \
620
} while (0)
621
622
#define COMPRESS_BIG(sc) do { \
623
sph_u32 K0 = sc->C0; \
624
sph_u32 K1 = sc->C1; \
625
sph_u32 K2 = sc->C2; \
626
sph_u32 K3 = sc->C3; \
627
unsigned u; \
628
INPUT_BLOCK_BIG(sc); \
629
for (u = 0; u < 10; u ++) { \
630
BIG_ROUND; \
631
} \
632
FINAL_BIG; \
633
} while (0)
634
635
#endif
636
637
#define INCR_COUNTER(sc, val) do { \
638
sc->C0 = T32(sc->C0 + (sph_u32)(val)); \
639
if (sc->C0 < (sph_u32)(val)) { \
640
if ((sc->C1 = T32(sc->C1 + 1)) == 0) \
641
if ((sc->C2 = T32(sc->C2 + 1)) == 0) \
642
sc->C3 = T32(sc->C3 + 1); \
643
} \
644
} while (0)
645
646
static void
647
echo_small_init(sph_echo_small_context *sc, unsigned out_len)
648
{
649
#if SPH_ECHO_64
650
sc->u.Vb[0][0] = (sph_u64)out_len;
651
sc->u.Vb[0][1] = 0;
652
sc->u.Vb[1][0] = (sph_u64)out_len;
653
sc->u.Vb[1][1] = 0;
654
sc->u.Vb[2][0] = (sph_u64)out_len;
655
sc->u.Vb[2][1] = 0;
656
sc->u.Vb[3][0] = (sph_u64)out_len;
657
sc->u.Vb[3][1] = 0;
658
#else
659
sc->u.Vs[0][0] = (sph_u32)out_len;
660
sc->u.Vs[0][1] = sc->u.Vs[0][2] = sc->u.Vs[0][3] = 0;
661
sc->u.Vs[1][0] = (sph_u32)out_len;
662
sc->u.Vs[1][1] = sc->u.Vs[1][2] = sc->u.Vs[1][3] = 0;
663
sc->u.Vs[2][0] = (sph_u32)out_len;
664
sc->u.Vs[2][1] = sc->u.Vs[2][2] = sc->u.Vs[2][3] = 0;
665
sc->u.Vs[3][0] = (sph_u32)out_len;
666
sc->u.Vs[3][1] = sc->u.Vs[3][2] = sc->u.Vs[3][3] = 0;
667
#endif
668
sc->ptr = 0;
669
sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
670
}
671
672
static void
673
echo_big_init(sph_echo_big_context *sc, unsigned out_len)
674
{
675
#if SPH_ECHO_64
676
sc->u.Vb[0][0] = (sph_u64)out_len;
677
sc->u.Vb[0][1] = 0;
678
sc->u.Vb[1][0] = (sph_u64)out_len;
679
sc->u.Vb[1][1] = 0;
680
sc->u.Vb[2][0] = (sph_u64)out_len;
681
sc->u.Vb[2][1] = 0;
682
sc->u.Vb[3][0] = (sph_u64)out_len;
683
sc->u.Vb[3][1] = 0;
684
sc->u.Vb[4][0] = (sph_u64)out_len;
685
sc->u.Vb[4][1] = 0;
686
sc->u.Vb[5][0] = (sph_u64)out_len;
687
sc->u.Vb[5][1] = 0;
688
sc->u.Vb[6][0] = (sph_u64)out_len;
689
sc->u.Vb[6][1] = 0;
690
sc->u.Vb[7][0] = (sph_u64)out_len;
691
sc->u.Vb[7][1] = 0;
692
#else
693
sc->u.Vs[0][0] = (sph_u32)out_len;
694
sc->u.Vs[0][1] = sc->u.Vs[0][2] = sc->u.Vs[0][3] = 0;
695
sc->u.Vs[1][0] = (sph_u32)out_len;
696
sc->u.Vs[1][1] = sc->u.Vs[1][2] = sc->u.Vs[1][3] = 0;
697
sc->u.Vs[2][0] = (sph_u32)out_len;
698
sc->u.Vs[2][1] = sc->u.Vs[2][2] = sc->u.Vs[2][3] = 0;
699
sc->u.Vs[3][0] = (sph_u32)out_len;
700
sc->u.Vs[3][1] = sc->u.Vs[3][2] = sc->u.Vs[3][3] = 0;
701
sc->u.Vs[4][0] = (sph_u32)out_len;
702
sc->u.Vs[4][1] = sc->u.Vs[4][2] = sc->u.Vs[4][3] = 0;
703
sc->u.Vs[5][0] = (sph_u32)out_len;
704
sc->u.Vs[5][1] = sc->u.Vs[5][2] = sc->u.Vs[5][3] = 0;
705
sc->u.Vs[6][0] = (sph_u32)out_len;
706
sc->u.Vs[6][1] = sc->u.Vs[6][2] = sc->u.Vs[6][3] = 0;
707
sc->u.Vs[7][0] = (sph_u32)out_len;
708
sc->u.Vs[7][1] = sc->u.Vs[7][2] = sc->u.Vs[7][3] = 0;
709
#endif
710
sc->ptr = 0;
711
sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
712
}
713
714
static void
715
echo_small_compress(sph_echo_small_context *sc)
716
{
717
DECL_STATE_SMALL
718
719
COMPRESS_SMALL(sc);
720
}
721
722
static void
723
echo_big_compress(sph_echo_big_context *sc)
724
{
725
DECL_STATE_BIG
726
727
COMPRESS_BIG(sc);
728
}
729
730
static void
731
echo_small_core(sph_echo_small_context *sc,
732
const unsigned char *data, size_t len)
733
{
734
unsigned char *buf;
735
size_t ptr;
736
737
buf = sc->buf;
738
ptr = sc->ptr;
739
if (len < (sizeof sc->buf) - ptr) {
740
memcpy(buf + ptr, data, len);
741
ptr += len;
742
sc->ptr = ptr;
743
return;
744
}
745
746
while (len > 0) {
747
size_t clen;
748
749
clen = (sizeof sc->buf) - ptr;
750
if (clen > len)
751
clen = len;
752
memcpy(buf + ptr, data, clen);
753
ptr += clen;
754
data += clen;
755
len -= clen;
756
if (ptr == sizeof sc->buf) {
757
INCR_COUNTER(sc, 1536);
758
echo_small_compress(sc);
759
ptr = 0;
760
}
761
}
762
sc->ptr = ptr;
763
}
764
765
static void
766
echo_big_core(sph_echo_big_context *sc,
767
const unsigned char *data, size_t len)
768
{
769
unsigned char *buf;
770
size_t ptr;
771
772
buf = sc->buf;
773
ptr = sc->ptr;
774
if (len < (sizeof sc->buf) - ptr) {
775
memcpy(buf + ptr, data, len);
776
ptr += len;
777
sc->ptr = ptr;
778
return;
779
}
780
781
while (len > 0) {
782
size_t clen;
783
784
clen = (sizeof sc->buf) - ptr;
785
if (clen > len)
786
clen = len;
787
memcpy(buf + ptr, data, clen);
788
ptr += clen;
789
data += clen;
790
len -= clen;
791
if (ptr == sizeof sc->buf) {
792
INCR_COUNTER(sc, 1024);
793
echo_big_compress(sc);
794
ptr = 0;
795
}
796
}
797
sc->ptr = ptr;
798
}
799
800
static void
801
echo_small_close(sph_echo_small_context *sc, unsigned ub, unsigned n,
802
void *dst, unsigned out_size_w32)
803
{
804
unsigned char *buf;
805
size_t ptr;
806
unsigned z;
807
unsigned elen;
808
union {
809
unsigned char tmp[32];
810
sph_u32 dummy;
811
#if SPH_ECHO_64
812
sph_u64 dummy2;
813
#endif
814
} u;
815
#if SPH_ECHO_64
816
sph_u64 *VV;
817
#else
818
sph_u32 *VV;
819
#endif
820
unsigned k;
821
822
buf = sc->buf;
823
ptr = sc->ptr;
824
elen = ((unsigned)ptr << 3) + n;
825
INCR_COUNTER(sc, elen);
826
sph_enc32le_aligned(u.tmp, sc->C0);
827
sph_enc32le_aligned(u.tmp + 4, sc->C1);
828
sph_enc32le_aligned(u.tmp + 8, sc->C2);
829
sph_enc32le_aligned(u.tmp + 12, sc->C3);
830
/*
831
* If elen is zero, then this block actually contains no message
832
* bit, only the first padding bit.
833
*/
834
if (elen == 0) {
835
sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
836
}
837
z = 0x80 >> n;
838
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
839
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
840
if (ptr > ((sizeof sc->buf) - 18)) {
841
echo_small_compress(sc);
842
sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
843
memset(buf, 0, sizeof sc->buf);
844
}
845
sph_enc16le(buf + (sizeof sc->buf) - 18, out_size_w32 << 5);
846
memcpy(buf + (sizeof sc->buf) - 16, u.tmp, 16);
847
echo_small_compress(sc);
848
#if SPH_ECHO_64
849
for (VV = &sc->u.Vb[0][0], k = 0; k < ((out_size_w32 + 1) >> 1); k ++)
850
sph_enc64le_aligned(u.tmp + (k << 3), VV[k]);
851
#else
852
for (VV = &sc->u.Vs[0][0], k = 0; k < out_size_w32; k ++)
853
sph_enc32le_aligned(u.tmp + (k << 2), VV[k]);
854
#endif
855
memcpy(dst, u.tmp, out_size_w32 << 2);
856
echo_small_init(sc, out_size_w32 << 5);
857
}
858
859
static void
860
echo_big_close(sph_echo_big_context *sc, unsigned ub, unsigned n,
861
void *dst, unsigned out_size_w32)
862
{
863
unsigned char *buf;
864
size_t ptr;
865
unsigned z;
866
unsigned elen;
867
union {
868
unsigned char tmp[64];
869
sph_u32 dummy;
870
#if SPH_ECHO_64
871
sph_u64 dummy2;
872
#endif
873
} u;
874
#if SPH_ECHO_64
875
sph_u64 *VV;
876
#else
877
sph_u32 *VV;
878
#endif
879
unsigned k;
880
881
buf = sc->buf;
882
ptr = sc->ptr;
883
elen = ((unsigned)ptr << 3) + n;
884
INCR_COUNTER(sc, elen);
885
sph_enc32le_aligned(u.tmp, sc->C0);
886
sph_enc32le_aligned(u.tmp + 4, sc->C1);
887
sph_enc32le_aligned(u.tmp + 8, sc->C2);
888
sph_enc32le_aligned(u.tmp + 12, sc->C3);
889
/*
890
* If elen is zero, then this block actually contains no message
891
* bit, only the first padding bit.
892
*/
893
if (elen == 0) {
894
sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
895
}
896
z = 0x80 >> n;
897
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
898
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
899
if (ptr > ((sizeof sc->buf) - 18)) {
900
echo_big_compress(sc);
901
sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
902
memset(buf, 0, sizeof sc->buf);
903
}
904
sph_enc16le(buf + (sizeof sc->buf) - 18, out_size_w32 << 5);
905
memcpy(buf + (sizeof sc->buf) - 16, u.tmp, 16);
906
echo_big_compress(sc);
907
#if SPH_ECHO_64
908
for (VV = &sc->u.Vb[0][0], k = 0; k < ((out_size_w32 + 1) >> 1); k ++)
909
sph_enc64le_aligned(u.tmp + (k << 3), VV[k]);
910
#else
911
for (VV = &sc->u.Vs[0][0], k = 0; k < out_size_w32; k ++)
912
sph_enc32le_aligned(u.tmp + (k << 2), VV[k]);
913
#endif
914
memcpy(dst, u.tmp, out_size_w32 << 2);
915
echo_big_init(sc, out_size_w32 << 5);
916
}
917
918
/* see sph_echo.h */
919
void
920
sph_echo224_init(void *cc)
921
{
922
echo_small_init(cc, 224);
923
}
924
925
/* see sph_echo.h */
926
void
927
sph_echo224(void *cc, const void *data, size_t len)
928
{
929
echo_small_core(cc, data, len);
930
}
931
932
/* see sph_echo.h */
933
void
934
sph_echo224_close(void *cc, void *dst)
935
{
936
echo_small_close(cc, 0, 0, dst, 7);
937
}
938
939
/* see sph_echo.h */
940
void
941
sph_echo224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
942
{
943
echo_small_close(cc, ub, n, dst, 7);
944
}
945
946
/* see sph_echo.h */
947
void
948
sph_echo256_init(void *cc)
949
{
950
echo_small_init(cc, 256);
951
}
952
953
/* see sph_echo.h */
954
void
955
sph_echo256(void *cc, const void *data, size_t len)
956
{
957
echo_small_core(cc, data, len);
958
}
959
960
/* see sph_echo.h */
961
void
962
sph_echo256_close(void *cc, void *dst)
963
{
964
echo_small_close(cc, 0, 0, dst, 8);
965
}
966
967
/* see sph_echo.h */
968
void
969
sph_echo256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
970
{
971
echo_small_close(cc, ub, n, dst, 8);
972
}
973
974
/* see sph_echo.h */
975
void
976
sph_echo384_init(void *cc)
977
{
978
echo_big_init(cc, 384);
979
}
980
981
/* see sph_echo.h */
982
void
983
sph_echo384(void *cc, const void *data, size_t len)
984
{
985
echo_big_core(cc, data, len);
986
}
987
988
/* see sph_echo.h */
989
void
990
sph_echo384_close(void *cc, void *dst)
991
{
992
echo_big_close(cc, 0, 0, dst, 12);
993
}
994
995
/* see sph_echo.h */
996
void
997
sph_echo384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
998
{
999
echo_big_close(cc, ub, n, dst, 12);
1000
}
1001
1002
/* see sph_echo.h */
1003
void
1004
sph_echo512_init(void *cc)
1005
{
1006
echo_big_init(cc, 512);
1007
}
1008
1009
/* see sph_echo.h */
1010
void
1011
sph_echo512(void *cc, const void *data, size_t len)
1012
{
1013
echo_big_core(cc, data, len);
1014
}
1015
1016
/* see sph_echo.h */
1017
void
1018
sph_echo512_close(void *cc, void *dst)
1019
{
1020
echo_big_close(cc, 0, 0, dst, 16);
1021
}
1022
1023
/* see sph_echo.h */
1024
void
1025
sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1026
{
1027
echo_big_close(cc, ub, n, dst, 16);
1028
}
1029
#ifdef __cplusplus
1030
}
1031
#endif
1032
1033