Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/sha3/sph_hamsi.c
1201 views
1
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
2
/*
3
* Hamsi implementation.
4
*
5
* ==========================(LICENSE BEGIN)============================
6
*
7
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice shall be
18
* included in all copies or substantial portions of the Software.
19
*
20
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
*
28
* ===========================(LICENSE END)=============================
29
*
30
* @author Thomas Pornin <[email protected]>
31
*/
32
33
#include <stddef.h>
34
#include <string.h>
35
36
#include "sph_hamsi.h"
37
38
#ifdef __cplusplus
39
extern "C"{
40
#endif
41
42
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
43
#define SPH_SMALL_FOOTPRINT_HAMSI 1
44
#endif
45
46
/*
47
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
48
* table lookup during message expansion (1 to 8, inclusive). If we note
49
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
50
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
51
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
52
* then we will get t tables (where t=ceil(w/n)) of individual size
53
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
54
* n=5, there are 7 tables, but the last one uses only two bits on
55
* input, not five).
56
*
57
* Also, we read t rows of r words from RAM. Words in a given row are
58
* concatenated in RAM in that order, so most of the cost is about
59
* reading the first row word; comparatively, cache misses are thus
60
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
61
*
62
* When n=1, tables are "special" in that we omit the first entry of
63
* each table (which always contains 0), so that total table size is
64
* halved.
65
*
66
* We thus have the following (size1 is the cumulative table size of
67
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
68
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
69
*
70
* n size1 size2 t1 t2
71
* ---------------------------------------
72
* 1 1024 4096 32 64
73
* 2 2048 8192 16 32
74
* 3 2688 10880 11 22
75
* 4 4096 16384 8 16
76
* 5 6272 25600 7 13
77
* 6 10368 41984 6 11
78
* 7 16896 73856 5 10
79
* 8 32768 131072 4 8
80
*
81
* So there is a trade-off: a lower n makes the tables fit better in
82
* L1 cache, but increases the number of memory accesses. The optimal
83
* value depends on the amount of available L1 cache and the relative
84
* impact of a cache miss.
85
*
86
* Experimentally, in ideal benchmark conditions (which are not necessarily
87
* realistic with regards to L1 cache contention), it seems that n=8 is
88
* the best value on "big" architectures (those with 32 kB or more of L1
89
* cache), while n=4 is better on "small" architectures. This was tested
90
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
91
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
92
* (8 kB L1 cache).
93
*
94
* Note: with n=1, the 32 tables (actually implemented as one big table)
95
* are read entirely and sequentially, regardless of the input data,
96
* thus avoiding any data-dependent table access pattern.
97
*/
98
99
#if !defined SPH_HAMSI_EXPAND_SMALL
100
#if SPH_SMALL_FOOTPRINT_HAMSI
101
#define SPH_HAMSI_EXPAND_SMALL 4
102
#else
103
#define SPH_HAMSI_EXPAND_SMALL 8
104
#endif
105
#endif
106
107
#if !defined SPH_HAMSI_EXPAND_BIG
108
#define SPH_HAMSI_EXPAND_BIG 8
109
#endif
110
111
#ifdef _MSC_VER
112
#pragma warning (disable: 4146)
113
#endif
114
115
#include "sph_hamsi_helper.c"
116
117
static const sph_u32 IV224[] = {
118
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
119
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
120
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
121
};
122
123
/*
124
* This version is the one used in the Hamsi submission package for
125
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
126
* shall soon be corrected in the official Hamsi specification.
127
*
128
static const sph_u32 IV224[] = {
129
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
130
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
131
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
132
};
133
*/
134
135
static const sph_u32 IV256[] = {
136
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
137
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
138
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
139
};
140
141
static const sph_u32 IV384[] = {
142
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
143
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
144
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
145
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
146
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
147
SPH_C32(0x2c204b61)
148
};
149
150
static const sph_u32 IV512[] = {
151
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
152
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
153
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
154
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
155
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
156
SPH_C32(0x6769756d)
157
};
158
159
static const sph_u32 alpha_n[] = {
160
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
161
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
162
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
163
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
164
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
165
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
166
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
167
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
168
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
169
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
170
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
171
};
172
173
static const sph_u32 alpha_f[] = {
174
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
175
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
176
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
177
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
178
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
179
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
180
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
181
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
182
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
183
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
184
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
185
};
186
187
#define DECL_STATE_SMALL \
188
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
189
190
#define READ_STATE_SMALL(sc) do { \
191
c0 = sc->h[0x0]; \
192
c1 = sc->h[0x1]; \
193
c2 = sc->h[0x2]; \
194
c3 = sc->h[0x3]; \
195
c4 = sc->h[0x4]; \
196
c5 = sc->h[0x5]; \
197
c6 = sc->h[0x6]; \
198
c7 = sc->h[0x7]; \
199
} while (0)
200
201
#define WRITE_STATE_SMALL(sc) do { \
202
sc->h[0x0] = c0; \
203
sc->h[0x1] = c1; \
204
sc->h[0x2] = c2; \
205
sc->h[0x3] = c3; \
206
sc->h[0x4] = c4; \
207
sc->h[0x5] = c5; \
208
sc->h[0x6] = c6; \
209
sc->h[0x7] = c7; \
210
} while (0)
211
212
#define s0 m0
213
#define s1 m1
214
#define s2 c0
215
#define s3 c1
216
#define s4 c2
217
#define s5 c3
218
#define s6 m2
219
#define s7 m3
220
#define s8 m4
221
#define s9 m5
222
#define sA c4
223
#define sB c5
224
#define sC c6
225
#define sD c7
226
#define sE m6
227
#define sF m7
228
229
#define SBOX(a, b, c, d) do { \
230
sph_u32 t; \
231
t = (a); \
232
(a) &= (c); \
233
(a) ^= (d); \
234
(c) ^= (b); \
235
(c) ^= (a); \
236
(d) |= t; \
237
(d) ^= (b); \
238
t ^= (c); \
239
(b) = (d); \
240
(d) |= t; \
241
(d) ^= (a); \
242
(a) &= (b); \
243
t ^= (a); \
244
(b) ^= (d); \
245
(b) ^= t; \
246
(a) = (c); \
247
(c) = (b); \
248
(b) = (d); \
249
(d) = SPH_T32(~t); \
250
} while (0)
251
252
#define L(a, b, c, d) do { \
253
(a) = SPH_ROTL32(a, 13); \
254
(c) = SPH_ROTL32(c, 3); \
255
(b) ^= (a) ^ (c); \
256
(d) ^= (c) ^ SPH_T32((a) << 3); \
257
(b) = SPH_ROTL32(b, 1); \
258
(d) = SPH_ROTL32(d, 7); \
259
(a) ^= (b) ^ (d); \
260
(c) ^= (d) ^ SPH_T32((b) << 7); \
261
(a) = SPH_ROTL32(a, 5); \
262
(c) = SPH_ROTL32(c, 22); \
263
} while (0)
264
265
#define ROUND_SMALL(rc, alpha) do { \
266
s0 ^= alpha[0x00]; \
267
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
268
s2 ^= alpha[0x02]; \
269
s3 ^= alpha[0x03]; \
270
s4 ^= alpha[0x08]; \
271
s5 ^= alpha[0x09]; \
272
s6 ^= alpha[0x0A]; \
273
s7 ^= alpha[0x0B]; \
274
s8 ^= alpha[0x10]; \
275
s9 ^= alpha[0x11]; \
276
sA ^= alpha[0x12]; \
277
sB ^= alpha[0x13]; \
278
sC ^= alpha[0x18]; \
279
sD ^= alpha[0x19]; \
280
sE ^= alpha[0x1A]; \
281
sF ^= alpha[0x1B]; \
282
SBOX(s0, s4, s8, sC); \
283
SBOX(s1, s5, s9, sD); \
284
SBOX(s2, s6, sA, sE); \
285
SBOX(s3, s7, sB, sF); \
286
L(s0, s5, sA, sF); \
287
L(s1, s6, sB, sC); \
288
L(s2, s7, s8, sD); \
289
L(s3, s4, s9, sE); \
290
} while (0)
291
292
#define P_SMALL do { \
293
ROUND_SMALL(0, alpha_n); \
294
ROUND_SMALL(1, alpha_n); \
295
ROUND_SMALL(2, alpha_n); \
296
} while (0)
297
298
#define PF_SMALL do { \
299
ROUND_SMALL(0, alpha_f); \
300
ROUND_SMALL(1, alpha_f); \
301
ROUND_SMALL(2, alpha_f); \
302
ROUND_SMALL(3, alpha_f); \
303
ROUND_SMALL(4, alpha_f); \
304
ROUND_SMALL(5, alpha_f); \
305
} while (0)
306
307
#define T_SMALL do { \
308
/* order is important */ \
309
c7 = (sc->h[7] ^= sB); \
310
c6 = (sc->h[6] ^= sA); \
311
c5 = (sc->h[5] ^= s9); \
312
c4 = (sc->h[4] ^= s8); \
313
c3 = (sc->h[3] ^= s3); \
314
c2 = (sc->h[2] ^= s2); \
315
c1 = (sc->h[1] ^= s1); \
316
c0 = (sc->h[0] ^= s0); \
317
} while (0)
318
319
static void
320
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
321
{
322
DECL_STATE_SMALL
323
#if !SPH_64
324
sph_u32 tmp;
325
#endif
326
327
#if SPH_64
328
sc->count += (sph_u64)num << 5;
329
#else
330
tmp = SPH_T32((sph_u32)num << 5);
331
sc->count_low = SPH_T32(sc->count_low + tmp);
332
sc->count_high += (sph_u32)((num >> 13) >> 14);
333
if (sc->count_low < tmp)
334
sc->count_high ++;
335
#endif
336
READ_STATE_SMALL(sc);
337
while (num -- > 0) {
338
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
339
340
INPUT_SMALL;
341
P_SMALL;
342
T_SMALL;
343
buf += 4;
344
}
345
WRITE_STATE_SMALL(sc);
346
}
347
348
static void
349
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
350
{
351
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
352
DECL_STATE_SMALL
353
354
READ_STATE_SMALL(sc);
355
INPUT_SMALL;
356
PF_SMALL;
357
T_SMALL;
358
WRITE_STATE_SMALL(sc);
359
}
360
361
static void
362
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
363
{
364
sc->partial_len = 0;
365
memcpy(sc->h, iv, sizeof sc->h);
366
#if SPH_64
367
sc->count = 0;
368
#else
369
sc->count_high = sc->count_low = 0;
370
#endif
371
}
372
373
static void
374
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
375
{
376
if (sc->partial_len != 0) {
377
size_t mlen;
378
379
mlen = 4 - sc->partial_len;
380
if (len < mlen) {
381
memcpy(sc->partial + sc->partial_len, data, len);
382
sc->partial_len += len;
383
return;
384
} else {
385
memcpy(sc->partial + sc->partial_len, data, mlen);
386
len -= mlen;
387
data = (const unsigned char *)data + mlen;
388
hamsi_small(sc, sc->partial, 1);
389
sc->partial_len = 0;
390
}
391
}
392
393
hamsi_small(sc, data, (len >> 2));
394
data = (const unsigned char *)data + (len & ~(size_t)3);
395
len &= (size_t)3;
396
memcpy(sc->partial, data, len);
397
sc->partial_len = len;
398
}
399
400
static void
401
hamsi_small_close(sph_hamsi_small_context *sc,
402
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
403
{
404
unsigned char pad[12];
405
size_t ptr, u;
406
unsigned z;
407
unsigned char *out;
408
409
ptr = sc->partial_len;
410
memcpy(pad, sc->partial, ptr);
411
#if SPH_64
412
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
413
#else
414
sph_enc32be(pad + 4, sc->count_high);
415
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
416
#endif
417
z = 0x80 >> n;
418
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
419
while (ptr < 4)
420
pad[ptr ++] = 0;
421
hamsi_small(sc, pad, 2);
422
hamsi_small_final(sc, pad + 8);
423
out = dst;
424
for (u = 0; u < out_size_w32; u ++)
425
sph_enc32be(out + (u << 2), sc->h[u]);
426
}
427
428
#define DECL_STATE_BIG \
429
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
430
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
431
432
#define READ_STATE_BIG(sc) do { \
433
c0 = sc->h[0x0]; \
434
c1 = sc->h[0x1]; \
435
c2 = sc->h[0x2]; \
436
c3 = sc->h[0x3]; \
437
c4 = sc->h[0x4]; \
438
c5 = sc->h[0x5]; \
439
c6 = sc->h[0x6]; \
440
c7 = sc->h[0x7]; \
441
c8 = sc->h[0x8]; \
442
c9 = sc->h[0x9]; \
443
cA = sc->h[0xA]; \
444
cB = sc->h[0xB]; \
445
cC = sc->h[0xC]; \
446
cD = sc->h[0xD]; \
447
cE = sc->h[0xE]; \
448
cF = sc->h[0xF]; \
449
} while (0)
450
451
#define WRITE_STATE_BIG(sc) do { \
452
sc->h[0x0] = c0; \
453
sc->h[0x1] = c1; \
454
sc->h[0x2] = c2; \
455
sc->h[0x3] = c3; \
456
sc->h[0x4] = c4; \
457
sc->h[0x5] = c5; \
458
sc->h[0x6] = c6; \
459
sc->h[0x7] = c7; \
460
sc->h[0x8] = c8; \
461
sc->h[0x9] = c9; \
462
sc->h[0xA] = cA; \
463
sc->h[0xB] = cB; \
464
sc->h[0xC] = cC; \
465
sc->h[0xD] = cD; \
466
sc->h[0xE] = cE; \
467
sc->h[0xF] = cF; \
468
} while (0)
469
470
#define s00 m0
471
#define s01 m1
472
#define s02 c0
473
#define s03 c1
474
#define s04 m2
475
#define s05 m3
476
#define s06 c2
477
#define s07 c3
478
#define s08 c4
479
#define s09 c5
480
#define s0A m4
481
#define s0B m5
482
#define s0C c6
483
#define s0D c7
484
#define s0E m6
485
#define s0F m7
486
#define s10 m8
487
#define s11 m9
488
#define s12 c8
489
#define s13 c9
490
#define s14 mA
491
#define s15 mB
492
#define s16 cA
493
#define s17 cB
494
#define s18 cC
495
#define s19 cD
496
#define s1A mC
497
#define s1B mD
498
#define s1C cE
499
#define s1D cF
500
#define s1E mE
501
#define s1F mF
502
503
#define ROUND_BIG(rc, alpha) do { \
504
s00 ^= alpha[0x00]; \
505
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
506
s02 ^= alpha[0x02]; \
507
s03 ^= alpha[0x03]; \
508
s04 ^= alpha[0x04]; \
509
s05 ^= alpha[0x05]; \
510
s06 ^= alpha[0x06]; \
511
s07 ^= alpha[0x07]; \
512
s08 ^= alpha[0x08]; \
513
s09 ^= alpha[0x09]; \
514
s0A ^= alpha[0x0A]; \
515
s0B ^= alpha[0x0B]; \
516
s0C ^= alpha[0x0C]; \
517
s0D ^= alpha[0x0D]; \
518
s0E ^= alpha[0x0E]; \
519
s0F ^= alpha[0x0F]; \
520
s10 ^= alpha[0x10]; \
521
s11 ^= alpha[0x11]; \
522
s12 ^= alpha[0x12]; \
523
s13 ^= alpha[0x13]; \
524
s14 ^= alpha[0x14]; \
525
s15 ^= alpha[0x15]; \
526
s16 ^= alpha[0x16]; \
527
s17 ^= alpha[0x17]; \
528
s18 ^= alpha[0x18]; \
529
s19 ^= alpha[0x19]; \
530
s1A ^= alpha[0x1A]; \
531
s1B ^= alpha[0x1B]; \
532
s1C ^= alpha[0x1C]; \
533
s1D ^= alpha[0x1D]; \
534
s1E ^= alpha[0x1E]; \
535
s1F ^= alpha[0x1F]; \
536
SBOX(s00, s08, s10, s18); \
537
SBOX(s01, s09, s11, s19); \
538
SBOX(s02, s0A, s12, s1A); \
539
SBOX(s03, s0B, s13, s1B); \
540
SBOX(s04, s0C, s14, s1C); \
541
SBOX(s05, s0D, s15, s1D); \
542
SBOX(s06, s0E, s16, s1E); \
543
SBOX(s07, s0F, s17, s1F); \
544
L(s00, s09, s12, s1B); \
545
L(s01, s0A, s13, s1C); \
546
L(s02, s0B, s14, s1D); \
547
L(s03, s0C, s15, s1E); \
548
L(s04, s0D, s16, s1F); \
549
L(s05, s0E, s17, s18); \
550
L(s06, s0F, s10, s19); \
551
L(s07, s08, s11, s1A); \
552
L(s00, s02, s05, s07); \
553
L(s10, s13, s15, s16); \
554
L(s09, s0B, s0C, s0E); \
555
L(s19, s1A, s1C, s1F); \
556
} while (0)
557
558
#if SPH_SMALL_FOOTPRINT_HAMSI
559
560
#define P_BIG do { \
561
unsigned r; \
562
for (r = 0; r < 6; r ++) \
563
ROUND_BIG(r, alpha_n); \
564
} while (0)
565
566
#define PF_BIG do { \
567
unsigned r; \
568
for (r = 0; r < 12; r ++) \
569
ROUND_BIG(r, alpha_f); \
570
} while (0)
571
572
#else
573
574
#define P_BIG do { \
575
ROUND_BIG(0, alpha_n); \
576
ROUND_BIG(1, alpha_n); \
577
ROUND_BIG(2, alpha_n); \
578
ROUND_BIG(3, alpha_n); \
579
ROUND_BIG(4, alpha_n); \
580
ROUND_BIG(5, alpha_n); \
581
} while (0)
582
583
#define PF_BIG do { \
584
ROUND_BIG(0, alpha_f); \
585
ROUND_BIG(1, alpha_f); \
586
ROUND_BIG(2, alpha_f); \
587
ROUND_BIG(3, alpha_f); \
588
ROUND_BIG(4, alpha_f); \
589
ROUND_BIG(5, alpha_f); \
590
ROUND_BIG(6, alpha_f); \
591
ROUND_BIG(7, alpha_f); \
592
ROUND_BIG(8, alpha_f); \
593
ROUND_BIG(9, alpha_f); \
594
ROUND_BIG(10, alpha_f); \
595
ROUND_BIG(11, alpha_f); \
596
} while (0)
597
598
#endif
599
600
#define T_BIG do { \
601
/* order is important */ \
602
cF = (sc->h[0xF] ^= s17); \
603
cE = (sc->h[0xE] ^= s16); \
604
cD = (sc->h[0xD] ^= s15); \
605
cC = (sc->h[0xC] ^= s14); \
606
cB = (sc->h[0xB] ^= s13); \
607
cA = (sc->h[0xA] ^= s12); \
608
c9 = (sc->h[0x9] ^= s11); \
609
c8 = (sc->h[0x8] ^= s10); \
610
c7 = (sc->h[0x7] ^= s07); \
611
c6 = (sc->h[0x6] ^= s06); \
612
c5 = (sc->h[0x5] ^= s05); \
613
c4 = (sc->h[0x4] ^= s04); \
614
c3 = (sc->h[0x3] ^= s03); \
615
c2 = (sc->h[0x2] ^= s02); \
616
c1 = (sc->h[0x1] ^= s01); \
617
c0 = (sc->h[0x0] ^= s00); \
618
} while (0)
619
620
static void
621
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
622
{
623
DECL_STATE_BIG
624
#if !SPH_64
625
sph_u32 tmp;
626
#endif
627
628
#if SPH_64
629
sc->count += (sph_u64)num << 6;
630
#else
631
tmp = SPH_T32((sph_u32)num << 6);
632
sc->count_low = SPH_T32(sc->count_low + tmp);
633
sc->count_high += (sph_u32)((num >> 13) >> 13);
634
if (sc->count_low < tmp)
635
sc->count_high ++;
636
#endif
637
READ_STATE_BIG(sc);
638
while (num -- > 0) {
639
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
640
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
641
642
INPUT_BIG;
643
P_BIG;
644
T_BIG;
645
buf += 8;
646
}
647
WRITE_STATE_BIG(sc);
648
}
649
650
static void
651
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
652
{
653
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
654
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
655
DECL_STATE_BIG
656
657
READ_STATE_BIG(sc);
658
INPUT_BIG;
659
PF_BIG;
660
T_BIG;
661
WRITE_STATE_BIG(sc);
662
}
663
664
static void
665
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
666
{
667
sc->partial_len = 0;
668
memcpy(sc->h, iv, sizeof sc->h);
669
#if SPH_64
670
sc->count = 0;
671
#else
672
sc->count_high = sc->count_low = 0;
673
#endif
674
}
675
676
static void
677
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
678
{
679
if (sc->partial_len != 0) {
680
size_t mlen;
681
682
mlen = 8 - sc->partial_len;
683
if (len < mlen) {
684
memcpy(sc->partial + sc->partial_len, data, len);
685
sc->partial_len += len;
686
return;
687
} else {
688
memcpy(sc->partial + sc->partial_len, data, mlen);
689
len -= mlen;
690
data = (const unsigned char *)data + mlen;
691
hamsi_big(sc, sc->partial, 1);
692
sc->partial_len = 0;
693
}
694
}
695
696
hamsi_big(sc, data, (len >> 3));
697
data = (const unsigned char *)data + (len & ~(size_t)7);
698
len &= (size_t)7;
699
memcpy(sc->partial, data, len);
700
sc->partial_len = len;
701
}
702
703
static void
704
hamsi_big_close(sph_hamsi_big_context *sc,
705
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
706
{
707
unsigned char pad[8];
708
size_t ptr, u;
709
unsigned z;
710
unsigned char *out;
711
712
ptr = sc->partial_len;
713
#if SPH_64
714
sph_enc64be(pad, sc->count + (ptr << 3) + n);
715
#else
716
sph_enc32be(pad, sc->count_high);
717
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
718
#endif
719
z = 0x80 >> n;
720
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
721
while (ptr < 8)
722
sc->partial[ptr ++] = 0;
723
hamsi_big(sc, sc->partial, 1);
724
hamsi_big_final(sc, pad);
725
out = dst;
726
if (out_size_w32 == 12) {
727
sph_enc32be(out + 0, sc->h[ 0]);
728
sph_enc32be(out + 4, sc->h[ 1]);
729
sph_enc32be(out + 8, sc->h[ 3]);
730
sph_enc32be(out + 12, sc->h[ 4]);
731
sph_enc32be(out + 16, sc->h[ 5]);
732
sph_enc32be(out + 20, sc->h[ 6]);
733
sph_enc32be(out + 24, sc->h[ 8]);
734
sph_enc32be(out + 28, sc->h[ 9]);
735
sph_enc32be(out + 32, sc->h[10]);
736
sph_enc32be(out + 36, sc->h[12]);
737
sph_enc32be(out + 40, sc->h[13]);
738
sph_enc32be(out + 44, sc->h[15]);
739
} else {
740
for (u = 0; u < 16; u ++)
741
sph_enc32be(out + (u << 2), sc->h[u]);
742
}
743
}
744
745
/* see sph_hamsi.h */
746
void
747
sph_hamsi224_init(void *cc)
748
{
749
hamsi_small_init(cc, IV224);
750
}
751
752
/* see sph_hamsi.h */
753
void
754
sph_hamsi224(void *cc, const void *data, size_t len)
755
{
756
hamsi_small_core(cc, data, len);
757
}
758
759
/* see sph_hamsi.h */
760
void
761
sph_hamsi224_close(void *cc, void *dst)
762
{
763
hamsi_small_close(cc, 0, 0, dst, 7);
764
hamsi_small_init(cc, IV224);
765
}
766
767
/* see sph_hamsi.h */
768
void
769
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
770
{
771
hamsi_small_close(cc, ub, n, dst, 7);
772
hamsi_small_init(cc, IV224);
773
}
774
775
/* see sph_hamsi.h */
776
void
777
sph_hamsi256_init(void *cc)
778
{
779
hamsi_small_init(cc, IV256);
780
}
781
782
/* see sph_hamsi.h */
783
void
784
sph_hamsi256(void *cc, const void *data, size_t len)
785
{
786
hamsi_small_core(cc, data, len);
787
}
788
789
/* see sph_hamsi.h */
790
void
791
sph_hamsi256_close(void *cc, void *dst)
792
{
793
hamsi_small_close(cc, 0, 0, dst, 8);
794
hamsi_small_init(cc, IV256);
795
}
796
797
/* see sph_hamsi.h */
798
void
799
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
800
{
801
hamsi_small_close(cc, ub, n, dst, 8);
802
hamsi_small_init(cc, IV256);
803
}
804
805
/* see sph_hamsi.h */
806
void
807
sph_hamsi384_init(void *cc)
808
{
809
hamsi_big_init(cc, IV384);
810
}
811
812
/* see sph_hamsi.h */
813
void
814
sph_hamsi384(void *cc, const void *data, size_t len)
815
{
816
hamsi_big_core(cc, data, len);
817
}
818
819
/* see sph_hamsi.h */
820
void
821
sph_hamsi384_close(void *cc, void *dst)
822
{
823
hamsi_big_close(cc, 0, 0, dst, 12);
824
hamsi_big_init(cc, IV384);
825
}
826
827
/* see sph_hamsi.h */
828
void
829
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
830
{
831
hamsi_big_close(cc, ub, n, dst, 12);
832
hamsi_big_init(cc, IV384);
833
}
834
835
/* see sph_hamsi.h */
836
void
837
sph_hamsi512_init(void *cc)
838
{
839
hamsi_big_init(cc, IV512);
840
}
841
842
/* see sph_hamsi.h */
843
void
844
sph_hamsi512(void *cc, const void *data, size_t len)
845
{
846
hamsi_big_core(cc, data, len);
847
}
848
849
/* see sph_hamsi.h */
850
void
851
sph_hamsi512_close(void *cc, void *dst)
852
{
853
hamsi_big_close(cc, 0, 0, dst, 16);
854
hamsi_big_init(cc, IV512);
855
}
856
857
/* see sph_hamsi.h */
858
void
859
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
860
{
861
hamsi_big_close(cc, ub, n, dst, 16);
862
hamsi_big_init(cc, IV512);
863
}
864
865
#ifdef __cplusplus
866
}
867
#endif
868
869