Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/sha3/sph_radiogatun.c
1201 views
1
/* $Id: radiogatun.c 226 2010-06-16 17:28:08Z tp $ */
2
/*
3
* RadioGatun implementation.
4
*
5
* ==========================(LICENSE BEGIN)============================
6
*
7
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice shall be
18
* included in all copies or substantial portions of the Software.
19
*
20
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
*
28
* ===========================(LICENSE END)=============================
29
*
30
* @author Thomas Pornin <[email protected]>
31
*/
32
33
#include <stddef.h>
34
#include <string.h>
35
36
#include "sph_radiogatun.h"
37
38
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_RADIOGATUN
39
#define SPH_SMALL_FOOTPRINT_RADIOGATUN 1
40
#endif
41
42
/* ======================================================================= */
43
/*
44
* The core macros. We want to unroll 13 successive rounds so that the
45
* belt rotation becomes pure routing, solved at compilation time, with
46
* no unnecessary copying. We also wish all state variables to be
47
* independant local variables, so that the C compiler becomes free to
48
* map these on registers at it sees fit. This requires some heavy
49
* preprocessor trickeries, including a full addition macro modulo 13.
50
*
51
* These macros are size-independent. Some macros must be defined before
52
* use:
53
* WT evaluates to the type for a word (32-bit or 64-bit)
54
* T truncates a value to the proper word size
55
* ROR(x, n) right rotation of a word x, with explicit modular
56
* reduction of the rotation count n by the word size
57
* INW(i, j) input word j (0, 1, or 2) of block i (0 to 12)
58
*
59
* For INW, the input buffer is pointed to by "buf" which has type
60
* "const unsigned char *".
61
*/
62
63
#define MUL19(action) do { \
64
action(0); \
65
action(1); \
66
action(2); \
67
action(3); \
68
action(4); \
69
action(5); \
70
action(6); \
71
action(7); \
72
action(8); \
73
action(9); \
74
action(10); \
75
action(11); \
76
action(12); \
77
action(13); \
78
action(14); \
79
action(15); \
80
action(16); \
81
action(17); \
82
action(18); \
83
} while (0)
84
85
#define DECL19(b) b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
86
b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
87
b ## 12, b ## 13, b ## 14, b ## 15, b ## 16, \
88
b ## 17, b ## 18
89
90
#define M19_T7(i) M19_T7_(i)
91
#define M19_T7_(i) M19_T7_ ## i
92
#define M19_T7_0 0
93
#define M19_T7_1 7
94
#define M19_T7_2 14
95
#define M19_T7_3 2
96
#define M19_T7_4 9
97
#define M19_T7_5 16
98
#define M19_T7_6 4
99
#define M19_T7_7 11
100
#define M19_T7_8 18
101
#define M19_T7_9 6
102
#define M19_T7_10 13
103
#define M19_T7_11 1
104
#define M19_T7_12 8
105
#define M19_T7_13 15
106
#define M19_T7_14 3
107
#define M19_T7_15 10
108
#define M19_T7_16 17
109
#define M19_T7_17 5
110
#define M19_T7_18 12
111
112
#define M19_A1(i) M19_A1_(i)
113
#define M19_A1_(i) M19_A1_ ## i
114
#define M19_A1_0 1
115
#define M19_A1_1 2
116
#define M19_A1_2 3
117
#define M19_A1_3 4
118
#define M19_A1_4 5
119
#define M19_A1_5 6
120
#define M19_A1_6 7
121
#define M19_A1_7 8
122
#define M19_A1_8 9
123
#define M19_A1_9 10
124
#define M19_A1_10 11
125
#define M19_A1_11 12
126
#define M19_A1_12 13
127
#define M19_A1_13 14
128
#define M19_A1_14 15
129
#define M19_A1_15 16
130
#define M19_A1_16 17
131
#define M19_A1_17 18
132
#define M19_A1_18 0
133
134
#define M19_A2(i) M19_A2_(i)
135
#define M19_A2_(i) M19_A2_ ## i
136
#define M19_A2_0 2
137
#define M19_A2_1 3
138
#define M19_A2_2 4
139
#define M19_A2_3 5
140
#define M19_A2_4 6
141
#define M19_A2_5 7
142
#define M19_A2_6 8
143
#define M19_A2_7 9
144
#define M19_A2_8 10
145
#define M19_A2_9 11
146
#define M19_A2_10 12
147
#define M19_A2_11 13
148
#define M19_A2_12 14
149
#define M19_A2_13 15
150
#define M19_A2_14 16
151
#define M19_A2_15 17
152
#define M19_A2_16 18
153
#define M19_A2_17 0
154
#define M19_A2_18 1
155
156
#define M19_A4(i) M19_A4_(i)
157
#define M19_A4_(i) M19_A4_ ## i
158
#define M19_A4_0 4
159
#define M19_A4_1 5
160
#define M19_A4_2 6
161
#define M19_A4_3 7
162
#define M19_A4_4 8
163
#define M19_A4_5 9
164
#define M19_A4_6 10
165
#define M19_A4_7 11
166
#define M19_A4_8 12
167
#define M19_A4_9 13
168
#define M19_A4_10 14
169
#define M19_A4_11 15
170
#define M19_A4_12 16
171
#define M19_A4_13 17
172
#define M19_A4_14 18
173
#define M19_A4_15 0
174
#define M19_A4_16 1
175
#define M19_A4_17 2
176
#define M19_A4_18 3
177
178
#define ACC_a(i) ACC_a_(i)
179
#define ACC_a_(i) a ## i
180
#define ACC_atmp(i) ACC_atmp_(i)
181
#define ACC_atmp_(i) atmp ## i
182
183
#define MILL1(i) (atmp ## i = a ## i ^ T(ACC_a(M19_A1(i)) \
184
| ~ACC_a(M19_A2(i))))
185
#define MILL2(i) (a ## i = ROR(ACC_atmp(M19_T7(i)), ((i * (i + 1)) >> 1)))
186
#define MILL3(i) (atmp ## i = a ## i ^ ACC_a(M19_A1(i)) ^ ACC_a(M19_A4(i)))
187
#define MILL4(i) (a ## i = atmp ## i ^ (i == 0))
188
189
#define MILL do { \
190
WT DECL19(atmp); \
191
MUL19(MILL1); \
192
MUL19(MILL2); \
193
MUL19(MILL3); \
194
MUL19(MILL4); \
195
} while (0)
196
197
#define DECL13(b) b ## 0 ## _0, b ## 0 ## _1, b ## 0 ## _2, \
198
b ## 1 ## _0, b ## 1 ## _1, b ## 1 ## _2, \
199
b ## 2 ## _0, b ## 2 ## _1, b ## 2 ## _2, \
200
b ## 3 ## _0, b ## 3 ## _1, b ## 3 ## _2, \
201
b ## 4 ## _0, b ## 4 ## _1, b ## 4 ## _2, \
202
b ## 5 ## _0, b ## 5 ## _1, b ## 5 ## _2, \
203
b ## 6 ## _0, b ## 6 ## _1, b ## 6 ## _2, \
204
b ## 7 ## _0, b ## 7 ## _1, b ## 7 ## _2, \
205
b ## 8 ## _0, b ## 8 ## _1, b ## 8 ## _2, \
206
b ## 9 ## _0, b ## 9 ## _1, b ## 9 ## _2, \
207
b ## 10 ## _0, b ## 10 ## _1, b ## 10 ## _2, \
208
b ## 11 ## _0, b ## 11 ## _1, b ## 11 ## _2, \
209
b ## 12 ## _0, b ## 12 ## _1, b ## 12 ## _2
210
211
#define M13_A(i, j) M13_A_(i, j)
212
#define M13_A_(i, j) M13_A_ ## i ## _ ## j
213
#define M13_A_0_0 0
214
#define M13_A_0_1 1
215
#define M13_A_0_2 2
216
#define M13_A_0_3 3
217
#define M13_A_0_4 4
218
#define M13_A_0_5 5
219
#define M13_A_0_6 6
220
#define M13_A_0_7 7
221
#define M13_A_0_8 8
222
#define M13_A_0_9 9
223
#define M13_A_0_10 10
224
#define M13_A_0_11 11
225
#define M13_A_0_12 12
226
#define M13_A_1_0 1
227
#define M13_A_1_1 2
228
#define M13_A_1_2 3
229
#define M13_A_1_3 4
230
#define M13_A_1_4 5
231
#define M13_A_1_5 6
232
#define M13_A_1_6 7
233
#define M13_A_1_7 8
234
#define M13_A_1_8 9
235
#define M13_A_1_9 10
236
#define M13_A_1_10 11
237
#define M13_A_1_11 12
238
#define M13_A_1_12 0
239
#define M13_A_2_0 2
240
#define M13_A_2_1 3
241
#define M13_A_2_2 4
242
#define M13_A_2_3 5
243
#define M13_A_2_4 6
244
#define M13_A_2_5 7
245
#define M13_A_2_6 8
246
#define M13_A_2_7 9
247
#define M13_A_2_8 10
248
#define M13_A_2_9 11
249
#define M13_A_2_10 12
250
#define M13_A_2_11 0
251
#define M13_A_2_12 1
252
#define M13_A_3_0 3
253
#define M13_A_3_1 4
254
#define M13_A_3_2 5
255
#define M13_A_3_3 6
256
#define M13_A_3_4 7
257
#define M13_A_3_5 8
258
#define M13_A_3_6 9
259
#define M13_A_3_7 10
260
#define M13_A_3_8 11
261
#define M13_A_3_9 12
262
#define M13_A_3_10 0
263
#define M13_A_3_11 1
264
#define M13_A_3_12 2
265
#define M13_A_4_0 4
266
#define M13_A_4_1 5
267
#define M13_A_4_2 6
268
#define M13_A_4_3 7
269
#define M13_A_4_4 8
270
#define M13_A_4_5 9
271
#define M13_A_4_6 10
272
#define M13_A_4_7 11
273
#define M13_A_4_8 12
274
#define M13_A_4_9 0
275
#define M13_A_4_10 1
276
#define M13_A_4_11 2
277
#define M13_A_4_12 3
278
#define M13_A_5_0 5
279
#define M13_A_5_1 6
280
#define M13_A_5_2 7
281
#define M13_A_5_3 8
282
#define M13_A_5_4 9
283
#define M13_A_5_5 10
284
#define M13_A_5_6 11
285
#define M13_A_5_7 12
286
#define M13_A_5_8 0
287
#define M13_A_5_9 1
288
#define M13_A_5_10 2
289
#define M13_A_5_11 3
290
#define M13_A_5_12 4
291
#define M13_A_6_0 6
292
#define M13_A_6_1 7
293
#define M13_A_6_2 8
294
#define M13_A_6_3 9
295
#define M13_A_6_4 10
296
#define M13_A_6_5 11
297
#define M13_A_6_6 12
298
#define M13_A_6_7 0
299
#define M13_A_6_8 1
300
#define M13_A_6_9 2
301
#define M13_A_6_10 3
302
#define M13_A_6_11 4
303
#define M13_A_6_12 5
304
#define M13_A_7_0 7
305
#define M13_A_7_1 8
306
#define M13_A_7_2 9
307
#define M13_A_7_3 10
308
#define M13_A_7_4 11
309
#define M13_A_7_5 12
310
#define M13_A_7_6 0
311
#define M13_A_7_7 1
312
#define M13_A_7_8 2
313
#define M13_A_7_9 3
314
#define M13_A_7_10 4
315
#define M13_A_7_11 5
316
#define M13_A_7_12 6
317
#define M13_A_8_0 8
318
#define M13_A_8_1 9
319
#define M13_A_8_2 10
320
#define M13_A_8_3 11
321
#define M13_A_8_4 12
322
#define M13_A_8_5 0
323
#define M13_A_8_6 1
324
#define M13_A_8_7 2
325
#define M13_A_8_8 3
326
#define M13_A_8_9 4
327
#define M13_A_8_10 5
328
#define M13_A_8_11 6
329
#define M13_A_8_12 7
330
#define M13_A_9_0 9
331
#define M13_A_9_1 10
332
#define M13_A_9_2 11
333
#define M13_A_9_3 12
334
#define M13_A_9_4 0
335
#define M13_A_9_5 1
336
#define M13_A_9_6 2
337
#define M13_A_9_7 3
338
#define M13_A_9_8 4
339
#define M13_A_9_9 5
340
#define M13_A_9_10 6
341
#define M13_A_9_11 7
342
#define M13_A_9_12 8
343
#define M13_A_10_0 10
344
#define M13_A_10_1 11
345
#define M13_A_10_2 12
346
#define M13_A_10_3 0
347
#define M13_A_10_4 1
348
#define M13_A_10_5 2
349
#define M13_A_10_6 3
350
#define M13_A_10_7 4
351
#define M13_A_10_8 5
352
#define M13_A_10_9 6
353
#define M13_A_10_10 7
354
#define M13_A_10_11 8
355
#define M13_A_10_12 9
356
#define M13_A_11_0 11
357
#define M13_A_11_1 12
358
#define M13_A_11_2 0
359
#define M13_A_11_3 1
360
#define M13_A_11_4 2
361
#define M13_A_11_5 3
362
#define M13_A_11_6 4
363
#define M13_A_11_7 5
364
#define M13_A_11_8 6
365
#define M13_A_11_9 7
366
#define M13_A_11_10 8
367
#define M13_A_11_11 9
368
#define M13_A_11_12 10
369
#define M13_A_12_0 12
370
#define M13_A_12_1 0
371
#define M13_A_12_2 1
372
#define M13_A_12_3 2
373
#define M13_A_12_4 3
374
#define M13_A_12_5 4
375
#define M13_A_12_6 5
376
#define M13_A_12_7 6
377
#define M13_A_12_8 7
378
#define M13_A_12_9 8
379
#define M13_A_12_10 9
380
#define M13_A_12_11 10
381
#define M13_A_12_12 11
382
383
#define M13_N(i) M13_N_(i)
384
#define M13_N_(i) M13_N_ ## i
385
#define M13_N_0 12
386
#define M13_N_1 11
387
#define M13_N_2 10
388
#define M13_N_3 9
389
#define M13_N_4 8
390
#define M13_N_5 7
391
#define M13_N_6 6
392
#define M13_N_7 5
393
#define M13_N_8 4
394
#define M13_N_9 3
395
#define M13_N_10 2
396
#define M13_N_11 1
397
#define M13_N_12 0
398
399
#define ACC_b(i, k) ACC_b_(i, k)
400
#define ACC_b_(i, k) b ## i ## _ ## k
401
402
#define ROUND_ELT(k, s) do { \
403
if ((bj += 3) == 39) \
404
bj = 0; \
405
sc->b[bj + s] ^= a ## k; \
406
} while (0)
407
408
#define ROUND_SF(j) do { \
409
size_t bj = (j) * 3; \
410
ROUND_ELT(1, 0); \
411
ROUND_ELT(2, 1); \
412
ROUND_ELT(3, 2); \
413
ROUND_ELT(4, 0); \
414
ROUND_ELT(5, 1); \
415
ROUND_ELT(6, 2); \
416
ROUND_ELT(7, 0); \
417
ROUND_ELT(8, 1); \
418
ROUND_ELT(9, 2); \
419
ROUND_ELT(10, 0); \
420
ROUND_ELT(11, 1); \
421
ROUND_ELT(12, 2); \
422
MILL; \
423
bj = (j) * 3; \
424
a ## 13 ^= sc->b[bj + 0]; \
425
a ## 14 ^= sc->b[bj + 1]; \
426
a ## 15 ^= sc->b[bj + 2]; \
427
} while (0)
428
429
#define INPUT_SF(j, p0, p1, p2) do { \
430
size_t bj = ((j) + 1) * 3; \
431
if (bj == 39) \
432
bj = 0; \
433
sc->b[bj + 0] ^= (p0); \
434
sc->b[bj + 1] ^= (p1); \
435
sc->b[bj + 2] ^= (p2); \
436
a16 ^= (p0); \
437
a17 ^= (p1); \
438
a18 ^= (p2); \
439
} while (0)
440
441
442
#if SPH_SMALL_FOOTPRINT_RADIOGATUN
443
444
#define ROUND ROUND_SF
445
#define INPUT INPUT_SF
446
447
#else
448
449
/*
450
* Round function R, on base j. The value j is such that B[0] is actually
451
* b[j] after the initial rotation. On the 13-round macro, j has the
452
* successive values 12, 11, 10... 1, 0.
453
*/
454
#define ROUND(j) do { \
455
ACC_b(M13_A(1, j), 0) ^= a ## 1; \
456
ACC_b(M13_A(2, j), 1) ^= a ## 2; \
457
ACC_b(M13_A(3, j), 2) ^= a ## 3; \
458
ACC_b(M13_A(4, j), 0) ^= a ## 4; \
459
ACC_b(M13_A(5, j), 1) ^= a ## 5; \
460
ACC_b(M13_A(6, j), 2) ^= a ## 6; \
461
ACC_b(M13_A(7, j), 0) ^= a ## 7; \
462
ACC_b(M13_A(8, j), 1) ^= a ## 8; \
463
ACC_b(M13_A(9, j), 2) ^= a ## 9; \
464
ACC_b(M13_A(10, j), 0) ^= a ## 10; \
465
ACC_b(M13_A(11, j), 1) ^= a ## 11; \
466
ACC_b(M13_A(12, j), 2) ^= a ## 12; \
467
MILL; \
468
a ## 13 ^= ACC_b(j, 0); \
469
a ## 14 ^= ACC_b(j, 1); \
470
a ## 15 ^= ACC_b(j, 2); \
471
} while (0)
472
473
#define INPUT(j, p0, p1, p2) do { \
474
ACC_b(M13_A(1, j), 0) ^= (p0); \
475
ACC_b(M13_A(1, j), 1) ^= (p1); \
476
ACC_b(M13_A(1, j), 2) ^= (p2); \
477
a16 ^= (p0); \
478
a17 ^= (p1); \
479
a18 ^= (p2); \
480
} while (0)
481
482
#endif
483
484
#define MUL13(action) do { \
485
action(0); \
486
action(1); \
487
action(2); \
488
action(3); \
489
action(4); \
490
action(5); \
491
action(6); \
492
action(7); \
493
action(8); \
494
action(9); \
495
action(10); \
496
action(11); \
497
action(12); \
498
} while (0)
499
500
#define MILL_READ_ELT(i) do { \
501
a ## i = sc->a[i]; \
502
} while (0)
503
504
#define MILL_WRITE_ELT(i) do { \
505
sc->a[i] = a ## i; \
506
} while (0)
507
508
#define STATE_READ_SF do { \
509
MUL19(MILL_READ_ELT); \
510
} while (0)
511
512
#define STATE_WRITE_SF do { \
513
MUL19(MILL_WRITE_ELT); \
514
} while (0)
515
516
#define PUSH13_SF do { \
517
WT DECL19(a); \
518
const unsigned char *buf; \
519
\
520
buf = data; \
521
STATE_READ_SF; \
522
while (len >= sizeof sc->data) { \
523
size_t mk; \
524
for (mk = 13; mk > 0; mk --) { \
525
WT p0 = INW(0, 0); \
526
WT p1 = INW(0, 1); \
527
WT p2 = INW(0, 2); \
528
INPUT_SF(mk - 1, p0, p1, p2); \
529
ROUND_SF(mk - 1); \
530
buf += (sizeof sc->data) / 13; \
531
len -= (sizeof sc->data) / 13; \
532
} \
533
} \
534
STATE_WRITE_SF; \
535
return len; \
536
} while (0)
537
538
#if SPH_SMALL_FOOTPRINT_RADIOGATUN
539
540
#define STATE_READ STATE_READ_SF
541
#define STATE_WRITE STATE_WRITE_SF
542
#define PUSH13 PUSH13_SF
543
544
#else
545
546
#define BELT_READ_ELT(i) do { \
547
b ## i ## _0 = sc->b[3 * i + 0]; \
548
b ## i ## _1 = sc->b[3 * i + 1]; \
549
b ## i ## _2 = sc->b[3 * i + 2]; \
550
} while (0)
551
552
#define BELT_WRITE_ELT(i) do { \
553
sc->b[3 * i + 0] = b ## i ## _0; \
554
sc->b[3 * i + 1] = b ## i ## _1; \
555
sc->b[3 * i + 2] = b ## i ## _2; \
556
} while (0)
557
558
#define STATE_READ do { \
559
MUL13(BELT_READ_ELT); \
560
MUL19(MILL_READ_ELT); \
561
} while (0)
562
563
#define STATE_WRITE do { \
564
MUL13(BELT_WRITE_ELT); \
565
MUL19(MILL_WRITE_ELT); \
566
} while (0)
567
568
/*
569
* Input data by chunks of 13*3 blocks. This is the body of the
570
* radiogatun32_push13() and radiogatun64_push13() functions.
571
*/
572
#define PUSH13 do { \
573
WT DECL19(a), DECL13(b); \
574
const unsigned char *buf; \
575
\
576
buf = data; \
577
STATE_READ; \
578
while (len >= sizeof sc->data) { \
579
WT p0, p1, p2; \
580
MUL13(PUSH13_ELT); \
581
buf += sizeof sc->data; \
582
len -= sizeof sc->data; \
583
} \
584
STATE_WRITE; \
585
return len; \
586
} while (0)
587
588
#define PUSH13_ELT(k) do { \
589
p0 = INW(k, 0); \
590
p1 = INW(k, 1); \
591
p2 = INW(k, 2); \
592
INPUT(M13_N(k), p0, p1, p2); \
593
ROUND(M13_N(k)); \
594
} while (0)
595
596
#endif
597
598
#define BLANK13_SF do { \
599
size_t mk = 13; \
600
while (mk -- > 0) \
601
ROUND_SF(mk); \
602
} while (0)
603
604
#define BLANK1_SF do { \
605
WT tmp0, tmp1, tmp2; \
606
ROUND_SF(12); \
607
tmp0 = sc->b[36]; \
608
tmp1 = sc->b[37]; \
609
tmp2 = sc->b[38]; \
610
memmove(sc->b + 3, sc->b, 36 * sizeof sc->b[0]); \
611
sc->b[0] = tmp0; \
612
sc->b[1] = tmp1; \
613
sc->b[2] = tmp2; \
614
} while (0)
615
616
#if SPH_SMALL_FOOTPRINT_RADIOGATUN
617
618
#define BLANK13 BLANK13_SF
619
#define BLANK1 BLANK1_SF
620
621
#else
622
623
/*
624
* Run 13 blank rounds. This macro expects the "a" and "b" state variables
625
* to be alread declared.
626
*/
627
#define BLANK13 MUL13(BLANK13_ELT)
628
629
#define BLANK13_ELT(k) ROUND(M13_N(k))
630
631
#define MUL12(action) do { \
632
action(0); \
633
action(1); \
634
action(2); \
635
action(3); \
636
action(4); \
637
action(5); \
638
action(6); \
639
action(7); \
640
action(8); \
641
action(9); \
642
action(10); \
643
action(11); \
644
} while (0)
645
646
/*
647
* Run a single blank round, and physically rotate the belt. This is used
648
* for the last blank rounds, and the output rounds. This macro expects the
649
* "a" abd "b" state variables to be already declared.
650
*/
651
#define BLANK1 do { \
652
WT tmp0, tmp1, tmp2; \
653
ROUND(12); \
654
tmp0 = b0_0; \
655
tmp1 = b0_1; \
656
tmp2 = b0_2; \
657
MUL12(BLANK1_ELT); \
658
b1_0 = tmp0; \
659
b1_1 = tmp1; \
660
b1_2 = tmp2; \
661
} while (0)
662
663
#define BLANK1_ELT(i) do { \
664
ACC_b(M13_A(M13_N(i), 1), 0) = ACC_b(M13_N(i), 0); \
665
ACC_b(M13_A(M13_N(i), 1), 1) = ACC_b(M13_N(i), 1); \
666
ACC_b(M13_A(M13_N(i), 1), 2) = ACC_b(M13_N(i), 2); \
667
} while (0)
668
669
#endif
670
671
#define NO_TOKEN
672
673
/*
674
* Perform padding, then blank rounds, then output some words. This is
675
* the body of sph_radiogatun32_close() and sph_radiogatun64_close().
676
*/
677
#define CLOSE_SF(width) CLOSE_GEN(width, \
678
NO_TOKEN, STATE_READ_SF, BLANK1_SF, BLANK13_SF)
679
680
#if SPH_SMALL_FOOTPRINT_RADIOGATUN
681
#define CLOSE CLOSE_SF
682
#else
683
#define CLOSE(width) CLOSE_GEN(width, \
684
WT DECL13(b);, STATE_READ, BLANK1, BLANK13)
685
#endif
686
687
#define CLOSE_GEN(width, WTb13, state_read, blank1, blank13) do { \
688
unsigned ptr, num; \
689
unsigned char *out; \
690
WT DECL19(a); \
691
WTb13 \
692
\
693
ptr = sc->data_ptr; \
694
sc->data[ptr ++] = 0x01; \
695
memset(sc->data + ptr, 0, (sizeof sc->data) - ptr); \
696
radiogatun ## width ## _push13(sc, sc->data, sizeof sc->data); \
697
\
698
num = 17; \
699
for (;;) { \
700
ptr += 3 * (width >> 3); \
701
if (ptr > sizeof sc->data) \
702
break; \
703
num --; \
704
} \
705
\
706
state_read; \
707
if (num >= 13) { \
708
blank13; \
709
num -= 13; \
710
} \
711
while (num -- > 0) \
712
blank1; \
713
\
714
num = 0; \
715
out = dst; \
716
for (;;) { \
717
OUTW(out, a1); \
718
out += width >> 3; \
719
OUTW(out, a2); \
720
out += width >> 3; \
721
num += 2 * (width >> 3); \
722
if (num >= 32) \
723
break; \
724
blank1; \
725
} \
726
INIT; \
727
} while (0)
728
729
/*
730
* Initialize context structure.
731
*/
732
#if SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN
733
734
#define INIT do { \
735
memset(sc->a, 0, sizeof sc->a); \
736
memset(sc->b, 0, sizeof sc->b); \
737
sc->data_ptr = 0; \
738
} while (0)
739
740
#else
741
742
#define INIT do { \
743
size_t u; \
744
for (u = 0; u < 19; u ++) \
745
sc->a[u] = 0; \
746
for (u = 0; u < 39; u ++) \
747
sc->b[u] = 0; \
748
sc->data_ptr = 0; \
749
} while (0)
750
751
#endif
752
753
/* ======================================================================= */
754
/*
755
* RadioGatun[32].
756
*/
757
758
#if !SPH_NO_RG32
759
760
#undef WT
761
#define WT sph_u32
762
#undef T
763
#define T SPH_T32
764
#undef ROR
765
#define ROR(x, n) SPH_T32(((x) << ((32 - (n)) & 31)) | ((x) >> ((n) & 31)))
766
#undef INW
767
#define INW(i, j) sph_dec32le_aligned(buf + (4 * (3 * (i) + (j))))
768
#undef OUTW
769
#define OUTW(b, v) sph_enc32le(b, v)
770
771
/*
772
* Insert data by big chunks of 13*12 = 156 bytes. Returned value is the
773
* number of remaining bytes (between 0 and 155). This method assumes that
774
* the input data is suitably aligned.
775
*/
776
static size_t
777
radiogatun32_push13(sph_radiogatun32_context *sc, const void *data, size_t len)
778
{
779
PUSH13;
780
}
781
782
/* see sph_radiogatun.h */
783
void
784
sph_radiogatun32_init(void *cc)
785
{
786
sph_radiogatun32_context *sc;
787
788
sc = cc;
789
INIT;
790
}
791
792
#ifdef SPH_UPTR
793
static void
794
radiogatun32_short(void *cc, const void *data, size_t len)
795
#else
796
/* see sph_radiogatun.h */
797
void
798
sph_radiogatun32(void *cc, const void *data, size_t len)
799
#endif
800
{
801
sph_radiogatun32_context *sc;
802
unsigned ptr;
803
804
sc = cc;
805
ptr = sc->data_ptr;
806
while (len > 0) {
807
size_t clen;
808
809
clen = (sizeof sc->data) - ptr;
810
if (clen > len)
811
clen = len;
812
memcpy(sc->data + ptr, data, clen);
813
data = (const unsigned char *)data + clen;
814
len -= clen;
815
ptr += clen;
816
if (ptr == sizeof sc->data) {
817
radiogatun32_push13(sc, sc->data, sizeof sc->data);
818
ptr = 0;
819
}
820
}
821
sc->data_ptr = ptr;
822
}
823
824
#ifdef SPH_UPTR
825
/* see sph_radiogatun.h */
826
void
827
sph_radiogatun32(void *cc, const void *data, size_t len)
828
{
829
sph_radiogatun32_context *sc;
830
unsigned ptr;
831
size_t rlen;
832
833
if (len < (2 * sizeof sc->data)) {
834
radiogatun32_short(cc, data, len);
835
return;
836
}
837
sc = cc;
838
ptr = sc->data_ptr;
839
if (ptr > 0) {
840
unsigned t;
841
842
t = (sizeof sc->data) - ptr;
843
radiogatun32_short(sc, data, t);
844
data = (const unsigned char *)data + t;
845
len -= t;
846
}
847
#if !SPH_UNALIGNED
848
if (((SPH_UPTR)data & 3) != 0) {
849
radiogatun32_short(sc, data, len);
850
return;
851
}
852
#endif
853
rlen = radiogatun32_push13(sc, data, len);
854
memcpy(sc->data, (const unsigned char *)data + len - rlen, rlen);
855
sc->data_ptr = rlen;
856
}
857
#endif
858
859
/* see sph_radiogatun.h */
860
void
861
sph_radiogatun32_close(void *cc, void *dst)
862
{
863
sph_radiogatun32_context *sc;
864
865
sc = cc;
866
CLOSE(32);
867
}
868
869
#endif
870
871
/* ======================================================================= */
872
/*
873
* RadioGatun[64]. Compiled only if a 64-bit or more type is available.
874
*/
875
876
#if SPH_64
877
878
#if !SPH_NO_RG64
879
880
#undef WT
881
#define WT sph_u64
882
#undef T
883
#define T SPH_T64
884
#undef ROR
885
#define ROR(x, n) SPH_T64(((x) << ((64 - (n)) & 63)) | ((x) >> ((n) & 63)))
886
#undef INW
887
#define INW(i, j) sph_dec64le_aligned(buf + (8 * (3 * (i) + (j))))
888
#undef OUTW
889
#define OUTW(b, v) sph_enc64le(b, v)
890
891
/*
892
* On 32-bit x86, register pressure is such that using the small
893
* footprint version is a net gain (x2 speed), because that variant
894
* uses fewer local variables.
895
*/
896
#if SPH_I386_MSVC || SPH_I386_GCC || defined __i386__
897
#undef PUSH13
898
#define PUSH13 PUSH13_SF
899
#undef CLOSE
900
#define CLOSE CLOSE_SF
901
#endif
902
903
/*
904
* Insert data by big chunks of 13*24 = 312 bytes. Returned value is the
905
* number of remaining bytes (between 0 and 311). This method assumes that
906
* the input data is suitably aligned.
907
*/
908
static size_t
909
radiogatun64_push13(sph_radiogatun64_context *sc, const void *data, size_t len)
910
{
911
PUSH13;
912
}
913
914
/* see sph_radiogatun.h */
915
void
916
sph_radiogatun64_init(void *cc)
917
{
918
sph_radiogatun64_context *sc;
919
920
sc = cc;
921
INIT;
922
}
923
924
#ifdef SPH_UPTR
925
static void
926
radiogatun64_short(void *cc, const void *data, size_t len)
927
#else
928
/* see sph_radiogatun.h */
929
void
930
sph_radiogatun64(void *cc, const void *data, size_t len)
931
#endif
932
{
933
sph_radiogatun64_context *sc;
934
unsigned ptr;
935
936
sc = cc;
937
ptr = sc->data_ptr;
938
while (len > 0) {
939
size_t clen;
940
941
clen = (sizeof sc->data) - ptr;
942
if (clen > len)
943
clen = len;
944
memcpy(sc->data + ptr, data, clen);
945
data = (const unsigned char *)data + clen;
946
len -= clen;
947
ptr += clen;
948
if (ptr == sizeof sc->data) {
949
radiogatun64_push13(sc, sc->data, sizeof sc->data);
950
ptr = 0;
951
}
952
}
953
sc->data_ptr = ptr;
954
}
955
956
#ifdef SPH_UPTR
957
/* see sph_radiogatun.h */
958
void
959
sph_radiogatun64(void *cc, const void *data, size_t len)
960
{
961
sph_radiogatun64_context *sc;
962
unsigned ptr;
963
size_t rlen;
964
965
if (len < (2 * sizeof sc->data)) {
966
radiogatun64_short(cc, data, len);
967
return;
968
}
969
sc = cc;
970
ptr = sc->data_ptr;
971
if (ptr > 0) {
972
unsigned t;
973
974
t = (sizeof sc->data) - ptr;
975
radiogatun64_short(sc, data, t);
976
data = (const unsigned char *)data + t;
977
len -= t;
978
}
979
#if !SPH_UNALIGNED
980
if (((SPH_UPTR)data & 7) != 0) {
981
radiogatun64_short(sc, data, len);
982
return;
983
}
984
#endif
985
rlen = radiogatun64_push13(sc, data, len);
986
memcpy(sc->data, (const unsigned char *)data + len - rlen, rlen);
987
sc->data_ptr = rlen;
988
}
989
#endif
990
991
/* see sph_radiogatun.h */
992
void
993
sph_radiogatun64_close(void *cc, void *dst)
994
{
995
sph_radiogatun64_context *sc;
996
997
sc = cc;
998
CLOSE(64);
999
}
1000
1001
#endif
1002
1003
#endif
1004
1005