Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/sha3/sph_keccak.c
1201 views
1
/* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */
2
/*
3
* Keccak implementation.
4
*
5
* ==========================(LICENSE BEGIN)============================
6
*
7
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice shall be
18
* included in all copies or substantial portions of the Software.
19
*
20
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
*
28
* ===========================(LICENSE END)=============================
29
*
30
* @author Thomas Pornin <[email protected]>
31
*/
32
33
#include <stddef.h>
34
#include <string.h>
35
36
#include "sph_keccak.h"
37
38
#ifdef __cplusplus
39
extern "C"{
40
#endif
41
42
/*
43
* Parameters:
44
*
45
* SPH_KECCAK_64 use a 64-bit type
46
* SPH_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll)
47
* SPH_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only)
48
* SPH_KECCAK_NOCOPY do not copy the state into local variables
49
*
50
* If there is no usable 64-bit type, the code automatically switches
51
* back to the 32-bit implementation.
52
*
53
* Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1
54
* code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core
55
* (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,
56
* 8 kB L1 code cache), seem to show that the following are optimal:
57
*
58
* -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,
59
* do not copy the state; unrolling 2, 6 or all rounds also provides
60
* near-optimal performance.
61
* -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,
62
* interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds
63
* also provides near-optimal performance.
64
* -- PowerPC: use the 64-bit implementation, unroll 8 rounds,
65
* copy the state. Unrolling 4 or 6 rounds is near-optimal.
66
* -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,
67
* copy the state.
68
* -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy
69
* the state. Unrolling only 1 round is also near-optimal.
70
*
71
* Also, interleaving does not always yield actual improvements when
72
* using a 32-bit implementation; in particular when the architecture
73
* does not offer a native rotation opcode (interleaving replaces one
74
* 64-bit rotation with two 32-bit rotations, which is a gain only if
75
* there is a native 32-bit rotation opcode and not a native 64-bit
76
* rotation opcode; also, interleaving implies a small overhead when
77
* processing input words).
78
*
79
* To sum up:
80
* -- when possible, use the 64-bit code
81
* -- exception: on 32-bit x86, use 32-bit code
82
* -- when using 32-bit code, use interleaving
83
* -- copy the state, except on x86
84
* -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines
85
*/
86
87
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_KECCAK
88
#define SPH_SMALL_FOOTPRINT_KECCAK 1
89
#endif
90
91
/*
92
* By default, we select the 64-bit implementation if a 64-bit type
93
* is available, unless a 32-bit x86 is detected.
94
*/
95
#if !defined SPH_KECCAK_64 && SPH_64 \
96
&& !(defined __i386__ || SPH_I386_GCC || SPH_I386_MSVC)
97
#define SPH_KECCAK_64 1
98
#endif
99
100
/*
101
* If using a 32-bit implementation, we prefer to interleave.
102
*/
103
#if !SPH_KECCAK_64 && !defined SPH_KECCAK_INTERLEAVE
104
#define SPH_KECCAK_INTERLEAVE 1
105
#endif
106
107
/*
108
* Unroll 8 rounds on big systems, 2 rounds on small systems.
109
*/
110
#ifndef SPH_KECCAK_UNROLL
111
#if SPH_SMALL_FOOTPRINT_KECCAK
112
#define SPH_KECCAK_UNROLL 2
113
#else
114
#define SPH_KECCAK_UNROLL 8
115
#endif
116
#endif
117
118
/*
119
* We do not want to copy the state to local variables on x86 (32-bit
120
* and 64-bit alike).
121
*/
122
#ifndef SPH_KECCAK_NOCOPY
123
#if defined __i386__ || defined __x86_64 || SPH_I386_MSVC || SPH_I386_GCC
124
#define SPH_KECCAK_NOCOPY 1
125
#else
126
#define SPH_KECCAK_NOCOPY 0
127
#endif
128
#endif
129
130
#ifdef _MSC_VER
131
#pragma warning (disable: 4146)
132
#endif
133
134
#if SPH_KECCAK_64
135
136
static const sph_u64 RC[] = {
137
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
138
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
139
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
140
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
141
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
142
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
143
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
144
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
145
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
146
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
147
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
148
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
149
};
150
151
#if SPH_KECCAK_NOCOPY
152
153
#define a00 (kc->u.wide[ 0])
154
#define a10 (kc->u.wide[ 1])
155
#define a20 (kc->u.wide[ 2])
156
#define a30 (kc->u.wide[ 3])
157
#define a40 (kc->u.wide[ 4])
158
#define a01 (kc->u.wide[ 5])
159
#define a11 (kc->u.wide[ 6])
160
#define a21 (kc->u.wide[ 7])
161
#define a31 (kc->u.wide[ 8])
162
#define a41 (kc->u.wide[ 9])
163
#define a02 (kc->u.wide[10])
164
#define a12 (kc->u.wide[11])
165
#define a22 (kc->u.wide[12])
166
#define a32 (kc->u.wide[13])
167
#define a42 (kc->u.wide[14])
168
#define a03 (kc->u.wide[15])
169
#define a13 (kc->u.wide[16])
170
#define a23 (kc->u.wide[17])
171
#define a33 (kc->u.wide[18])
172
#define a43 (kc->u.wide[19])
173
#define a04 (kc->u.wide[20])
174
#define a14 (kc->u.wide[21])
175
#define a24 (kc->u.wide[22])
176
#define a34 (kc->u.wide[23])
177
#define a44 (kc->u.wide[24])
178
179
#define DECL_STATE
180
#define READ_STATE(sc)
181
#define WRITE_STATE(sc)
182
183
#define INPUT_BUF(size) do { \
184
size_t j; \
185
for (j = 0; j < (size); j += 8) { \
186
kc->u.wide[j >> 3] ^= sph_dec64le_aligned(buf + j); \
187
} \
188
} while (0)
189
190
#define INPUT_BUF144 INPUT_BUF(144)
191
#define INPUT_BUF136 INPUT_BUF(136)
192
#define INPUT_BUF104 INPUT_BUF(104)
193
#define INPUT_BUF72 INPUT_BUF(72)
194
195
#else
196
197
#define DECL_STATE \
198
sph_u64 a00, a01, a02, a03, a04; \
199
sph_u64 a10, a11, a12, a13, a14; \
200
sph_u64 a20, a21, a22, a23, a24; \
201
sph_u64 a30, a31, a32, a33, a34; \
202
sph_u64 a40, a41, a42, a43, a44;
203
204
#define READ_STATE(state) do { \
205
a00 = (state)->u.wide[ 0]; \
206
a10 = (state)->u.wide[ 1]; \
207
a20 = (state)->u.wide[ 2]; \
208
a30 = (state)->u.wide[ 3]; \
209
a40 = (state)->u.wide[ 4]; \
210
a01 = (state)->u.wide[ 5]; \
211
a11 = (state)->u.wide[ 6]; \
212
a21 = (state)->u.wide[ 7]; \
213
a31 = (state)->u.wide[ 8]; \
214
a41 = (state)->u.wide[ 9]; \
215
a02 = (state)->u.wide[10]; \
216
a12 = (state)->u.wide[11]; \
217
a22 = (state)->u.wide[12]; \
218
a32 = (state)->u.wide[13]; \
219
a42 = (state)->u.wide[14]; \
220
a03 = (state)->u.wide[15]; \
221
a13 = (state)->u.wide[16]; \
222
a23 = (state)->u.wide[17]; \
223
a33 = (state)->u.wide[18]; \
224
a43 = (state)->u.wide[19]; \
225
a04 = (state)->u.wide[20]; \
226
a14 = (state)->u.wide[21]; \
227
a24 = (state)->u.wide[22]; \
228
a34 = (state)->u.wide[23]; \
229
a44 = (state)->u.wide[24]; \
230
} while (0)
231
232
#define WRITE_STATE(state) do { \
233
(state)->u.wide[ 0] = a00; \
234
(state)->u.wide[ 1] = a10; \
235
(state)->u.wide[ 2] = a20; \
236
(state)->u.wide[ 3] = a30; \
237
(state)->u.wide[ 4] = a40; \
238
(state)->u.wide[ 5] = a01; \
239
(state)->u.wide[ 6] = a11; \
240
(state)->u.wide[ 7] = a21; \
241
(state)->u.wide[ 8] = a31; \
242
(state)->u.wide[ 9] = a41; \
243
(state)->u.wide[10] = a02; \
244
(state)->u.wide[11] = a12; \
245
(state)->u.wide[12] = a22; \
246
(state)->u.wide[13] = a32; \
247
(state)->u.wide[14] = a42; \
248
(state)->u.wide[15] = a03; \
249
(state)->u.wide[16] = a13; \
250
(state)->u.wide[17] = a23; \
251
(state)->u.wide[18] = a33; \
252
(state)->u.wide[19] = a43; \
253
(state)->u.wide[20] = a04; \
254
(state)->u.wide[21] = a14; \
255
(state)->u.wide[22] = a24; \
256
(state)->u.wide[23] = a34; \
257
(state)->u.wide[24] = a44; \
258
} while (0)
259
260
#define INPUT_BUF144 do { \
261
a00 ^= sph_dec64le_aligned(buf + 0); \
262
a10 ^= sph_dec64le_aligned(buf + 8); \
263
a20 ^= sph_dec64le_aligned(buf + 16); \
264
a30 ^= sph_dec64le_aligned(buf + 24); \
265
a40 ^= sph_dec64le_aligned(buf + 32); \
266
a01 ^= sph_dec64le_aligned(buf + 40); \
267
a11 ^= sph_dec64le_aligned(buf + 48); \
268
a21 ^= sph_dec64le_aligned(buf + 56); \
269
a31 ^= sph_dec64le_aligned(buf + 64); \
270
a41 ^= sph_dec64le_aligned(buf + 72); \
271
a02 ^= sph_dec64le_aligned(buf + 80); \
272
a12 ^= sph_dec64le_aligned(buf + 88); \
273
a22 ^= sph_dec64le_aligned(buf + 96); \
274
a32 ^= sph_dec64le_aligned(buf + 104); \
275
a42 ^= sph_dec64le_aligned(buf + 112); \
276
a03 ^= sph_dec64le_aligned(buf + 120); \
277
a13 ^= sph_dec64le_aligned(buf + 128); \
278
a23 ^= sph_dec64le_aligned(buf + 136); \
279
} while (0)
280
281
#define INPUT_BUF136 do { \
282
a00 ^= sph_dec64le_aligned(buf + 0); \
283
a10 ^= sph_dec64le_aligned(buf + 8); \
284
a20 ^= sph_dec64le_aligned(buf + 16); \
285
a30 ^= sph_dec64le_aligned(buf + 24); \
286
a40 ^= sph_dec64le_aligned(buf + 32); \
287
a01 ^= sph_dec64le_aligned(buf + 40); \
288
a11 ^= sph_dec64le_aligned(buf + 48); \
289
a21 ^= sph_dec64le_aligned(buf + 56); \
290
a31 ^= sph_dec64le_aligned(buf + 64); \
291
a41 ^= sph_dec64le_aligned(buf + 72); \
292
a02 ^= sph_dec64le_aligned(buf + 80); \
293
a12 ^= sph_dec64le_aligned(buf + 88); \
294
a22 ^= sph_dec64le_aligned(buf + 96); \
295
a32 ^= sph_dec64le_aligned(buf + 104); \
296
a42 ^= sph_dec64le_aligned(buf + 112); \
297
a03 ^= sph_dec64le_aligned(buf + 120); \
298
a13 ^= sph_dec64le_aligned(buf + 128); \
299
} while (0)
300
301
#define INPUT_BUF104 do { \
302
a00 ^= sph_dec64le_aligned(buf + 0); \
303
a10 ^= sph_dec64le_aligned(buf + 8); \
304
a20 ^= sph_dec64le_aligned(buf + 16); \
305
a30 ^= sph_dec64le_aligned(buf + 24); \
306
a40 ^= sph_dec64le_aligned(buf + 32); \
307
a01 ^= sph_dec64le_aligned(buf + 40); \
308
a11 ^= sph_dec64le_aligned(buf + 48); \
309
a21 ^= sph_dec64le_aligned(buf + 56); \
310
a31 ^= sph_dec64le_aligned(buf + 64); \
311
a41 ^= sph_dec64le_aligned(buf + 72); \
312
a02 ^= sph_dec64le_aligned(buf + 80); \
313
a12 ^= sph_dec64le_aligned(buf + 88); \
314
a22 ^= sph_dec64le_aligned(buf + 96); \
315
} while (0)
316
317
#define INPUT_BUF72 do { \
318
a00 ^= sph_dec64le_aligned(buf + 0); \
319
a10 ^= sph_dec64le_aligned(buf + 8); \
320
a20 ^= sph_dec64le_aligned(buf + 16); \
321
a30 ^= sph_dec64le_aligned(buf + 24); \
322
a40 ^= sph_dec64le_aligned(buf + 32); \
323
a01 ^= sph_dec64le_aligned(buf + 40); \
324
a11 ^= sph_dec64le_aligned(buf + 48); \
325
a21 ^= sph_dec64le_aligned(buf + 56); \
326
a31 ^= sph_dec64le_aligned(buf + 64); \
327
} while (0)
328
329
#define INPUT_BUF(lim) do { \
330
a00 ^= sph_dec64le_aligned(buf + 0); \
331
a10 ^= sph_dec64le_aligned(buf + 8); \
332
a20 ^= sph_dec64le_aligned(buf + 16); \
333
a30 ^= sph_dec64le_aligned(buf + 24); \
334
a40 ^= sph_dec64le_aligned(buf + 32); \
335
a01 ^= sph_dec64le_aligned(buf + 40); \
336
a11 ^= sph_dec64le_aligned(buf + 48); \
337
a21 ^= sph_dec64le_aligned(buf + 56); \
338
a31 ^= sph_dec64le_aligned(buf + 64); \
339
if ((lim) == 72) \
340
break; \
341
a41 ^= sph_dec64le_aligned(buf + 72); \
342
a02 ^= sph_dec64le_aligned(buf + 80); \
343
a12 ^= sph_dec64le_aligned(buf + 88); \
344
a22 ^= sph_dec64le_aligned(buf + 96); \
345
if ((lim) == 104) \
346
break; \
347
a32 ^= sph_dec64le_aligned(buf + 104); \
348
a42 ^= sph_dec64le_aligned(buf + 112); \
349
a03 ^= sph_dec64le_aligned(buf + 120); \
350
a13 ^= sph_dec64le_aligned(buf + 128); \
351
if ((lim) == 136) \
352
break; \
353
a23 ^= sph_dec64le_aligned(buf + 136); \
354
} while (0)
355
356
#endif
357
358
#define DECL64(x) sph_u64 x
359
#define MOV64(d, s) (d = s)
360
#define XOR64(d, a, b) (d = a ^ b)
361
#define AND64(d, a, b) (d = a & b)
362
#define OR64(d, a, b) (d = a | b)
363
#define NOT64(d, s) (d = SPH_T64(~s))
364
#define ROL64(d, v, n) (d = SPH_ROTL64(v, n))
365
#define XOR64_IOTA XOR64
366
367
#else
368
369
static const struct {
370
sph_u32 high, low;
371
} RC[] = {
372
#if SPH_KECCAK_INTERLEAVE
373
{ SPH_C32(0x00000000), SPH_C32(0x00000001) },
374
{ SPH_C32(0x00000089), SPH_C32(0x00000000) },
375
{ SPH_C32(0x8000008B), SPH_C32(0x00000000) },
376
{ SPH_C32(0x80008080), SPH_C32(0x00000000) },
377
{ SPH_C32(0x0000008B), SPH_C32(0x00000001) },
378
{ SPH_C32(0x00008000), SPH_C32(0x00000001) },
379
{ SPH_C32(0x80008088), SPH_C32(0x00000001) },
380
{ SPH_C32(0x80000082), SPH_C32(0x00000001) },
381
{ SPH_C32(0x0000000B), SPH_C32(0x00000000) },
382
{ SPH_C32(0x0000000A), SPH_C32(0x00000000) },
383
{ SPH_C32(0x00008082), SPH_C32(0x00000001) },
384
{ SPH_C32(0x00008003), SPH_C32(0x00000000) },
385
{ SPH_C32(0x0000808B), SPH_C32(0x00000001) },
386
{ SPH_C32(0x8000000B), SPH_C32(0x00000001) },
387
{ SPH_C32(0x8000008A), SPH_C32(0x00000001) },
388
{ SPH_C32(0x80000081), SPH_C32(0x00000001) },
389
{ SPH_C32(0x80000081), SPH_C32(0x00000000) },
390
{ SPH_C32(0x80000008), SPH_C32(0x00000000) },
391
{ SPH_C32(0x00000083), SPH_C32(0x00000000) },
392
{ SPH_C32(0x80008003), SPH_C32(0x00000000) },
393
{ SPH_C32(0x80008088), SPH_C32(0x00000001) },
394
{ SPH_C32(0x80000088), SPH_C32(0x00000000) },
395
{ SPH_C32(0x00008000), SPH_C32(0x00000001) },
396
{ SPH_C32(0x80008082), SPH_C32(0x00000000) }
397
#else
398
{ SPH_C32(0x00000000), SPH_C32(0x00000001) },
399
{ SPH_C32(0x00000000), SPH_C32(0x00008082) },
400
{ SPH_C32(0x80000000), SPH_C32(0x0000808A) },
401
{ SPH_C32(0x80000000), SPH_C32(0x80008000) },
402
{ SPH_C32(0x00000000), SPH_C32(0x0000808B) },
403
{ SPH_C32(0x00000000), SPH_C32(0x80000001) },
404
{ SPH_C32(0x80000000), SPH_C32(0x80008081) },
405
{ SPH_C32(0x80000000), SPH_C32(0x00008009) },
406
{ SPH_C32(0x00000000), SPH_C32(0x0000008A) },
407
{ SPH_C32(0x00000000), SPH_C32(0x00000088) },
408
{ SPH_C32(0x00000000), SPH_C32(0x80008009) },
409
{ SPH_C32(0x00000000), SPH_C32(0x8000000A) },
410
{ SPH_C32(0x00000000), SPH_C32(0x8000808B) },
411
{ SPH_C32(0x80000000), SPH_C32(0x0000008B) },
412
{ SPH_C32(0x80000000), SPH_C32(0x00008089) },
413
{ SPH_C32(0x80000000), SPH_C32(0x00008003) },
414
{ SPH_C32(0x80000000), SPH_C32(0x00008002) },
415
{ SPH_C32(0x80000000), SPH_C32(0x00000080) },
416
{ SPH_C32(0x00000000), SPH_C32(0x0000800A) },
417
{ SPH_C32(0x80000000), SPH_C32(0x8000000A) },
418
{ SPH_C32(0x80000000), SPH_C32(0x80008081) },
419
{ SPH_C32(0x80000000), SPH_C32(0x00008080) },
420
{ SPH_C32(0x00000000), SPH_C32(0x80000001) },
421
{ SPH_C32(0x80000000), SPH_C32(0x80008008) }
422
#endif
423
};
424
425
#if SPH_KECCAK_INTERLEAVE
426
427
#define INTERLEAVE(xl, xh) do { \
428
sph_u32 l, h, t; \
429
l = (xl); h = (xh); \
430
t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
431
t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
432
t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
433
t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
434
t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
435
t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
436
t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
437
t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
438
t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
439
l ^= t; h ^= t >> 16; \
440
(xl) = l; (xh) = h; \
441
} while (0)
442
443
#define UNINTERLEAVE(xl, xh) do { \
444
sph_u32 l, h, t; \
445
l = (xl); h = (xh); \
446
t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
447
l ^= t; h ^= t >> 16; \
448
t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
449
t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
450
t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
451
t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
452
t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
453
t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
454
t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
455
t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
456
(xl) = l; (xh) = h; \
457
} while (0)
458
459
#else
460
461
#define INTERLEAVE(l, h)
462
#define UNINTERLEAVE(l, h)
463
464
#endif
465
466
#if SPH_KECCAK_NOCOPY
467
468
#define a00l (kc->u.narrow[2 * 0 + 0])
469
#define a00h (kc->u.narrow[2 * 0 + 1])
470
#define a10l (kc->u.narrow[2 * 1 + 0])
471
#define a10h (kc->u.narrow[2 * 1 + 1])
472
#define a20l (kc->u.narrow[2 * 2 + 0])
473
#define a20h (kc->u.narrow[2 * 2 + 1])
474
#define a30l (kc->u.narrow[2 * 3 + 0])
475
#define a30h (kc->u.narrow[2 * 3 + 1])
476
#define a40l (kc->u.narrow[2 * 4 + 0])
477
#define a40h (kc->u.narrow[2 * 4 + 1])
478
#define a01l (kc->u.narrow[2 * 5 + 0])
479
#define a01h (kc->u.narrow[2 * 5 + 1])
480
#define a11l (kc->u.narrow[2 * 6 + 0])
481
#define a11h (kc->u.narrow[2 * 6 + 1])
482
#define a21l (kc->u.narrow[2 * 7 + 0])
483
#define a21h (kc->u.narrow[2 * 7 + 1])
484
#define a31l (kc->u.narrow[2 * 8 + 0])
485
#define a31h (kc->u.narrow[2 * 8 + 1])
486
#define a41l (kc->u.narrow[2 * 9 + 0])
487
#define a41h (kc->u.narrow[2 * 9 + 1])
488
#define a02l (kc->u.narrow[2 * 10 + 0])
489
#define a02h (kc->u.narrow[2 * 10 + 1])
490
#define a12l (kc->u.narrow[2 * 11 + 0])
491
#define a12h (kc->u.narrow[2 * 11 + 1])
492
#define a22l (kc->u.narrow[2 * 12 + 0])
493
#define a22h (kc->u.narrow[2 * 12 + 1])
494
#define a32l (kc->u.narrow[2 * 13 + 0])
495
#define a32h (kc->u.narrow[2 * 13 + 1])
496
#define a42l (kc->u.narrow[2 * 14 + 0])
497
#define a42h (kc->u.narrow[2 * 14 + 1])
498
#define a03l (kc->u.narrow[2 * 15 + 0])
499
#define a03h (kc->u.narrow[2 * 15 + 1])
500
#define a13l (kc->u.narrow[2 * 16 + 0])
501
#define a13h (kc->u.narrow[2 * 16 + 1])
502
#define a23l (kc->u.narrow[2 * 17 + 0])
503
#define a23h (kc->u.narrow[2 * 17 + 1])
504
#define a33l (kc->u.narrow[2 * 18 + 0])
505
#define a33h (kc->u.narrow[2 * 18 + 1])
506
#define a43l (kc->u.narrow[2 * 19 + 0])
507
#define a43h (kc->u.narrow[2 * 19 + 1])
508
#define a04l (kc->u.narrow[2 * 20 + 0])
509
#define a04h (kc->u.narrow[2 * 20 + 1])
510
#define a14l (kc->u.narrow[2 * 21 + 0])
511
#define a14h (kc->u.narrow[2 * 21 + 1])
512
#define a24l (kc->u.narrow[2 * 22 + 0])
513
#define a24h (kc->u.narrow[2 * 22 + 1])
514
#define a34l (kc->u.narrow[2 * 23 + 0])
515
#define a34h (kc->u.narrow[2 * 23 + 1])
516
#define a44l (kc->u.narrow[2 * 24 + 0])
517
#define a44h (kc->u.narrow[2 * 24 + 1])
518
519
#define DECL_STATE
520
#define READ_STATE(state)
521
#define WRITE_STATE(state)
522
523
#define INPUT_BUF(size) do { \
524
size_t j; \
525
for (j = 0; j < (size); j += 8) { \
526
sph_u32 tl, th; \
527
tl = sph_dec32le_aligned(buf + j + 0); \
528
th = sph_dec32le_aligned(buf + j + 4); \
529
INTERLEAVE(tl, th); \
530
kc->u.narrow[(j >> 2) + 0] ^= tl; \
531
kc->u.narrow[(j >> 2) + 1] ^= th; \
532
} \
533
} while (0)
534
535
#define INPUT_BUF144 INPUT_BUF(144)
536
#define INPUT_BUF136 INPUT_BUF(136)
537
#define INPUT_BUF104 INPUT_BUF(104)
538
#define INPUT_BUF72 INPUT_BUF(72)
539
540
#else
541
542
#define DECL_STATE \
543
sph_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \
544
sph_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \
545
sph_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \
546
sph_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \
547
sph_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h;
548
549
#define READ_STATE(state) do { \
550
a00l = (state)->u.narrow[2 * 0 + 0]; \
551
a00h = (state)->u.narrow[2 * 0 + 1]; \
552
a10l = (state)->u.narrow[2 * 1 + 0]; \
553
a10h = (state)->u.narrow[2 * 1 + 1]; \
554
a20l = (state)->u.narrow[2 * 2 + 0]; \
555
a20h = (state)->u.narrow[2 * 2 + 1]; \
556
a30l = (state)->u.narrow[2 * 3 + 0]; \
557
a30h = (state)->u.narrow[2 * 3 + 1]; \
558
a40l = (state)->u.narrow[2 * 4 + 0]; \
559
a40h = (state)->u.narrow[2 * 4 + 1]; \
560
a01l = (state)->u.narrow[2 * 5 + 0]; \
561
a01h = (state)->u.narrow[2 * 5 + 1]; \
562
a11l = (state)->u.narrow[2 * 6 + 0]; \
563
a11h = (state)->u.narrow[2 * 6 + 1]; \
564
a21l = (state)->u.narrow[2 * 7 + 0]; \
565
a21h = (state)->u.narrow[2 * 7 + 1]; \
566
a31l = (state)->u.narrow[2 * 8 + 0]; \
567
a31h = (state)->u.narrow[2 * 8 + 1]; \
568
a41l = (state)->u.narrow[2 * 9 + 0]; \
569
a41h = (state)->u.narrow[2 * 9 + 1]; \
570
a02l = (state)->u.narrow[2 * 10 + 0]; \
571
a02h = (state)->u.narrow[2 * 10 + 1]; \
572
a12l = (state)->u.narrow[2 * 11 + 0]; \
573
a12h = (state)->u.narrow[2 * 11 + 1]; \
574
a22l = (state)->u.narrow[2 * 12 + 0]; \
575
a22h = (state)->u.narrow[2 * 12 + 1]; \
576
a32l = (state)->u.narrow[2 * 13 + 0]; \
577
a32h = (state)->u.narrow[2 * 13 + 1]; \
578
a42l = (state)->u.narrow[2 * 14 + 0]; \
579
a42h = (state)->u.narrow[2 * 14 + 1]; \
580
a03l = (state)->u.narrow[2 * 15 + 0]; \
581
a03h = (state)->u.narrow[2 * 15 + 1]; \
582
a13l = (state)->u.narrow[2 * 16 + 0]; \
583
a13h = (state)->u.narrow[2 * 16 + 1]; \
584
a23l = (state)->u.narrow[2 * 17 + 0]; \
585
a23h = (state)->u.narrow[2 * 17 + 1]; \
586
a33l = (state)->u.narrow[2 * 18 + 0]; \
587
a33h = (state)->u.narrow[2 * 18 + 1]; \
588
a43l = (state)->u.narrow[2 * 19 + 0]; \
589
a43h = (state)->u.narrow[2 * 19 + 1]; \
590
a04l = (state)->u.narrow[2 * 20 + 0]; \
591
a04h = (state)->u.narrow[2 * 20 + 1]; \
592
a14l = (state)->u.narrow[2 * 21 + 0]; \
593
a14h = (state)->u.narrow[2 * 21 + 1]; \
594
a24l = (state)->u.narrow[2 * 22 + 0]; \
595
a24h = (state)->u.narrow[2 * 22 + 1]; \
596
a34l = (state)->u.narrow[2 * 23 + 0]; \
597
a34h = (state)->u.narrow[2 * 23 + 1]; \
598
a44l = (state)->u.narrow[2 * 24 + 0]; \
599
a44h = (state)->u.narrow[2 * 24 + 1]; \
600
} while (0)
601
602
#define WRITE_STATE(state) do { \
603
(state)->u.narrow[2 * 0 + 0] = a00l; \
604
(state)->u.narrow[2 * 0 + 1] = a00h; \
605
(state)->u.narrow[2 * 1 + 0] = a10l; \
606
(state)->u.narrow[2 * 1 + 1] = a10h; \
607
(state)->u.narrow[2 * 2 + 0] = a20l; \
608
(state)->u.narrow[2 * 2 + 1] = a20h; \
609
(state)->u.narrow[2 * 3 + 0] = a30l; \
610
(state)->u.narrow[2 * 3 + 1] = a30h; \
611
(state)->u.narrow[2 * 4 + 0] = a40l; \
612
(state)->u.narrow[2 * 4 + 1] = a40h; \
613
(state)->u.narrow[2 * 5 + 0] = a01l; \
614
(state)->u.narrow[2 * 5 + 1] = a01h; \
615
(state)->u.narrow[2 * 6 + 0] = a11l; \
616
(state)->u.narrow[2 * 6 + 1] = a11h; \
617
(state)->u.narrow[2 * 7 + 0] = a21l; \
618
(state)->u.narrow[2 * 7 + 1] = a21h; \
619
(state)->u.narrow[2 * 8 + 0] = a31l; \
620
(state)->u.narrow[2 * 8 + 1] = a31h; \
621
(state)->u.narrow[2 * 9 + 0] = a41l; \
622
(state)->u.narrow[2 * 9 + 1] = a41h; \
623
(state)->u.narrow[2 * 10 + 0] = a02l; \
624
(state)->u.narrow[2 * 10 + 1] = a02h; \
625
(state)->u.narrow[2 * 11 + 0] = a12l; \
626
(state)->u.narrow[2 * 11 + 1] = a12h; \
627
(state)->u.narrow[2 * 12 + 0] = a22l; \
628
(state)->u.narrow[2 * 12 + 1] = a22h; \
629
(state)->u.narrow[2 * 13 + 0] = a32l; \
630
(state)->u.narrow[2 * 13 + 1] = a32h; \
631
(state)->u.narrow[2 * 14 + 0] = a42l; \
632
(state)->u.narrow[2 * 14 + 1] = a42h; \
633
(state)->u.narrow[2 * 15 + 0] = a03l; \
634
(state)->u.narrow[2 * 15 + 1] = a03h; \
635
(state)->u.narrow[2 * 16 + 0] = a13l; \
636
(state)->u.narrow[2 * 16 + 1] = a13h; \
637
(state)->u.narrow[2 * 17 + 0] = a23l; \
638
(state)->u.narrow[2 * 17 + 1] = a23h; \
639
(state)->u.narrow[2 * 18 + 0] = a33l; \
640
(state)->u.narrow[2 * 18 + 1] = a33h; \
641
(state)->u.narrow[2 * 19 + 0] = a43l; \
642
(state)->u.narrow[2 * 19 + 1] = a43h; \
643
(state)->u.narrow[2 * 20 + 0] = a04l; \
644
(state)->u.narrow[2 * 20 + 1] = a04h; \
645
(state)->u.narrow[2 * 21 + 0] = a14l; \
646
(state)->u.narrow[2 * 21 + 1] = a14h; \
647
(state)->u.narrow[2 * 22 + 0] = a24l; \
648
(state)->u.narrow[2 * 22 + 1] = a24h; \
649
(state)->u.narrow[2 * 23 + 0] = a34l; \
650
(state)->u.narrow[2 * 23 + 1] = a34h; \
651
(state)->u.narrow[2 * 24 + 0] = a44l; \
652
(state)->u.narrow[2 * 24 + 1] = a44h; \
653
} while (0)
654
655
#define READ64(d, off) do { \
656
sph_u32 tl, th; \
657
tl = sph_dec32le_aligned(buf + (off)); \
658
th = sph_dec32le_aligned(buf + (off) + 4); \
659
INTERLEAVE(tl, th); \
660
d ## l ^= tl; \
661
d ## h ^= th; \
662
} while (0)
663
664
#define INPUT_BUF144 do { \
665
READ64(a00, 0); \
666
READ64(a10, 8); \
667
READ64(a20, 16); \
668
READ64(a30, 24); \
669
READ64(a40, 32); \
670
READ64(a01, 40); \
671
READ64(a11, 48); \
672
READ64(a21, 56); \
673
READ64(a31, 64); \
674
READ64(a41, 72); \
675
READ64(a02, 80); \
676
READ64(a12, 88); \
677
READ64(a22, 96); \
678
READ64(a32, 104); \
679
READ64(a42, 112); \
680
READ64(a03, 120); \
681
READ64(a13, 128); \
682
READ64(a23, 136); \
683
} while (0)
684
685
#define INPUT_BUF136 do { \
686
READ64(a00, 0); \
687
READ64(a10, 8); \
688
READ64(a20, 16); \
689
READ64(a30, 24); \
690
READ64(a40, 32); \
691
READ64(a01, 40); \
692
READ64(a11, 48); \
693
READ64(a21, 56); \
694
READ64(a31, 64); \
695
READ64(a41, 72); \
696
READ64(a02, 80); \
697
READ64(a12, 88); \
698
READ64(a22, 96); \
699
READ64(a32, 104); \
700
READ64(a42, 112); \
701
READ64(a03, 120); \
702
READ64(a13, 128); \
703
} while (0)
704
705
#define INPUT_BUF104 do { \
706
READ64(a00, 0); \
707
READ64(a10, 8); \
708
READ64(a20, 16); \
709
READ64(a30, 24); \
710
READ64(a40, 32); \
711
READ64(a01, 40); \
712
READ64(a11, 48); \
713
READ64(a21, 56); \
714
READ64(a31, 64); \
715
READ64(a41, 72); \
716
READ64(a02, 80); \
717
READ64(a12, 88); \
718
READ64(a22, 96); \
719
} while (0)
720
721
#define INPUT_BUF72 do { \
722
READ64(a00, 0); \
723
READ64(a10, 8); \
724
READ64(a20, 16); \
725
READ64(a30, 24); \
726
READ64(a40, 32); \
727
READ64(a01, 40); \
728
READ64(a11, 48); \
729
READ64(a21, 56); \
730
READ64(a31, 64); \
731
} while (0)
732
733
#define INPUT_BUF(lim) do { \
734
READ64(a00, 0); \
735
READ64(a10, 8); \
736
READ64(a20, 16); \
737
READ64(a30, 24); \
738
READ64(a40, 32); \
739
READ64(a01, 40); \
740
READ64(a11, 48); \
741
READ64(a21, 56); \
742
READ64(a31, 64); \
743
if ((lim) == 72) \
744
break; \
745
READ64(a41, 72); \
746
READ64(a02, 80); \
747
READ64(a12, 88); \
748
READ64(a22, 96); \
749
if ((lim) == 104) \
750
break; \
751
READ64(a32, 104); \
752
READ64(a42, 112); \
753
READ64(a03, 120); \
754
READ64(a13, 128); \
755
if ((lim) == 136) \
756
break; \
757
READ64(a23, 136); \
758
} while (0)
759
760
#endif
761
762
#define DECL64(x) sph_u64 x ## l, x ## h
763
#define MOV64(d, s) (d ## l = s ## l, d ## h = s ## h)
764
#define XOR64(d, a, b) (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h)
765
#define AND64(d, a, b) (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h)
766
#define OR64(d, a, b) (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h)
767
#define NOT64(d, s) (d ## l = SPH_T32(~s ## l), d ## h = SPH_T32(~s ## h))
768
#define ROL64(d, v, n) ROL64_ ## n(d, v)
769
770
#if SPH_KECCAK_INTERLEAVE
771
772
#define ROL64_odd1(d, v) do { \
773
sph_u32 tmp; \
774
tmp = v ## l; \
775
d ## l = SPH_T32(v ## h << 1) | (v ## h >> 31); \
776
d ## h = tmp; \
777
} while (0)
778
779
#define ROL64_odd63(d, v) do { \
780
sph_u32 tmp; \
781
tmp = SPH_T32(v ## l << 31) | (v ## l >> 1); \
782
d ## l = v ## h; \
783
d ## h = tmp; \
784
} while (0)
785
786
#define ROL64_odd(d, v, n) do { \
787
sph_u32 tmp; \
788
tmp = SPH_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \
789
d ## l = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
790
d ## h = tmp; \
791
} while (0)
792
793
#define ROL64_even(d, v, n) do { \
794
d ## l = SPH_T32(v ## l << n) | (v ## l >> (32 - n)); \
795
d ## h = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
796
} while (0)
797
798
#define ROL64_0(d, v)
799
#define ROL64_1(d, v) ROL64_odd1(d, v)
800
#define ROL64_2(d, v) ROL64_even(d, v, 1)
801
#define ROL64_3(d, v) ROL64_odd( d, v, 2)
802
#define ROL64_4(d, v) ROL64_even(d, v, 2)
803
#define ROL64_5(d, v) ROL64_odd( d, v, 3)
804
#define ROL64_6(d, v) ROL64_even(d, v, 3)
805
#define ROL64_7(d, v) ROL64_odd( d, v, 4)
806
#define ROL64_8(d, v) ROL64_even(d, v, 4)
807
#define ROL64_9(d, v) ROL64_odd( d, v, 5)
808
#define ROL64_10(d, v) ROL64_even(d, v, 5)
809
#define ROL64_11(d, v) ROL64_odd( d, v, 6)
810
#define ROL64_12(d, v) ROL64_even(d, v, 6)
811
#define ROL64_13(d, v) ROL64_odd( d, v, 7)
812
#define ROL64_14(d, v) ROL64_even(d, v, 7)
813
#define ROL64_15(d, v) ROL64_odd( d, v, 8)
814
#define ROL64_16(d, v) ROL64_even(d, v, 8)
815
#define ROL64_17(d, v) ROL64_odd( d, v, 9)
816
#define ROL64_18(d, v) ROL64_even(d, v, 9)
817
#define ROL64_19(d, v) ROL64_odd( d, v, 10)
818
#define ROL64_20(d, v) ROL64_even(d, v, 10)
819
#define ROL64_21(d, v) ROL64_odd( d, v, 11)
820
#define ROL64_22(d, v) ROL64_even(d, v, 11)
821
#define ROL64_23(d, v) ROL64_odd( d, v, 12)
822
#define ROL64_24(d, v) ROL64_even(d, v, 12)
823
#define ROL64_25(d, v) ROL64_odd( d, v, 13)
824
#define ROL64_26(d, v) ROL64_even(d, v, 13)
825
#define ROL64_27(d, v) ROL64_odd( d, v, 14)
826
#define ROL64_28(d, v) ROL64_even(d, v, 14)
827
#define ROL64_29(d, v) ROL64_odd( d, v, 15)
828
#define ROL64_30(d, v) ROL64_even(d, v, 15)
829
#define ROL64_31(d, v) ROL64_odd( d, v, 16)
830
#define ROL64_32(d, v) ROL64_even(d, v, 16)
831
#define ROL64_33(d, v) ROL64_odd( d, v, 17)
832
#define ROL64_34(d, v) ROL64_even(d, v, 17)
833
#define ROL64_35(d, v) ROL64_odd( d, v, 18)
834
#define ROL64_36(d, v) ROL64_even(d, v, 18)
835
#define ROL64_37(d, v) ROL64_odd( d, v, 19)
836
#define ROL64_38(d, v) ROL64_even(d, v, 19)
837
#define ROL64_39(d, v) ROL64_odd( d, v, 20)
838
#define ROL64_40(d, v) ROL64_even(d, v, 20)
839
#define ROL64_41(d, v) ROL64_odd( d, v, 21)
840
#define ROL64_42(d, v) ROL64_even(d, v, 21)
841
#define ROL64_43(d, v) ROL64_odd( d, v, 22)
842
#define ROL64_44(d, v) ROL64_even(d, v, 22)
843
#define ROL64_45(d, v) ROL64_odd( d, v, 23)
844
#define ROL64_46(d, v) ROL64_even(d, v, 23)
845
#define ROL64_47(d, v) ROL64_odd( d, v, 24)
846
#define ROL64_48(d, v) ROL64_even(d, v, 24)
847
#define ROL64_49(d, v) ROL64_odd( d, v, 25)
848
#define ROL64_50(d, v) ROL64_even(d, v, 25)
849
#define ROL64_51(d, v) ROL64_odd( d, v, 26)
850
#define ROL64_52(d, v) ROL64_even(d, v, 26)
851
#define ROL64_53(d, v) ROL64_odd( d, v, 27)
852
#define ROL64_54(d, v) ROL64_even(d, v, 27)
853
#define ROL64_55(d, v) ROL64_odd( d, v, 28)
854
#define ROL64_56(d, v) ROL64_even(d, v, 28)
855
#define ROL64_57(d, v) ROL64_odd( d, v, 29)
856
#define ROL64_58(d, v) ROL64_even(d, v, 29)
857
#define ROL64_59(d, v) ROL64_odd( d, v, 30)
858
#define ROL64_60(d, v) ROL64_even(d, v, 30)
859
#define ROL64_61(d, v) ROL64_odd( d, v, 31)
860
#define ROL64_62(d, v) ROL64_even(d, v, 31)
861
#define ROL64_63(d, v) ROL64_odd63(d, v)
862
863
#else
864
865
#define ROL64_small(d, v, n) do { \
866
sph_u32 tmp; \
867
tmp = SPH_T32(v ## l << n) | (v ## h >> (32 - n)); \
868
d ## h = SPH_T32(v ## h << n) | (v ## l >> (32 - n)); \
869
d ## l = tmp; \
870
} while (0)
871
872
#define ROL64_0(d, v) 0
873
#define ROL64_1(d, v) ROL64_small(d, v, 1)
874
#define ROL64_2(d, v) ROL64_small(d, v, 2)
875
#define ROL64_3(d, v) ROL64_small(d, v, 3)
876
#define ROL64_4(d, v) ROL64_small(d, v, 4)
877
#define ROL64_5(d, v) ROL64_small(d, v, 5)
878
#define ROL64_6(d, v) ROL64_small(d, v, 6)
879
#define ROL64_7(d, v) ROL64_small(d, v, 7)
880
#define ROL64_8(d, v) ROL64_small(d, v, 8)
881
#define ROL64_9(d, v) ROL64_small(d, v, 9)
882
#define ROL64_10(d, v) ROL64_small(d, v, 10)
883
#define ROL64_11(d, v) ROL64_small(d, v, 11)
884
#define ROL64_12(d, v) ROL64_small(d, v, 12)
885
#define ROL64_13(d, v) ROL64_small(d, v, 13)
886
#define ROL64_14(d, v) ROL64_small(d, v, 14)
887
#define ROL64_15(d, v) ROL64_small(d, v, 15)
888
#define ROL64_16(d, v) ROL64_small(d, v, 16)
889
#define ROL64_17(d, v) ROL64_small(d, v, 17)
890
#define ROL64_18(d, v) ROL64_small(d, v, 18)
891
#define ROL64_19(d, v) ROL64_small(d, v, 19)
892
#define ROL64_20(d, v) ROL64_small(d, v, 20)
893
#define ROL64_21(d, v) ROL64_small(d, v, 21)
894
#define ROL64_22(d, v) ROL64_small(d, v, 22)
895
#define ROL64_23(d, v) ROL64_small(d, v, 23)
896
#define ROL64_24(d, v) ROL64_small(d, v, 24)
897
#define ROL64_25(d, v) ROL64_small(d, v, 25)
898
#define ROL64_26(d, v) ROL64_small(d, v, 26)
899
#define ROL64_27(d, v) ROL64_small(d, v, 27)
900
#define ROL64_28(d, v) ROL64_small(d, v, 28)
901
#define ROL64_29(d, v) ROL64_small(d, v, 29)
902
#define ROL64_30(d, v) ROL64_small(d, v, 30)
903
#define ROL64_31(d, v) ROL64_small(d, v, 31)
904
905
#define ROL64_32(d, v) do { \
906
sph_u32 tmp; \
907
tmp = v ## l; \
908
d ## l = v ## h; \
909
d ## h = tmp; \
910
} while (0)
911
912
#define ROL64_big(d, v, n) do { \
913
sph_u32 trl, trh; \
914
ROL64_small(tr, v, n); \
915
d ## h = trl; \
916
d ## l = trh; \
917
} while (0)
918
919
#define ROL64_33(d, v) ROL64_big(d, v, 1)
920
#define ROL64_34(d, v) ROL64_big(d, v, 2)
921
#define ROL64_35(d, v) ROL64_big(d, v, 3)
922
#define ROL64_36(d, v) ROL64_big(d, v, 4)
923
#define ROL64_37(d, v) ROL64_big(d, v, 5)
924
#define ROL64_38(d, v) ROL64_big(d, v, 6)
925
#define ROL64_39(d, v) ROL64_big(d, v, 7)
926
#define ROL64_40(d, v) ROL64_big(d, v, 8)
927
#define ROL64_41(d, v) ROL64_big(d, v, 9)
928
#define ROL64_42(d, v) ROL64_big(d, v, 10)
929
#define ROL64_43(d, v) ROL64_big(d, v, 11)
930
#define ROL64_44(d, v) ROL64_big(d, v, 12)
931
#define ROL64_45(d, v) ROL64_big(d, v, 13)
932
#define ROL64_46(d, v) ROL64_big(d, v, 14)
933
#define ROL64_47(d, v) ROL64_big(d, v, 15)
934
#define ROL64_48(d, v) ROL64_big(d, v, 16)
935
#define ROL64_49(d, v) ROL64_big(d, v, 17)
936
#define ROL64_50(d, v) ROL64_big(d, v, 18)
937
#define ROL64_51(d, v) ROL64_big(d, v, 19)
938
#define ROL64_52(d, v) ROL64_big(d, v, 20)
939
#define ROL64_53(d, v) ROL64_big(d, v, 21)
940
#define ROL64_54(d, v) ROL64_big(d, v, 22)
941
#define ROL64_55(d, v) ROL64_big(d, v, 23)
942
#define ROL64_56(d, v) ROL64_big(d, v, 24)
943
#define ROL64_57(d, v) ROL64_big(d, v, 25)
944
#define ROL64_58(d, v) ROL64_big(d, v, 26)
945
#define ROL64_59(d, v) ROL64_big(d, v, 27)
946
#define ROL64_60(d, v) ROL64_big(d, v, 28)
947
#define ROL64_61(d, v) ROL64_big(d, v, 29)
948
#define ROL64_62(d, v) ROL64_big(d, v, 30)
949
#define ROL64_63(d, v) ROL64_big(d, v, 31)
950
951
#endif
952
953
#define XOR64_IOTA(d, s, k) \
954
(d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high)
955
956
#endif
957
958
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
959
DECL64(tt0); \
960
DECL64(tt1); \
961
DECL64(tt2); \
962
DECL64(tt3); \
963
XOR64(tt0, d0, d1); \
964
XOR64(tt1, d2, d3); \
965
XOR64(tt0, tt0, d4); \
966
XOR64(tt0, tt0, tt1); \
967
ROL64(tt0, tt0, 1); \
968
XOR64(tt2, c0, c1); \
969
XOR64(tt3, c2, c3); \
970
XOR64(tt0, tt0, c4); \
971
XOR64(tt2, tt2, tt3); \
972
XOR64(t, tt0, tt2); \
973
} while (0)
974
975
#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
976
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
977
b40, b41, b42, b43, b44) \
978
do { \
979
DECL64(t0); \
980
DECL64(t1); \
981
DECL64(t2); \
982
DECL64(t3); \
983
DECL64(t4); \
984
TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
985
TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
986
TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
987
TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
988
TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
989
XOR64(b00, b00, t0); \
990
XOR64(b01, b01, t0); \
991
XOR64(b02, b02, t0); \
992
XOR64(b03, b03, t0); \
993
XOR64(b04, b04, t0); \
994
XOR64(b10, b10, t1); \
995
XOR64(b11, b11, t1); \
996
XOR64(b12, b12, t1); \
997
XOR64(b13, b13, t1); \
998
XOR64(b14, b14, t1); \
999
XOR64(b20, b20, t2); \
1000
XOR64(b21, b21, t2); \
1001
XOR64(b22, b22, t2); \
1002
XOR64(b23, b23, t2); \
1003
XOR64(b24, b24, t2); \
1004
XOR64(b30, b30, t3); \
1005
XOR64(b31, b31, t3); \
1006
XOR64(b32, b32, t3); \
1007
XOR64(b33, b33, t3); \
1008
XOR64(b34, b34, t3); \
1009
XOR64(b40, b40, t4); \
1010
XOR64(b41, b41, t4); \
1011
XOR64(b42, b42, t4); \
1012
XOR64(b43, b43, t4); \
1013
XOR64(b44, b44, t4); \
1014
} while (0)
1015
1016
#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
1017
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
1018
b40, b41, b42, b43, b44) \
1019
do { \
1020
/* ROL64(b00, b00, 0); */ \
1021
ROL64(b01, b01, 36); \
1022
ROL64(b02, b02, 3); \
1023
ROL64(b03, b03, 41); \
1024
ROL64(b04, b04, 18); \
1025
ROL64(b10, b10, 1); \
1026
ROL64(b11, b11, 44); \
1027
ROL64(b12, b12, 10); \
1028
ROL64(b13, b13, 45); \
1029
ROL64(b14, b14, 2); \
1030
ROL64(b20, b20, 62); \
1031
ROL64(b21, b21, 6); \
1032
ROL64(b22, b22, 43); \
1033
ROL64(b23, b23, 15); \
1034
ROL64(b24, b24, 61); \
1035
ROL64(b30, b30, 28); \
1036
ROL64(b31, b31, 55); \
1037
ROL64(b32, b32, 25); \
1038
ROL64(b33, b33, 21); \
1039
ROL64(b34, b34, 56); \
1040
ROL64(b40, b40, 27); \
1041
ROL64(b41, b41, 20); \
1042
ROL64(b42, b42, 39); \
1043
ROL64(b43, b43, 8); \
1044
ROL64(b44, b44, 14); \
1045
} while (0)
1046
1047
/*
1048
* The KHI macro integrates the "lane complement" optimization. On input,
1049
* some words are complemented:
1050
* a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
1051
* On output, the following words are complemented:
1052
* a04 a10 a20 a22 a23 a31
1053
*
1054
* The (implicit) permutation and the theta expansion will bring back
1055
* the input mask for the next round.
1056
*/
1057
1058
#define KHI_XO(d, a, b, c) do { \
1059
DECL64(kt); \
1060
OR64(kt, b, c); \
1061
XOR64(d, a, kt); \
1062
} while (0)
1063
1064
#define KHI_XA(d, a, b, c) do { \
1065
DECL64(kt); \
1066
AND64(kt, b, c); \
1067
XOR64(d, a, kt); \
1068
} while (0)
1069
1070
#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
1071
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
1072
b40, b41, b42, b43, b44) \
1073
do { \
1074
DECL64(c0); \
1075
DECL64(c1); \
1076
DECL64(c2); \
1077
DECL64(c3); \
1078
DECL64(c4); \
1079
DECL64(bnn); \
1080
NOT64(bnn, b20); \
1081
KHI_XO(c0, b00, b10, b20); \
1082
KHI_XO(c1, b10, bnn, b30); \
1083
KHI_XA(c2, b20, b30, b40); \
1084
KHI_XO(c3, b30, b40, b00); \
1085
KHI_XA(c4, b40, b00, b10); \
1086
MOV64(b00, c0); \
1087
MOV64(b10, c1); \
1088
MOV64(b20, c2); \
1089
MOV64(b30, c3); \
1090
MOV64(b40, c4); \
1091
NOT64(bnn, b41); \
1092
KHI_XO(c0, b01, b11, b21); \
1093
KHI_XA(c1, b11, b21, b31); \
1094
KHI_XO(c2, b21, b31, bnn); \
1095
KHI_XO(c3, b31, b41, b01); \
1096
KHI_XA(c4, b41, b01, b11); \
1097
MOV64(b01, c0); \
1098
MOV64(b11, c1); \
1099
MOV64(b21, c2); \
1100
MOV64(b31, c3); \
1101
MOV64(b41, c4); \
1102
NOT64(bnn, b32); \
1103
KHI_XO(c0, b02, b12, b22); \
1104
KHI_XA(c1, b12, b22, b32); \
1105
KHI_XA(c2, b22, bnn, b42); \
1106
KHI_XO(c3, bnn, b42, b02); \
1107
KHI_XA(c4, b42, b02, b12); \
1108
MOV64(b02, c0); \
1109
MOV64(b12, c1); \
1110
MOV64(b22, c2); \
1111
MOV64(b32, c3); \
1112
MOV64(b42, c4); \
1113
NOT64(bnn, b33); \
1114
KHI_XA(c0, b03, b13, b23); \
1115
KHI_XO(c1, b13, b23, b33); \
1116
KHI_XO(c2, b23, bnn, b43); \
1117
KHI_XA(c3, bnn, b43, b03); \
1118
KHI_XO(c4, b43, b03, b13); \
1119
MOV64(b03, c0); \
1120
MOV64(b13, c1); \
1121
MOV64(b23, c2); \
1122
MOV64(b33, c3); \
1123
MOV64(b43, c4); \
1124
NOT64(bnn, b14); \
1125
KHI_XA(c0, b04, bnn, b24); \
1126
KHI_XO(c1, bnn, b24, b34); \
1127
KHI_XA(c2, b24, b34, b44); \
1128
KHI_XO(c3, b34, b44, b04); \
1129
KHI_XA(c4, b44, b04, b14); \
1130
MOV64(b04, c0); \
1131
MOV64(b14, c1); \
1132
MOV64(b24, c2); \
1133
MOV64(b34, c3); \
1134
MOV64(b44, c4); \
1135
} while (0)
1136
1137
#define IOTA(r) XOR64_IOTA(a00, a00, r)
1138
1139
#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
1140
a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
1141
#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
1142
a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
1143
#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
1144
a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
1145
#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
1146
a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
1147
#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
1148
a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
1149
#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
1150
a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
1151
#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
1152
a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
1153
#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
1154
a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
1155
#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
1156
a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
1157
#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
1158
a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
1159
#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
1160
a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
1161
#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
1162
a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
1163
#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
1164
a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
1165
#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
1166
a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
1167
#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
1168
a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
1169
#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
1170
a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
1171
#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
1172
a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
1173
#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
1174
a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
1175
#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
1176
a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
1177
#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
1178
a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
1179
#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
1180
a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
1181
#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
1182
a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
1183
#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
1184
a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
1185
#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
1186
a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
1187
1188
#define P1_TO_P0 do { \
1189
DECL64(t); \
1190
MOV64(t, a01); \
1191
MOV64(a01, a30); \
1192
MOV64(a30, a33); \
1193
MOV64(a33, a23); \
1194
MOV64(a23, a12); \
1195
MOV64(a12, a21); \
1196
MOV64(a21, a02); \
1197
MOV64(a02, a10); \
1198
MOV64(a10, a11); \
1199
MOV64(a11, a41); \
1200
MOV64(a41, a24); \
1201
MOV64(a24, a42); \
1202
MOV64(a42, a04); \
1203
MOV64(a04, a20); \
1204
MOV64(a20, a22); \
1205
MOV64(a22, a32); \
1206
MOV64(a32, a43); \
1207
MOV64(a43, a34); \
1208
MOV64(a34, a03); \
1209
MOV64(a03, a40); \
1210
MOV64(a40, a44); \
1211
MOV64(a44, a14); \
1212
MOV64(a14, a31); \
1213
MOV64(a31, a13); \
1214
MOV64(a13, t); \
1215
} while (0)
1216
1217
#define P2_TO_P0 do { \
1218
DECL64(t); \
1219
MOV64(t, a01); \
1220
MOV64(a01, a33); \
1221
MOV64(a33, a12); \
1222
MOV64(a12, a02); \
1223
MOV64(a02, a11); \
1224
MOV64(a11, a24); \
1225
MOV64(a24, a04); \
1226
MOV64(a04, a22); \
1227
MOV64(a22, a43); \
1228
MOV64(a43, a03); \
1229
MOV64(a03, a44); \
1230
MOV64(a44, a31); \
1231
MOV64(a31, t); \
1232
MOV64(t, a10); \
1233
MOV64(a10, a41); \
1234
MOV64(a41, a42); \
1235
MOV64(a42, a20); \
1236
MOV64(a20, a32); \
1237
MOV64(a32, a34); \
1238
MOV64(a34, a40); \
1239
MOV64(a40, a14); \
1240
MOV64(a14, a13); \
1241
MOV64(a13, a30); \
1242
MOV64(a30, a23); \
1243
MOV64(a23, a21); \
1244
MOV64(a21, t); \
1245
} while (0)
1246
1247
#define P4_TO_P0 do { \
1248
DECL64(t); \
1249
MOV64(t, a01); \
1250
MOV64(a01, a12); \
1251
MOV64(a12, a11); \
1252
MOV64(a11, a04); \
1253
MOV64(a04, a43); \
1254
MOV64(a43, a44); \
1255
MOV64(a44, t); \
1256
MOV64(t, a02); \
1257
MOV64(a02, a24); \
1258
MOV64(a24, a22); \
1259
MOV64(a22, a03); \
1260
MOV64(a03, a31); \
1261
MOV64(a31, a33); \
1262
MOV64(a33, t); \
1263
MOV64(t, a10); \
1264
MOV64(a10, a42); \
1265
MOV64(a42, a32); \
1266
MOV64(a32, a40); \
1267
MOV64(a40, a13); \
1268
MOV64(a13, a23); \
1269
MOV64(a23, t); \
1270
MOV64(t, a14); \
1271
MOV64(a14, a30); \
1272
MOV64(a30, a21); \
1273
MOV64(a21, a41); \
1274
MOV64(a41, a20); \
1275
MOV64(a20, a34); \
1276
MOV64(a34, t); \
1277
} while (0)
1278
1279
#define P6_TO_P0 do { \
1280
DECL64(t); \
1281
MOV64(t, a01); \
1282
MOV64(a01, a02); \
1283
MOV64(a02, a04); \
1284
MOV64(a04, a03); \
1285
MOV64(a03, t); \
1286
MOV64(t, a10); \
1287
MOV64(a10, a20); \
1288
MOV64(a20, a40); \
1289
MOV64(a40, a30); \
1290
MOV64(a30, t); \
1291
MOV64(t, a11); \
1292
MOV64(a11, a22); \
1293
MOV64(a22, a44); \
1294
MOV64(a44, a33); \
1295
MOV64(a33, t); \
1296
MOV64(t, a12); \
1297
MOV64(a12, a24); \
1298
MOV64(a24, a43); \
1299
MOV64(a43, a31); \
1300
MOV64(a31, t); \
1301
MOV64(t, a13); \
1302
MOV64(a13, a21); \
1303
MOV64(a21, a42); \
1304
MOV64(a42, a34); \
1305
MOV64(a34, t); \
1306
MOV64(t, a14); \
1307
MOV64(a14, a23); \
1308
MOV64(a23, a41); \
1309
MOV64(a41, a32); \
1310
MOV64(a32, t); \
1311
} while (0)
1312
1313
#define P8_TO_P0 do { \
1314
DECL64(t); \
1315
MOV64(t, a01); \
1316
MOV64(a01, a11); \
1317
MOV64(a11, a43); \
1318
MOV64(a43, t); \
1319
MOV64(t, a02); \
1320
MOV64(a02, a22); \
1321
MOV64(a22, a31); \
1322
MOV64(a31, t); \
1323
MOV64(t, a03); \
1324
MOV64(a03, a33); \
1325
MOV64(a33, a24); \
1326
MOV64(a24, t); \
1327
MOV64(t, a04); \
1328
MOV64(a04, a44); \
1329
MOV64(a44, a12); \
1330
MOV64(a12, t); \
1331
MOV64(t, a10); \
1332
MOV64(a10, a32); \
1333
MOV64(a32, a13); \
1334
MOV64(a13, t); \
1335
MOV64(t, a14); \
1336
MOV64(a14, a21); \
1337
MOV64(a21, a20); \
1338
MOV64(a20, t); \
1339
MOV64(t, a23); \
1340
MOV64(a23, a42); \
1341
MOV64(a42, a40); \
1342
MOV64(a40, t); \
1343
MOV64(t, a30); \
1344
MOV64(a30, a41); \
1345
MOV64(a41, a34); \
1346
MOV64(a34, t); \
1347
} while (0)
1348
1349
#define P12_TO_P0 do { \
1350
DECL64(t); \
1351
MOV64(t, a01); \
1352
MOV64(a01, a04); \
1353
MOV64(a04, t); \
1354
MOV64(t, a02); \
1355
MOV64(a02, a03); \
1356
MOV64(a03, t); \
1357
MOV64(t, a10); \
1358
MOV64(a10, a40); \
1359
MOV64(a40, t); \
1360
MOV64(t, a11); \
1361
MOV64(a11, a44); \
1362
MOV64(a44, t); \
1363
MOV64(t, a12); \
1364
MOV64(a12, a43); \
1365
MOV64(a43, t); \
1366
MOV64(t, a13); \
1367
MOV64(a13, a42); \
1368
MOV64(a42, t); \
1369
MOV64(t, a14); \
1370
MOV64(a14, a41); \
1371
MOV64(a41, t); \
1372
MOV64(t, a20); \
1373
MOV64(a20, a30); \
1374
MOV64(a30, t); \
1375
MOV64(t, a21); \
1376
MOV64(a21, a34); \
1377
MOV64(a34, t); \
1378
MOV64(t, a22); \
1379
MOV64(a22, a33); \
1380
MOV64(a33, t); \
1381
MOV64(t, a23); \
1382
MOV64(a23, a32); \
1383
MOV64(a32, t); \
1384
MOV64(t, a24); \
1385
MOV64(a24, a31); \
1386
MOV64(a31, t); \
1387
} while (0)
1388
1389
#define LPAR (
1390
#define RPAR )
1391
1392
#define KF_ELT(r, s, k) do { \
1393
THETA LPAR P ## r RPAR; \
1394
RHO LPAR P ## r RPAR; \
1395
KHI LPAR P ## s RPAR; \
1396
IOTA(k); \
1397
} while (0)
1398
1399
#define DO(x) x
1400
1401
#define KECCAK_F_1600 DO(KECCAK_F_1600_)
1402
1403
#if SPH_KECCAK_UNROLL == 1
1404
1405
#define KECCAK_F_1600_ do { \
1406
int j; \
1407
for (j = 0; j < 24; j ++) { \
1408
KF_ELT( 0, 1, RC[j + 0]); \
1409
P1_TO_P0; \
1410
} \
1411
} while (0)
1412
1413
#elif SPH_KECCAK_UNROLL == 2
1414
1415
#define KECCAK_F_1600_ do { \
1416
int j; \
1417
for (j = 0; j < 24; j += 2) { \
1418
KF_ELT( 0, 1, RC[j + 0]); \
1419
KF_ELT( 1, 2, RC[j + 1]); \
1420
P2_TO_P0; \
1421
} \
1422
} while (0)
1423
1424
#elif SPH_KECCAK_UNROLL == 4
1425
1426
#define KECCAK_F_1600_ do { \
1427
int j; \
1428
for (j = 0; j < 24; j += 4) { \
1429
KF_ELT( 0, 1, RC[j + 0]); \
1430
KF_ELT( 1, 2, RC[j + 1]); \
1431
KF_ELT( 2, 3, RC[j + 2]); \
1432
KF_ELT( 3, 4, RC[j + 3]); \
1433
P4_TO_P0; \
1434
} \
1435
} while (0)
1436
1437
#elif SPH_KECCAK_UNROLL == 6
1438
1439
#define KECCAK_F_1600_ do { \
1440
int j; \
1441
for (j = 0; j < 24; j += 6) { \
1442
KF_ELT( 0, 1, RC[j + 0]); \
1443
KF_ELT( 1, 2, RC[j + 1]); \
1444
KF_ELT( 2, 3, RC[j + 2]); \
1445
KF_ELT( 3, 4, RC[j + 3]); \
1446
KF_ELT( 4, 5, RC[j + 4]); \
1447
KF_ELT( 5, 6, RC[j + 5]); \
1448
P6_TO_P0; \
1449
} \
1450
} while (0)
1451
1452
#elif SPH_KECCAK_UNROLL == 8
1453
1454
#define KECCAK_F_1600_ do { \
1455
int j; \
1456
for (j = 0; j < 24; j += 8) { \
1457
KF_ELT( 0, 1, RC[j + 0]); \
1458
KF_ELT( 1, 2, RC[j + 1]); \
1459
KF_ELT( 2, 3, RC[j + 2]); \
1460
KF_ELT( 3, 4, RC[j + 3]); \
1461
KF_ELT( 4, 5, RC[j + 4]); \
1462
KF_ELT( 5, 6, RC[j + 5]); \
1463
KF_ELT( 6, 7, RC[j + 6]); \
1464
KF_ELT( 7, 8, RC[j + 7]); \
1465
P8_TO_P0; \
1466
} \
1467
} while (0)
1468
1469
#elif SPH_KECCAK_UNROLL == 12
1470
1471
#define KECCAK_F_1600_ do { \
1472
int j; \
1473
for (j = 0; j < 24; j += 12) { \
1474
KF_ELT( 0, 1, RC[j + 0]); \
1475
KF_ELT( 1, 2, RC[j + 1]); \
1476
KF_ELT( 2, 3, RC[j + 2]); \
1477
KF_ELT( 3, 4, RC[j + 3]); \
1478
KF_ELT( 4, 5, RC[j + 4]); \
1479
KF_ELT( 5, 6, RC[j + 5]); \
1480
KF_ELT( 6, 7, RC[j + 6]); \
1481
KF_ELT( 7, 8, RC[j + 7]); \
1482
KF_ELT( 8, 9, RC[j + 8]); \
1483
KF_ELT( 9, 10, RC[j + 9]); \
1484
KF_ELT(10, 11, RC[j + 10]); \
1485
KF_ELT(11, 12, RC[j + 11]); \
1486
P12_TO_P0; \
1487
} \
1488
} while (0)
1489
1490
#elif SPH_KECCAK_UNROLL == 0
1491
1492
#define KECCAK_F_1600_ do { \
1493
KF_ELT( 0, 1, RC[ 0]); \
1494
KF_ELT( 1, 2, RC[ 1]); \
1495
KF_ELT( 2, 3, RC[ 2]); \
1496
KF_ELT( 3, 4, RC[ 3]); \
1497
KF_ELT( 4, 5, RC[ 4]); \
1498
KF_ELT( 5, 6, RC[ 5]); \
1499
KF_ELT( 6, 7, RC[ 6]); \
1500
KF_ELT( 7, 8, RC[ 7]); \
1501
KF_ELT( 8, 9, RC[ 8]); \
1502
KF_ELT( 9, 10, RC[ 9]); \
1503
KF_ELT(10, 11, RC[10]); \
1504
KF_ELT(11, 12, RC[11]); \
1505
KF_ELT(12, 13, RC[12]); \
1506
KF_ELT(13, 14, RC[13]); \
1507
KF_ELT(14, 15, RC[14]); \
1508
KF_ELT(15, 16, RC[15]); \
1509
KF_ELT(16, 17, RC[16]); \
1510
KF_ELT(17, 18, RC[17]); \
1511
KF_ELT(18, 19, RC[18]); \
1512
KF_ELT(19, 20, RC[19]); \
1513
KF_ELT(20, 21, RC[20]); \
1514
KF_ELT(21, 22, RC[21]); \
1515
KF_ELT(22, 23, RC[22]); \
1516
KF_ELT(23, 0, RC[23]); \
1517
} while (0)
1518
1519
#else
1520
1521
#error Unimplemented unroll count for Keccak.
1522
1523
#endif
1524
1525
static void
1526
keccak_init(sph_keccak_context *kc, unsigned out_size)
1527
{
1528
int i;
1529
1530
#if SPH_KECCAK_64
1531
for (i = 0; i < 25; i ++)
1532
kc->u.wide[i] = 0;
1533
/*
1534
* Initialization for the "lane complement".
1535
*/
1536
kc->u.wide[ 1] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1537
kc->u.wide[ 2] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1538
kc->u.wide[ 8] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1539
kc->u.wide[12] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1540
kc->u.wide[17] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1541
kc->u.wide[20] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1542
#else
1543
1544
for (i = 0; i < 50; i ++)
1545
kc->u.narrow[i] = 0;
1546
/*
1547
* Initialization for the "lane complement".
1548
* Note: since we set to all-one full 64-bit words,
1549
* interleaving (if applicable) is a no-op.
1550
*/
1551
kc->u.narrow[ 2] = SPH_C32(0xFFFFFFFF);
1552
kc->u.narrow[ 3] = SPH_C32(0xFFFFFFFF);
1553
kc->u.narrow[ 4] = SPH_C32(0xFFFFFFFF);
1554
kc->u.narrow[ 5] = SPH_C32(0xFFFFFFFF);
1555
kc->u.narrow[16] = SPH_C32(0xFFFFFFFF);
1556
kc->u.narrow[17] = SPH_C32(0xFFFFFFFF);
1557
kc->u.narrow[24] = SPH_C32(0xFFFFFFFF);
1558
kc->u.narrow[25] = SPH_C32(0xFFFFFFFF);
1559
kc->u.narrow[34] = SPH_C32(0xFFFFFFFF);
1560
kc->u.narrow[35] = SPH_C32(0xFFFFFFFF);
1561
kc->u.narrow[40] = SPH_C32(0xFFFFFFFF);
1562
kc->u.narrow[41] = SPH_C32(0xFFFFFFFF);
1563
#endif
1564
kc->ptr = 0;
1565
kc->lim = 200 - (out_size >> 2);
1566
}
1567
1568
static void
1569
keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
1570
{
1571
unsigned char *buf;
1572
size_t ptr;
1573
DECL_STATE
1574
1575
buf = kc->buf;
1576
ptr = kc->ptr;
1577
1578
if (len < (lim - ptr)) {
1579
memcpy(buf + ptr, data, len);
1580
kc->ptr = ptr + len;
1581
return;
1582
}
1583
1584
READ_STATE(kc);
1585
while (len > 0) {
1586
size_t clen;
1587
1588
clen = (lim - ptr);
1589
if (clen > len)
1590
clen = len;
1591
memcpy(buf + ptr, data, clen);
1592
ptr += clen;
1593
data = (const unsigned char *)data + clen;
1594
len -= clen;
1595
if (ptr == lim) {
1596
INPUT_BUF(lim);
1597
KECCAK_F_1600;
1598
ptr = 0;
1599
}
1600
}
1601
WRITE_STATE(kc);
1602
kc->ptr = ptr;
1603
}
1604
1605
#if SPH_KECCAK_64
1606
1607
#define DEFCLOSE(d, lim) \
1608
static void keccak_close ## d( \
1609
sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \
1610
{ \
1611
unsigned eb; \
1612
union { \
1613
unsigned char tmp[lim + 1]; \
1614
sph_u64 dummy; /* for alignment */ \
1615
} u; \
1616
size_t j; \
1617
\
1618
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
1619
if (kc->ptr == (lim - 1)) { \
1620
if (n == 7) { \
1621
u.tmp[0] = eb; \
1622
memset(u.tmp + 1, 0, lim - 1); \
1623
u.tmp[lim] = 0x80; \
1624
j = 1 + lim; \
1625
} else { \
1626
u.tmp[0] = eb | 0x80; \
1627
j = 1; \
1628
} \
1629
} else { \
1630
j = lim - kc->ptr; \
1631
u.tmp[0] = eb; \
1632
memset(u.tmp + 1, 0, j - 2); \
1633
u.tmp[j - 1] = 0x80; \
1634
} \
1635
keccak_core(kc, u.tmp, j, lim); \
1636
/* Finalize the "lane complement" */ \
1637
kc->u.wide[ 1] = ~kc->u.wide[ 1]; \
1638
kc->u.wide[ 2] = ~kc->u.wide[ 2]; \
1639
kc->u.wide[ 8] = ~kc->u.wide[ 8]; \
1640
kc->u.wide[12] = ~kc->u.wide[12]; \
1641
kc->u.wide[17] = ~kc->u.wide[17]; \
1642
kc->u.wide[20] = ~kc->u.wide[20]; \
1643
for (j = 0; j < d; j += 8) \
1644
sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
1645
memcpy(dst, u.tmp, d); \
1646
keccak_init(kc, (unsigned)d << 3); \
1647
} \
1648
1649
#else
1650
1651
#define DEFCLOSE(d, lim) \
1652
static void keccak_close ## d( \
1653
sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \
1654
{ \
1655
unsigned eb; \
1656
union { \
1657
unsigned char tmp[lim + 1]; \
1658
sph_u64 dummy; /* for alignment */ \
1659
} u; \
1660
size_t j; \
1661
\
1662
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
1663
if (kc->ptr == (lim - 1)) { \
1664
if (n == 7) { \
1665
u.tmp[0] = eb; \
1666
memset(u.tmp + 1, 0, lim - 1); \
1667
u.tmp[lim] = 0x80; \
1668
j = 1 + lim; \
1669
} else { \
1670
u.tmp[0] = eb | 0x80; \
1671
j = 1; \
1672
} \
1673
} else { \
1674
j = lim - kc->ptr; \
1675
u.tmp[0] = eb; \
1676
memset(u.tmp + 1, 0, j - 2); \
1677
u.tmp[j - 1] = 0x80; \
1678
} \
1679
keccak_core(kc, u.tmp, j, lim); \
1680
/* Finalize the "lane complement" */ \
1681
kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \
1682
kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \
1683
kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \
1684
kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \
1685
kc->u.narrow[16] = ~kc->u.narrow[16]; \
1686
kc->u.narrow[17] = ~kc->u.narrow[17]; \
1687
kc->u.narrow[24] = ~kc->u.narrow[24]; \
1688
kc->u.narrow[25] = ~kc->u.narrow[25]; \
1689
kc->u.narrow[34] = ~kc->u.narrow[34]; \
1690
kc->u.narrow[35] = ~kc->u.narrow[35]; \
1691
kc->u.narrow[40] = ~kc->u.narrow[40]; \
1692
kc->u.narrow[41] = ~kc->u.narrow[41]; \
1693
/* un-interleave */ \
1694
for (j = 0; j < 50; j += 2) \
1695
UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \
1696
for (j = 0; j < d; j += 4) \
1697
sph_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \
1698
memcpy(dst, u.tmp, d); \
1699
keccak_init(kc, (unsigned)d << 3); \
1700
} \
1701
1702
#endif
1703
1704
DEFCLOSE(28, 144)
1705
DEFCLOSE(32, 136)
1706
DEFCLOSE(48, 104)
1707
DEFCLOSE(64, 72)
1708
1709
/* see sph_keccak.h */
1710
void
1711
sph_keccak224_init(void *cc)
1712
{
1713
keccak_init(cc, 224);
1714
}
1715
1716
/* see sph_keccak.h */
1717
void
1718
sph_keccak224(void *cc, const void *data, size_t len)
1719
{
1720
keccak_core(cc, data, len, 144);
1721
}
1722
1723
/* see sph_keccak.h */
1724
void
1725
sph_keccak224_close(void *cc, void *dst)
1726
{
1727
sph_keccak224_addbits_and_close(cc, 0, 0, dst);
1728
}
1729
1730
/* see sph_keccak.h */
1731
void
1732
sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1733
{
1734
keccak_close28(cc, ub, n, dst);
1735
}
1736
1737
/* see sph_keccak.h */
1738
void
1739
sph_keccak256_init(void *cc)
1740
{
1741
keccak_init(cc, 256);
1742
}
1743
1744
/* see sph_keccak.h */
1745
void
1746
sph_keccak256(void *cc, const void *data, size_t len)
1747
{
1748
keccak_core(cc, data, len, 136);
1749
}
1750
1751
/* see sph_keccak.h */
1752
void
1753
sph_keccak256_close(void *cc, void *dst)
1754
{
1755
sph_keccak256_addbits_and_close(cc, 0, 0, dst);
1756
}
1757
1758
/* see sph_keccak.h */
1759
void
1760
sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1761
{
1762
keccak_close32(cc, ub, n, dst);
1763
}
1764
1765
/* see sph_keccak.h */
1766
void
1767
sph_keccak384_init(void *cc)
1768
{
1769
keccak_init(cc, 384);
1770
}
1771
1772
/* see sph_keccak.h */
1773
void
1774
sph_keccak384(void *cc, const void *data, size_t len)
1775
{
1776
keccak_core(cc, data, len, 104);
1777
}
1778
1779
/* see sph_keccak.h */
1780
void
1781
sph_keccak384_close(void *cc, void *dst)
1782
{
1783
sph_keccak384_addbits_and_close(cc, 0, 0, dst);
1784
}
1785
1786
/* see sph_keccak.h */
1787
void
1788
sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1789
{
1790
keccak_close48(cc, ub, n, dst);
1791
}
1792
1793
/* see sph_keccak.h */
1794
void
1795
sph_keccak512_init(void *cc)
1796
{
1797
keccak_init(cc, 512);
1798
}
1799
1800
/* see sph_keccak.h */
1801
void
1802
sph_keccak512(void *cc, const void *data, size_t len)
1803
{
1804
keccak_core(cc, data, len, 72);
1805
}
1806
1807
/* see sph_keccak.h */
1808
void
1809
sph_keccak512_close(void *cc, void *dst)
1810
{
1811
sph_keccak512_addbits_and_close(cc, 0, 0, dst);
1812
}
1813
1814
/* see sph_keccak.h */
1815
void
1816
sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1817
{
1818
keccak_close64(cc, ub, n, dst);
1819
}
1820
1821
1822
#ifdef __cplusplus
1823
}
1824
#endif
1825
1826