Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/sha3/sph_shavite.c
1201 views
1
/* $Id: shavite.c 227 2010-06-16 17:28:38Z tp $ */
2
/*
3
* SHAvite-3 implementation.
4
*
5
* ==========================(LICENSE BEGIN)============================
6
*
7
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice shall be
18
* included in all copies or substantial portions of the Software.
19
*
20
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
*
28
* ===========================(LICENSE END)=============================
29
*
30
* @author Thomas Pornin <[email protected]>
31
*/
32
33
#include <stddef.h>
34
#include <string.h>
35
36
#include "sph_shavite.h"
37
38
#ifdef __cplusplus
39
extern "C"{
40
#endif
41
42
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHAVITE
43
#define SPH_SMALL_FOOTPRINT_SHAVITE 1
44
#endif
45
46
#ifdef _MSC_VER
47
#pragma warning (disable: 4146)
48
#endif
49
50
#define C32 SPH_C32
51
52
/*
53
* As of round 2 of the SHA-3 competition, the published reference
54
* implementation and test vectors are wrong, because they use
55
* big-endian AES tables while the internal decoding uses little-endian.
56
* The code below follows the specification. To turn it into a code
57
* which follows the reference implementation (the one called "BugFix"
58
* on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
59
* the code below (from the '#define AES_BIG_ENDIAN...' to the definition
60
* of the AES_ROUND_NOKEY macro) and replace it with the version which
61
* is commented out afterwards.
62
*/
63
64
#define AES_BIG_ENDIAN 0
65
#include "aes_helper.c"
66
67
static const sph_u32 IV224[] = {
68
C32(0x6774F31C), C32(0x990AE210), C32(0xC87D4274), C32(0xC9546371),
69
C32(0x62B2AEA8), C32(0x4B5801D8), C32(0x1B702860), C32(0x842F3017)
70
};
71
72
static const sph_u32 IV256[] = {
73
C32(0x49BB3E47), C32(0x2674860D), C32(0xA8B392AC), C32(0x021AC4E6),
74
C32(0x409283CF), C32(0x620E5D86), C32(0x6D929DCB), C32(0x96CC2A8B)
75
};
76
77
static const sph_u32 IV384[] = {
78
C32(0x83DF1545), C32(0xF9AAEC13), C32(0xF4803CB0), C32(0x11FE1F47),
79
C32(0xDA6CD269), C32(0x4F53FCD7), C32(0x950529A2), C32(0x97908147),
80
C32(0xB0A4D7AF), C32(0x2B9132BF), C32(0x226E607D), C32(0x3C0F8D7C),
81
C32(0x487B3F0F), C32(0x04363E22), C32(0x0155C99C), C32(0xEC2E20D3)
82
};
83
84
static const sph_u32 IV512[] = {
85
C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
86
C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
87
C32(0x8E45D73D), C32(0x681AB538), C32(0xBDE86578), C32(0xDD577E47),
88
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
89
};
90
91
#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
92
sph_u32 t0 = (x0); \
93
sph_u32 t1 = (x1); \
94
sph_u32 t2 = (x2); \
95
sph_u32 t3 = (x3); \
96
AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
97
} while (0)
98
99
/*
100
* This is the code needed to match the "reference implementation" as
101
* published on Nov 23rd, 2009, instead of the published specification.
102
*
103
104
#define AES_BIG_ENDIAN 1
105
#include "aes_helper.c"
106
107
static const sph_u32 IV224[] = {
108
C32(0xC4C67795), C32(0xC0B1817F), C32(0xEAD88924), C32(0x1ABB1BB0),
109
C32(0xE0C29152), C32(0xBDE046BA), C32(0xAEEECF99), C32(0x58D509D8)
110
};
111
112
static const sph_u32 IV256[] = {
113
C32(0x3EECF551), C32(0xBF10819B), C32(0xE6DC8559), C32(0xF3E23FD5),
114
C32(0x431AEC73), C32(0x79E3F731), C32(0x98325F05), C32(0xA92A31F1)
115
};
116
117
static const sph_u32 IV384[] = {
118
C32(0x71F48510), C32(0xA903A8AC), C32(0xFE3216DD), C32(0x0B2D2AD4),
119
C32(0x6672900A), C32(0x41032819), C32(0x15A7D780), C32(0xB3CAB8D9),
120
C32(0x34EF4711), C32(0xDE019FE8), C32(0x4D674DC4), C32(0xE056D96B),
121
C32(0xA35C016B), C32(0xDD903BA7), C32(0x8C1B09B4), C32(0x2C3E9F25)
122
};
123
124
static const sph_u32 IV512[] = {
125
C32(0xD5652B63), C32(0x25F1E6EA), C32(0xB18F48FA), C32(0xA1EE3A47),
126
C32(0xC8B67B07), C32(0xBDCE48D3), C32(0xE3937B78), C32(0x05DB5186),
127
C32(0x613BE326), C32(0xA11FA303), C32(0x90C833D4), C32(0x79CEE316),
128
C32(0x1E1AF00F), C32(0x2829B165), C32(0x23B25F80), C32(0x21E11499)
129
};
130
131
#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
132
sph_u32 t0 = (x0); \
133
sph_u32 t1 = (x1); \
134
sph_u32 t2 = (x2); \
135
sph_u32 t3 = (x3); \
136
AES_ROUND_NOKEY_BE(t0, t1, t2, t3, x0, x1, x2, x3); \
137
} while (0)
138
139
*/
140
141
#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
142
sph_u32 kt; \
143
AES_ROUND_NOKEY(k1, k2, k3, k0); \
144
kt = (k0); \
145
(k0) = (k1); \
146
(k1) = (k2); \
147
(k2) = (k3); \
148
(k3) = kt; \
149
} while (0)
150
151
#if SPH_SMALL_FOOTPRINT_SHAVITE
152
153
/*
154
* This function assumes that "msg" is aligned for 32-bit access.
155
*/
156
static void
157
c256(sph_shavite_small_context *sc, const void *msg)
158
{
159
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
160
sph_u32 rk[144];
161
size_t u;
162
int r, s;
163
164
#if SPH_LITTLE_ENDIAN
165
memcpy(rk, msg, 64);
166
#else
167
for (u = 0; u < 16; u += 4) {
168
rk[u + 0] = sph_dec32le_aligned(
169
(const unsigned char *)msg + (u << 2) + 0);
170
rk[u + 1] = sph_dec32le_aligned(
171
(const unsigned char *)msg + (u << 2) + 4);
172
rk[u + 2] = sph_dec32le_aligned(
173
(const unsigned char *)msg + (u << 2) + 8);
174
rk[u + 3] = sph_dec32le_aligned(
175
(const unsigned char *)msg + (u << 2) + 12);
176
}
177
#endif
178
u = 16;
179
for (r = 0; r < 4; r ++) {
180
for (s = 0; s < 2; s ++) {
181
sph_u32 x0, x1, x2, x3;
182
183
x0 = rk[u - 15];
184
x1 = rk[u - 14];
185
x2 = rk[u - 13];
186
x3 = rk[u - 16];
187
AES_ROUND_NOKEY(x0, x1, x2, x3);
188
rk[u + 0] = x0 ^ rk[u - 4];
189
rk[u + 1] = x1 ^ rk[u - 3];
190
rk[u + 2] = x2 ^ rk[u - 2];
191
rk[u + 3] = x3 ^ rk[u - 1];
192
if (u == 16) {
193
rk[ 16] ^= sc->count0;
194
rk[ 17] ^= SPH_T32(~sc->count1);
195
} else if (u == 56) {
196
rk[ 57] ^= sc->count1;
197
rk[ 58] ^= SPH_T32(~sc->count0);
198
}
199
u += 4;
200
201
x0 = rk[u - 15];
202
x1 = rk[u - 14];
203
x2 = rk[u - 13];
204
x3 = rk[u - 16];
205
AES_ROUND_NOKEY(x0, x1, x2, x3);
206
rk[u + 0] = x0 ^ rk[u - 4];
207
rk[u + 1] = x1 ^ rk[u - 3];
208
rk[u + 2] = x2 ^ rk[u - 2];
209
rk[u + 3] = x3 ^ rk[u - 1];
210
if (u == 84) {
211
rk[ 86] ^= sc->count1;
212
rk[ 87] ^= SPH_T32(~sc->count0);
213
} else if (u == 124) {
214
rk[124] ^= sc->count0;
215
rk[127] ^= SPH_T32(~sc->count1);
216
}
217
u += 4;
218
}
219
for (s = 0; s < 4; s ++) {
220
rk[u + 0] = rk[u - 16] ^ rk[u - 3];
221
rk[u + 1] = rk[u - 15] ^ rk[u - 2];
222
rk[u + 2] = rk[u - 14] ^ rk[u - 1];
223
rk[u + 3] = rk[u - 13] ^ rk[u - 0];
224
u += 4;
225
}
226
}
227
228
p0 = sc->h[0x0];
229
p1 = sc->h[0x1];
230
p2 = sc->h[0x2];
231
p3 = sc->h[0x3];
232
p4 = sc->h[0x4];
233
p5 = sc->h[0x5];
234
p6 = sc->h[0x6];
235
p7 = sc->h[0x7];
236
u = 0;
237
for (r = 0; r < 6; r ++) {
238
sph_u32 x0, x1, x2, x3;
239
240
x0 = p4 ^ rk[u ++];
241
x1 = p5 ^ rk[u ++];
242
x2 = p6 ^ rk[u ++];
243
x3 = p7 ^ rk[u ++];
244
AES_ROUND_NOKEY(x0, x1, x2, x3);
245
x0 ^= rk[u ++];
246
x1 ^= rk[u ++];
247
x2 ^= rk[u ++];
248
x3 ^= rk[u ++];
249
AES_ROUND_NOKEY(x0, x1, x2, x3);
250
x0 ^= rk[u ++];
251
x1 ^= rk[u ++];
252
x2 ^= rk[u ++];
253
x3 ^= rk[u ++];
254
AES_ROUND_NOKEY(x0, x1, x2, x3);
255
p0 ^= x0;
256
p1 ^= x1;
257
p2 ^= x2;
258
p3 ^= x3;
259
260
x0 = p0 ^ rk[u ++];
261
x1 = p1 ^ rk[u ++];
262
x2 = p2 ^ rk[u ++];
263
x3 = p3 ^ rk[u ++];
264
AES_ROUND_NOKEY(x0, x1, x2, x3);
265
x0 ^= rk[u ++];
266
x1 ^= rk[u ++];
267
x2 ^= rk[u ++];
268
x3 ^= rk[u ++];
269
AES_ROUND_NOKEY(x0, x1, x2, x3);
270
x0 ^= rk[u ++];
271
x1 ^= rk[u ++];
272
x2 ^= rk[u ++];
273
x3 ^= rk[u ++];
274
AES_ROUND_NOKEY(x0, x1, x2, x3);
275
p4 ^= x0;
276
p5 ^= x1;
277
p6 ^= x2;
278
p7 ^= x3;
279
}
280
sc->h[0x0] ^= p0;
281
sc->h[0x1] ^= p1;
282
sc->h[0x2] ^= p2;
283
sc->h[0x3] ^= p3;
284
sc->h[0x4] ^= p4;
285
sc->h[0x5] ^= p5;
286
sc->h[0x6] ^= p6;
287
sc->h[0x7] ^= p7;
288
}
289
290
#else
291
292
/*
293
* This function assumes that "msg" is aligned for 32-bit access.
294
*/
295
static void
296
c256(sph_shavite_small_context *sc, const void *msg)
297
{
298
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
299
sph_u32 x0, x1, x2, x3;
300
sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7;
301
sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF;
302
303
p0 = sc->h[0x0];
304
p1 = sc->h[0x1];
305
p2 = sc->h[0x2];
306
p3 = sc->h[0x3];
307
p4 = sc->h[0x4];
308
p5 = sc->h[0x5];
309
p6 = sc->h[0x6];
310
p7 = sc->h[0x7];
311
/* round 0 */
312
rk0 = sph_dec32le_aligned((const unsigned char *)msg + 0);
313
x0 = p4 ^ rk0;
314
rk1 = sph_dec32le_aligned((const unsigned char *)msg + 4);
315
x1 = p5 ^ rk1;
316
rk2 = sph_dec32le_aligned((const unsigned char *)msg + 8);
317
x2 = p6 ^ rk2;
318
rk3 = sph_dec32le_aligned((const unsigned char *)msg + 12);
319
x3 = p7 ^ rk3;
320
AES_ROUND_NOKEY(x0, x1, x2, x3);
321
rk4 = sph_dec32le_aligned((const unsigned char *)msg + 16);
322
x0 ^= rk4;
323
rk5 = sph_dec32le_aligned((const unsigned char *)msg + 20);
324
x1 ^= rk5;
325
rk6 = sph_dec32le_aligned((const unsigned char *)msg + 24);
326
x2 ^= rk6;
327
rk7 = sph_dec32le_aligned((const unsigned char *)msg + 28);
328
x3 ^= rk7;
329
AES_ROUND_NOKEY(x0, x1, x2, x3);
330
rk8 = sph_dec32le_aligned((const unsigned char *)msg + 32);
331
x0 ^= rk8;
332
rk9 = sph_dec32le_aligned((const unsigned char *)msg + 36);
333
x1 ^= rk9;
334
rkA = sph_dec32le_aligned((const unsigned char *)msg + 40);
335
x2 ^= rkA;
336
rkB = sph_dec32le_aligned((const unsigned char *)msg + 44);
337
x3 ^= rkB;
338
AES_ROUND_NOKEY(x0, x1, x2, x3);
339
p0 ^= x0;
340
p1 ^= x1;
341
p2 ^= x2;
342
p3 ^= x3;
343
/* round 1 */
344
rkC = sph_dec32le_aligned((const unsigned char *)msg + 48);
345
x0 = p0 ^ rkC;
346
rkD = sph_dec32le_aligned((const unsigned char *)msg + 52);
347
x1 = p1 ^ rkD;
348
rkE = sph_dec32le_aligned((const unsigned char *)msg + 56);
349
x2 = p2 ^ rkE;
350
rkF = sph_dec32le_aligned((const unsigned char *)msg + 60);
351
x3 = p3 ^ rkF;
352
AES_ROUND_NOKEY(x0, x1, x2, x3);
353
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
354
rk0 ^= rkC ^ sc->count0;
355
rk1 ^= rkD ^ SPH_T32(~sc->count1);
356
rk2 ^= rkE;
357
rk3 ^= rkF;
358
x0 ^= rk0;
359
x1 ^= rk1;
360
x2 ^= rk2;
361
x3 ^= rk3;
362
AES_ROUND_NOKEY(x0, x1, x2, x3);
363
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
364
rk4 ^= rk0;
365
rk5 ^= rk1;
366
rk6 ^= rk2;
367
rk7 ^= rk3;
368
x0 ^= rk4;
369
x1 ^= rk5;
370
x2 ^= rk6;
371
x3 ^= rk7;
372
AES_ROUND_NOKEY(x0, x1, x2, x3);
373
p4 ^= x0;
374
p5 ^= x1;
375
p6 ^= x2;
376
p7 ^= x3;
377
/* round 2 */
378
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
379
rk8 ^= rk4;
380
rk9 ^= rk5;
381
rkA ^= rk6;
382
rkB ^= rk7;
383
x0 = p4 ^ rk8;
384
x1 = p5 ^ rk9;
385
x2 = p6 ^ rkA;
386
x3 = p7 ^ rkB;
387
AES_ROUND_NOKEY(x0, x1, x2, x3);
388
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
389
rkC ^= rk8;
390
rkD ^= rk9;
391
rkE ^= rkA;
392
rkF ^= rkB;
393
x0 ^= rkC;
394
x1 ^= rkD;
395
x2 ^= rkE;
396
x3 ^= rkF;
397
AES_ROUND_NOKEY(x0, x1, x2, x3);
398
rk0 ^= rkD;
399
x0 ^= rk0;
400
rk1 ^= rkE;
401
x1 ^= rk1;
402
rk2 ^= rkF;
403
x2 ^= rk2;
404
rk3 ^= rk0;
405
x3 ^= rk3;
406
AES_ROUND_NOKEY(x0, x1, x2, x3);
407
p0 ^= x0;
408
p1 ^= x1;
409
p2 ^= x2;
410
p3 ^= x3;
411
/* round 3 */
412
rk4 ^= rk1;
413
x0 = p0 ^ rk4;
414
rk5 ^= rk2;
415
x1 = p1 ^ rk5;
416
rk6 ^= rk3;
417
x2 = p2 ^ rk6;
418
rk7 ^= rk4;
419
x3 = p3 ^ rk7;
420
AES_ROUND_NOKEY(x0, x1, x2, x3);
421
rk8 ^= rk5;
422
x0 ^= rk8;
423
rk9 ^= rk6;
424
x1 ^= rk9;
425
rkA ^= rk7;
426
x2 ^= rkA;
427
rkB ^= rk8;
428
x3 ^= rkB;
429
AES_ROUND_NOKEY(x0, x1, x2, x3);
430
rkC ^= rk9;
431
x0 ^= rkC;
432
rkD ^= rkA;
433
x1 ^= rkD;
434
rkE ^= rkB;
435
x2 ^= rkE;
436
rkF ^= rkC;
437
x3 ^= rkF;
438
AES_ROUND_NOKEY(x0, x1, x2, x3);
439
p4 ^= x0;
440
p5 ^= x1;
441
p6 ^= x2;
442
p7 ^= x3;
443
/* round 4 */
444
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
445
rk0 ^= rkC;
446
rk1 ^= rkD;
447
rk2 ^= rkE;
448
rk3 ^= rkF;
449
x0 = p4 ^ rk0;
450
x1 = p5 ^ rk1;
451
x2 = p6 ^ rk2;
452
x3 = p7 ^ rk3;
453
AES_ROUND_NOKEY(x0, x1, x2, x3);
454
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
455
rk4 ^= rk0;
456
rk5 ^= rk1;
457
rk6 ^= rk2;
458
rk7 ^= rk3;
459
x0 ^= rk4;
460
x1 ^= rk5;
461
x2 ^= rk6;
462
x3 ^= rk7;
463
AES_ROUND_NOKEY(x0, x1, x2, x3);
464
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
465
rk8 ^= rk4;
466
rk9 ^= rk5 ^ sc->count1;
467
rkA ^= rk6 ^ SPH_T32(~sc->count0);
468
rkB ^= rk7;
469
x0 ^= rk8;
470
x1 ^= rk9;
471
x2 ^= rkA;
472
x3 ^= rkB;
473
AES_ROUND_NOKEY(x0, x1, x2, x3);
474
p0 ^= x0;
475
p1 ^= x1;
476
p2 ^= x2;
477
p3 ^= x3;
478
/* round 5 */
479
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
480
rkC ^= rk8;
481
rkD ^= rk9;
482
rkE ^= rkA;
483
rkF ^= rkB;
484
x0 = p0 ^ rkC;
485
x1 = p1 ^ rkD;
486
x2 = p2 ^ rkE;
487
x3 = p3 ^ rkF;
488
AES_ROUND_NOKEY(x0, x1, x2, x3);
489
rk0 ^= rkD;
490
x0 ^= rk0;
491
rk1 ^= rkE;
492
x1 ^= rk1;
493
rk2 ^= rkF;
494
x2 ^= rk2;
495
rk3 ^= rk0;
496
x3 ^= rk3;
497
AES_ROUND_NOKEY(x0, x1, x2, x3);
498
rk4 ^= rk1;
499
x0 ^= rk4;
500
rk5 ^= rk2;
501
x1 ^= rk5;
502
rk6 ^= rk3;
503
x2 ^= rk6;
504
rk7 ^= rk4;
505
x3 ^= rk7;
506
AES_ROUND_NOKEY(x0, x1, x2, x3);
507
p4 ^= x0;
508
p5 ^= x1;
509
p6 ^= x2;
510
p7 ^= x3;
511
/* round 6 */
512
rk8 ^= rk5;
513
x0 = p4 ^ rk8;
514
rk9 ^= rk6;
515
x1 = p5 ^ rk9;
516
rkA ^= rk7;
517
x2 = p6 ^ rkA;
518
rkB ^= rk8;
519
x3 = p7 ^ rkB;
520
AES_ROUND_NOKEY(x0, x1, x2, x3);
521
rkC ^= rk9;
522
x0 ^= rkC;
523
rkD ^= rkA;
524
x1 ^= rkD;
525
rkE ^= rkB;
526
x2 ^= rkE;
527
rkF ^= rkC;
528
x3 ^= rkF;
529
AES_ROUND_NOKEY(x0, x1, x2, x3);
530
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
531
rk0 ^= rkC;
532
rk1 ^= rkD;
533
rk2 ^= rkE;
534
rk3 ^= rkF;
535
x0 ^= rk0;
536
x1 ^= rk1;
537
x2 ^= rk2;
538
x3 ^= rk3;
539
AES_ROUND_NOKEY(x0, x1, x2, x3);
540
p0 ^= x0;
541
p1 ^= x1;
542
p2 ^= x2;
543
p3 ^= x3;
544
/* round 7 */
545
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
546
rk4 ^= rk0;
547
rk5 ^= rk1;
548
rk6 ^= rk2 ^ sc->count1;
549
rk7 ^= rk3 ^ SPH_T32(~sc->count0);
550
x0 = p0 ^ rk4;
551
x1 = p1 ^ rk5;
552
x2 = p2 ^ rk6;
553
x3 = p3 ^ rk7;
554
AES_ROUND_NOKEY(x0, x1, x2, x3);
555
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
556
rk8 ^= rk4;
557
rk9 ^= rk5;
558
rkA ^= rk6;
559
rkB ^= rk7;
560
x0 ^= rk8;
561
x1 ^= rk9;
562
x2 ^= rkA;
563
x3 ^= rkB;
564
AES_ROUND_NOKEY(x0, x1, x2, x3);
565
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
566
rkC ^= rk8;
567
rkD ^= rk9;
568
rkE ^= rkA;
569
rkF ^= rkB;
570
x0 ^= rkC;
571
x1 ^= rkD;
572
x2 ^= rkE;
573
x3 ^= rkF;
574
AES_ROUND_NOKEY(x0, x1, x2, x3);
575
p4 ^= x0;
576
p5 ^= x1;
577
p6 ^= x2;
578
p7 ^= x3;
579
/* round 8 */
580
rk0 ^= rkD;
581
x0 = p4 ^ rk0;
582
rk1 ^= rkE;
583
x1 = p5 ^ rk1;
584
rk2 ^= rkF;
585
x2 = p6 ^ rk2;
586
rk3 ^= rk0;
587
x3 = p7 ^ rk3;
588
AES_ROUND_NOKEY(x0, x1, x2, x3);
589
rk4 ^= rk1;
590
x0 ^= rk4;
591
rk5 ^= rk2;
592
x1 ^= rk5;
593
rk6 ^= rk3;
594
x2 ^= rk6;
595
rk7 ^= rk4;
596
x3 ^= rk7;
597
AES_ROUND_NOKEY(x0, x1, x2, x3);
598
rk8 ^= rk5;
599
x0 ^= rk8;
600
rk9 ^= rk6;
601
x1 ^= rk9;
602
rkA ^= rk7;
603
x2 ^= rkA;
604
rkB ^= rk8;
605
x3 ^= rkB;
606
AES_ROUND_NOKEY(x0, x1, x2, x3);
607
p0 ^= x0;
608
p1 ^= x1;
609
p2 ^= x2;
610
p3 ^= x3;
611
/* round 9 */
612
rkC ^= rk9;
613
x0 = p0 ^ rkC;
614
rkD ^= rkA;
615
x1 = p1 ^ rkD;
616
rkE ^= rkB;
617
x2 = p2 ^ rkE;
618
rkF ^= rkC;
619
x3 = p3 ^ rkF;
620
AES_ROUND_NOKEY(x0, x1, x2, x3);
621
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
622
rk0 ^= rkC;
623
rk1 ^= rkD;
624
rk2 ^= rkE;
625
rk3 ^= rkF;
626
x0 ^= rk0;
627
x1 ^= rk1;
628
x2 ^= rk2;
629
x3 ^= rk3;
630
AES_ROUND_NOKEY(x0, x1, x2, x3);
631
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
632
rk4 ^= rk0;
633
rk5 ^= rk1;
634
rk6 ^= rk2;
635
rk7 ^= rk3;
636
x0 ^= rk4;
637
x1 ^= rk5;
638
x2 ^= rk6;
639
x3 ^= rk7;
640
AES_ROUND_NOKEY(x0, x1, x2, x3);
641
p4 ^= x0;
642
p5 ^= x1;
643
p6 ^= x2;
644
p7 ^= x3;
645
/* round 10 */
646
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
647
rk8 ^= rk4;
648
rk9 ^= rk5;
649
rkA ^= rk6;
650
rkB ^= rk7;
651
x0 = p4 ^ rk8;
652
x1 = p5 ^ rk9;
653
x2 = p6 ^ rkA;
654
x3 = p7 ^ rkB;
655
AES_ROUND_NOKEY(x0, x1, x2, x3);
656
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
657
rkC ^= rk8 ^ sc->count0;
658
rkD ^= rk9;
659
rkE ^= rkA;
660
rkF ^= rkB ^ SPH_T32(~sc->count1);
661
x0 ^= rkC;
662
x1 ^= rkD;
663
x2 ^= rkE;
664
x3 ^= rkF;
665
AES_ROUND_NOKEY(x0, x1, x2, x3);
666
rk0 ^= rkD;
667
x0 ^= rk0;
668
rk1 ^= rkE;
669
x1 ^= rk1;
670
rk2 ^= rkF;
671
x2 ^= rk2;
672
rk3 ^= rk0;
673
x3 ^= rk3;
674
AES_ROUND_NOKEY(x0, x1, x2, x3);
675
p0 ^= x0;
676
p1 ^= x1;
677
p2 ^= x2;
678
p3 ^= x3;
679
/* round 11 */
680
rk4 ^= rk1;
681
x0 = p0 ^ rk4;
682
rk5 ^= rk2;
683
x1 = p1 ^ rk5;
684
rk6 ^= rk3;
685
x2 = p2 ^ rk6;
686
rk7 ^= rk4;
687
x3 = p3 ^ rk7;
688
AES_ROUND_NOKEY(x0, x1, x2, x3);
689
rk8 ^= rk5;
690
x0 ^= rk8;
691
rk9 ^= rk6;
692
x1 ^= rk9;
693
rkA ^= rk7;
694
x2 ^= rkA;
695
rkB ^= rk8;
696
x3 ^= rkB;
697
AES_ROUND_NOKEY(x0, x1, x2, x3);
698
rkC ^= rk9;
699
x0 ^= rkC;
700
rkD ^= rkA;
701
x1 ^= rkD;
702
rkE ^= rkB;
703
x2 ^= rkE;
704
rkF ^= rkC;
705
x3 ^= rkF;
706
AES_ROUND_NOKEY(x0, x1, x2, x3);
707
p4 ^= x0;
708
p5 ^= x1;
709
p6 ^= x2;
710
p7 ^= x3;
711
sc->h[0x0] ^= p0;
712
sc->h[0x1] ^= p1;
713
sc->h[0x2] ^= p2;
714
sc->h[0x3] ^= p3;
715
sc->h[0x4] ^= p4;
716
sc->h[0x5] ^= p5;
717
sc->h[0x6] ^= p6;
718
sc->h[0x7] ^= p7;
719
}
720
721
#endif
722
723
#if SPH_SMALL_FOOTPRINT_SHAVITE
724
725
/*
726
* This function assumes that "msg" is aligned for 32-bit access.
727
*/
728
static void
729
c512(sph_shavite_big_context *sc, const void *msg)
730
{
731
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
732
sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
733
sph_u32 rk[448];
734
size_t u;
735
int r, s;
736
737
#if SPH_LITTLE_ENDIAN
738
memcpy(rk, msg, 128);
739
#else
740
for (u = 0; u < 32; u += 4) {
741
rk[u + 0] = sph_dec32le_aligned(
742
(const unsigned char *)msg + (u << 2) + 0);
743
rk[u + 1] = sph_dec32le_aligned(
744
(const unsigned char *)msg + (u << 2) + 4);
745
rk[u + 2] = sph_dec32le_aligned(
746
(const unsigned char *)msg + (u << 2) + 8);
747
rk[u + 3] = sph_dec32le_aligned(
748
(const unsigned char *)msg + (u << 2) + 12);
749
}
750
#endif
751
u = 32;
752
for (;;) {
753
for (s = 0; s < 4; s ++) {
754
sph_u32 x0, x1, x2, x3;
755
756
x0 = rk[u - 31];
757
x1 = rk[u - 30];
758
x2 = rk[u - 29];
759
x3 = rk[u - 32];
760
AES_ROUND_NOKEY(x0, x1, x2, x3);
761
rk[u + 0] = x0 ^ rk[u - 4];
762
rk[u + 1] = x1 ^ rk[u - 3];
763
rk[u + 2] = x2 ^ rk[u - 2];
764
rk[u + 3] = x3 ^ rk[u - 1];
765
if (u == 32) {
766
rk[ 32] ^= sc->count0;
767
rk[ 33] ^= sc->count1;
768
rk[ 34] ^= sc->count2;
769
rk[ 35] ^= SPH_T32(~sc->count3);
770
} else if (u == 440) {
771
rk[440] ^= sc->count1;
772
rk[441] ^= sc->count0;
773
rk[442] ^= sc->count3;
774
rk[443] ^= SPH_T32(~sc->count2);
775
}
776
u += 4;
777
778
x0 = rk[u - 31];
779
x1 = rk[u - 30];
780
x2 = rk[u - 29];
781
x3 = rk[u - 32];
782
AES_ROUND_NOKEY(x0, x1, x2, x3);
783
rk[u + 0] = x0 ^ rk[u - 4];
784
rk[u + 1] = x1 ^ rk[u - 3];
785
rk[u + 2] = x2 ^ rk[u - 2];
786
rk[u + 3] = x3 ^ rk[u - 1];
787
if (u == 164) {
788
rk[164] ^= sc->count3;
789
rk[165] ^= sc->count2;
790
rk[166] ^= sc->count1;
791
rk[167] ^= SPH_T32(~sc->count0);
792
} else if (u == 316) {
793
rk[316] ^= sc->count2;
794
rk[317] ^= sc->count3;
795
rk[318] ^= sc->count0;
796
rk[319] ^= SPH_T32(~sc->count1);
797
}
798
u += 4;
799
}
800
if (u == 448)
801
break;
802
for (s = 0; s < 8; s ++) {
803
rk[u + 0] = rk[u - 32] ^ rk[u - 7];
804
rk[u + 1] = rk[u - 31] ^ rk[u - 6];
805
rk[u + 2] = rk[u - 30] ^ rk[u - 5];
806
rk[u + 3] = rk[u - 29] ^ rk[u - 4];
807
u += 4;
808
}
809
}
810
811
p0 = sc->h[0x0];
812
p1 = sc->h[0x1];
813
p2 = sc->h[0x2];
814
p3 = sc->h[0x3];
815
p4 = sc->h[0x4];
816
p5 = sc->h[0x5];
817
p6 = sc->h[0x6];
818
p7 = sc->h[0x7];
819
p8 = sc->h[0x8];
820
p9 = sc->h[0x9];
821
pA = sc->h[0xA];
822
pB = sc->h[0xB];
823
pC = sc->h[0xC];
824
pD = sc->h[0xD];
825
pE = sc->h[0xE];
826
pF = sc->h[0xF];
827
u = 0;
828
for (r = 0; r < 14; r ++) {
829
#define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3) do { \
830
sph_u32 x0, x1, x2, x3; \
831
x0 = r0 ^ rk[u ++]; \
832
x1 = r1 ^ rk[u ++]; \
833
x2 = r2 ^ rk[u ++]; \
834
x3 = r3 ^ rk[u ++]; \
835
AES_ROUND_NOKEY(x0, x1, x2, x3); \
836
x0 ^= rk[u ++]; \
837
x1 ^= rk[u ++]; \
838
x2 ^= rk[u ++]; \
839
x3 ^= rk[u ++]; \
840
AES_ROUND_NOKEY(x0, x1, x2, x3); \
841
x0 ^= rk[u ++]; \
842
x1 ^= rk[u ++]; \
843
x2 ^= rk[u ++]; \
844
x3 ^= rk[u ++]; \
845
AES_ROUND_NOKEY(x0, x1, x2, x3); \
846
x0 ^= rk[u ++]; \
847
x1 ^= rk[u ++]; \
848
x2 ^= rk[u ++]; \
849
x3 ^= rk[u ++]; \
850
AES_ROUND_NOKEY(x0, x1, x2, x3); \
851
l0 ^= x0; \
852
l1 ^= x1; \
853
l2 ^= x2; \
854
l3 ^= x3; \
855
} while (0)
856
857
#define WROT(a, b, c, d) do { \
858
sph_u32 t = d; \
859
d = c; \
860
c = b; \
861
b = a; \
862
a = t; \
863
} while (0)
864
865
C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);
866
C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);
867
868
WROT(p0, p4, p8, pC);
869
WROT(p1, p5, p9, pD);
870
WROT(p2, p6, pA, pE);
871
WROT(p3, p7, pB, pF);
872
873
#undef C512_ELT
874
#undef WROT
875
}
876
sc->h[0x0] ^= p0;
877
sc->h[0x1] ^= p1;
878
sc->h[0x2] ^= p2;
879
sc->h[0x3] ^= p3;
880
sc->h[0x4] ^= p4;
881
sc->h[0x5] ^= p5;
882
sc->h[0x6] ^= p6;
883
sc->h[0x7] ^= p7;
884
sc->h[0x8] ^= p8;
885
sc->h[0x9] ^= p9;
886
sc->h[0xA] ^= pA;
887
sc->h[0xB] ^= pB;
888
sc->h[0xC] ^= pC;
889
sc->h[0xD] ^= pD;
890
sc->h[0xE] ^= pE;
891
sc->h[0xF] ^= pF;
892
}
893
894
#else
895
896
/*
897
* This function assumes that "msg" is aligned for 32-bit access.
898
*/
899
static void
900
c512(sph_shavite_big_context *sc, const void *msg)
901
{
902
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
903
sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
904
sph_u32 x0, x1, x2, x3;
905
sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
906
sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
907
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
908
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
909
int r;
910
911
p0 = sc->h[0x0];
912
p1 = sc->h[0x1];
913
p2 = sc->h[0x2];
914
p3 = sc->h[0x3];
915
p4 = sc->h[0x4];
916
p5 = sc->h[0x5];
917
p6 = sc->h[0x6];
918
p7 = sc->h[0x7];
919
p8 = sc->h[0x8];
920
p9 = sc->h[0x9];
921
pA = sc->h[0xA];
922
pB = sc->h[0xB];
923
pC = sc->h[0xC];
924
pD = sc->h[0xD];
925
pE = sc->h[0xE];
926
pF = sc->h[0xF];
927
/* round 0 */
928
rk00 = sph_dec32le_aligned((const unsigned char *)msg + 0);
929
x0 = p4 ^ rk00;
930
rk01 = sph_dec32le_aligned((const unsigned char *)msg + 4);
931
x1 = p5 ^ rk01;
932
rk02 = sph_dec32le_aligned((const unsigned char *)msg + 8);
933
x2 = p6 ^ rk02;
934
rk03 = sph_dec32le_aligned((const unsigned char *)msg + 12);
935
x3 = p7 ^ rk03;
936
AES_ROUND_NOKEY(x0, x1, x2, x3);
937
rk04 = sph_dec32le_aligned((const unsigned char *)msg + 16);
938
x0 ^= rk04;
939
rk05 = sph_dec32le_aligned((const unsigned char *)msg + 20);
940
x1 ^= rk05;
941
rk06 = sph_dec32le_aligned((const unsigned char *)msg + 24);
942
x2 ^= rk06;
943
rk07 = sph_dec32le_aligned((const unsigned char *)msg + 28);
944
x3 ^= rk07;
945
AES_ROUND_NOKEY(x0, x1, x2, x3);
946
rk08 = sph_dec32le_aligned((const unsigned char *)msg + 32);
947
x0 ^= rk08;
948
rk09 = sph_dec32le_aligned((const unsigned char *)msg + 36);
949
x1 ^= rk09;
950
rk0A = sph_dec32le_aligned((const unsigned char *)msg + 40);
951
x2 ^= rk0A;
952
rk0B = sph_dec32le_aligned((const unsigned char *)msg + 44);
953
x3 ^= rk0B;
954
AES_ROUND_NOKEY(x0, x1, x2, x3);
955
rk0C = sph_dec32le_aligned((const unsigned char *)msg + 48);
956
x0 ^= rk0C;
957
rk0D = sph_dec32le_aligned((const unsigned char *)msg + 52);
958
x1 ^= rk0D;
959
rk0E = sph_dec32le_aligned((const unsigned char *)msg + 56);
960
x2 ^= rk0E;
961
rk0F = sph_dec32le_aligned((const unsigned char *)msg + 60);
962
x3 ^= rk0F;
963
AES_ROUND_NOKEY(x0, x1, x2, x3);
964
p0 ^= x0;
965
p1 ^= x1;
966
p2 ^= x2;
967
p3 ^= x3;
968
rk10 = sph_dec32le_aligned((const unsigned char *)msg + 64);
969
x0 = pC ^ rk10;
970
rk11 = sph_dec32le_aligned((const unsigned char *)msg + 68);
971
x1 = pD ^ rk11;
972
rk12 = sph_dec32le_aligned((const unsigned char *)msg + 72);
973
x2 = pE ^ rk12;
974
rk13 = sph_dec32le_aligned((const unsigned char *)msg + 76);
975
x3 = pF ^ rk13;
976
AES_ROUND_NOKEY(x0, x1, x2, x3);
977
rk14 = sph_dec32le_aligned((const unsigned char *)msg + 80);
978
x0 ^= rk14;
979
rk15 = sph_dec32le_aligned((const unsigned char *)msg + 84);
980
x1 ^= rk15;
981
rk16 = sph_dec32le_aligned((const unsigned char *)msg + 88);
982
x2 ^= rk16;
983
rk17 = sph_dec32le_aligned((const unsigned char *)msg + 92);
984
x3 ^= rk17;
985
AES_ROUND_NOKEY(x0, x1, x2, x3);
986
rk18 = sph_dec32le_aligned((const unsigned char *)msg + 96);
987
x0 ^= rk18;
988
rk19 = sph_dec32le_aligned((const unsigned char *)msg + 100);
989
x1 ^= rk19;
990
rk1A = sph_dec32le_aligned((const unsigned char *)msg + 104);
991
x2 ^= rk1A;
992
rk1B = sph_dec32le_aligned((const unsigned char *)msg + 108);
993
x3 ^= rk1B;
994
AES_ROUND_NOKEY(x0, x1, x2, x3);
995
rk1C = sph_dec32le_aligned((const unsigned char *)msg + 112);
996
x0 ^= rk1C;
997
rk1D = sph_dec32le_aligned((const unsigned char *)msg + 116);
998
x1 ^= rk1D;
999
rk1E = sph_dec32le_aligned((const unsigned char *)msg + 120);
1000
x2 ^= rk1E;
1001
rk1F = sph_dec32le_aligned((const unsigned char *)msg + 124);
1002
x3 ^= rk1F;
1003
AES_ROUND_NOKEY(x0, x1, x2, x3);
1004
p8 ^= x0;
1005
p9 ^= x1;
1006
pA ^= x2;
1007
pB ^= x3;
1008
1009
for (r = 0; r < 3; r ++) {
1010
/* round 1, 5, 9 */
1011
KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
1012
rk00 ^= rk1C;
1013
rk01 ^= rk1D;
1014
rk02 ^= rk1E;
1015
rk03 ^= rk1F;
1016
if (r == 0) {
1017
rk00 ^= sc->count0;
1018
rk01 ^= sc->count1;
1019
rk02 ^= sc->count2;
1020
rk03 ^= SPH_T32(~sc->count3);
1021
}
1022
x0 = p0 ^ rk00;
1023
x1 = p1 ^ rk01;
1024
x2 = p2 ^ rk02;
1025
x3 = p3 ^ rk03;
1026
AES_ROUND_NOKEY(x0, x1, x2, x3);
1027
KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
1028
rk04 ^= rk00;
1029
rk05 ^= rk01;
1030
rk06 ^= rk02;
1031
rk07 ^= rk03;
1032
if (r == 1) {
1033
rk04 ^= sc->count3;
1034
rk05 ^= sc->count2;
1035
rk06 ^= sc->count1;
1036
rk07 ^= SPH_T32(~sc->count0);
1037
}
1038
x0 ^= rk04;
1039
x1 ^= rk05;
1040
x2 ^= rk06;
1041
x3 ^= rk07;
1042
AES_ROUND_NOKEY(x0, x1, x2, x3);
1043
KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
1044
rk08 ^= rk04;
1045
rk09 ^= rk05;
1046
rk0A ^= rk06;
1047
rk0B ^= rk07;
1048
x0 ^= rk08;
1049
x1 ^= rk09;
1050
x2 ^= rk0A;
1051
x3 ^= rk0B;
1052
AES_ROUND_NOKEY(x0, x1, x2, x3);
1053
KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
1054
rk0C ^= rk08;
1055
rk0D ^= rk09;
1056
rk0E ^= rk0A;
1057
rk0F ^= rk0B;
1058
x0 ^= rk0C;
1059
x1 ^= rk0D;
1060
x2 ^= rk0E;
1061
x3 ^= rk0F;
1062
AES_ROUND_NOKEY(x0, x1, x2, x3);
1063
pC ^= x0;
1064
pD ^= x1;
1065
pE ^= x2;
1066
pF ^= x3;
1067
KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
1068
rk10 ^= rk0C;
1069
rk11 ^= rk0D;
1070
rk12 ^= rk0E;
1071
rk13 ^= rk0F;
1072
x0 = p8 ^ rk10;
1073
x1 = p9 ^ rk11;
1074
x2 = pA ^ rk12;
1075
x3 = pB ^ rk13;
1076
AES_ROUND_NOKEY(x0, x1, x2, x3);
1077
KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
1078
rk14 ^= rk10;
1079
rk15 ^= rk11;
1080
rk16 ^= rk12;
1081
rk17 ^= rk13;
1082
x0 ^= rk14;
1083
x1 ^= rk15;
1084
x2 ^= rk16;
1085
x3 ^= rk17;
1086
AES_ROUND_NOKEY(x0, x1, x2, x3);
1087
KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
1088
rk18 ^= rk14;
1089
rk19 ^= rk15;
1090
rk1A ^= rk16;
1091
rk1B ^= rk17;
1092
x0 ^= rk18;
1093
x1 ^= rk19;
1094
x2 ^= rk1A;
1095
x3 ^= rk1B;
1096
AES_ROUND_NOKEY(x0, x1, x2, x3);
1097
KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
1098
rk1C ^= rk18;
1099
rk1D ^= rk19;
1100
rk1E ^= rk1A;
1101
rk1F ^= rk1B;
1102
if (r == 2) {
1103
rk1C ^= sc->count2;
1104
rk1D ^= sc->count3;
1105
rk1E ^= sc->count0;
1106
rk1F ^= SPH_T32(~sc->count1);
1107
}
1108
x0 ^= rk1C;
1109
x1 ^= rk1D;
1110
x2 ^= rk1E;
1111
x3 ^= rk1F;
1112
AES_ROUND_NOKEY(x0, x1, x2, x3);
1113
p4 ^= x0;
1114
p5 ^= x1;
1115
p6 ^= x2;
1116
p7 ^= x3;
1117
/* round 2, 6, 10 */
1118
rk00 ^= rk19;
1119
x0 = pC ^ rk00;
1120
rk01 ^= rk1A;
1121
x1 = pD ^ rk01;
1122
rk02 ^= rk1B;
1123
x2 = pE ^ rk02;
1124
rk03 ^= rk1C;
1125
x3 = pF ^ rk03;
1126
AES_ROUND_NOKEY(x0, x1, x2, x3);
1127
rk04 ^= rk1D;
1128
x0 ^= rk04;
1129
rk05 ^= rk1E;
1130
x1 ^= rk05;
1131
rk06 ^= rk1F;
1132
x2 ^= rk06;
1133
rk07 ^= rk00;
1134
x3 ^= rk07;
1135
AES_ROUND_NOKEY(x0, x1, x2, x3);
1136
rk08 ^= rk01;
1137
x0 ^= rk08;
1138
rk09 ^= rk02;
1139
x1 ^= rk09;
1140
rk0A ^= rk03;
1141
x2 ^= rk0A;
1142
rk0B ^= rk04;
1143
x3 ^= rk0B;
1144
AES_ROUND_NOKEY(x0, x1, x2, x3);
1145
rk0C ^= rk05;
1146
x0 ^= rk0C;
1147
rk0D ^= rk06;
1148
x1 ^= rk0D;
1149
rk0E ^= rk07;
1150
x2 ^= rk0E;
1151
rk0F ^= rk08;
1152
x3 ^= rk0F;
1153
AES_ROUND_NOKEY(x0, x1, x2, x3);
1154
p8 ^= x0;
1155
p9 ^= x1;
1156
pA ^= x2;
1157
pB ^= x3;
1158
rk10 ^= rk09;
1159
x0 = p4 ^ rk10;
1160
rk11 ^= rk0A;
1161
x1 = p5 ^ rk11;
1162
rk12 ^= rk0B;
1163
x2 = p6 ^ rk12;
1164
rk13 ^= rk0C;
1165
x3 = p7 ^ rk13;
1166
AES_ROUND_NOKEY(x0, x1, x2, x3);
1167
rk14 ^= rk0D;
1168
x0 ^= rk14;
1169
rk15 ^= rk0E;
1170
x1 ^= rk15;
1171
rk16 ^= rk0F;
1172
x2 ^= rk16;
1173
rk17 ^= rk10;
1174
x3 ^= rk17;
1175
AES_ROUND_NOKEY(x0, x1, x2, x3);
1176
rk18 ^= rk11;
1177
x0 ^= rk18;
1178
rk19 ^= rk12;
1179
x1 ^= rk19;
1180
rk1A ^= rk13;
1181
x2 ^= rk1A;
1182
rk1B ^= rk14;
1183
x3 ^= rk1B;
1184
AES_ROUND_NOKEY(x0, x1, x2, x3);
1185
rk1C ^= rk15;
1186
x0 ^= rk1C;
1187
rk1D ^= rk16;
1188
x1 ^= rk1D;
1189
rk1E ^= rk17;
1190
x2 ^= rk1E;
1191
rk1F ^= rk18;
1192
x3 ^= rk1F;
1193
AES_ROUND_NOKEY(x0, x1, x2, x3);
1194
p0 ^= x0;
1195
p1 ^= x1;
1196
p2 ^= x2;
1197
p3 ^= x3;
1198
/* round 3, 7, 11 */
1199
KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
1200
rk00 ^= rk1C;
1201
rk01 ^= rk1D;
1202
rk02 ^= rk1E;
1203
rk03 ^= rk1F;
1204
x0 = p8 ^ rk00;
1205
x1 = p9 ^ rk01;
1206
x2 = pA ^ rk02;
1207
x3 = pB ^ rk03;
1208
AES_ROUND_NOKEY(x0, x1, x2, x3);
1209
KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
1210
rk04 ^= rk00;
1211
rk05 ^= rk01;
1212
rk06 ^= rk02;
1213
rk07 ^= rk03;
1214
x0 ^= rk04;
1215
x1 ^= rk05;
1216
x2 ^= rk06;
1217
x3 ^= rk07;
1218
AES_ROUND_NOKEY(x0, x1, x2, x3);
1219
KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
1220
rk08 ^= rk04;
1221
rk09 ^= rk05;
1222
rk0A ^= rk06;
1223
rk0B ^= rk07;
1224
x0 ^= rk08;
1225
x1 ^= rk09;
1226
x2 ^= rk0A;
1227
x3 ^= rk0B;
1228
AES_ROUND_NOKEY(x0, x1, x2, x3);
1229
KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
1230
rk0C ^= rk08;
1231
rk0D ^= rk09;
1232
rk0E ^= rk0A;
1233
rk0F ^= rk0B;
1234
x0 ^= rk0C;
1235
x1 ^= rk0D;
1236
x2 ^= rk0E;
1237
x3 ^= rk0F;
1238
AES_ROUND_NOKEY(x0, x1, x2, x3);
1239
p4 ^= x0;
1240
p5 ^= x1;
1241
p6 ^= x2;
1242
p7 ^= x3;
1243
KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
1244
rk10 ^= rk0C;
1245
rk11 ^= rk0D;
1246
rk12 ^= rk0E;
1247
rk13 ^= rk0F;
1248
x0 = p0 ^ rk10;
1249
x1 = p1 ^ rk11;
1250
x2 = p2 ^ rk12;
1251
x3 = p3 ^ rk13;
1252
AES_ROUND_NOKEY(x0, x1, x2, x3);
1253
KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
1254
rk14 ^= rk10;
1255
rk15 ^= rk11;
1256
rk16 ^= rk12;
1257
rk17 ^= rk13;
1258
x0 ^= rk14;
1259
x1 ^= rk15;
1260
x2 ^= rk16;
1261
x3 ^= rk17;
1262
AES_ROUND_NOKEY(x0, x1, x2, x3);
1263
KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
1264
rk18 ^= rk14;
1265
rk19 ^= rk15;
1266
rk1A ^= rk16;
1267
rk1B ^= rk17;
1268
x0 ^= rk18;
1269
x1 ^= rk19;
1270
x2 ^= rk1A;
1271
x3 ^= rk1B;
1272
AES_ROUND_NOKEY(x0, x1, x2, x3);
1273
KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
1274
rk1C ^= rk18;
1275
rk1D ^= rk19;
1276
rk1E ^= rk1A;
1277
rk1F ^= rk1B;
1278
x0 ^= rk1C;
1279
x1 ^= rk1D;
1280
x2 ^= rk1E;
1281
x3 ^= rk1F;
1282
AES_ROUND_NOKEY(x0, x1, x2, x3);
1283
pC ^= x0;
1284
pD ^= x1;
1285
pE ^= x2;
1286
pF ^= x3;
1287
/* round 4, 8, 12 */
1288
rk00 ^= rk19;
1289
x0 = p4 ^ rk00;
1290
rk01 ^= rk1A;
1291
x1 = p5 ^ rk01;
1292
rk02 ^= rk1B;
1293
x2 = p6 ^ rk02;
1294
rk03 ^= rk1C;
1295
x3 = p7 ^ rk03;
1296
AES_ROUND_NOKEY(x0, x1, x2, x3);
1297
rk04 ^= rk1D;
1298
x0 ^= rk04;
1299
rk05 ^= rk1E;
1300
x1 ^= rk05;
1301
rk06 ^= rk1F;
1302
x2 ^= rk06;
1303
rk07 ^= rk00;
1304
x3 ^= rk07;
1305
AES_ROUND_NOKEY(x0, x1, x2, x3);
1306
rk08 ^= rk01;
1307
x0 ^= rk08;
1308
rk09 ^= rk02;
1309
x1 ^= rk09;
1310
rk0A ^= rk03;
1311
x2 ^= rk0A;
1312
rk0B ^= rk04;
1313
x3 ^= rk0B;
1314
AES_ROUND_NOKEY(x0, x1, x2, x3);
1315
rk0C ^= rk05;
1316
x0 ^= rk0C;
1317
rk0D ^= rk06;
1318
x1 ^= rk0D;
1319
rk0E ^= rk07;
1320
x2 ^= rk0E;
1321
rk0F ^= rk08;
1322
x3 ^= rk0F;
1323
AES_ROUND_NOKEY(x0, x1, x2, x3);
1324
p0 ^= x0;
1325
p1 ^= x1;
1326
p2 ^= x2;
1327
p3 ^= x3;
1328
rk10 ^= rk09;
1329
x0 = pC ^ rk10;
1330
rk11 ^= rk0A;
1331
x1 = pD ^ rk11;
1332
rk12 ^= rk0B;
1333
x2 = pE ^ rk12;
1334
rk13 ^= rk0C;
1335
x3 = pF ^ rk13;
1336
AES_ROUND_NOKEY(x0, x1, x2, x3);
1337
rk14 ^= rk0D;
1338
x0 ^= rk14;
1339
rk15 ^= rk0E;
1340
x1 ^= rk15;
1341
rk16 ^= rk0F;
1342
x2 ^= rk16;
1343
rk17 ^= rk10;
1344
x3 ^= rk17;
1345
AES_ROUND_NOKEY(x0, x1, x2, x3);
1346
rk18 ^= rk11;
1347
x0 ^= rk18;
1348
rk19 ^= rk12;
1349
x1 ^= rk19;
1350
rk1A ^= rk13;
1351
x2 ^= rk1A;
1352
rk1B ^= rk14;
1353
x3 ^= rk1B;
1354
AES_ROUND_NOKEY(x0, x1, x2, x3);
1355
rk1C ^= rk15;
1356
x0 ^= rk1C;
1357
rk1D ^= rk16;
1358
x1 ^= rk1D;
1359
rk1E ^= rk17;
1360
x2 ^= rk1E;
1361
rk1F ^= rk18;
1362
x3 ^= rk1F;
1363
AES_ROUND_NOKEY(x0, x1, x2, x3);
1364
p8 ^= x0;
1365
p9 ^= x1;
1366
pA ^= x2;
1367
pB ^= x3;
1368
}
1369
/* round 13 */
1370
KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
1371
rk00 ^= rk1C;
1372
rk01 ^= rk1D;
1373
rk02 ^= rk1E;
1374
rk03 ^= rk1F;
1375
x0 = p0 ^ rk00;
1376
x1 = p1 ^ rk01;
1377
x2 = p2 ^ rk02;
1378
x3 = p3 ^ rk03;
1379
AES_ROUND_NOKEY(x0, x1, x2, x3);
1380
KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
1381
rk04 ^= rk00;
1382
rk05 ^= rk01;
1383
rk06 ^= rk02;
1384
rk07 ^= rk03;
1385
x0 ^= rk04;
1386
x1 ^= rk05;
1387
x2 ^= rk06;
1388
x3 ^= rk07;
1389
AES_ROUND_NOKEY(x0, x1, x2, x3);
1390
KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
1391
rk08 ^= rk04;
1392
rk09 ^= rk05;
1393
rk0A ^= rk06;
1394
rk0B ^= rk07;
1395
x0 ^= rk08;
1396
x1 ^= rk09;
1397
x2 ^= rk0A;
1398
x3 ^= rk0B;
1399
AES_ROUND_NOKEY(x0, x1, x2, x3);
1400
KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
1401
rk0C ^= rk08;
1402
rk0D ^= rk09;
1403
rk0E ^= rk0A;
1404
rk0F ^= rk0B;
1405
x0 ^= rk0C;
1406
x1 ^= rk0D;
1407
x2 ^= rk0E;
1408
x3 ^= rk0F;
1409
AES_ROUND_NOKEY(x0, x1, x2, x3);
1410
pC ^= x0;
1411
pD ^= x1;
1412
pE ^= x2;
1413
pF ^= x3;
1414
KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
1415
rk10 ^= rk0C;
1416
rk11 ^= rk0D;
1417
rk12 ^= rk0E;
1418
rk13 ^= rk0F;
1419
x0 = p8 ^ rk10;
1420
x1 = p9 ^ rk11;
1421
x2 = pA ^ rk12;
1422
x3 = pB ^ rk13;
1423
AES_ROUND_NOKEY(x0, x1, x2, x3);
1424
KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
1425
rk14 ^= rk10;
1426
rk15 ^= rk11;
1427
rk16 ^= rk12;
1428
rk17 ^= rk13;
1429
x0 ^= rk14;
1430
x1 ^= rk15;
1431
x2 ^= rk16;
1432
x3 ^= rk17;
1433
AES_ROUND_NOKEY(x0, x1, x2, x3);
1434
KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
1435
rk18 ^= rk14 ^ sc->count1;
1436
rk19 ^= rk15 ^ sc->count0;
1437
rk1A ^= rk16 ^ sc->count3;
1438
rk1B ^= rk17 ^ SPH_T32(~sc->count2);
1439
x0 ^= rk18;
1440
x1 ^= rk19;
1441
x2 ^= rk1A;
1442
x3 ^= rk1B;
1443
AES_ROUND_NOKEY(x0, x1, x2, x3);
1444
KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
1445
rk1C ^= rk18;
1446
rk1D ^= rk19;
1447
rk1E ^= rk1A;
1448
rk1F ^= rk1B;
1449
x0 ^= rk1C;
1450
x1 ^= rk1D;
1451
x2 ^= rk1E;
1452
x3 ^= rk1F;
1453
AES_ROUND_NOKEY(x0, x1, x2, x3);
1454
p4 ^= x0;
1455
p5 ^= x1;
1456
p6 ^= x2;
1457
p7 ^= x3;
1458
sc->h[0x0] ^= p8;
1459
sc->h[0x1] ^= p9;
1460
sc->h[0x2] ^= pA;
1461
sc->h[0x3] ^= pB;
1462
sc->h[0x4] ^= pC;
1463
sc->h[0x5] ^= pD;
1464
sc->h[0x6] ^= pE;
1465
sc->h[0x7] ^= pF;
1466
sc->h[0x8] ^= p0;
1467
sc->h[0x9] ^= p1;
1468
sc->h[0xA] ^= p2;
1469
sc->h[0xB] ^= p3;
1470
sc->h[0xC] ^= p4;
1471
sc->h[0xD] ^= p5;
1472
sc->h[0xE] ^= p6;
1473
sc->h[0xF] ^= p7;
1474
}
1475
1476
#endif
1477
1478
static void
1479
shavite_small_init(sph_shavite_small_context *sc, const sph_u32 *iv)
1480
{
1481
memcpy(sc->h, iv, sizeof sc->h);
1482
sc->ptr = 0;
1483
sc->count0 = 0;
1484
sc->count1 = 0;
1485
}
1486
1487
static void
1488
shavite_small_core(sph_shavite_small_context *sc, const void *data, size_t len)
1489
{
1490
unsigned char *buf;
1491
size_t ptr;
1492
1493
buf = sc->buf;
1494
ptr = sc->ptr;
1495
while (len > 0) {
1496
size_t clen;
1497
1498
clen = (sizeof sc->buf) - ptr;
1499
if (clen > len)
1500
clen = len;
1501
memcpy(buf + ptr, data, clen);
1502
data = (const unsigned char *)data + clen;
1503
ptr += clen;
1504
len -= clen;
1505
if (ptr == sizeof sc->buf) {
1506
if ((sc->count0 = SPH_T32(sc->count0 + 512)) == 0)
1507
sc->count1 = SPH_T32(sc->count1 + 1);
1508
c256(sc, buf);
1509
ptr = 0;
1510
}
1511
}
1512
sc->ptr = ptr;
1513
}
1514
1515
static void
1516
shavite_small_close(sph_shavite_small_context *sc,
1517
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
1518
{
1519
unsigned char *buf;
1520
size_t ptr, u;
1521
unsigned z;
1522
sph_u32 count0, count1;
1523
1524
buf = sc->buf;
1525
ptr = sc->ptr;
1526
count0 = (sc->count0 += SPH_T32(ptr << 3) + n);
1527
count1 = sc->count1;
1528
z = 0x80 >> n;
1529
z = ((ub & -z) | z) & 0xFF;
1530
if (ptr == 0 && n == 0) {
1531
buf[0] = 0x80;
1532
memset(buf + 1, 0, 53);
1533
sc->count0 = sc->count1 = 0;
1534
} else if (ptr < 54) {
1535
buf[ptr ++] = z;
1536
memset(buf + ptr, 0, 54 - ptr);
1537
} else {
1538
buf[ptr ++] = z;
1539
memset(buf + ptr, 0, 64 - ptr);
1540
c256(sc, buf);
1541
memset(buf, 0, 54);
1542
sc->count0 = sc->count1 = 0;
1543
}
1544
sph_enc32le(buf + 54, count0);
1545
sph_enc32le(buf + 58, count1);
1546
buf[62] = (unsigned char) (out_size_w32 << 5);
1547
buf[63] = (unsigned char) (out_size_w32 >> 3);
1548
c256(sc, buf);
1549
for (u = 0; u < out_size_w32; u ++)
1550
sph_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);
1551
}
1552
1553
static void
1554
shavite_big_init(sph_shavite_big_context *sc, const sph_u32 *iv)
1555
{
1556
memcpy(sc->h, iv, sizeof sc->h);
1557
sc->ptr = 0;
1558
sc->count0 = 0;
1559
sc->count1 = 0;
1560
sc->count2 = 0;
1561
sc->count3 = 0;
1562
}
1563
1564
static void
1565
shavite_big_core(sph_shavite_big_context *sc, const void *data, size_t len)
1566
{
1567
unsigned char *buf;
1568
size_t ptr;
1569
1570
buf = sc->buf;
1571
ptr = sc->ptr;
1572
while (len > 0) {
1573
size_t clen;
1574
1575
clen = (sizeof sc->buf) - ptr;
1576
if (clen > len)
1577
clen = len;
1578
memcpy(buf + ptr, data, clen);
1579
data = (const unsigned char *)data + clen;
1580
ptr += clen;
1581
len -= clen;
1582
if (ptr == sizeof sc->buf) {
1583
if ((sc->count0 = SPH_T32(sc->count0 + 1024)) == 0) {
1584
sc->count1 = SPH_T32(sc->count1 + 1);
1585
if (sc->count1 == 0) {
1586
sc->count2 = SPH_T32(sc->count2 + 1);
1587
if (sc->count2 == 0) {
1588
sc->count3 = SPH_T32(
1589
sc->count3 + 1);
1590
}
1591
}
1592
}
1593
c512(sc, buf);
1594
ptr = 0;
1595
}
1596
}
1597
sc->ptr = ptr;
1598
}
1599
1600
static void
1601
shavite_big_close(sph_shavite_big_context *sc,
1602
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
1603
{
1604
unsigned char *buf;
1605
size_t ptr, u;
1606
unsigned z;
1607
sph_u32 count0, count1, count2, count3;
1608
1609
buf = sc->buf;
1610
ptr = sc->ptr;
1611
count0 = (sc->count0 += SPH_T32(ptr << 3) + n);
1612
count1 = sc->count1;
1613
count2 = sc->count2;
1614
count3 = sc->count3;
1615
z = 0x80 >> n;
1616
z = ((ub & -z) | z) & 0xFF;
1617
if (ptr == 0 && n == 0) {
1618
buf[0] = 0x80;
1619
memset(buf + 1, 0, 109);
1620
sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;
1621
} else if (ptr < 110) {
1622
buf[ptr ++] = z;
1623
memset(buf + ptr, 0, 110 - ptr);
1624
} else {
1625
buf[ptr ++] = z;
1626
memset(buf + ptr, 0, 128 - ptr);
1627
c512(sc, buf);
1628
memset(buf, 0, 110);
1629
sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;
1630
}
1631
sph_enc32le(buf + 110, count0);
1632
sph_enc32le(buf + 114, count1);
1633
sph_enc32le(buf + 118, count2);
1634
sph_enc32le(buf + 122, count3);
1635
buf[126] = (unsigned char) (out_size_w32 << 5);
1636
buf[127] = (unsigned char) (out_size_w32 >> 3);
1637
c512(sc, buf);
1638
for (u = 0; u < out_size_w32; u ++)
1639
sph_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);
1640
}
1641
1642
/* see sph_shavite.h */
1643
void
1644
sph_shavite224_init(void *cc)
1645
{
1646
shavite_small_init(cc, IV224);
1647
}
1648
1649
/* see sph_shavite.h */
1650
void
1651
sph_shavite224(void *cc, const void *data, size_t len)
1652
{
1653
shavite_small_core(cc, data, len);
1654
}
1655
1656
/* see sph_shavite.h */
1657
void
1658
sph_shavite224_close(void *cc, void *dst)
1659
{
1660
shavite_small_close(cc, 0, 0, dst, 7);
1661
shavite_small_init(cc, IV224);
1662
}
1663
1664
/* see sph_shavite.h */
1665
void
1666
sph_shavite224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1667
{
1668
shavite_small_close(cc, ub, n, dst, 7);
1669
shavite_small_init(cc, IV224);
1670
}
1671
1672
/* see sph_shavite.h */
1673
void
1674
sph_shavite256_init(void *cc)
1675
{
1676
shavite_small_init(cc, IV256);
1677
}
1678
1679
/* see sph_shavite.h */
1680
void
1681
sph_shavite256(void *cc, const void *data, size_t len)
1682
{
1683
shavite_small_core(cc, data, len);
1684
}
1685
1686
/* see sph_shavite.h */
1687
void
1688
sph_shavite256_close(void *cc, void *dst)
1689
{
1690
shavite_small_close(cc, 0, 0, dst, 8);
1691
shavite_small_init(cc, IV256);
1692
}
1693
1694
/* see sph_shavite.h */
1695
void
1696
sph_shavite256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1697
{
1698
shavite_small_close(cc, ub, n, dst, 8);
1699
shavite_small_init(cc, IV256);
1700
}
1701
1702
/* see sph_shavite.h */
1703
void
1704
sph_shavite384_init(void *cc)
1705
{
1706
shavite_big_init(cc, IV384);
1707
}
1708
1709
/* see sph_shavite.h */
1710
void
1711
sph_shavite384(void *cc, const void *data, size_t len)
1712
{
1713
shavite_big_core(cc, data, len);
1714
}
1715
1716
/* see sph_shavite.h */
1717
void
1718
sph_shavite384_close(void *cc, void *dst)
1719
{
1720
shavite_big_close(cc, 0, 0, dst, 12);
1721
shavite_big_init(cc, IV384);
1722
}
1723
1724
/* see sph_shavite.h */
1725
void
1726
sph_shavite384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1727
{
1728
shavite_big_close(cc, ub, n, dst, 12);
1729
shavite_big_init(cc, IV384);
1730
}
1731
1732
/* see sph_shavite.h */
1733
void
1734
sph_shavite512_init(void *cc)
1735
{
1736
shavite_big_init(cc, IV512);
1737
}
1738
1739
/* see sph_shavite.h */
1740
void
1741
sph_shavite512(void *cc, const void *data, size_t len)
1742
{
1743
shavite_big_core(cc, data, len);
1744
}
1745
1746
/* see sph_shavite.h */
1747
void
1748
sph_shavite512_close(void *cc, void *dst)
1749
{
1750
shavite_big_close(cc, 0, 0, dst, 16);
1751
shavite_big_init(cc, IV512);
1752
}
1753
1754
/* see sph_shavite.h */
1755
void
1756
sph_shavite512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1757
{
1758
shavite_big_close(cc, ub, n, dst, 16);
1759
shavite_big_init(cc, IV512);
1760
}
1761
1762
#ifdef __cplusplus
1763
}
1764
#endif
1765