Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/sha3/sph_jh.c
1201 views
1
/* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
2
/*
3
* JH implementation.
4
*
5
* ==========================(LICENSE BEGIN)============================
6
*
7
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice shall be
18
* included in all copies or substantial portions of the Software.
19
*
20
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
*
28
* ===========================(LICENSE END)=============================
29
*
30
* @author Thomas Pornin <[email protected]>
31
*/
32
33
#include <stddef.h>
34
#include <string.h>
35
36
#include "sph_jh.h"
37
38
#ifdef __cplusplus
39
extern "C"{
40
#endif
41
42
43
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
44
#define SPH_SMALL_FOOTPRINT_JH 1
45
#endif
46
47
#if !defined SPH_JH_64 && SPH_64_TRUE
48
#define SPH_JH_64 1
49
#endif
50
51
#if !SPH_64
52
#undef SPH_JH_64
53
#endif
54
55
#ifdef _MSC_VER
56
#pragma warning (disable: 4146)
57
#endif
58
59
/*
60
* The internal bitslice representation may use either big-endian or
61
* little-endian (true bitslice operations do not care about the bit
62
* ordering, and the bit-swapping linear operations in JH happen to
63
* be invariant through endianness-swapping). The constants must be
64
* defined according to the chosen endianness; we use some
65
* byte-swapping macros for that.
66
*/
67
68
#if SPH_LITTLE_ENDIAN
69
70
#define C32e(x) ((SPH_C32(x) >> 24) \
71
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
72
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
73
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
74
#define dec32e_aligned sph_dec32le_aligned
75
#define enc32e sph_enc32le
76
77
#if SPH_64
78
#define C64e(x) ((SPH_C64(x) >> 56) \
79
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
80
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
81
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
82
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
83
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
84
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
85
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
86
#define dec64e_aligned sph_dec64le_aligned
87
#define enc64e sph_enc64le
88
#endif
89
90
#else
91
92
#define C32e(x) SPH_C32(x)
93
#define dec32e_aligned sph_dec32be_aligned
94
#define enc32e sph_enc32be
95
#if SPH_64
96
#define C64e(x) SPH_C64(x)
97
#define dec64e_aligned sph_dec64be_aligned
98
#define enc64e sph_enc64be
99
#endif
100
101
#endif
102
103
#define Sb(x0, x1, x2, x3, c) do { \
104
x3 = ~x3; \
105
x0 ^= (c) & ~x2; \
106
tmp = (c) ^ (x0 & x1); \
107
x0 ^= x2 & x3; \
108
x3 ^= ~x1 & x2; \
109
x1 ^= x0 & x2; \
110
x2 ^= x0 & ~x3; \
111
x0 ^= x1 | x3; \
112
x3 ^= x1 & x2; \
113
x1 ^= tmp & x0; \
114
x2 ^= tmp; \
115
} while (0)
116
117
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
118
x4 ^= x1; \
119
x5 ^= x2; \
120
x6 ^= x3 ^ x0; \
121
x7 ^= x0; \
122
x0 ^= x5; \
123
x1 ^= x6; \
124
x2 ^= x7 ^ x4; \
125
x3 ^= x4; \
126
} while (0)
127
128
#if SPH_JH_64
129
130
static const sph_u64 C[] = {
131
C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
132
C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
133
C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
134
C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
135
C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
136
C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
137
C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
138
C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
139
C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
140
C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
141
C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
142
C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
143
C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
144
C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
145
C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
146
C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
147
C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
148
C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
149
C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
150
C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
151
C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
152
C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
153
C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
154
C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
155
C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
156
C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
157
C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
158
C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
159
C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
160
C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
161
C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
162
C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
163
C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
164
C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
165
C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
166
C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
167
C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
168
C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
169
C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
170
C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
171
C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
172
C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
173
C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
174
C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
175
C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
176
C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
177
C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
178
C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
179
C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
180
C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
181
C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
182
C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
183
C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
184
C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
185
C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
186
C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
187
C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
188
C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
189
C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
190
C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
191
C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
192
C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
193
C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
194
C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
195
C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
196
C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
197
C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
198
C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
199
C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
200
C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
201
C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
202
C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
203
C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
204
C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
205
C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
206
C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
207
C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
208
C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
209
C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
210
C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
211
C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
212
C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
213
C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
214
C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
215
};
216
217
#define Ceven_hi(r) (C[((r) << 2) + 0])
218
#define Ceven_lo(r) (C[((r) << 2) + 1])
219
#define Codd_hi(r) (C[((r) << 2) + 2])
220
#define Codd_lo(r) (C[((r) << 2) + 3])
221
222
#define S(x0, x1, x2, x3, cb, r) do { \
223
Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
224
Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
225
} while (0)
226
227
#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
228
Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
229
x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
230
Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
231
x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
232
} while (0)
233
234
#define Wz(x, c, n) do { \
235
sph_u64 t = (x ## h & (c)) << (n); \
236
x ## h = ((x ## h >> (n)) & (c)) | t; \
237
t = (x ## l & (c)) << (n); \
238
x ## l = ((x ## l >> (n)) & (c)) | t; \
239
} while (0)
240
241
#define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1)
242
#define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2)
243
#define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4)
244
#define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8)
245
#define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
246
#define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
247
#define W6(x) do { \
248
sph_u64 t = x ## h; \
249
x ## h = x ## l; \
250
x ## l = t; \
251
} while (0)
252
253
#define DECL_STATE \
254
sph_u64 h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
255
sph_u64 h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
256
sph_u64 tmp;
257
258
#define READ_STATE(state) do { \
259
h0h = (state)->H.wide[ 0]; \
260
h0l = (state)->H.wide[ 1]; \
261
h1h = (state)->H.wide[ 2]; \
262
h1l = (state)->H.wide[ 3]; \
263
h2h = (state)->H.wide[ 4]; \
264
h2l = (state)->H.wide[ 5]; \
265
h3h = (state)->H.wide[ 6]; \
266
h3l = (state)->H.wide[ 7]; \
267
h4h = (state)->H.wide[ 8]; \
268
h4l = (state)->H.wide[ 9]; \
269
h5h = (state)->H.wide[10]; \
270
h5l = (state)->H.wide[11]; \
271
h6h = (state)->H.wide[12]; \
272
h6l = (state)->H.wide[13]; \
273
h7h = (state)->H.wide[14]; \
274
h7l = (state)->H.wide[15]; \
275
} while (0)
276
277
#define WRITE_STATE(state) do { \
278
(state)->H.wide[ 0] = h0h; \
279
(state)->H.wide[ 1] = h0l; \
280
(state)->H.wide[ 2] = h1h; \
281
(state)->H.wide[ 3] = h1l; \
282
(state)->H.wide[ 4] = h2h; \
283
(state)->H.wide[ 5] = h2l; \
284
(state)->H.wide[ 6] = h3h; \
285
(state)->H.wide[ 7] = h3l; \
286
(state)->H.wide[ 8] = h4h; \
287
(state)->H.wide[ 9] = h4l; \
288
(state)->H.wide[10] = h5h; \
289
(state)->H.wide[11] = h5l; \
290
(state)->H.wide[12] = h6h; \
291
(state)->H.wide[13] = h6l; \
292
(state)->H.wide[14] = h7h; \
293
(state)->H.wide[15] = h7l; \
294
} while (0)
295
296
#define INPUT_BUF1 \
297
sph_u64 m0h = dec64e_aligned(buf + 0); \
298
sph_u64 m0l = dec64e_aligned(buf + 8); \
299
sph_u64 m1h = dec64e_aligned(buf + 16); \
300
sph_u64 m1l = dec64e_aligned(buf + 24); \
301
sph_u64 m2h = dec64e_aligned(buf + 32); \
302
sph_u64 m2l = dec64e_aligned(buf + 40); \
303
sph_u64 m3h = dec64e_aligned(buf + 48); \
304
sph_u64 m3l = dec64e_aligned(buf + 56); \
305
h0h ^= m0h; \
306
h0l ^= m0l; \
307
h1h ^= m1h; \
308
h1l ^= m1l; \
309
h2h ^= m2h; \
310
h2l ^= m2l; \
311
h3h ^= m3h; \
312
h3l ^= m3l;
313
314
#define INPUT_BUF2 \
315
h4h ^= m0h; \
316
h4l ^= m0l; \
317
h5h ^= m1h; \
318
h5l ^= m1l; \
319
h6h ^= m2h; \
320
h6l ^= m2l; \
321
h7h ^= m3h; \
322
h7l ^= m3l;
323
324
static const sph_u64 IV224[] = {
325
C64e(0x2dfedd62f99a98ac), C64e(0xae7cacd619d634e7),
326
C64e(0xa4831005bc301216), C64e(0xb86038c6c9661494),
327
C64e(0x66d9899f2580706f), C64e(0xce9ea31b1d9b1adc),
328
C64e(0x11e8325f7b366e10), C64e(0xf994857f02fa06c1),
329
C64e(0x1b4f1b5cd8c840b3), C64e(0x97f6a17f6e738099),
330
C64e(0xdcdf93a5adeaa3d3), C64e(0xa431e8dec9539a68),
331
C64e(0x22b4a98aec86a1e4), C64e(0xd574ac959ce56cf0),
332
C64e(0x15960deab5ab2bbf), C64e(0x9611dcf0dd64ea6e)
333
};
334
335
static const sph_u64 IV256[] = {
336
C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
337
C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
338
C64e(0xa4239e267726b945), C64e(0xe0fb1a48d41a9477),
339
C64e(0xcdb5ab26026b177a), C64e(0x56f024420fff2fa8),
340
C64e(0x71a396897f2e4d75), C64e(0x1d144908f77de262),
341
C64e(0x277695f776248f94), C64e(0x87d5b6574780296c),
342
C64e(0x5c5e272dac8e0d6c), C64e(0x518450c657057a0f),
343
C64e(0x7be4d367702412ea), C64e(0x89e3ab13d31cd769)
344
};
345
346
static const sph_u64 IV384[] = {
347
C64e(0x481e3bc6d813398a), C64e(0x6d3b5e894ade879b),
348
C64e(0x63faea68d480ad2e), C64e(0x332ccb21480f8267),
349
C64e(0x98aec84d9082b928), C64e(0xd455ea3041114249),
350
C64e(0x36f555b2924847ec), C64e(0xc7250a93baf43ce1),
351
C64e(0x569b7f8a27db454c), C64e(0x9efcbd496397af0e),
352
C64e(0x589fc27d26aa80cd), C64e(0x80c08b8c9deb2eda),
353
C64e(0x8a7981e8f8d5373a), C64e(0xf43967adddd17a71),
354
C64e(0xa9b4d3bda475d394), C64e(0x976c3fba9842737f)
355
};
356
357
static const sph_u64 IV512[] = {
358
C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
359
C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
360
C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
361
C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
362
C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
363
C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
364
C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
365
C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
366
};
367
368
#else
369
370
static const sph_u32 C[] = {
371
C32e(0x72d5dea2), C32e(0xdf15f867), C32e(0x7b84150a),
372
C32e(0xb7231557), C32e(0x81abd690), C32e(0x4d5a87f6),
373
C32e(0x4e9f4fc5), C32e(0xc3d12b40), C32e(0xea983ae0),
374
C32e(0x5c45fa9c), C32e(0x03c5d299), C32e(0x66b2999a),
375
C32e(0x660296b4), C32e(0xf2bb538a), C32e(0xb556141a),
376
C32e(0x88dba231), C32e(0x03a35a5c), C32e(0x9a190edb),
377
C32e(0x403fb20a), C32e(0x87c14410), C32e(0x1c051980),
378
C32e(0x849e951d), C32e(0x6f33ebad), C32e(0x5ee7cddc),
379
C32e(0x10ba1392), C32e(0x02bf6b41), C32e(0xdc786515),
380
C32e(0xf7bb27d0), C32e(0x0a2c8139), C32e(0x37aa7850),
381
C32e(0x3f1abfd2), C32e(0x410091d3), C32e(0x422d5a0d),
382
C32e(0xf6cc7e90), C32e(0xdd629f9c), C32e(0x92c097ce),
383
C32e(0x185ca70b), C32e(0xc72b44ac), C32e(0xd1df65d6),
384
C32e(0x63c6fc23), C32e(0x976e6c03), C32e(0x9ee0b81a),
385
C32e(0x2105457e), C32e(0x446ceca8), C32e(0xeef103bb),
386
C32e(0x5d8e61fa), C32e(0xfd9697b2), C32e(0x94838197),
387
C32e(0x4a8e8537), C32e(0xdb03302f), C32e(0x2a678d2d),
388
C32e(0xfb9f6a95), C32e(0x8afe7381), C32e(0xf8b8696c),
389
C32e(0x8ac77246), C32e(0xc07f4214), C32e(0xc5f4158f),
390
C32e(0xbdc75ec4), C32e(0x75446fa7), C32e(0x8f11bb80),
391
C32e(0x52de75b7), C32e(0xaee488bc), C32e(0x82b8001e),
392
C32e(0x98a6a3f4), C32e(0x8ef48f33), C32e(0xa9a36315),
393
C32e(0xaa5f5624), C32e(0xd5b7f989), C32e(0xb6f1ed20),
394
C32e(0x7c5ae0fd), C32e(0x36cae95a), C32e(0x06422c36),
395
C32e(0xce293543), C32e(0x4efe983d), C32e(0x533af974),
396
C32e(0x739a4ba7), C32e(0xd0f51f59), C32e(0x6f4e8186),
397
C32e(0x0e9dad81), C32e(0xafd85a9f), C32e(0xa7050667),
398
C32e(0xee34626a), C32e(0x8b0b28be), C32e(0x6eb91727),
399
C32e(0x47740726), C32e(0xc680103f), C32e(0xe0a07e6f),
400
C32e(0xc67e487b), C32e(0x0d550aa5), C32e(0x4af8a4c0),
401
C32e(0x91e3e79f), C32e(0x978ef19e), C32e(0x86767281),
402
C32e(0x50608dd4), C32e(0x7e9e5a41), C32e(0xf3e5b062),
403
C32e(0xfc9f1fec), C32e(0x4054207a), C32e(0xe3e41a00),
404
C32e(0xcef4c984), C32e(0x4fd794f5), C32e(0x9dfa95d8),
405
C32e(0x552e7e11), C32e(0x24c354a5), C32e(0x5bdf7228),
406
C32e(0xbdfe6e28), C32e(0x78f57fe2), C32e(0x0fa5c4b2),
407
C32e(0x05897cef), C32e(0xee49d32e), C32e(0x447e9385),
408
C32e(0xeb28597f), C32e(0x705f6937), C32e(0xb324314a),
409
C32e(0x5e8628f1), C32e(0x1dd6e465), C32e(0xc71b7704),
410
C32e(0x51b920e7), C32e(0x74fe43e8), C32e(0x23d4878a),
411
C32e(0x7d29e8a3), C32e(0x927694f2), C32e(0xddcb7a09),
412
C32e(0x9b30d9c1), C32e(0x1d1b30fb), C32e(0x5bdc1be0),
413
C32e(0xda24494f), C32e(0xf29c82bf), C32e(0xa4e7ba31),
414
C32e(0xb470bfff), C32e(0x0d324405), C32e(0xdef8bc48),
415
C32e(0x3baefc32), C32e(0x53bbd339), C32e(0x459fc3c1),
416
C32e(0xe0298ba0), C32e(0xe5c905fd), C32e(0xf7ae090f),
417
C32e(0x94703412), C32e(0x4290f134), C32e(0xa271b701),
418
C32e(0xe344ed95), C32e(0xe93b8e36), C32e(0x4f2f984a),
419
C32e(0x88401d63), C32e(0xa06cf615), C32e(0x47c1444b),
420
C32e(0x8752afff), C32e(0x7ebb4af1), C32e(0xe20ac630),
421
C32e(0x4670b6c5), C32e(0xcc6e8ce6), C32e(0xa4d5a456),
422
C32e(0xbd4fca00), C32e(0xda9d844b), C32e(0xc83e18ae),
423
C32e(0x7357ce45), C32e(0x3064d1ad), C32e(0xe8a6ce68),
424
C32e(0x145c2567), C32e(0xa3da8cf2), C32e(0xcb0ee116),
425
C32e(0x33e90658), C32e(0x9a94999a), C32e(0x1f60b220),
426
C32e(0xc26f847b), C32e(0xd1ceac7f), C32e(0xa0d18518),
427
C32e(0x32595ba1), C32e(0x8ddd19d3), C32e(0x509a1cc0),
428
C32e(0xaaa5b446), C32e(0x9f3d6367), C32e(0xe4046bba),
429
C32e(0xf6ca19ab), C32e(0x0b56ee7e), C32e(0x1fb179ea),
430
C32e(0xa9282174), C32e(0xe9bdf735), C32e(0x3b3651ee),
431
C32e(0x1d57ac5a), C32e(0x7550d376), C32e(0x3a46c2fe),
432
C32e(0xa37d7001), C32e(0xf735c1af), C32e(0x98a4d842),
433
C32e(0x78edec20), C32e(0x9e6b6779), C32e(0x41836315),
434
C32e(0xea3adba8), C32e(0xfac33b4d), C32e(0x32832c83),
435
C32e(0xa7403b1f), C32e(0x1c2747f3), C32e(0x5940f034),
436
C32e(0xb72d769a), C32e(0xe73e4e6c), C32e(0xd2214ffd),
437
C32e(0xb8fd8d39), C32e(0xdc5759ef), C32e(0x8d9b0c49),
438
C32e(0x2b49ebda), C32e(0x5ba2d749), C32e(0x68f3700d),
439
C32e(0x7d3baed0), C32e(0x7a8d5584), C32e(0xf5a5e9f0),
440
C32e(0xe4f88e65), C32e(0xa0b8a2f4), C32e(0x36103b53),
441
C32e(0x0ca8079e), C32e(0x753eec5a), C32e(0x91689492),
442
C32e(0x56e8884f), C32e(0x5bb05c55), C32e(0xf8babc4c),
443
C32e(0xe3bb3b99), C32e(0xf387947b), C32e(0x75daf4d6),
444
C32e(0x726b1c5d), C32e(0x64aeac28), C32e(0xdc34b36d),
445
C32e(0x6c34a550), C32e(0xb828db71), C32e(0xf861e2f2),
446
C32e(0x108d512a), C32e(0xe3db6433), C32e(0x59dd75fc),
447
C32e(0x1cacbcf1), C32e(0x43ce3fa2), C32e(0x67bbd13c),
448
C32e(0x02e843b0), C32e(0x330a5bca), C32e(0x8829a175),
449
C32e(0x7f34194d), C32e(0xb416535c), C32e(0x923b94c3),
450
C32e(0x0e794d1e), C32e(0x797475d7), C32e(0xb6eeaf3f),
451
C32e(0xeaa8d4f7), C32e(0xbe1a3921), C32e(0x5cf47e09),
452
C32e(0x4c232751), C32e(0x26a32453), C32e(0xba323cd2),
453
C32e(0x44a3174a), C32e(0x6da6d5ad), C32e(0xb51d3ea6),
454
C32e(0xaff2c908), C32e(0x83593d98), C32e(0x916b3c56),
455
C32e(0x4cf87ca1), C32e(0x7286604d), C32e(0x46e23ecc),
456
C32e(0x086ec7f6), C32e(0x2f9833b3), C32e(0xb1bc765e),
457
C32e(0x2bd666a5), C32e(0xefc4e62a), C32e(0x06f4b6e8),
458
C32e(0xbec1d436), C32e(0x74ee8215), C32e(0xbcef2163),
459
C32e(0xfdc14e0d), C32e(0xf453c969), C32e(0xa77d5ac4),
460
C32e(0x06585826), C32e(0x7ec11416), C32e(0x06e0fa16),
461
C32e(0x7e90af3d), C32e(0x28639d3f), C32e(0xd2c9f2e3),
462
C32e(0x009bd20c), C32e(0x5faace30), C32e(0xb7d40c30),
463
C32e(0x742a5116), C32e(0xf2e03298), C32e(0x0deb30d8),
464
C32e(0xe3cef89a), C32e(0x4bc59e7b), C32e(0xb5f17992),
465
C32e(0xff51e66e), C32e(0x048668d3), C32e(0x9b234d57),
466
C32e(0xe6966731), C32e(0xcce6a6f3), C32e(0x170a7505),
467
C32e(0xb17681d9), C32e(0x13326cce), C32e(0x3c175284),
468
C32e(0xf805a262), C32e(0xf42bcbb3), C32e(0x78471547),
469
C32e(0xff465482), C32e(0x23936a48), C32e(0x38df5807),
470
C32e(0x4e5e6565), C32e(0xf2fc7c89), C32e(0xfc86508e),
471
C32e(0x31702e44), C32e(0xd00bca86), C32e(0xf04009a2),
472
C32e(0x3078474e), C32e(0x65a0ee39), C32e(0xd1f73883),
473
C32e(0xf75ee937), C32e(0xe42c3abd), C32e(0x2197b226),
474
C32e(0x0113f86f), C32e(0xa344edd1), C32e(0xef9fdee7),
475
C32e(0x8ba0df15), C32e(0x762592d9), C32e(0x3c85f7f6),
476
C32e(0x12dc42be), C32e(0xd8a7ec7c), C32e(0xab27b07e),
477
C32e(0x538d7dda), C32e(0xaa3ea8de), C32e(0xaa25ce93),
478
C32e(0xbd0269d8), C32e(0x5af643fd), C32e(0x1a7308f9),
479
C32e(0xc05fefda), C32e(0x174a19a5), C32e(0x974d6633),
480
C32e(0x4cfd216a), C32e(0x35b49831), C32e(0xdb411570),
481
C32e(0xea1e0fbb), C32e(0xedcd549b), C32e(0x9ad063a1),
482
C32e(0x51974072), C32e(0xf6759dbf), C32e(0x91476fe2)
483
};
484
485
#define Ceven_w3(r) (C[((r) << 3) + 0])
486
#define Ceven_w2(r) (C[((r) << 3) + 1])
487
#define Ceven_w1(r) (C[((r) << 3) + 2])
488
#define Ceven_w0(r) (C[((r) << 3) + 3])
489
#define Codd_w3(r) (C[((r) << 3) + 4])
490
#define Codd_w2(r) (C[((r) << 3) + 5])
491
#define Codd_w1(r) (C[((r) << 3) + 6])
492
#define Codd_w0(r) (C[((r) << 3) + 7])
493
494
#define S(x0, x1, x2, x3, cb, r) do { \
495
Sb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, cb ## w3(r)); \
496
Sb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, cb ## w2(r)); \
497
Sb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, cb ## w1(r)); \
498
Sb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, cb ## w0(r)); \
499
} while (0)
500
501
#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
502
Lb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, \
503
x4 ## 3, x5 ## 3, x6 ## 3, x7 ## 3); \
504
Lb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, \
505
x4 ## 2, x5 ## 2, x6 ## 2, x7 ## 2); \
506
Lb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, \
507
x4 ## 1, x5 ## 1, x6 ## 1, x7 ## 1); \
508
Lb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, \
509
x4 ## 0, x5 ## 0, x6 ## 0, x7 ## 0); \
510
} while (0)
511
512
#define Wz(x, c, n) do { \
513
sph_u32 t = (x ## 3 & (c)) << (n); \
514
x ## 3 = ((x ## 3 >> (n)) & (c)) | t; \
515
t = (x ## 2 & (c)) << (n); \
516
x ## 2 = ((x ## 2 >> (n)) & (c)) | t; \
517
t = (x ## 1 & (c)) << (n); \
518
x ## 1 = ((x ## 1 >> (n)) & (c)) | t; \
519
t = (x ## 0 & (c)) << (n); \
520
x ## 0 = ((x ## 0 >> (n)) & (c)) | t; \
521
} while (0)
522
523
#define W0(x) Wz(x, SPH_C32(0x55555555), 1)
524
#define W1(x) Wz(x, SPH_C32(0x33333333), 2)
525
#define W2(x) Wz(x, SPH_C32(0x0F0F0F0F), 4)
526
#define W3(x) Wz(x, SPH_C32(0x00FF00FF), 8)
527
#define W4(x) Wz(x, SPH_C32(0x0000FFFF), 16)
528
#define W5(x) do { \
529
sph_u32 t = x ## 3; \
530
x ## 3 = x ## 2; \
531
x ## 2 = t; \
532
t = x ## 1; \
533
x ## 1 = x ## 0; \
534
x ## 0 = t; \
535
} while (0)
536
#define W6(x) do { \
537
sph_u32 t = x ## 3; \
538
x ## 3 = x ## 1; \
539
x ## 1 = t; \
540
t = x ## 2; \
541
x ## 2 = x ## 0; \
542
x ## 0 = t; \
543
} while (0)
544
545
#define DECL_STATE \
546
sph_u32 h03, h02, h01, h00, h13, h12, h11, h10; \
547
sph_u32 h23, h22, h21, h20, h33, h32, h31, h30; \
548
sph_u32 h43, h42, h41, h40, h53, h52, h51, h50; \
549
sph_u32 h63, h62, h61, h60, h73, h72, h71, h70; \
550
sph_u32 tmp;
551
552
#define READ_STATE(state) do { \
553
h03 = (state)->H.narrow[ 0]; \
554
h02 = (state)->H.narrow[ 1]; \
555
h01 = (state)->H.narrow[ 2]; \
556
h00 = (state)->H.narrow[ 3]; \
557
h13 = (state)->H.narrow[ 4]; \
558
h12 = (state)->H.narrow[ 5]; \
559
h11 = (state)->H.narrow[ 6]; \
560
h10 = (state)->H.narrow[ 7]; \
561
h23 = (state)->H.narrow[ 8]; \
562
h22 = (state)->H.narrow[ 9]; \
563
h21 = (state)->H.narrow[10]; \
564
h20 = (state)->H.narrow[11]; \
565
h33 = (state)->H.narrow[12]; \
566
h32 = (state)->H.narrow[13]; \
567
h31 = (state)->H.narrow[14]; \
568
h30 = (state)->H.narrow[15]; \
569
h43 = (state)->H.narrow[16]; \
570
h42 = (state)->H.narrow[17]; \
571
h41 = (state)->H.narrow[18]; \
572
h40 = (state)->H.narrow[19]; \
573
h53 = (state)->H.narrow[20]; \
574
h52 = (state)->H.narrow[21]; \
575
h51 = (state)->H.narrow[22]; \
576
h50 = (state)->H.narrow[23]; \
577
h63 = (state)->H.narrow[24]; \
578
h62 = (state)->H.narrow[25]; \
579
h61 = (state)->H.narrow[26]; \
580
h60 = (state)->H.narrow[27]; \
581
h73 = (state)->H.narrow[28]; \
582
h72 = (state)->H.narrow[29]; \
583
h71 = (state)->H.narrow[30]; \
584
h70 = (state)->H.narrow[31]; \
585
} while (0)
586
587
#define WRITE_STATE(state) do { \
588
(state)->H.narrow[ 0] = h03; \
589
(state)->H.narrow[ 1] = h02; \
590
(state)->H.narrow[ 2] = h01; \
591
(state)->H.narrow[ 3] = h00; \
592
(state)->H.narrow[ 4] = h13; \
593
(state)->H.narrow[ 5] = h12; \
594
(state)->H.narrow[ 6] = h11; \
595
(state)->H.narrow[ 7] = h10; \
596
(state)->H.narrow[ 8] = h23; \
597
(state)->H.narrow[ 9] = h22; \
598
(state)->H.narrow[10] = h21; \
599
(state)->H.narrow[11] = h20; \
600
(state)->H.narrow[12] = h33; \
601
(state)->H.narrow[13] = h32; \
602
(state)->H.narrow[14] = h31; \
603
(state)->H.narrow[15] = h30; \
604
(state)->H.narrow[16] = h43; \
605
(state)->H.narrow[17] = h42; \
606
(state)->H.narrow[18] = h41; \
607
(state)->H.narrow[19] = h40; \
608
(state)->H.narrow[20] = h53; \
609
(state)->H.narrow[21] = h52; \
610
(state)->H.narrow[22] = h51; \
611
(state)->H.narrow[23] = h50; \
612
(state)->H.narrow[24] = h63; \
613
(state)->H.narrow[25] = h62; \
614
(state)->H.narrow[26] = h61; \
615
(state)->H.narrow[27] = h60; \
616
(state)->H.narrow[28] = h73; \
617
(state)->H.narrow[29] = h72; \
618
(state)->H.narrow[30] = h71; \
619
(state)->H.narrow[31] = h70; \
620
} while (0)
621
622
#define INPUT_BUF1 \
623
sph_u32 m03 = dec32e_aligned(buf + 0); \
624
sph_u32 m02 = dec32e_aligned(buf + 4); \
625
sph_u32 m01 = dec32e_aligned(buf + 8); \
626
sph_u32 m00 = dec32e_aligned(buf + 12); \
627
sph_u32 m13 = dec32e_aligned(buf + 16); \
628
sph_u32 m12 = dec32e_aligned(buf + 20); \
629
sph_u32 m11 = dec32e_aligned(buf + 24); \
630
sph_u32 m10 = dec32e_aligned(buf + 28); \
631
sph_u32 m23 = dec32e_aligned(buf + 32); \
632
sph_u32 m22 = dec32e_aligned(buf + 36); \
633
sph_u32 m21 = dec32e_aligned(buf + 40); \
634
sph_u32 m20 = dec32e_aligned(buf + 44); \
635
sph_u32 m33 = dec32e_aligned(buf + 48); \
636
sph_u32 m32 = dec32e_aligned(buf + 52); \
637
sph_u32 m31 = dec32e_aligned(buf + 56); \
638
sph_u32 m30 = dec32e_aligned(buf + 60); \
639
h03 ^= m03; \
640
h02 ^= m02; \
641
h01 ^= m01; \
642
h00 ^= m00; \
643
h13 ^= m13; \
644
h12 ^= m12; \
645
h11 ^= m11; \
646
h10 ^= m10; \
647
h23 ^= m23; \
648
h22 ^= m22; \
649
h21 ^= m21; \
650
h20 ^= m20; \
651
h33 ^= m33; \
652
h32 ^= m32; \
653
h31 ^= m31; \
654
h30 ^= m30;
655
656
#define INPUT_BUF2 \
657
h43 ^= m03; \
658
h42 ^= m02; \
659
h41 ^= m01; \
660
h40 ^= m00; \
661
h53 ^= m13; \
662
h52 ^= m12; \
663
h51 ^= m11; \
664
h50 ^= m10; \
665
h63 ^= m23; \
666
h62 ^= m22; \
667
h61 ^= m21; \
668
h60 ^= m20; \
669
h73 ^= m33; \
670
h72 ^= m32; \
671
h71 ^= m31; \
672
h70 ^= m30;
673
674
static const sph_u32 IV224[] = {
675
C32e(0x2dfedd62), C32e(0xf99a98ac), C32e(0xae7cacd6), C32e(0x19d634e7),
676
C32e(0xa4831005), C32e(0xbc301216), C32e(0xb86038c6), C32e(0xc9661494),
677
C32e(0x66d9899f), C32e(0x2580706f), C32e(0xce9ea31b), C32e(0x1d9b1adc),
678
C32e(0x11e8325f), C32e(0x7b366e10), C32e(0xf994857f), C32e(0x02fa06c1),
679
C32e(0x1b4f1b5c), C32e(0xd8c840b3), C32e(0x97f6a17f), C32e(0x6e738099),
680
C32e(0xdcdf93a5), C32e(0xadeaa3d3), C32e(0xa431e8de), C32e(0xc9539a68),
681
C32e(0x22b4a98a), C32e(0xec86a1e4), C32e(0xd574ac95), C32e(0x9ce56cf0),
682
C32e(0x15960dea), C32e(0xb5ab2bbf), C32e(0x9611dcf0), C32e(0xdd64ea6e)
683
};
684
685
static const sph_u32 IV256[] = {
686
C32e(0xeb98a341), C32e(0x2c20d3eb), C32e(0x92cdbe7b), C32e(0x9cb245c1),
687
C32e(0x1c935191), C32e(0x60d4c7fa), C32e(0x260082d6), C32e(0x7e508a03),
688
C32e(0xa4239e26), C32e(0x7726b945), C32e(0xe0fb1a48), C32e(0xd41a9477),
689
C32e(0xcdb5ab26), C32e(0x026b177a), C32e(0x56f02442), C32e(0x0fff2fa8),
690
C32e(0x71a39689), C32e(0x7f2e4d75), C32e(0x1d144908), C32e(0xf77de262),
691
C32e(0x277695f7), C32e(0x76248f94), C32e(0x87d5b657), C32e(0x4780296c),
692
C32e(0x5c5e272d), C32e(0xac8e0d6c), C32e(0x518450c6), C32e(0x57057a0f),
693
C32e(0x7be4d367), C32e(0x702412ea), C32e(0x89e3ab13), C32e(0xd31cd769)
694
};
695
696
static const sph_u32 IV384[] = {
697
C32e(0x481e3bc6), C32e(0xd813398a), C32e(0x6d3b5e89), C32e(0x4ade879b),
698
C32e(0x63faea68), C32e(0xd480ad2e), C32e(0x332ccb21), C32e(0x480f8267),
699
C32e(0x98aec84d), C32e(0x9082b928), C32e(0xd455ea30), C32e(0x41114249),
700
C32e(0x36f555b2), C32e(0x924847ec), C32e(0xc7250a93), C32e(0xbaf43ce1),
701
C32e(0x569b7f8a), C32e(0x27db454c), C32e(0x9efcbd49), C32e(0x6397af0e),
702
C32e(0x589fc27d), C32e(0x26aa80cd), C32e(0x80c08b8c), C32e(0x9deb2eda),
703
C32e(0x8a7981e8), C32e(0xf8d5373a), C32e(0xf43967ad), C32e(0xddd17a71),
704
C32e(0xa9b4d3bd), C32e(0xa475d394), C32e(0x976c3fba), C32e(0x9842737f)
705
};
706
707
static const sph_u32 IV512[] = {
708
C32e(0x6fd14b96), C32e(0x3e00aa17), C32e(0x636a2e05), C32e(0x7a15d543),
709
C32e(0x8a225e8d), C32e(0x0c97ef0b), C32e(0xe9341259), C32e(0xf2b3c361),
710
C32e(0x891da0c1), C32e(0x536f801e), C32e(0x2aa9056b), C32e(0xea2b6d80),
711
C32e(0x588eccdb), C32e(0x2075baa6), C32e(0xa90f3a76), C32e(0xbaf83bf7),
712
C32e(0x0169e605), C32e(0x41e34a69), C32e(0x46b58a8e), C32e(0x2e6fe65a),
713
C32e(0x1047a7d0), C32e(0xc1843c24), C32e(0x3b6e71b1), C32e(0x2d5ac199),
714
C32e(0xcf57f6ec), C32e(0x9db1f856), C32e(0xa706887c), C32e(0x5716b156),
715
C32e(0xe3c2fcdf), C32e(0xe68517fb), C32e(0x545a4678), C32e(0xcc8cdd4b)
716
};
717
718
#endif
719
720
#define SL(ro) SLu(r + ro, ro)
721
722
#define SLu(r, ro) do { \
723
S(h0, h2, h4, h6, Ceven_, r); \
724
S(h1, h3, h5, h7, Codd_, r); \
725
L(h0, h2, h4, h6, h1, h3, h5, h7); \
726
W ## ro(h1); \
727
W ## ro(h3); \
728
W ## ro(h5); \
729
W ## ro(h7); \
730
} while (0)
731
732
#if SPH_SMALL_FOOTPRINT_JH
733
734
#if SPH_JH_64
735
736
/*
737
* The "small footprint" 64-bit version just uses a partially unrolled
738
* loop.
739
*/
740
741
#define E8 do { \
742
unsigned r; \
743
for (r = 0; r < 42; r += 7) { \
744
SL(0); \
745
SL(1); \
746
SL(2); \
747
SL(3); \
748
SL(4); \
749
SL(5); \
750
SL(6); \
751
} \
752
} while (0)
753
754
#else
755
756
#define E8 do { \
757
unsigned r, g; \
758
for (r = g = 0; r < 42; r ++) { \
759
S(h0, h2, h4, h6, Ceven_, r); \
760
S(h1, h3, h5, h7, Codd_, r); \
761
L(h0, h2, h4, h6, h1, h3, h5, h7); \
762
switch (g) { \
763
case 0: \
764
W0(h1); \
765
W0(h3); \
766
W0(h5); \
767
W0(h7); \
768
break; \
769
case 1: \
770
W1(h1); \
771
W1(h3); \
772
W1(h5); \
773
W1(h7); \
774
break; \
775
case 2: \
776
W2(h1); \
777
W2(h3); \
778
W2(h5); \
779
W2(h7); \
780
break; \
781
case 3: \
782
W3(h1); \
783
W3(h3); \
784
W3(h5); \
785
W3(h7); \
786
break; \
787
case 4: \
788
W4(h1); \
789
W4(h3); \
790
W4(h5); \
791
W4(h7); \
792
break; \
793
case 5: \
794
W5(h1); \
795
W5(h3); \
796
W5(h5); \
797
W5(h7); \
798
break; \
799
case 6: \
800
W6(h1); \
801
W6(h3); \
802
W6(h5); \
803
W6(h7); \
804
break; \
805
} \
806
if (++ g == 7) \
807
g = 0; \
808
} \
809
} while (0)
810
811
#endif
812
813
#else
814
815
#if SPH_JH_64
816
817
/*
818
* On a "true 64-bit" architecture, we can unroll at will.
819
*/
820
821
#define E8 do { \
822
SLu( 0, 0); \
823
SLu( 1, 1); \
824
SLu( 2, 2); \
825
SLu( 3, 3); \
826
SLu( 4, 4); \
827
SLu( 5, 5); \
828
SLu( 6, 6); \
829
SLu( 7, 0); \
830
SLu( 8, 1); \
831
SLu( 9, 2); \
832
SLu(10, 3); \
833
SLu(11, 4); \
834
SLu(12, 5); \
835
SLu(13, 6); \
836
SLu(14, 0); \
837
SLu(15, 1); \
838
SLu(16, 2); \
839
SLu(17, 3); \
840
SLu(18, 4); \
841
SLu(19, 5); \
842
SLu(20, 6); \
843
SLu(21, 0); \
844
SLu(22, 1); \
845
SLu(23, 2); \
846
SLu(24, 3); \
847
SLu(25, 4); \
848
SLu(26, 5); \
849
SLu(27, 6); \
850
SLu(28, 0); \
851
SLu(29, 1); \
852
SLu(30, 2); \
853
SLu(31, 3); \
854
SLu(32, 4); \
855
SLu(33, 5); \
856
SLu(34, 6); \
857
SLu(35, 0); \
858
SLu(36, 1); \
859
SLu(37, 2); \
860
SLu(38, 3); \
861
SLu(39, 4); \
862
SLu(40, 5); \
863
SLu(41, 6); \
864
} while (0)
865
866
#else
867
868
/*
869
* We are not aiming at a small footprint, but we are still using a
870
* 32-bit implementation. Full loop unrolling would smash the L1
871
* cache on some "big" architectures (32 kB L1 cache).
872
*/
873
874
#define E8 do { \
875
unsigned r; \
876
for (r = 0; r < 42; r += 7) { \
877
SL(0); \
878
SL(1); \
879
SL(2); \
880
SL(3); \
881
SL(4); \
882
SL(5); \
883
SL(6); \
884
} \
885
} while (0)
886
887
#endif
888
889
#endif
890
891
static void
892
jh_init(sph_jh_context *sc, const void *iv)
893
{
894
sc->ptr = 0;
895
#if SPH_JH_64
896
memcpy(sc->H.wide, iv, sizeof sc->H.wide);
897
#else
898
memcpy(sc->H.narrow, iv, sizeof sc->H.narrow);
899
#endif
900
#if SPH_64
901
sc->block_count = 0;
902
#else
903
sc->block_count_high = 0;
904
sc->block_count_low = 0;
905
#endif
906
}
907
908
static void
909
jh_core(sph_jh_context *sc, const void *data, size_t len)
910
{
911
unsigned char *buf;
912
size_t ptr;
913
DECL_STATE
914
915
buf = sc->buf;
916
ptr = sc->ptr;
917
if (len < (sizeof sc->buf) - ptr) {
918
memcpy(buf + ptr, data, len);
919
ptr += len;
920
sc->ptr = ptr;
921
return;
922
}
923
924
READ_STATE(sc);
925
while (len > 0) {
926
size_t clen;
927
928
clen = (sizeof sc->buf) - ptr;
929
if (clen > len)
930
clen = len;
931
memcpy(buf + ptr, data, clen);
932
ptr += clen;
933
data = (const unsigned char *)data + clen;
934
len -= clen;
935
if (ptr == sizeof sc->buf) {
936
INPUT_BUF1;
937
E8;
938
INPUT_BUF2;
939
#if SPH_64
940
sc->block_count ++;
941
#else
942
if ((sc->block_count_low = SPH_T32(
943
sc->block_count_low + 1)) == 0)
944
sc->block_count_high ++;
945
#endif
946
ptr = 0;
947
}
948
}
949
WRITE_STATE(sc);
950
sc->ptr = ptr;
951
}
952
953
static void
954
jh_close(sph_jh_context *sc, unsigned ub, unsigned n,
955
void *dst, size_t out_size_w32, const void *iv)
956
{
957
unsigned z;
958
unsigned char buf[128];
959
size_t numz, u;
960
#if SPH_64
961
sph_u64 l0, l1;
962
#else
963
sph_u32 l0, l1, l2, l3;
964
#endif
965
966
z = 0x80 >> n;
967
buf[0] = ((ub & -z) | z) & 0xFF;
968
if (sc->ptr == 0 && n == 0) {
969
numz = 47;
970
} else {
971
numz = 111 - sc->ptr;
972
}
973
memset(buf + 1, 0, numz);
974
#if SPH_64
975
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3) + n;
976
l1 = SPH_T64(sc->block_count >> 55);
977
sph_enc64be(buf + numz + 1, l1);
978
sph_enc64be(buf + numz + 9, l0);
979
#else
980
l0 = SPH_T32(sc->block_count_low << 9) + (sc->ptr << 3) + n;
981
l1 = SPH_T32(sc->block_count_low >> 23)
982
+ SPH_T32(sc->block_count_high << 9);
983
l2 = SPH_T32(sc->block_count_high >> 23);
984
l3 = 0;
985
sph_enc32be(buf + numz + 1, l3);
986
sph_enc32be(buf + numz + 5, l2);
987
sph_enc32be(buf + numz + 9, l1);
988
sph_enc32be(buf + numz + 13, l0);
989
#endif
990
jh_core(sc, buf, numz + 17);
991
#if SPH_JH_64
992
for (u = 0; u < 8; u ++)
993
enc64e(buf + (u << 3), sc->H.wide[u + 8]);
994
#else
995
for (u = 0; u < 16; u ++)
996
enc32e(buf + (u << 2), sc->H.narrow[u + 16]);
997
#endif
998
memcpy(dst, buf + ((16 - out_size_w32) << 2), out_size_w32 << 2);
999
jh_init(sc, iv);
1000
}
1001
1002
/* see sph_jh.h */
1003
void
1004
sph_jh224_init(void *cc)
1005
{
1006
jh_init(cc, IV224);
1007
}
1008
1009
/* see sph_jh.h */
1010
void
1011
sph_jh224(void *cc, const void *data, size_t len)
1012
{
1013
jh_core(cc, data, len);
1014
}
1015
1016
/* see sph_jh.h */
1017
void
1018
sph_jh224_close(void *cc, void *dst)
1019
{
1020
jh_close(cc, 0, 0, dst, 7, IV224);
1021
}
1022
1023
/* see sph_jh.h */
1024
void
1025
sph_jh224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1026
{
1027
jh_close(cc, ub, n, dst, 7, IV224);
1028
}
1029
1030
/* see sph_jh.h */
1031
void
1032
sph_jh256_init(void *cc)
1033
{
1034
jh_init(cc, IV256);
1035
}
1036
1037
/* see sph_jh.h */
1038
void
1039
sph_jh256(void *cc, const void *data, size_t len)
1040
{
1041
jh_core(cc, data, len);
1042
}
1043
1044
/* see sph_jh.h */
1045
void
1046
sph_jh256_close(void *cc, void *dst)
1047
{
1048
jh_close(cc, 0, 0, dst, 8, IV256);
1049
}
1050
1051
/* see sph_jh.h */
1052
void
1053
sph_jh256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1054
{
1055
jh_close(cc, ub, n, dst, 8, IV256);
1056
}
1057
1058
/* see sph_jh.h */
1059
void
1060
sph_jh384_init(void *cc)
1061
{
1062
jh_init(cc, IV384);
1063
}
1064
1065
/* see sph_jh.h */
1066
void
1067
sph_jh384(void *cc, const void *data, size_t len)
1068
{
1069
jh_core(cc, data, len);
1070
}
1071
1072
/* see sph_jh.h */
1073
void
1074
sph_jh384_close(void *cc, void *dst)
1075
{
1076
jh_close(cc, 0, 0, dst, 12, IV384);
1077
}
1078
1079
/* see sph_jh.h */
1080
void
1081
sph_jh384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1082
{
1083
jh_close(cc, ub, n, dst, 12, IV384);
1084
}
1085
1086
/* see sph_jh.h */
1087
void
1088
sph_jh512_init(void *cc)
1089
{
1090
jh_init(cc, IV512);
1091
}
1092
1093
/* see sph_jh.h */
1094
void
1095
sph_jh512(void *cc, const void *data, size_t len)
1096
{
1097
jh_core(cc, data, len);
1098
}
1099
1100
/* see sph_jh.h */
1101
void
1102
sph_jh512_close(void *cc, void *dst)
1103
{
1104
jh_close(cc, 0, 0, dst, 16, IV512);
1105
}
1106
1107
/* see sph_jh.h */
1108
void
1109
sph_jh512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1110
{
1111
jh_close(cc, ub, n, dst, 16, IV512);
1112
}
1113
1114
#ifdef __cplusplus
1115
}
1116
#endif
1117
1118