Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_blake2/impl/blake2b.c
12 views
1
/*
2
BLAKE2 reference source code package - optimized C implementations
3
4
Written in 2012 by Samuel Neves <[email protected]>
5
6
To the extent possible under law, the author(s) have dedicated all copyright
7
and related and neighboring rights to this software to the public domain
8
worldwide. This software is distributed without any warranty.
9
10
You should have received a copy of the CC0 Public Domain Dedication along with
11
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12
*/
13
14
#include <stdint.h>
15
#include <string.h>
16
#include <stdio.h>
17
18
#include "blake2.h"
19
#include "blake2-impl.h"
20
21
#include "blake2-config.h"
22
23
#if defined(_MSC_VER)
24
#include <intrin.h>
25
#endif
26
27
#if defined(HAVE_SSE2)
28
#include <emmintrin.h>
29
// MSVC only defines _mm_set_epi64x for x86_64...
30
#if defined(_MSC_VER) && !defined(_M_X64)
31
static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
32
{
33
return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
34
}
35
#endif
36
#endif
37
38
#if defined(HAVE_SSSE3)
39
#include <tmmintrin.h>
40
#endif
41
#if defined(HAVE_SSE4_1)
42
#include <smmintrin.h>
43
#endif
44
#if defined(HAVE_AVX)
45
#include <immintrin.h>
46
#endif
47
#if defined(HAVE_XOP) && !defined(_MSC_VER)
48
#include <x86intrin.h>
49
#endif
50
51
52
53
#include "blake2b-round.h"
54
55
static const uint64_t blake2b_IV[8] =
56
{
57
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
58
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
59
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
60
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
61
};
62
63
static const uint8_t blake2b_sigma[12][16] =
64
{
65
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
66
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
67
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
68
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
69
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
70
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
71
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
72
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
73
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
74
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
75
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
76
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
77
};
78
79
80
/* Some helper functions, not necessarily useful */
81
static inline int blake2b_set_lastnode( blake2b_state *S )
82
{
83
S->f[1] = ~0ULL;
84
return 0;
85
}
86
87
static inline int blake2b_clear_lastnode( blake2b_state *S )
88
{
89
S->f[1] = 0ULL;
90
return 0;
91
}
92
93
static inline int blake2b_set_lastblock( blake2b_state *S )
94
{
95
if( S->last_node ) blake2b_set_lastnode( S );
96
97
S->f[0] = ~0ULL;
98
return 0;
99
}
100
101
static inline int blake2b_clear_lastblock( blake2b_state *S )
102
{
103
if( S->last_node ) blake2b_clear_lastnode( S );
104
105
S->f[0] = 0ULL;
106
return 0;
107
}
108
109
110
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
111
{
112
#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
113
// ADD/ADC chain
114
__uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
115
t += inc;
116
S->t[0] = ( uint64_t )( t >> 0 );
117
S->t[1] = ( uint64_t )( t >> 64 );
118
#else
119
S->t[0] += inc;
120
S->t[1] += ( S->t[0] < inc );
121
#endif
122
return 0;
123
}
124
125
126
// Parameter-related functions
127
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
128
{
129
P->digest_length = digest_length;
130
return 0;
131
}
132
133
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
134
{
135
P->fanout = fanout;
136
return 0;
137
}
138
139
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
140
{
141
P->depth = depth;
142
return 0;
143
}
144
145
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
146
{
147
P->leaf_length = leaf_length;
148
return 0;
149
}
150
151
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
152
{
153
P->node_offset = node_offset;
154
return 0;
155
}
156
157
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
158
{
159
P->node_depth = node_depth;
160
return 0;
161
}
162
163
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
164
{
165
P->inner_length = inner_length;
166
return 0;
167
}
168
169
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
170
{
171
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
172
return 0;
173
}
174
175
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
176
{
177
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
178
return 0;
179
}
180
181
static inline int blake2b_init0( blake2b_state *S )
182
{
183
memset( S, 0, sizeof( blake2b_state ) );
184
185
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
186
187
return 0;
188
}
189
190
191
192
#if defined(__cplusplus)
193
extern "C" {
194
#endif
195
int blake2b_init( blake2b_state *S, size_t outlen );
196
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
197
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
198
int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
199
int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
200
int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
201
#if defined(__cplusplus)
202
}
203
#endif
204
205
/* init xors IV with input parameter block */
206
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
207
{
208
uint8_t *p, *h, *v;
209
//blake2b_init0( S );
210
v = ( uint8_t * )( blake2b_IV );
211
h = ( uint8_t * )( S->h );
212
p = ( uint8_t * )( P );
213
/* IV XOR ParamBlock */
214
memset( S, 0, sizeof( blake2b_state ) );
215
216
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
217
218
S->outlen = P->digest_length;
219
return 0;
220
}
221
222
223
/* Some sort of default parameter block initialization, for sequential blake2b */
224
225
int blake2b_init( blake2b_state *S, size_t outlen )
226
{
227
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
228
229
const blake2b_param P =
230
{
231
( uint8_t ) outlen,
232
0,
233
1,
234
1,
235
0,
236
0,
237
0,
238
0,
239
{0},
240
{0},
241
{0}
242
};
243
return blake2b_init_param( S, &P );
244
}
245
246
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
247
{
248
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
249
250
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
251
252
const blake2b_param P =
253
{
254
( uint8_t ) outlen,
255
( uint8_t ) keylen,
256
1,
257
1,
258
0,
259
0,
260
0,
261
0,
262
{0},
263
{0},
264
{0}
265
};
266
267
if( blake2b_init_param( S, &P ) < 0 )
268
return 0;
269
270
{
271
uint8_t block[BLAKE2B_BLOCKBYTES];
272
memset( block, 0, BLAKE2B_BLOCKBYTES );
273
memcpy( block, key, keylen );
274
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
275
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
276
}
277
return 0;
278
}
279
280
static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
281
{
282
__m128i row1l, row1h;
283
__m128i row2l, row2h;
284
__m128i row3l, row3h;
285
__m128i row4l, row4h;
286
__m128i b0, b1;
287
__m128i t0, t1;
288
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
289
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
290
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
291
#endif
292
#if defined(HAVE_SSE4_1)
293
const __m128i m0 = LOADU( block + 00 );
294
const __m128i m1 = LOADU( block + 16 );
295
const __m128i m2 = LOADU( block + 32 );
296
const __m128i m3 = LOADU( block + 48 );
297
const __m128i m4 = LOADU( block + 64 );
298
const __m128i m5 = LOADU( block + 80 );
299
const __m128i m6 = LOADU( block + 96 );
300
const __m128i m7 = LOADU( block + 112 );
301
#else
302
const uint64_t m0 = ( ( uint64_t * )block )[ 0];
303
const uint64_t m1 = ( ( uint64_t * )block )[ 1];
304
const uint64_t m2 = ( ( uint64_t * )block )[ 2];
305
const uint64_t m3 = ( ( uint64_t * )block )[ 3];
306
const uint64_t m4 = ( ( uint64_t * )block )[ 4];
307
const uint64_t m5 = ( ( uint64_t * )block )[ 5];
308
const uint64_t m6 = ( ( uint64_t * )block )[ 6];
309
const uint64_t m7 = ( ( uint64_t * )block )[ 7];
310
const uint64_t m8 = ( ( uint64_t * )block )[ 8];
311
const uint64_t m9 = ( ( uint64_t * )block )[ 9];
312
const uint64_t m10 = ( ( uint64_t * )block )[10];
313
const uint64_t m11 = ( ( uint64_t * )block )[11];
314
const uint64_t m12 = ( ( uint64_t * )block )[12];
315
const uint64_t m13 = ( ( uint64_t * )block )[13];
316
const uint64_t m14 = ( ( uint64_t * )block )[14];
317
const uint64_t m15 = ( ( uint64_t * )block )[15];
318
#endif
319
row1l = LOADU( &S->h[0] );
320
row1h = LOADU( &S->h[2] );
321
row2l = LOADU( &S->h[4] );
322
row2h = LOADU( &S->h[6] );
323
row3l = LOADU( &blake2b_IV[0] );
324
row3h = LOADU( &blake2b_IV[2] );
325
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
326
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
327
ROUND( 0 );
328
ROUND( 1 );
329
ROUND( 2 );
330
ROUND( 3 );
331
ROUND( 4 );
332
ROUND( 5 );
333
ROUND( 6 );
334
ROUND( 7 );
335
ROUND( 8 );
336
ROUND( 9 );
337
ROUND( 10 );
338
ROUND( 11 );
339
row1l = _mm_xor_si128( row3l, row1l );
340
row1h = _mm_xor_si128( row3h, row1h );
341
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
342
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
343
row2l = _mm_xor_si128( row4l, row2l );
344
row2h = _mm_xor_si128( row4h, row2h );
345
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
346
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
347
return 0;
348
}
349
350
351
int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
352
{
353
while( inlen > 0 )
354
{
355
uint32_t left = S->buflen;
356
uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
357
358
if( inlen > fill )
359
{
360
memcpy( S->buf + left, in, fill ); // Fill buffer
361
S->buflen += fill;
362
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
363
blake2b_compress( S, S->buf ); // Compress
364
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
365
S->buflen -= BLAKE2B_BLOCKBYTES;
366
in += fill;
367
inlen -= fill;
368
}
369
else // inlen <= fill
370
{
371
memcpy( S->buf + left, in, inlen );
372
S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
373
in += inlen;
374
inlen -= inlen;
375
}
376
}
377
378
return 0;
379
}
380
381
382
int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
383
{
384
if(S->outlen != outlen) return -1;
385
386
if( S->buflen > BLAKE2B_BLOCKBYTES )
387
{
388
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
389
blake2b_compress( S, S->buf );
390
S->buflen -= BLAKE2B_BLOCKBYTES;
391
memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
392
}
393
394
blake2b_increment_counter( S, S->buflen );
395
blake2b_set_lastblock( S );
396
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
397
blake2b_compress( S, S->buf );
398
memcpy( out, &S->h[0], outlen );
399
return 0;
400
}
401
402
403
int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
404
{
405
blake2b_state S[1];
406
407
/* Verify parameters */
408
if ( NULL == in && inlen > 0 ) return -1;
409
410
if ( NULL == out ) return -1;
411
412
if( NULL == key && keylen > 0 ) return -1;
413
414
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
415
416
if( keylen > BLAKE2B_KEYBYTES ) return -1;
417
418
if( keylen )
419
{
420
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
421
}
422
else
423
{
424
if( blake2b_init( S, outlen ) < 0 ) return -1;
425
}
426
427
if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
428
return blake2b_final( S, out, outlen );
429
}
430
431
#if defined(SUPERCOP)
432
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
433
{
434
return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
435
}
436
#endif
437
438