Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/Sha256.c
4253 views
1
/* Sha256.c -- SHA-256 Hash
2
2024-03-01 : Igor Pavlov : Public domain
3
This code is based on public domain code from Wei Dai's Crypto++ library. */
4
5
#include "Precomp.h"
6
7
#include <string.h>
8
9
#include "CpuArch.h"
10
#include "RotateDefs.h"
11
#include "Sha256.h"
12
13
#if defined(_MSC_VER) && (_MSC_VER < 1900)
14
// #define USE_MY_MM
15
#endif
16
17
#ifdef MY_CPU_X86_OR_AMD64
18
#if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
19
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
20
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
21
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
22
|| defined(_MSC_VER) && (_MSC_VER >= 1200)
23
#define Z7_COMPILER_SHA256_SUPPORTED
24
#endif
25
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
26
27
#if defined(__ARM_FEATURE_SHA2) \
28
|| defined(__ARM_FEATURE_CRYPTO)
29
#define Z7_COMPILER_SHA256_SUPPORTED
30
#else
31
#if defined(MY_CPU_ARM64) \
32
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
33
|| defined(Z7_MSC_VER_ORIGINAL)
34
#if defined(__ARM_FP) && \
35
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
36
|| defined(__GNUC__) && (__GNUC__ >= 6) \
37
) \
38
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
39
#if defined(MY_CPU_ARM64) \
40
|| !defined(Z7_CLANG_VERSION) \
41
|| defined(__ARM_NEON) && \
42
(Z7_CLANG_VERSION < 170000 || \
43
Z7_CLANG_VERSION > 170001)
44
#define Z7_COMPILER_SHA256_SUPPORTED
45
#endif
46
#endif
47
#endif
48
#endif
49
#endif
50
51
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
52
53
#ifdef Z7_COMPILER_SHA256_SUPPORTED
54
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
55
56
static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
57
static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
58
59
#define SHA256_UPDATE_BLOCKS(p) p->func_UpdateBlocks
60
#else
61
#define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
62
#endif
63
64
65
BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
66
{
67
SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
68
69
#ifdef Z7_COMPILER_SHA256_SUPPORTED
70
if (algo != SHA256_ALGO_SW)
71
{
72
if (algo == SHA256_ALGO_DEFAULT)
73
func = g_SHA256_FUNC_UPDATE_BLOCKS;
74
else
75
{
76
if (algo != SHA256_ALGO_HW)
77
return False;
78
func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
79
if (!func)
80
return False;
81
}
82
}
83
#else
84
if (algo > 1)
85
return False;
86
#endif
87
88
p->func_UpdateBlocks = func;
89
return True;
90
}
91
92
93
/* define it for speed optimization */
94
95
#ifdef Z7_SFX
96
#define STEP_PRE 1
97
#define STEP_MAIN 1
98
#else
99
#define STEP_PRE 2
100
#define STEP_MAIN 4
101
// #define Z7_SHA256_UNROLL
102
#endif
103
104
#undef Z7_SHA256_BIG_W
105
#if STEP_MAIN != 16
106
#define Z7_SHA256_BIG_W
107
#endif
108
109
110
111
112
void Sha256_InitState(CSha256 *p)
113
{
114
p->count = 0;
115
p->state[0] = 0x6a09e667;
116
p->state[1] = 0xbb67ae85;
117
p->state[2] = 0x3c6ef372;
118
p->state[3] = 0xa54ff53a;
119
p->state[4] = 0x510e527f;
120
p->state[5] = 0x9b05688c;
121
p->state[6] = 0x1f83d9ab;
122
p->state[7] = 0x5be0cd19;
123
}
124
125
void Sha256_Init(CSha256 *p)
126
{
127
p->func_UpdateBlocks =
128
#ifdef Z7_COMPILER_SHA256_SUPPORTED
129
g_SHA256_FUNC_UPDATE_BLOCKS;
130
#else
131
NULL;
132
#endif
133
Sha256_InitState(p);
134
}
135
136
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
137
#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
138
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
139
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
140
141
#define Ch(x,y,z) (z^(x&(y^z)))
142
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
143
144
145
#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
146
147
#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
148
149
#ifdef Z7_SHA256_BIG_W
150
// we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
151
#define w(j, i) W[(size_t)(j) + i]
152
#define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
153
#else
154
#if STEP_MAIN == 16
155
#define w(j, i) W[(i) & 15]
156
#else
157
#define w(j, i) W[((size_t)(j) + (i)) & 15]
158
#endif
159
#define blk2(j, i) (w(j, i) += blk2_main(j, i))
160
#endif
161
162
#define W_MAIN(i) blk2(j, i)
163
164
165
#define T1(wx, i) \
166
tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
167
h = g; \
168
g = f; \
169
f = e; \
170
e = d + tmp; \
171
tmp += S0(a) + Maj(a, b, c); \
172
d = c; \
173
c = b; \
174
b = a; \
175
a = tmp; \
176
177
#define R1_PRE(i) T1( W_PRE, i)
178
#define R1_MAIN(i) T1( W_MAIN, i)
179
180
#if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
181
#define R2_MAIN(i) \
182
R1_MAIN(i) \
183
R1_MAIN(i + 1) \
184
185
#endif
186
187
188
189
#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
190
191
#define T4( a,b,c,d,e,f,g,h, wx, i) \
192
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
193
tmp = h; \
194
h += d; \
195
d = tmp + S0(a) + Maj(a, b, c); \
196
197
#define R4( wx, i) \
198
T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
199
T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
200
T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
201
T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
202
203
#define R4_PRE(i) R4( W_PRE, i)
204
#define R4_MAIN(i) R4( W_MAIN, i)
205
206
207
#define T8( a,b,c,d,e,f,g,h, wx, i) \
208
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
209
d += h; \
210
h += S0(a) + Maj(a, b, c); \
211
212
#define R8( wx, i) \
213
T8 ( a,b,c,d,e,f,g,h, wx, i ); \
214
T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
215
T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
216
T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
217
T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
218
T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
219
T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
220
T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
221
222
#define R8_PRE(i) R8( W_PRE, i)
223
#define R8_MAIN(i) R8( W_MAIN, i)
224
225
#endif
226
227
// static
228
extern MY_ALIGN(64)
229
const UInt32 SHA256_K_ARRAY[64];
230
231
MY_ALIGN(64)
232
const UInt32 SHA256_K_ARRAY[64] = {
233
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
234
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
235
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
236
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
237
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
238
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
239
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
240
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
241
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
242
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
243
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
244
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
245
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
246
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
247
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
248
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
249
};
250
251
#define K SHA256_K_ARRAY
252
253
254
Z7_NO_INLINE
255
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
256
{
257
UInt32 W
258
#ifdef Z7_SHA256_BIG_W
259
[64];
260
#else
261
[16];
262
#endif
263
264
unsigned j;
265
266
UInt32 a,b,c,d,e,f,g,h;
267
268
#if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
269
UInt32 tmp;
270
#endif
271
272
a = state[0];
273
b = state[1];
274
c = state[2];
275
d = state[3];
276
e = state[4];
277
f = state[5];
278
g = state[6];
279
h = state[7];
280
281
while (numBlocks)
282
{
283
284
for (j = 0; j < 16; j += STEP_PRE)
285
{
286
#if STEP_PRE > 4
287
288
#if STEP_PRE < 8
289
R4_PRE(0);
290
#else
291
R8_PRE(0);
292
#if STEP_PRE == 16
293
R8_PRE(8);
294
#endif
295
#endif
296
297
#else
298
299
R1_PRE(0)
300
#if STEP_PRE >= 2
301
R1_PRE(1)
302
#if STEP_PRE >= 4
303
R1_PRE(2)
304
R1_PRE(3)
305
#endif
306
#endif
307
308
#endif
309
}
310
311
for (j = 16; j < 64; j += STEP_MAIN)
312
{
313
#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
314
315
#if STEP_MAIN < 8
316
R4_MAIN(0)
317
#else
318
R8_MAIN(0)
319
#if STEP_MAIN == 16
320
R8_MAIN(8)
321
#endif
322
#endif
323
324
#else
325
326
R1_MAIN(0)
327
#if STEP_MAIN >= 2
328
R1_MAIN(1)
329
#if STEP_MAIN >= 4
330
R2_MAIN(2)
331
#if STEP_MAIN >= 8
332
R2_MAIN(4)
333
R2_MAIN(6)
334
#if STEP_MAIN >= 16
335
R2_MAIN(8)
336
R2_MAIN(10)
337
R2_MAIN(12)
338
R2_MAIN(14)
339
#endif
340
#endif
341
#endif
342
#endif
343
#endif
344
}
345
346
a += state[0]; state[0] = a;
347
b += state[1]; state[1] = b;
348
c += state[2]; state[2] = c;
349
d += state[3]; state[3] = d;
350
e += state[4]; state[4] = e;
351
f += state[5]; state[5] = f;
352
g += state[6]; state[6] = g;
353
h += state[7]; state[7] = h;
354
355
data += 64;
356
numBlocks--;
357
}
358
359
/* Wipe variables */
360
/* memset(W, 0, sizeof(W)); */
361
}
362
363
#undef S0
364
#undef S1
365
#undef s0
366
#undef s1
367
#undef K
368
369
#define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
370
371
void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
372
{
373
if (size == 0)
374
return;
375
376
{
377
unsigned pos = (unsigned)p->count & 0x3F;
378
unsigned num;
379
380
p->count += size;
381
382
num = 64 - pos;
383
if (num > size)
384
{
385
memcpy(p->buffer + pos, data, size);
386
return;
387
}
388
389
if (pos != 0)
390
{
391
size -= num;
392
memcpy(p->buffer + pos, data, num);
393
data += num;
394
Sha256_UpdateBlock(p);
395
}
396
}
397
{
398
size_t numBlocks = size >> 6;
399
SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
400
size &= 0x3F;
401
if (size == 0)
402
return;
403
data += (numBlocks << 6);
404
memcpy(p->buffer, data, size);
405
}
406
}
407
408
409
void Sha256_Final(CSha256 *p, Byte *digest)
410
{
411
unsigned pos = (unsigned)p->count & 0x3F;
412
unsigned i;
413
414
p->buffer[pos++] = 0x80;
415
416
if (pos > (64 - 8))
417
{
418
while (pos != 64) { p->buffer[pos++] = 0; }
419
// memset(&p->buf.buffer[pos], 0, 64 - pos);
420
Sha256_UpdateBlock(p);
421
pos = 0;
422
}
423
424
/*
425
if (pos & 3)
426
{
427
p->buffer[pos] = 0;
428
p->buffer[pos + 1] = 0;
429
p->buffer[pos + 2] = 0;
430
pos += 3;
431
pos &= ~3;
432
}
433
{
434
for (; pos < 64 - 8; pos += 4)
435
*(UInt32 *)(&p->buffer[pos]) = 0;
436
}
437
*/
438
439
memset(&p->buffer[pos], 0, (64 - 8) - pos);
440
441
{
442
UInt64 numBits = (p->count << 3);
443
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32))
444
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits))
445
}
446
447
Sha256_UpdateBlock(p);
448
449
for (i = 0; i < 8; i += 2)
450
{
451
UInt32 v0 = p->state[i];
452
UInt32 v1 = p->state[(size_t)i + 1];
453
SetBe32(digest , v0)
454
SetBe32(digest + 4, v1)
455
digest += 8;
456
}
457
458
Sha256_InitState(p);
459
}
460
461
462
void Sha256Prepare(void)
463
{
464
#ifdef Z7_COMPILER_SHA256_SUPPORTED
465
SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
466
f = Sha256_UpdateBlocks;
467
f_hw = NULL;
468
#ifdef MY_CPU_X86_OR_AMD64
469
#ifndef USE_MY_MM
470
if (CPU_IsSupported_SHA()
471
&& CPU_IsSupported_SSSE3()
472
// && CPU_IsSupported_SSE41()
473
)
474
#endif
475
#else
476
if (CPU_IsSupported_SHA2())
477
#endif
478
{
479
// printf("\n========== HW SHA256 ======== \n");
480
f = f_hw = Sha256_UpdateBlocks_HW;
481
}
482
g_SHA256_FUNC_UPDATE_BLOCKS = f;
483
g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
484
#endif
485
}
486
487
#undef S0
488
#undef S1
489
#undef s0
490
#undef s1
491
#undef Ch
492
#undef Maj
493
#undef W_MAIN
494
#undef W_PRE
495
#undef w
496
#undef blk2_main
497
#undef blk2
498
#undef T1
499
#undef T4
500
#undef T8
501
#undef R1_PRE
502
#undef R1_MAIN
503
#undef R2_MAIN
504
#undef R4
505
#undef R4_PRE
506
#undef R4_MAIN
507
#undef R8
508
#undef R8_PRE
509
#undef R8_MAIN
510
#undef STEP_PRE
511
#undef STEP_MAIN
512
#undef Z7_SHA256_BIG_W
513
#undef Z7_SHA256_UNROLL
514
#undef Z7_COMPILER_SHA256_SUPPORTED
515
516