CoCalc -- init.lua

GitHub Repository: Roblox/luau
Path: blob/master/bench/other/boatbomber-HashLib/init.lua
²⁷²⁷ views
1
--[=[------------------------------------------------------------------------------------------------------------------------
2
-- HashLib by Egor Skriptunoff, boatbomber, and howmanysmall
3

4
--------------------------------------------------------------------------------------------------------------------------
5

6
Module was originally written by Egor Skriptunoff and distributed under an MIT license.
7
It can be found here: https://github.com/Egor-Skriptunoff/pure_lua_SHA/blob/master/sha2.lua
8

9
That version was around 3000 lines long, and supported Lua versions 5.1, 5.2, 5.3, and 5.4, and LuaJIT.
10
Although that is super cool, Roblox only uses Lua 5.1, so that was extreme overkill.
11

12
I, boatbomber, worked to port it to Roblox in a way that doesn't overcomplicate it with support of unreachable
13
cases. Then, howmanysmall did some final optimizations that really squeeze out all the performance possible.
14
It's gotten stupid fast, thanks to her!
15

16
After quite a bit of work and benchmarking, this is what we were left with.
17
Enjoy!
18

19
--------------------------------------------------------------------------------------------------------------------------
20

21
DESCRIPTION:
22
	This module contains functions to calculate SHA digest:
23
		MD5, SHA-1,
24
		SHA-224, SHA-256, SHA-512/224, SHA-512/256, SHA-384, SHA-512,
25
		SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256,
26
		HMAC
27
	Additionally, it has a few extra utility functions:
28
		hex_to_bin
29
		base64_to_bin
30
		bin_to_base64
31
	Written in pure Lua.
32
USAGE:
33
	Input data should be a string
34
	Result (SHA digest) is returned in hexadecimal representation as a string of lowercase hex digits.
35
	Simplest usage example:
36
		local HashLib = require(script.HashLib)
37
		local your_hash = HashLib.sha256("your string")
38
API:
39
		HashLib.md5
40
		HashLib.sha1
41
	SHA2 hash functions:
42
		HashLib.sha224
43
		HashLib.sha256
44
		HashLib.sha512_224
45
		HashLib.sha512_256
46
		HashLib.sha384
47
		HashLib.sha512
48
	SHA3 hash functions:
49
		HashLib.sha3_224
50
		HashLib.sha3_256
51
		HashLib.sha3_384
52
		HashLib.sha3_512
53
		HashLib.shake128
54
		HashLib.shake256
55
	Misc utilities:
56
		HashLib.hmac (Applicable to any hash function from this module except SHAKE*)
57
		HashLib.hex_to_bin
58
		HashLib.base64_to_bin
59
		HashLib.bin_to_base64
60

61
--]=]
62
---------------------------------------------------------------------------
63

64
local Base64 = require(script.Base64)
65

66
--------------------------------------------------------------------------------
67
-- LOCALIZATION FOR VM OPTIMIZATIONS
68
--------------------------------------------------------------------------------
69

70
local ipairs = ipairs
71

72
--------------------------------------------------------------------------------
73
-- 32-BIT BITWISE FUNCTIONS
74
--------------------------------------------------------------------------------
75
-- Only low 32 bits of function arguments matter, high bits are ignored
76
-- The result of all functions (except HEX) is an integer inside "correct range":
77
-- for "bit" library:    (-TWO_POW_31)..(TWO_POW_31-1)
78
-- for "bit32" library:        0..(TWO_POW_32-1)
79
local bit32_band = bit32.band -- 2 arguments
80
local bit32_bor = bit32.bor -- 2 arguments
81
local bit32_bxor = bit32.bxor -- 2..5 arguments
82
local bit32_lshift = bit32.lshift -- second argument is integer 0..31
83
local bit32_rshift = bit32.rshift -- second argument is integer 0..31
84
local bit32_lrotate = bit32.lrotate -- second argument is integer 0..31
85
local bit32_rrotate = bit32.rrotate -- second argument is integer 0..31
86

87
--------------------------------------------------------------------------------
88
-- CREATING OPTIMIZED INNER LOOP
89
--------------------------------------------------------------------------------
90
-- Arrays of SHA2 "magic numbers" (in "INT64" and "FFI" branches "*_lo" arrays contain 64-bit values)
91
local sha2_K_lo, sha2_K_hi, sha2_H_lo, sha2_H_hi, sha3_RC_lo, sha3_RC_hi = {}, {}, {}, {}, {}, {}
92
local sha2_H_ext256 = {
93
	[224] = {},
94
	[256] = sha2_H_hi,
95
}
96

97
local sha2_H_ext512_lo, sha2_H_ext512_hi = {
98
	[384] = {},
99
	[512] = sha2_H_lo,
100
}, {
101
	[384] = {},
102
	[512] = sha2_H_hi,
103
}
104

105
local md5_K, md5_sha1_H = {}, { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }
106
local md5_next_shift = {
107
	0,
108
	0,
109
	0,
110
	0,
111
	0,
112
	0,
113
	0,
114
	0,
115
	28,
116
	25,
117
	26,
118
	27,
119
	0,
120
	0,
121
	10,
122
	9,
123
	11,
124
	12,
125
	0,
126
	15,
127
	16,
128
	17,
129
	18,
130
	0,
131
	20,
132
	22,
133
	23,
134
	21,
135
}
136
local HEX64, XOR64A5, lanes_index_base -- defined only for branches that internally use 64-bit integers: "INT64" and "FFI"
137
local common_W = {} -- temporary table shared between all calculations (to avoid creating new temporary table every time)
138
local K_lo_modulo, hi_factor, hi_factor_keccak = 4294967296, 0, 0
139

140
local TWO_POW_NEG_56 = 2 ^ -56
141
local TWO_POW_NEG_17 = 2 ^ -17
142

143
local TWO_POW_2 = 2 ^ 2
144
local TWO_POW_3 = 2 ^ 3
145
local TWO_POW_4 = 2 ^ 4
146
local TWO_POW_5 = 2 ^ 5
147
local TWO_POW_6 = 2 ^ 6
148
local TWO_POW_7 = 2 ^ 7
149
local TWO_POW_8 = 2 ^ 8
150
local TWO_POW_9 = 2 ^ 9
151
local TWO_POW_10 = 2 ^ 10
152
local TWO_POW_11 = 2 ^ 11
153
local TWO_POW_12 = 2 ^ 12
154
local TWO_POW_13 = 2 ^ 13
155
local TWO_POW_14 = 2 ^ 14
156
local TWO_POW_15 = 2 ^ 15
157
local TWO_POW_16 = 2 ^ 16
158
local TWO_POW_17 = 2 ^ 17
159
local TWO_POW_18 = 2 ^ 18
160
local TWO_POW_19 = 2 ^ 19
161
local TWO_POW_20 = 2 ^ 20
162
local TWO_POW_21 = 2 ^ 21
163
local TWO_POW_22 = 2 ^ 22
164
local TWO_POW_23 = 2 ^ 23
165
local TWO_POW_24 = 2 ^ 24
166
local TWO_POW_25 = 2 ^ 25
167
local TWO_POW_26 = 2 ^ 26
168
local TWO_POW_27 = 2 ^ 27
169
local TWO_POW_28 = 2 ^ 28
170
local TWO_POW_29 = 2 ^ 29
171
local TWO_POW_30 = 2 ^ 30
172
local TWO_POW_31 = 2 ^ 31
173
local TWO_POW_32 = 2 ^ 32
174
local TWO_POW_40 = 2 ^ 40
175

176
local TWO56_POW_7 = 256 ^ 7
177

178
-- Implementation for Lua 5.1/5.2 (with or without bitwise library available)
179
local function sha256_feed_64(H, str, offs, size)
180
	-- offs >= 0, size >= 0, size is multiple of 64
181
	local W, K = common_W, sha2_K_hi
182
	local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
183
	for pos = offs, offs + size - 1, 64 do
184
		for j = 1, 16 do
185
			pos = pos + 4
186
			local a, b, c, d = string.byte(str, pos - 3, pos)
187
			W[j] = ((a * 256 + b) * 256 + c) * 256 + d
188
		end
189

190
		for j = 17, 64 do
191
			local a, b = W[j - 15], W[j - 2]
192
			W[j] = bit32_bxor(bit32_rrotate(a, 7), bit32_lrotate(a, 14), bit32_rshift(a, 3))
193
				+ bit32_bxor(bit32_lrotate(b, 15), bit32_lrotate(b, 13), bit32_rshift(b, 10))
194
				+ W[j - 7]
195
				+ W[j - 16]
196
		end
197

198
		local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
199
		for j = 1, 64 do
200
			local z = bit32_bxor(bit32_rrotate(e, 6), bit32_rrotate(e, 11), bit32_lrotate(e, 7))
201
				+ bit32_band(e, f)
202
				+ bit32_band(-1 - e, g)
203
				+ h
204
				+ K[j]
205
				+ W[j]
206
			h = g
207
			g = f
208
			f = e
209
			e = z + d
210
			d = c
211
			c = b
212
			b = a
213
			a = z
214
				+ bit32_band(d, c)
215
				+ bit32_band(a, bit32_bxor(d, c))
216
				+ bit32_bxor(bit32_rrotate(a, 2), bit32_rrotate(a, 13), bit32_lrotate(a, 10))
217
		end
218

219
		h1, h2, h3, h4 = (a + h1) % 4294967296, (b + h2) % 4294967296, (c + h3) % 4294967296, (d + h4) % 4294967296
220
		h5, h6, h7, h8 = (e + h5) % 4294967296, (f + h6) % 4294967296, (g + h7) % 4294967296, (h + h8) % 4294967296
221
	end
222

223
	H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
224
end
225

226
local function sha512_feed_128(H_lo, H_hi, str, offs, size)
227
	-- offs >= 0, size >= 0, size is multiple of 128
228
	-- W1_hi, W1_lo, W2_hi, W2_lo, ...   Wk_hi = W[2*k-1], Wk_lo = W[2*k]
229
	local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
230
	local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo =
231
		H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
232
	local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi =
233
		H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
234
	for pos = offs, offs + size - 1, 128 do
235
		for j = 1, 16 * 2 do
236
			pos = pos + 4
237
			local a, b, c, d = string.byte(str, pos - 3, pos)
238
			W[j] = ((a * 256 + b) * 256 + c) * 256 + d
239
		end
240

241
		for jj = 34, 160, 2 do
242
			local a_lo, a_hi, b_lo, b_hi = W[jj - 30], W[jj - 31], W[jj - 4], W[jj - 5]
243
			local tmp1 = bit32_bxor(
244
				bit32_rshift(a_lo, 1) + bit32_lshift(a_hi, 31),
245
				bit32_rshift(a_lo, 8) + bit32_lshift(a_hi, 24),
246
				bit32_rshift(a_lo, 7) + bit32_lshift(a_hi, 25)
247
			) % 4294967296 + bit32_bxor(
248
				bit32_rshift(b_lo, 19) + bit32_lshift(b_hi, 13),
249
				bit32_lshift(b_lo, 3) + bit32_rshift(b_hi, 29),
250
				bit32_rshift(b_lo, 6) + bit32_lshift(b_hi, 26)
251
			) % 4294967296 + W[jj - 14] + W[jj - 32]
252

253
			local tmp2 = tmp1 % 4294967296
254
			W[jj - 1] = bit32_bxor(
255
				bit32_rshift(a_hi, 1) + bit32_lshift(a_lo, 31),
256
				bit32_rshift(a_hi, 8) + bit32_lshift(a_lo, 24),
257
				bit32_rshift(a_hi, 7)
258
			) + bit32_bxor(
259
				bit32_rshift(b_hi, 19) + bit32_lshift(b_lo, 13),
260
				bit32_lshift(b_hi, 3) + bit32_rshift(b_lo, 29),
261
				bit32_rshift(b_hi, 6)
262
			) + W[jj - 15] + W[jj - 33] + (tmp1 - tmp2) / 4294967296
263

264
			W[jj] = tmp2
265
		end
266

267
		local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
268
		local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
269
		for j = 1, 80 do
270
			local jj = 2 * j
271
			local tmp1 = bit32_bxor(
272
				bit32_rshift(e_lo, 14) + bit32_lshift(e_hi, 18),
273
				bit32_rshift(e_lo, 18) + bit32_lshift(e_hi, 14),
274
				bit32_lshift(e_lo, 23) + bit32_rshift(e_hi, 9)
275
			) % 4294967296 + (bit32_band(e_lo, f_lo) + bit32_band(-1 - e_lo, g_lo)) % 4294967296 + h_lo + K_lo[j] + W[jj]
276

277
			local z_lo = tmp1 % 4294967296
278
			local z_hi = bit32_bxor(
279
				bit32_rshift(e_hi, 14) + bit32_lshift(e_lo, 18),
280
				bit32_rshift(e_hi, 18) + bit32_lshift(e_lo, 14),
281
				bit32_lshift(e_hi, 23) + bit32_rshift(e_lo, 9)
282
			) + bit32_band(e_hi, f_hi) + bit32_band(-1 - e_hi, g_hi) + h_hi + K_hi[j] + W[jj - 1] + (tmp1 - z_lo) / 4294967296
283

284
			h_lo = g_lo
285
			h_hi = g_hi
286
			g_lo = f_lo
287
			g_hi = f_hi
288
			f_lo = e_lo
289
			f_hi = e_hi
290
			tmp1 = z_lo + d_lo
291
			e_lo = tmp1 % 4294967296
292
			e_hi = z_hi + d_hi + (tmp1 - e_lo) / 4294967296
293
			d_lo = c_lo
294
			d_hi = c_hi
295
			c_lo = b_lo
296
			c_hi = b_hi
297
			b_lo = a_lo
298
			b_hi = a_hi
299
			tmp1 = z_lo
300
				+ (bit32_band(d_lo, c_lo) + bit32_band(b_lo, bit32_bxor(d_lo, c_lo))) % 4294967296
301
				+ bit32_bxor(
302
						bit32_rshift(b_lo, 28) + bit32_lshift(b_hi, 4),
303
						bit32_lshift(b_lo, 30) + bit32_rshift(b_hi, 2),
304
						bit32_lshift(b_lo, 25) + bit32_rshift(b_hi, 7)
305
					)
306
					% 4294967296
307
			a_lo = tmp1 % 4294967296
308
			a_hi = z_hi
309
				+ (bit32_band(d_hi, c_hi) + bit32_band(b_hi, bit32_bxor(d_hi, c_hi)))
310
				+ bit32_bxor(
311
					bit32_rshift(b_hi, 28) + bit32_lshift(b_lo, 4),
312
					bit32_lshift(b_hi, 30) + bit32_rshift(b_lo, 2),
313
					bit32_lshift(b_hi, 25) + bit32_rshift(b_lo, 7)
314
				)
315
				+ (tmp1 - a_lo) / 4294967296
316
		end
317

318
		a_lo = h1_lo + a_lo
319
		h1_lo = a_lo % 4294967296
320
		h1_hi = (h1_hi + a_hi + (a_lo - h1_lo) / 4294967296) % 4294967296
321
		a_lo = h2_lo + b_lo
322
		h2_lo = a_lo % 4294967296
323
		h2_hi = (h2_hi + b_hi + (a_lo - h2_lo) / 4294967296) % 4294967296
324
		a_lo = h3_lo + c_lo
325
		h3_lo = a_lo % 4294967296
326
		h3_hi = (h3_hi + c_hi + (a_lo - h3_lo) / 4294967296) % 4294967296
327
		a_lo = h4_lo + d_lo
328
		h4_lo = a_lo % 4294967296
329
		h4_hi = (h4_hi + d_hi + (a_lo - h4_lo) / 4294967296) % 4294967296
330
		a_lo = h5_lo + e_lo
331
		h5_lo = a_lo % 4294967296
332
		h5_hi = (h5_hi + e_hi + (a_lo - h5_lo) / 4294967296) % 4294967296
333
		a_lo = h6_lo + f_lo
334
		h6_lo = a_lo % 4294967296
335
		h6_hi = (h6_hi + f_hi + (a_lo - h6_lo) / 4294967296) % 4294967296
336
		a_lo = h7_lo + g_lo
337
		h7_lo = a_lo % 4294967296
338
		h7_hi = (h7_hi + g_hi + (a_lo - h7_lo) / 4294967296) % 4294967296
339
		a_lo = h8_lo + h_lo
340
		h8_lo = a_lo % 4294967296
341
		h8_hi = (h8_hi + h_hi + (a_lo - h8_lo) / 4294967296) % 4294967296
342
	end
343

344
	H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] =
345
		h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
346
	H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] =
347
		h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
348
end
349

350
local function md5_feed_64(H, str, offs, size)
351
	-- offs >= 0, size >= 0, size is multiple of 64
352
	local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
353
	local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
354
	for pos = offs, offs + size - 1, 64 do
355
		for j = 1, 16 do
356
			pos = pos + 4
357
			local a, b, c, d = string.byte(str, pos - 3, pos)
358
			W[j] = ((d * 256 + c) * 256 + b) * 256 + a
359
		end
360

361
		local a, b, c, d = h1, h2, h3, h4
362
		local s = 25
363
		for j = 1, 16 do
364
			local F = bit32_rrotate(bit32_band(b, c) + bit32_band(-1 - b, d) + a + K[j] + W[j], s) + b
365
			s = md5_next_shift[s]
366
			a = d
367
			d = c
368
			c = b
369
			b = F
370
		end
371

372
		s = 27
373
		for j = 17, 32 do
374
			local F = bit32_rrotate(bit32_band(d, b) + bit32_band(-1 - d, c) + a + K[j] + W[(5 * j - 4) % 16 + 1], s)
375
				+ b
376
			s = md5_next_shift[s]
377
			a = d
378
			d = c
379
			c = b
380
			b = F
381
		end
382

383
		s = 28
384
		for j = 33, 48 do
385
			local F = bit32_rrotate(bit32_bxor(bit32_bxor(b, c), d) + a + K[j] + W[(3 * j + 2) % 16 + 1], s) + b
386
			s = md5_next_shift[s]
387
			a = d
388
			d = c
389
			c = b
390
			b = F
391
		end
392

393
		s = 26
394
		for j = 49, 64 do
395
			local F = bit32_rrotate(bit32_bxor(c, bit32_bor(b, -1 - d)) + a + K[j] + W[(j * 7 - 7) % 16 + 1], s) + b
396
			s = md5_next_shift[s]
397
			a = d
398
			d = c
399
			c = b
400
			b = F
401
		end
402

403
		h1 = (a + h1) % 4294967296
404
		h2 = (b + h2) % 4294967296
405
		h3 = (c + h3) % 4294967296
406
		h4 = (d + h4) % 4294967296
407
	end
408

409
	H[1], H[2], H[3], H[4] = h1, h2, h3, h4
410
end
411

412
local function sha1_feed_64(H, str, offs, size)
413
	-- offs >= 0, size >= 0, size is multiple of 64
414
	local W = common_W
415
	local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
416
	for pos = offs, offs + size - 1, 64 do
417
		for j = 1, 16 do
418
			pos = pos + 4
419
			local a, b, c, d = string.byte(str, pos - 3, pos)
420
			W[j] = ((a * 256 + b) * 256 + c) * 256 + d
421
		end
422

423
		for j = 17, 80 do
424
			W[j] = bit32_lrotate(bit32_bxor(W[j - 3], W[j - 8], W[j - 14], W[j - 16]), 1)
425
		end
426

427
		local a, b, c, d, e = h1, h2, h3, h4, h5
428
		for j = 1, 20 do
429
			local z = bit32_lrotate(a, 5) + bit32_band(b, c) + bit32_band(-1 - b, d) + 0x5A827999 + W[j] + e -- constant = math.floor(TWO_POW_30 * sqrt(2))
430
			e = d
431
			d = c
432
			c = bit32_rrotate(b, 2)
433
			b = a
434
			a = z
435
		end
436

437
		for j = 21, 40 do
438
			local z = bit32_lrotate(a, 5) + bit32_bxor(b, c, d) + 0x6ED9EBA1 + W[j] + e -- TWO_POW_30 * sqrt(3)
439
			e = d
440
			d = c
441
			c = bit32_rrotate(b, 2)
442
			b = a
443
			a = z
444
		end
445

446
		for j = 41, 60 do
447
			local z = bit32_lrotate(a, 5) + bit32_band(d, c) + bit32_band(b, bit32_bxor(d, c)) + 0x8F1BBCDC + W[j] + e -- TWO_POW_30 * sqrt(5)
448
			e = d
449
			d = c
450
			c = bit32_rrotate(b, 2)
451
			b = a
452
			a = z
453
		end
454

455
		for j = 61, 80 do
456
			local z = bit32_lrotate(a, 5) + bit32_bxor(b, c, d) + 0xCA62C1D6 + W[j] + e -- TWO_POW_30 * sqrt(10)
457
			e = d
458
			d = c
459
			c = bit32_rrotate(b, 2)
460
			b = a
461
			a = z
462
		end
463

464
		h1 = (a + h1) % 4294967296
465
		h2 = (b + h2) % 4294967296
466
		h3 = (c + h3) % 4294967296
467
		h4 = (d + h4) % 4294967296
468
		h5 = (e + h5) % 4294967296
469
	end
470

471
	H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
472
end
473

474
local function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
475
	-- This is an example of a Lua function having 79 local variables :-)
476
	-- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
477
	local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
478
	local qwords_qty = block_size_in_bytes / 8
479
	for pos = offs, offs + size - 1, block_size_in_bytes do
480
		for j = 1, qwords_qty do
481
			local a, b, c, d = string.byte(str, pos + 1, pos + 4)
482
			lanes_lo[j] = bit32_bxor(lanes_lo[j], ((d * 256 + c) * 256 + b) * 256 + a)
483
			pos = pos + 8
484
			a, b, c, d = string.byte(str, pos - 3, pos)
485
			lanes_hi[j] = bit32_bxor(lanes_hi[j], ((d * 256 + c) * 256 + b) * 256 + a)
486
		end
487

488
		local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi, L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi, L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi =
489
			lanes_lo[1],
490
			lanes_hi[1],
491
			lanes_lo[2],
492
			lanes_hi[2],
493
			lanes_lo[3],
494
			lanes_hi[3],
495
			lanes_lo[4],
496
			lanes_hi[4],
497
			lanes_lo[5],
498
			lanes_hi[5],
499
			lanes_lo[6],
500
			lanes_hi[6],
501
			lanes_lo[7],
502
			lanes_hi[7],
503
			lanes_lo[8],
504
			lanes_hi[8],
505
			lanes_lo[9],
506
			lanes_hi[9],
507
			lanes_lo[10],
508
			lanes_hi[10],
509
			lanes_lo[11],
510
			lanes_hi[11],
511
			lanes_lo[12],
512
			lanes_hi[12],
513
			lanes_lo[13],
514
			lanes_hi[13],
515
			lanes_lo[14],
516
			lanes_hi[14],
517
			lanes_lo[15],
518
			lanes_hi[15],
519
			lanes_lo[16],
520
			lanes_hi[16],
521
			lanes_lo[17],
522
			lanes_hi[17],
523
			lanes_lo[18],
524
			lanes_hi[18],
525
			lanes_lo[19],
526
			lanes_hi[19],
527
			lanes_lo[20],
528
			lanes_hi[20],
529
			lanes_lo[21],
530
			lanes_hi[21],
531
			lanes_lo[22],
532
			lanes_hi[22],
533
			lanes_lo[23],
534
			lanes_hi[23],
535
			lanes_lo[24],
536
			lanes_hi[24],
537
			lanes_lo[25],
538
			lanes_hi[25]
539

540
		for round_idx = 1, 24 do
541
			local C1_lo = bit32_bxor(L01_lo, L06_lo, L11_lo, L16_lo, L21_lo)
542
			local C1_hi = bit32_bxor(L01_hi, L06_hi, L11_hi, L16_hi, L21_hi)
543
			local C2_lo = bit32_bxor(L02_lo, L07_lo, L12_lo, L17_lo, L22_lo)
544
			local C2_hi = bit32_bxor(L02_hi, L07_hi, L12_hi, L17_hi, L22_hi)
545
			local C3_lo = bit32_bxor(L03_lo, L08_lo, L13_lo, L18_lo, L23_lo)
546
			local C3_hi = bit32_bxor(L03_hi, L08_hi, L13_hi, L18_hi, L23_hi)
547
			local C4_lo = bit32_bxor(L04_lo, L09_lo, L14_lo, L19_lo, L24_lo)
548
			local C4_hi = bit32_bxor(L04_hi, L09_hi, L14_hi, L19_hi, L24_hi)
549
			local C5_lo = bit32_bxor(L05_lo, L10_lo, L15_lo, L20_lo, L25_lo)
550
			local C5_hi = bit32_bxor(L05_hi, L10_hi, L15_hi, L20_hi, L25_hi)
551

552
			local D_lo = bit32_bxor(C1_lo, C3_lo * 2 + (C3_hi % TWO_POW_32 - C3_hi % TWO_POW_31) / TWO_POW_31)
553
			local D_hi = bit32_bxor(C1_hi, C3_hi * 2 + (C3_lo % TWO_POW_32 - C3_lo % TWO_POW_31) / TWO_POW_31)
554

555
			local T0_lo = bit32_bxor(D_lo, L02_lo)
556
			local T0_hi = bit32_bxor(D_hi, L02_hi)
557
			local T1_lo = bit32_bxor(D_lo, L07_lo)
558
			local T1_hi = bit32_bxor(D_hi, L07_hi)
559
			local T2_lo = bit32_bxor(D_lo, L12_lo)
560
			local T2_hi = bit32_bxor(D_hi, L12_hi)
561
			local T3_lo = bit32_bxor(D_lo, L17_lo)
562
			local T3_hi = bit32_bxor(D_hi, L17_hi)
563
			local T4_lo = bit32_bxor(D_lo, L22_lo)
564
			local T4_hi = bit32_bxor(D_hi, L22_hi)
565

566
			L02_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_20) / TWO_POW_20 + T1_hi * TWO_POW_12
567
			L02_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_20) / TWO_POW_20 + T1_lo * TWO_POW_12
568
			L07_lo = (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_19) / TWO_POW_19 + T3_hi * TWO_POW_13
569
			L07_hi = (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_19) / TWO_POW_19 + T3_lo * TWO_POW_13
570
			L12_lo = T0_lo * 2 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_31) / TWO_POW_31
571
			L12_hi = T0_hi * 2 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_31) / TWO_POW_31
572
			L17_lo = T2_lo * TWO_POW_10 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_22) / TWO_POW_22
573
			L17_hi = T2_hi * TWO_POW_10 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_22) / TWO_POW_22
574
			L22_lo = T4_lo * TWO_POW_2 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_30) / TWO_POW_30
575
			L22_hi = T4_hi * TWO_POW_2 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_30) / TWO_POW_30
576

577
			D_lo = bit32_bxor(C2_lo, C4_lo * 2 + (C4_hi % TWO_POW_32 - C4_hi % TWO_POW_31) / TWO_POW_31)
578
			D_hi = bit32_bxor(C2_hi, C4_hi * 2 + (C4_lo % TWO_POW_32 - C4_lo % TWO_POW_31) / TWO_POW_31)
579

580
			T0_lo = bit32_bxor(D_lo, L03_lo)
581
			T0_hi = bit32_bxor(D_hi, L03_hi)
582
			T1_lo = bit32_bxor(D_lo, L08_lo)
583
			T1_hi = bit32_bxor(D_hi, L08_hi)
584
			T2_lo = bit32_bxor(D_lo, L13_lo)
585
			T2_hi = bit32_bxor(D_hi, L13_hi)
586
			T3_lo = bit32_bxor(D_lo, L18_lo)
587
			T3_hi = bit32_bxor(D_hi, L18_hi)
588
			T4_lo = bit32_bxor(D_lo, L23_lo)
589
			T4_hi = bit32_bxor(D_hi, L23_hi)
590

591
			L03_lo = (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_21) / TWO_POW_21 + T2_hi * TWO_POW_11
592
			L03_hi = (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_21) / TWO_POW_21 + T2_lo * TWO_POW_11
593
			L08_lo = (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_3) / TWO_POW_3 + T4_hi * TWO_POW_29 % TWO_POW_32
594
			L08_hi = (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_3) / TWO_POW_3 + T4_lo * TWO_POW_29 % TWO_POW_32
595
			L13_lo = T1_lo * TWO_POW_6 + (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_26) / TWO_POW_26
596
			L13_hi = T1_hi * TWO_POW_6 + (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_26) / TWO_POW_26
597
			L18_lo = T3_lo * TWO_POW_15 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_17) / TWO_POW_17
598
			L18_hi = T3_hi * TWO_POW_15 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_17) / TWO_POW_17
599
			L23_lo = (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_2) / TWO_POW_2 + T0_hi * TWO_POW_30 % TWO_POW_32
600
			L23_hi = (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_2) / TWO_POW_2 + T0_lo * TWO_POW_30 % TWO_POW_32
601

602
			D_lo = bit32_bxor(C3_lo, C5_lo * 2 + (C5_hi % TWO_POW_32 - C5_hi % TWO_POW_31) / TWO_POW_31)
603
			D_hi = bit32_bxor(C3_hi, C5_hi * 2 + (C5_lo % TWO_POW_32 - C5_lo % TWO_POW_31) / TWO_POW_31)
604

605
			T0_lo = bit32_bxor(D_lo, L04_lo)
606
			T0_hi = bit32_bxor(D_hi, L04_hi)
607
			T1_lo = bit32_bxor(D_lo, L09_lo)
608
			T1_hi = bit32_bxor(D_hi, L09_hi)
609
			T2_lo = bit32_bxor(D_lo, L14_lo)
610
			T2_hi = bit32_bxor(D_hi, L14_hi)
611
			T3_lo = bit32_bxor(D_lo, L19_lo)
612
			T3_hi = bit32_bxor(D_hi, L19_hi)
613
			T4_lo = bit32_bxor(D_lo, L24_lo)
614
			T4_hi = bit32_bxor(D_hi, L24_hi)
615

616
			L04_lo = T3_lo * TWO_POW_21 % TWO_POW_32 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_11) / TWO_POW_11
617
			L04_hi = T3_hi * TWO_POW_21 % TWO_POW_32 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_11) / TWO_POW_11
618
			L09_lo = T0_lo * TWO_POW_28 % TWO_POW_32 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_4) / TWO_POW_4
619
			L09_hi = T0_hi * TWO_POW_28 % TWO_POW_32 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_4) / TWO_POW_4
620
			L14_lo = T2_lo * TWO_POW_25 % TWO_POW_32 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_7) / TWO_POW_7
621
			L14_hi = T2_hi * TWO_POW_25 % TWO_POW_32 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_7) / TWO_POW_7
622
			L19_lo = (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_8) / TWO_POW_8 + T4_hi * TWO_POW_24 % TWO_POW_32
623
			L19_hi = (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_8) / TWO_POW_8 + T4_lo * TWO_POW_24 % TWO_POW_32
624
			L24_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_9) / TWO_POW_9 + T1_hi * TWO_POW_23 % TWO_POW_32
625
			L24_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_9) / TWO_POW_9 + T1_lo * TWO_POW_23 % TWO_POW_32
626

627
			D_lo = bit32_bxor(C4_lo, C1_lo * 2 + (C1_hi % TWO_POW_32 - C1_hi % TWO_POW_31) / TWO_POW_31)
628
			D_hi = bit32_bxor(C4_hi, C1_hi * 2 + (C1_lo % TWO_POW_32 - C1_lo % TWO_POW_31) / TWO_POW_31)
629

630
			T0_lo = bit32_bxor(D_lo, L05_lo)
631
			T0_hi = bit32_bxor(D_hi, L05_hi)
632
			T1_lo = bit32_bxor(D_lo, L10_lo)
633
			T1_hi = bit32_bxor(D_hi, L10_hi)
634
			T2_lo = bit32_bxor(D_lo, L15_lo)
635
			T2_hi = bit32_bxor(D_hi, L15_hi)
636
			T3_lo = bit32_bxor(D_lo, L20_lo)
637
			T3_hi = bit32_bxor(D_hi, L20_hi)
638
			T4_lo = bit32_bxor(D_lo, L25_lo)
639
			T4_hi = bit32_bxor(D_hi, L25_hi)
640

641
			L05_lo = T4_lo * TWO_POW_14 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_18) / TWO_POW_18
642
			L05_hi = T4_hi * TWO_POW_14 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_18) / TWO_POW_18
643
			L10_lo = T1_lo * TWO_POW_20 % TWO_POW_32 + (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_12) / TWO_POW_12
644
			L10_hi = T1_hi * TWO_POW_20 % TWO_POW_32 + (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_12) / TWO_POW_12
645
			L15_lo = T3_lo * TWO_POW_8 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_24) / TWO_POW_24
646
			L15_hi = T3_hi * TWO_POW_8 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_24) / TWO_POW_24
647
			L20_lo = T0_lo * TWO_POW_27 % TWO_POW_32 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_5) / TWO_POW_5
648
			L20_hi = T0_hi * TWO_POW_27 % TWO_POW_32 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_5) / TWO_POW_5
649
			L25_lo = (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_25) / TWO_POW_25 + T2_hi * TWO_POW_7
650
			L25_hi = (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_25) / TWO_POW_25 + T2_lo * TWO_POW_7
651

652
			D_lo = bit32_bxor(C5_lo, C2_lo * 2 + (C2_hi % TWO_POW_32 - C2_hi % TWO_POW_31) / TWO_POW_31)
653
			D_hi = bit32_bxor(C5_hi, C2_hi * 2 + (C2_lo % TWO_POW_32 - C2_lo % TWO_POW_31) / TWO_POW_31)
654

655
			T1_lo = bit32_bxor(D_lo, L06_lo)
656
			T1_hi = bit32_bxor(D_hi, L06_hi)
657
			T2_lo = bit32_bxor(D_lo, L11_lo)
658
			T2_hi = bit32_bxor(D_hi, L11_hi)
659
			T3_lo = bit32_bxor(D_lo, L16_lo)
660
			T3_hi = bit32_bxor(D_hi, L16_hi)
661
			T4_lo = bit32_bxor(D_lo, L21_lo)
662
			T4_hi = bit32_bxor(D_hi, L21_hi)
663

664
			L06_lo = T2_lo * TWO_POW_3 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_29) / TWO_POW_29
665
			L06_hi = T2_hi * TWO_POW_3 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_29) / TWO_POW_29
666
			L11_lo = T4_lo * TWO_POW_18 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_14) / TWO_POW_14
667
			L11_hi = T4_hi * TWO_POW_18 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_14) / TWO_POW_14
668
			L16_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_28) / TWO_POW_28 + T1_hi * TWO_POW_4
669
			L16_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_28) / TWO_POW_28 + T1_lo * TWO_POW_4
670
			L21_lo = (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_23) / TWO_POW_23 + T3_hi * TWO_POW_9
671
			L21_hi = (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_23) / TWO_POW_23 + T3_lo * TWO_POW_9
672

673
			L01_lo = bit32_bxor(D_lo, L01_lo)
674
			L01_hi = bit32_bxor(D_hi, L01_hi)
675
			L01_lo, L02_lo, L03_lo, L04_lo, L05_lo =
676
				bit32_bxor(L01_lo, bit32_band(-1 - L02_lo, L03_lo)),
677
				bit32_bxor(L02_lo, bit32_band(-1 - L03_lo, L04_lo)),
678
				bit32_bxor(L03_lo, bit32_band(-1 - L04_lo, L05_lo)),
679
				bit32_bxor(L04_lo, bit32_band(-1 - L05_lo, L01_lo)),
680
				bit32_bxor(L05_lo, bit32_band(-1 - L01_lo, L02_lo))
681
			L01_hi, L02_hi, L03_hi, L04_hi, L05_hi =
682
				bit32_bxor(L01_hi, bit32_band(-1 - L02_hi, L03_hi)),
683
				bit32_bxor(L02_hi, bit32_band(-1 - L03_hi, L04_hi)),
684
				bit32_bxor(L03_hi, bit32_band(-1 - L04_hi, L05_hi)),
685
				bit32_bxor(L04_hi, bit32_band(-1 - L05_hi, L01_hi)),
686
				bit32_bxor(L05_hi, bit32_band(-1 - L01_hi, L02_hi))
687
			L06_lo, L07_lo, L08_lo, L09_lo, L10_lo =
688
				bit32_bxor(L09_lo, bit32_band(-1 - L10_lo, L06_lo)),
689
				bit32_bxor(L10_lo, bit32_band(-1 - L06_lo, L07_lo)),
690
				bit32_bxor(L06_lo, bit32_band(-1 - L07_lo, L08_lo)),
691
				bit32_bxor(L07_lo, bit32_band(-1 - L08_lo, L09_lo)),
692
				bit32_bxor(L08_lo, bit32_band(-1 - L09_lo, L10_lo))
693
			L06_hi, L07_hi, L08_hi, L09_hi, L10_hi =
694
				bit32_bxor(L09_hi, bit32_band(-1 - L10_hi, L06_hi)),
695
				bit32_bxor(L10_hi, bit32_band(-1 - L06_hi, L07_hi)),
696
				bit32_bxor(L06_hi, bit32_band(-1 - L07_hi, L08_hi)),
697
				bit32_bxor(L07_hi, bit32_band(-1 - L08_hi, L09_hi)),
698
				bit32_bxor(L08_hi, bit32_band(-1 - L09_hi, L10_hi))
699
			L11_lo, L12_lo, L13_lo, L14_lo, L15_lo =
700
				bit32_bxor(L12_lo, bit32_band(-1 - L13_lo, L14_lo)),
701
				bit32_bxor(L13_lo, bit32_band(-1 - L14_lo, L15_lo)),
702
				bit32_bxor(L14_lo, bit32_band(-1 - L15_lo, L11_lo)),
703
				bit32_bxor(L15_lo, bit32_band(-1 - L11_lo, L12_lo)),
704
				bit32_bxor(L11_lo, bit32_band(-1 - L12_lo, L13_lo))
705
			L11_hi, L12_hi, L13_hi, L14_hi, L15_hi =
706
				bit32_bxor(L12_hi, bit32_band(-1 - L13_hi, L14_hi)),
707
				bit32_bxor(L13_hi, bit32_band(-1 - L14_hi, L15_hi)),
708
				bit32_bxor(L14_hi, bit32_band(-1 - L15_hi, L11_hi)),
709
				bit32_bxor(L15_hi, bit32_band(-1 - L11_hi, L12_hi)),
710
				bit32_bxor(L11_hi, bit32_band(-1 - L12_hi, L13_hi))
711
			L16_lo, L17_lo, L18_lo, L19_lo, L20_lo =
712
				bit32_bxor(L20_lo, bit32_band(-1 - L16_lo, L17_lo)),
713
				bit32_bxor(L16_lo, bit32_band(-1 - L17_lo, L18_lo)),
714
				bit32_bxor(L17_lo, bit32_band(-1 - L18_lo, L19_lo)),
715
				bit32_bxor(L18_lo, bit32_band(-1 - L19_lo, L20_lo)),
716
				bit32_bxor(L19_lo, bit32_band(-1 - L20_lo, L16_lo))
717
			L16_hi, L17_hi, L18_hi, L19_hi, L20_hi =
718
				bit32_bxor(L20_hi, bit32_band(-1 - L16_hi, L17_hi)),
719
				bit32_bxor(L16_hi, bit32_band(-1 - L17_hi, L18_hi)),
720
				bit32_bxor(L17_hi, bit32_band(-1 - L18_hi, L19_hi)),
721
				bit32_bxor(L18_hi, bit32_band(-1 - L19_hi, L20_hi)),
722
				bit32_bxor(L19_hi, bit32_band(-1 - L20_hi, L16_hi))
723
			L21_lo, L22_lo, L23_lo, L24_lo, L25_lo =
724
				bit32_bxor(L23_lo, bit32_band(-1 - L24_lo, L25_lo)),
725
				bit32_bxor(L24_lo, bit32_band(-1 - L25_lo, L21_lo)),
726
				bit32_bxor(L25_lo, bit32_band(-1 - L21_lo, L22_lo)),
727
				bit32_bxor(L21_lo, bit32_band(-1 - L22_lo, L23_lo)),
728
				bit32_bxor(L22_lo, bit32_band(-1 - L23_lo, L24_lo))
729
			L21_hi, L22_hi, L23_hi, L24_hi, L25_hi =
730
				bit32_bxor(L23_hi, bit32_band(-1 - L24_hi, L25_hi)),
731
				bit32_bxor(L24_hi, bit32_band(-1 - L25_hi, L21_hi)),
732
				bit32_bxor(L25_hi, bit32_band(-1 - L21_hi, L22_hi)),
733
				bit32_bxor(L21_hi, bit32_band(-1 - L22_hi, L23_hi)),
734
				bit32_bxor(L22_hi, bit32_band(-1 - L23_hi, L24_hi))
735
			L01_lo = bit32_bxor(L01_lo, RC_lo[round_idx])
736
			L01_hi = L01_hi + RC_hi[round_idx] -- RC_hi[] is either 0 or 0x80000000, so we could use fast addition instead of slow XOR
737
		end
738

739
		lanes_lo[1] = L01_lo
740
		lanes_hi[1] = L01_hi
741
		lanes_lo[2] = L02_lo
742
		lanes_hi[2] = L02_hi
743
		lanes_lo[3] = L03_lo
744
		lanes_hi[3] = L03_hi
745
		lanes_lo[4] = L04_lo
746
		lanes_hi[4] = L04_hi
747
		lanes_lo[5] = L05_lo
748
		lanes_hi[5] = L05_hi
749
		lanes_lo[6] = L06_lo
750
		lanes_hi[6] = L06_hi
751
		lanes_lo[7] = L07_lo
752
		lanes_hi[7] = L07_hi
753
		lanes_lo[8] = L08_lo
754
		lanes_hi[8] = L08_hi
755
		lanes_lo[9] = L09_lo
756
		lanes_hi[9] = L09_hi
757
		lanes_lo[10] = L10_lo
758
		lanes_hi[10] = L10_hi
759
		lanes_lo[11] = L11_lo
760
		lanes_hi[11] = L11_hi
761
		lanes_lo[12] = L12_lo
762
		lanes_hi[12] = L12_hi
763
		lanes_lo[13] = L13_lo
764
		lanes_hi[13] = L13_hi
765
		lanes_lo[14] = L14_lo
766
		lanes_hi[14] = L14_hi
767
		lanes_lo[15] = L15_lo
768
		lanes_hi[15] = L15_hi
769
		lanes_lo[16] = L16_lo
770
		lanes_hi[16] = L16_hi
771
		lanes_lo[17] = L17_lo
772
		lanes_hi[17] = L17_hi
773
		lanes_lo[18] = L18_lo
774
		lanes_hi[18] = L18_hi
775
		lanes_lo[19] = L19_lo
776
		lanes_hi[19] = L19_hi
777
		lanes_lo[20] = L20_lo
778
		lanes_hi[20] = L20_hi
779
		lanes_lo[21] = L21_lo
780
		lanes_hi[21] = L21_hi
781
		lanes_lo[22] = L22_lo
782
		lanes_hi[22] = L22_hi
783
		lanes_lo[23] = L23_lo
784
		lanes_hi[23] = L23_hi
785
		lanes_lo[24] = L24_lo
786
		lanes_hi[24] = L24_hi
787
		lanes_lo[25] = L25_lo
788
		lanes_hi[25] = L25_hi
789
	end
790
end
791

792
--------------------------------------------------------------------------------
793
-- MAGIC NUMBERS CALCULATOR
794
--------------------------------------------------------------------------------
795
-- Q:
796
--    Is 53-bit "double" math enough to calculate square roots and cube roots of primes with 64 correct bits after decimal point?
797
-- A:
798
--    Yes, 53-bit "double" arithmetic is enough.
799
--    We could obtain first 40 bits by direct calculation of p^(1/3) and next 40 bits by one step of Newton's method.
800
do
801
	local function mul(src1, src2, factor, result_length)
802
		-- src1, src2 - long integers (arrays of digits in base TWO_POW_24)
803
		-- factor - small integer
804
		-- returns long integer result (src1 * src2 * factor) and its floating point approximation
805
		local result, carry, value, weight = table.create(result_length), 0, 0, 1
806
		for j = 1, result_length do
807
			for k = math.max(1, j + 1 - #src2), math.min(j, #src1) do
808
				carry = carry + factor * src1[k] * src2[j + 1 - k] -- "int32" is not enough for multiplication result, that's why "factor" must be of type "double"
809
			end
810

811
			local digit = carry % TWO_POW_24
812
			result[j] = math.floor(digit)
813
			carry = (carry - digit) / TWO_POW_24
814
			value = value + digit * weight
815
			weight = weight * TWO_POW_24
816
		end
817

818
		return result, value
819
	end
820

821
	local idx, step, p, one, sqrt_hi, sqrt_lo = 0, { 4, 1, 2, -2, 2 }, 4, { 1 }, sha2_H_hi, sha2_H_lo
822
	repeat
823
		p = p + step[p % 6]
824
		local d = 1
825
		repeat
826
			d = d + step[d % 6]
827
			if d * d > p then
828
				-- next prime number is found
829
				local root = p ^ (1 / 3)
830
				local R = root * TWO_POW_40
831
				R = mul(table.create(1, math.floor(R)), one, 1, 2)
832
				local _, delta = mul(R, mul(R, R, 1, 4), -1, 4)
833
				local hi = R[2] % 65536 * 65536 + math.floor(R[1] / 256)
834
				local lo = R[1] % 256 * 16777216 + math.floor(delta * (TWO_POW_NEG_56 / 3) * root / p)
835

836
				if idx < 16 then
837
					root = math.sqrt(p)
838
					R = root * TWO_POW_40
839
					R = mul(table.create(1, math.floor(R)), one, 1, 2)
840
					_, delta = mul(R, R, -1, 2)
841
					local hi = R[2] % 65536 * 65536 + math.floor(R[1] / 256)
842
					local lo = R[1] % 256 * 16777216 + math.floor(delta * TWO_POW_NEG_17 / root)
843
					local idx = idx % 8 + 1
844
					sha2_H_ext256[224][idx] = lo
845
					sqrt_hi[idx], sqrt_lo[idx] = hi, lo + hi * hi_factor
846
					if idx > 7 then
847
						sqrt_hi, sqrt_lo = sha2_H_ext512_hi[384], sha2_H_ext512_lo[384]
848
					end
849
				end
850

851
				idx = idx + 1
852
				sha2_K_hi[idx], sha2_K_lo[idx] = hi, lo % K_lo_modulo + hi * hi_factor
853
				break
854
			end
855
		until p % d == 0
856
	until idx > 79
857
end
858

859
-- Calculating IVs for SHA512/224 and SHA512/256
860
for width = 224, 256, 32 do
861
	local H_lo, H_hi = {}, nil
862
	if XOR64A5 then
863
		for j = 1, 8 do
864
			H_lo[j] = XOR64A5(sha2_H_lo[j])
865
		end
866
	else
867
		H_hi = {}
868
		for j = 1, 8 do
869
			H_lo[j] = bit32_bxor(sha2_H_lo[j], 0xA5A5A5A5) % 4294967296
870
			H_hi[j] = bit32_bxor(sha2_H_hi[j], 0xA5A5A5A5) % 4294967296
871
		end
872
	end
873

874
	sha512_feed_128(H_lo, H_hi, "SHA-512/" .. tostring(width) .. "\128" .. string.rep("\0", 115) .. "\88", 0, 128)
875
	sha2_H_ext512_lo[width] = H_lo
876
	sha2_H_ext512_hi[width] = H_hi
877
end
878

879
-- Constants for MD5
880
do
881
	for idx = 1, 64 do
882
		-- we can't use formula math.floor(abs(sin(idx))*TWO_POW_32) because its result may be beyond integer range on Lua built with 32-bit integers
883
		local hi, lo = math.modf(math.abs(math.sin(idx)) * TWO_POW_16)
884
		md5_K[idx] = hi * 65536 + math.floor(lo * TWO_POW_16)
885
	end
886
end
887

888
-- Constants for SHA3
889
do
890
	local sh_reg = 29
891
	local function next_bit()
892
		local r = sh_reg % 2
893
		sh_reg = bit32_bxor((sh_reg - r) / 2, 142 * r)
894
		return r
895
	end
896

897
	for idx = 1, 24 do
898
		local lo, m = 0, nil
899
		for _ = 1, 6 do
900
			m = m and m * m * 2 or 1
901
			lo = lo + next_bit() * m
902
		end
903

904
		local hi = next_bit() * m
905
		sha3_RC_hi[idx], sha3_RC_lo[idx] = hi, lo + hi * hi_factor_keccak
906
	end
907
end
908

909
--------------------------------------------------------------------------------
910
-- MAIN FUNCTIONS
911
--------------------------------------------------------------------------------
912
local function sha256ext(width, message)
913
	-- Create an instance (private objects for current calculation)
914
	local Array256 = sha2_H_ext256[width] -- # == 8
915
	local length, tail = 0, ""
916
	local H = table.create(8)
917
	H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] =
918
		Array256[1], Array256[2], Array256[3], Array256[4], Array256[5], Array256[6], Array256[7], Array256[8]
919

920
	local function partial(message_part)
921
		if message_part then
922
			local partLength = #message_part
923
			if tail then
924
				length = length + partLength
925
				local offs = 0
926
				local tailLength = #tail
927
				if tail ~= "" and tailLength + partLength >= 64 then
928
					offs = 64 - tailLength
929
					sha256_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64)
930
					tail = ""
931
				end
932

933
				local size = partLength - offs
934
				local size_tail = size % 64
935
				sha256_feed_64(H, message_part, offs, size - size_tail)
936
				tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
937
				return partial
938
			else
939
				error("Adding more chunks is not allowed after receiving the result", 2)
940
			end
941
		else
942
			if tail then
943
				local final_blocks = table.create(10) --{tail, "\128", string.rep("\0", (-9 - length) % 64 + 1)}
944
				final_blocks[1] = tail
945
				final_blocks[2] = "\128"
946
				final_blocks[3] = string.rep("\0", (-9 - length) % 64 + 1)
947

948
				tail = nil
949
				-- Assuming user data length is shorter than (TWO_POW_53)-9 bytes
950
				-- Anyway, it looks very unrealistic that someone would spend more than a year of calculations to process TWO_POW_53 bytes of data by using this Lua script :-)
951
				-- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes
952
				length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left
953
				for j = 4, 10 do
954
					length = length % 1 * 256
955
					final_blocks[j] = string.char(math.floor(length))
956
				end
957

958
				final_blocks = table.concat(final_blocks)
959
				sha256_feed_64(H, final_blocks, 0, #final_blocks)
960
				local max_reg = width / 32
961
				for j = 1, max_reg do
962
					H[j] = string.format("%08x", H[j] % 4294967296)
963
				end
964

965
				H = table.concat(H, "", 1, max_reg)
966
			end
967

968
			return H
969
		end
970
	end
971

972
	if message then
973
		-- Actually perform calculations and return the SHA256 digest of a message
974
		return partial(message)()
975
	else
976
		-- Return function for chunk-by-chunk loading
977
		-- User should feed every chunk of input data as single argument to this function and finally get SHA256 digest by invoking this function without an argument
978
		return partial
979
	end
980
end
981

982
local function sha512ext(width, message)
983
	-- Create an instance (private objects for current calculation)
984
	local length, tail, H_lo, H_hi =
985
		0,
986
		"",
987
		table.pack(table.unpack(sha2_H_ext512_lo[width])),
988
		not HEX64 and table.pack(table.unpack(sha2_H_ext512_hi[width]))
989

990
	local function partial(message_part)
991
		if message_part then
992
			local partLength = #message_part
993
			if tail then
994
				length = length + partLength
995
				local offs = 0
996
				if tail ~= "" and #tail + partLength >= 128 then
997
					offs = 128 - #tail
998
					sha512_feed_128(H_lo, H_hi, tail .. string.sub(message_part, 1, offs), 0, 128)
999
					tail = ""
1000
				end
1001

1002
				local size = partLength - offs
1003
				local size_tail = size % 128
1004
				sha512_feed_128(H_lo, H_hi, message_part, offs, size - size_tail)
1005
				tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
1006
				return partial
1007
			else
1008
				error("Adding more chunks is not allowed after receiving the result", 2)
1009
			end
1010
		else
1011
			if tail then
1012
				local final_blocks = table.create(3) --{tail, "\128", string.rep("\0", (-17-length) % 128 + 9)}
1013
				final_blocks[1] = tail
1014
				final_blocks[2] = "\128"
1015
				final_blocks[3] = string.rep("\0", (-17 - length) % 128 + 9)
1016

1017
				tail = nil
1018
				-- Assuming user data length is shorter than (TWO_POW_53)-17 bytes
1019
				-- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes
1020
				length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move floating point to the left
1021
				for j = 4, 10 do
1022
					length = length % 1 * 256
1023
					final_blocks[j] = string.char(math.floor(length))
1024
				end
1025

1026
				final_blocks = table.concat(final_blocks)
1027
				sha512_feed_128(H_lo, H_hi, final_blocks, 0, #final_blocks)
1028
				local max_reg = math.ceil(width / 64)
1029

1030
				if HEX64 then
1031
					for j = 1, max_reg do
1032
						H_lo[j] = HEX64(H_lo[j])
1033
					end
1034
				else
1035
					for j = 1, max_reg do
1036
						H_lo[j] = string.format("%08x", H_hi[j] % 4294967296)
1037
							.. string.format("%08x", H_lo[j] % 4294967296)
1038
					end
1039

1040
					H_hi = nil
1041
				end
1042

1043
				H_lo = string.sub(table.concat(H_lo, "", 1, max_reg), 1, width / 4)
1044
			end
1045

1046
			return H_lo
1047
		end
1048
	end
1049

1050
	if message then
1051
		-- Actually perform calculations and return the SHA512 digest of a message
1052
		return partial(message)()
1053
	else
1054
		-- Return function for chunk-by-chunk loading
1055
		-- User should feed every chunk of input data as single argument to this function and finally get SHA512 digest by invoking this function without an argument
1056
		return partial
1057
	end
1058
end
1059

1060
local function md5(message)
1061
	-- Create an instance (private objects for current calculation)
1062
	local H, length, tail = table.create(4), 0, ""
1063
	H[1], H[2], H[3], H[4] = md5_sha1_H[1], md5_sha1_H[2], md5_sha1_H[3], md5_sha1_H[4]
1064

1065
	local function partial(message_part)
1066
		if message_part then
1067
			local partLength = #message_part
1068
			if tail then
1069
				length = length + partLength
1070
				local offs = 0
1071
				if tail ~= "" and #tail + partLength >= 64 then
1072
					offs = 64 - #tail
1073
					md5_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64)
1074
					tail = ""
1075
				end
1076

1077
				local size = partLength - offs
1078
				local size_tail = size % 64
1079
				md5_feed_64(H, message_part, offs, size - size_tail)
1080
				tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
1081
				return partial
1082
			else
1083
				error("Adding more chunks is not allowed after receiving the result", 2)
1084
			end
1085
		else
1086
			if tail then
1087
				local final_blocks = table.create(3) --{tail, "\128", string.rep("\0", (-9 - length) % 64)}
1088
				final_blocks[1] = tail
1089
				final_blocks[2] = "\128"
1090
				final_blocks[3] = string.rep("\0", (-9 - length) % 64)
1091
				tail = nil
1092
				length = length * 8 -- convert "byte-counter" to "bit-counter"
1093
				for j = 4, 11 do
1094
					local low_byte = length % 256
1095
					final_blocks[j] = string.char(low_byte)
1096
					length = (length - low_byte) / 256
1097
				end
1098

1099
				final_blocks = table.concat(final_blocks)
1100
				md5_feed_64(H, final_blocks, 0, #final_blocks)
1101
				for j = 1, 4 do
1102
					H[j] = string.format("%08x", H[j] % 4294967296)
1103
				end
1104

1105
				H = string.gsub(table.concat(H), "(..)(..)(..)(..)", "%4%3%2%1")
1106
			end
1107

1108
			return H
1109
		end
1110
	end
1111

1112
	if message then
1113
		-- Actually perform calculations and return the MD5 digest of a message
1114
		return partial(message)()
1115
	else
1116
		-- Return function for chunk-by-chunk loading
1117
		-- User should feed every chunk of input data as single argument to this function and finally get MD5 digest by invoking this function without an argument
1118
		return partial
1119
	end
1120
end
1121

1122
local function sha1(message)
1123
	-- Create an instance (private objects for current calculation)
1124
	local H, length, tail = table.pack(table.unpack(md5_sha1_H)), 0, ""
1125

1126
	local function partial(message_part)
1127
		if message_part then
1128
			local partLength = #message_part
1129
			if tail then
1130
				length = length + partLength
1131
				local offs = 0
1132
				if tail ~= "" and #tail + partLength >= 64 then
1133
					offs = 64 - #tail
1134
					sha1_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64)
1135
					tail = ""
1136
				end
1137

1138
				local size = partLength - offs
1139
				local size_tail = size % 64
1140
				sha1_feed_64(H, message_part, offs, size - size_tail)
1141
				tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
1142
				return partial
1143
			else
1144
				error("Adding more chunks is not allowed after receiving the result", 2)
1145
			end
1146
		else
1147
			if tail then
1148
				local final_blocks = table.create(10) --{tail, "\128", string.rep("\0", (-9 - length) % 64 + 1)}
1149
				final_blocks[1] = tail
1150
				final_blocks[2] = "\128"
1151
				final_blocks[3] = string.rep("\0", (-9 - length) % 64 + 1)
1152
				tail = nil
1153

1154
				-- Assuming user data length is shorter than (TWO_POW_53)-9 bytes
1155
				-- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes
1156
				length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left
1157
				for j = 4, 10 do
1158
					length = length % 1 * 256
1159
					final_blocks[j] = string.char(math.floor(length))
1160
				end
1161

1162
				final_blocks = table.concat(final_blocks)
1163
				sha1_feed_64(H, final_blocks, 0, #final_blocks)
1164
				for j = 1, 5 do
1165
					H[j] = string.format("%08x", H[j] % 4294967296)
1166
				end
1167

1168
				H = table.concat(H)
1169
			end
1170

1171
			return H
1172
		end
1173
	end
1174

1175
	if message then
1176
		-- Actually perform calculations and return the SHA-1 digest of a message
1177
		return partial(message)()
1178
	else
1179
		-- Return function for chunk-by-chunk loading
1180
		-- User should feed every chunk of input data as single argument to this function and finally get SHA-1 digest by invoking this function without an argument
1181
		return partial
1182
	end
1183
end
1184

1185
local function keccak(block_size_in_bytes, digest_size_in_bytes, is_SHAKE, message)
1186
	-- "block_size_in_bytes" is multiple of 8
1187
	if type(digest_size_in_bytes) ~= "number" then
1188
		-- arguments in SHAKE are swapped:
1189
		--    NIST FIPS 202 defines SHAKE(message,num_bits)
1190
		--    this module   defines SHAKE(num_bytes,message)
1191
		-- it's easy to forget about this swap, hence the check
1192
		error("Argument 'digest_size_in_bytes' must be a number", 2)
1193
	end
1194

1195
	-- Create an instance (private objects for current calculation)
1196
	local tail, lanes_lo, lanes_hi = "", table.create(25, 0), hi_factor_keccak == 0 and table.create(25, 0)
1197
	local result
1198

1199
	--~     pad the input N using the pad function, yielding a padded bit string P with a length divisible by r (such that n = len(P)/r is integer),
1200
	--~     break P into n consecutive r-bit pieces P0, ..., Pn-1 (last is zero-padded)
1201
	--~     initialize the state S to a string of b 0 bits.
1202
	--~     absorb the input into the state: For each block Pi,
1203
	--~         extend Pi at the end by a string of c 0 bits, yielding one of length b,
1204
	--~         XOR that with S and
1205
	--~         apply the block permutation f to the result, yielding a new state S
1206
	--~     initialize Z to be the empty string
1207
	--~     while the length of Z is less than d:
1208
	--~         append the first r bits of S to Z
1209
	--~         if Z is still less than d bits long, apply f to S, yielding a new state S.
1210
	--~     truncate Z to d bits
1211
	local function partial(message_part)
1212
		if message_part then
1213
			local partLength = #message_part
1214
			if tail then
1215
				local offs = 0
1216
				if tail ~= "" and #tail + partLength >= block_size_in_bytes then
1217
					offs = block_size_in_bytes - #tail
1218
					keccak_feed(
1219
						lanes_lo,
1220
						lanes_hi,
1221
						tail .. string.sub(message_part, 1, offs),
1222
						0,
1223
						block_size_in_bytes,
1224
						block_size_in_bytes
1225
					)
1226
					tail = ""
1227
				end
1228

1229
				local size = partLength - offs
1230
				local size_tail = size % block_size_in_bytes
1231
				keccak_feed(lanes_lo, lanes_hi, message_part, offs, size - size_tail, block_size_in_bytes)
1232
				tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
1233
				return partial
1234
			else
1235
				error("Adding more chunks is not allowed after receiving the result", 2)
1236
			end
1237
		else
1238
			if tail then
1239
				-- append the following bits to the message: for usual SHA3: 011(0*)1, for SHAKE: 11111(0*)1
1240
				local gap_start = is_SHAKE and 31 or 6
1241
				tail = tail
1242
					.. (
1243
						#tail + 1 == block_size_in_bytes and string.char(gap_start + 128)
1244
						or string.char(gap_start) .. string.rep("\0", (-2 - #tail) % block_size_in_bytes) .. "\128"
1245
					)
1246
				keccak_feed(lanes_lo, lanes_hi, tail, 0, #tail, block_size_in_bytes)
1247
				tail = nil
1248

1249
				local lanes_used = 0
1250
				local total_lanes = math.floor(block_size_in_bytes / 8)
1251
				local qwords = {}
1252

1253
				local function get_next_qwords_of_digest(qwords_qty)
1254
					-- returns not more than 'qwords_qty' qwords ('qwords_qty' might be non-integer)
1255
					-- doesn't go across keccak-buffer boundary
1256
					-- block_size_in_bytes is a multiple of 8, so, keccak-buffer contains integer number of qwords
1257
					if lanes_used >= total_lanes then
1258
						keccak_feed(lanes_lo, lanes_hi, "\0\0\0\0\0\0\0\0", 0, 8, 8)
1259
						lanes_used = 0
1260
					end
1261

1262
					qwords_qty = math.floor(math.min(qwords_qty, total_lanes - lanes_used))
1263
					if hi_factor_keccak ~= 0 then
1264
						for j = 1, qwords_qty do
1265
							qwords[j] = HEX64(lanes_lo[lanes_used + j - 1 + lanes_index_base])
1266
						end
1267
					else
1268
						for j = 1, qwords_qty do
1269
							qwords[j] = string.format("%08x", lanes_hi[lanes_used + j] % 4294967296)
1270
								.. string.format("%08x", lanes_lo[lanes_used + j] % 4294967296)
1271
						end
1272
					end
1273

1274
					lanes_used = lanes_used + qwords_qty
1275
					return string.gsub(
1276
						table.concat(qwords, "", 1, qwords_qty),
1277
						"(..)(..)(..)(..)(..)(..)(..)(..)",
1278
						"%8%7%6%5%4%3%2%1"
1279
					),
1280
						qwords_qty * 8
1281
				end
1282

1283
				local parts = {} -- digest parts
1284
				local last_part, last_part_size = "", 0
1285

1286
				local function get_next_part_of_digest(bytes_needed)
1287
					-- returns 'bytes_needed' bytes, for arbitrary integer 'bytes_needed'
1288
					bytes_needed = bytes_needed or 1
1289
					if bytes_needed <= last_part_size then
1290
						last_part_size = last_part_size - bytes_needed
1291
						local part_size_in_nibbles = bytes_needed * 2
1292
						local result = string.sub(last_part, 1, part_size_in_nibbles)
1293
						last_part = string.sub(last_part, part_size_in_nibbles + 1)
1294
						return result
1295
					end
1296

1297
					local parts_qty = 0
1298
					if last_part_size > 0 then
1299
						parts_qty = 1
1300
						parts[parts_qty] = last_part
1301
						bytes_needed = bytes_needed - last_part_size
1302
					end
1303

1304
					-- repeats until the length is enough
1305
					while bytes_needed >= 8 do
1306
						local next_part, next_part_size = get_next_qwords_of_digest(bytes_needed / 8)
1307
						parts_qty = parts_qty + 1
1308
						parts[parts_qty] = next_part
1309
						bytes_needed = bytes_needed - next_part_size
1310
					end
1311

1312
					if bytes_needed > 0 then
1313
						last_part, last_part_size = get_next_qwords_of_digest(1)
1314
						parts_qty = parts_qty + 1
1315
						parts[parts_qty] = get_next_part_of_digest(bytes_needed)
1316
					else
1317
						last_part, last_part_size = "", 0
1318
					end
1319

1320
					return table.concat(parts, "", 1, parts_qty)
1321
				end
1322

1323
				if digest_size_in_bytes < 0 then
1324
					result = get_next_part_of_digest
1325
				else
1326
					result = get_next_part_of_digest(digest_size_in_bytes)
1327
				end
1328
			end
1329

1330
			return result
1331
		end
1332
	end
1333

1334
	if message then
1335
		-- Actually perform calculations and return the SHA3 digest of a message
1336
		return partial(message)()
1337
	else
1338
		-- Return function for chunk-by-chunk loading
1339
		-- User should feed every chunk of input data as single argument to this function and finally get SHA3 digest by invoking this function without an argument
1340
		return partial
1341
	end
1342
end
1343

1344
local function HexToBinFunction(hh)
1345
	return string.char(tonumber(hh, 16))
1346
end
1347

1348
local function hex2bin(hex_string)
1349
	return (string.gsub(hex_string, "%x%x", HexToBinFunction))
1350
end
1351

1352
local base64_symbols = {
1353
	["+"] = 62,
1354
	["-"] = 62,
1355
	[62] = "+",
1356
	["/"] = 63,
1357
	["_"] = 63,
1358
	[63] = "/",
1359
	["="] = -1,
1360
	["."] = -1,
1361
	[-1] = "=",
1362
}
1363

1364
local symbol_index = 0
1365
for j, pair in ipairs({ "AZ", "az", "09" }) do
1366
	for ascii = string.byte(pair), string.byte(pair, 2) do
1367
		local ch = string.char(ascii)
1368
		base64_symbols[ch] = symbol_index
1369
		base64_symbols[symbol_index] = ch
1370
		symbol_index = symbol_index + 1
1371
	end
1372
end
1373

1374
local function bin2base64(binary_string)
1375
	local stringLength = #binary_string
1376
	local result = table.create(math.ceil(stringLength / 3))
1377
	local length = 0
1378

1379
	for pos = 1, #binary_string, 3 do
1380
		local c1, c2, c3, c4 = string.byte(string.sub(binary_string, pos, pos + 2) .. "\0", 1, -1)
1381
		length = length + 1
1382
		result[length] = base64_symbols[math.floor(c1 / 4)]
1383
			.. base64_symbols[c1 % 4 * 16 + math.floor(c2 / 16)]
1384
			.. base64_symbols[c3 and c2 % 16 * 4 + math.floor(c3 / 64) or -1]
1385
			.. base64_symbols[c4 and c3 % 64 or -1]
1386
	end
1387

1388
	return table.concat(result)
1389
end
1390

1391
local function base642bin(base64_string)
1392
	local result, chars_qty = {}, 3
1393
	for pos, ch in string.gmatch(string.gsub(base64_string, "%s+", ""), "()(.)") do
1394
		local code = base64_symbols[ch]
1395
		if code < 0 then
1396
			chars_qty = chars_qty - 1
1397
			code = 0
1398
		end
1399

1400
		local idx = pos % 4
1401
		if idx > 0 then
1402
			result[-idx] = code
1403
		else
1404
			local c1 = result[-1] * 4 + math.floor(result[-2] / 16)
1405
			local c2 = (result[-2] % 16) * 16 + math.floor(result[-3] / 4)
1406
			local c3 = (result[-3] % 4) * 64 + code
1407
			result[#result + 1] = string.sub(string.char(c1, c2, c3), 1, chars_qty)
1408
		end
1409
	end
1410

1411
	return table.concat(result)
1412
end
1413

1414
local block_size_for_HMAC -- this table will be initialized at the end of the module
1415
--local function pad_and_xor(str, result_length, byte_for_xor)
1416
--	return string.gsub(str, ".", function(c)
1417
--		return string.char(bit32_bxor(string.byte(c), byte_for_xor))
1418
--	end) .. string.rep(string.char(byte_for_xor), result_length - #str)
1419
--end
1420

1421
-- For the sake of speed of converting hexes to strings, there's a map of the conversions here
1422
local BinaryStringMap = {}
1423
for Index = 0, 255 do
1424
	BinaryStringMap[string.format("%02x", Index)] = string.char(Index)
1425
end
1426

1427
-- Update 02.14.20 - added AsBinary for easy GameAnalytics replacement.
1428
local function hmac(hash_func, key, message, AsBinary)
1429
	-- Create an instance (private objects for current calculation)
1430
	local block_size = block_size_for_HMAC[hash_func]
1431
	if not block_size then
1432
		error("Unknown hash function", 2)
1433
	end
1434

1435
	local KeyLength = #key
1436
	if KeyLength > block_size then
1437
		key = string.gsub(hash_func(key), "%x%x", HexToBinFunction)
1438
		KeyLength = #key
1439
	end
1440

1441
	local append = hash_func()(string.gsub(key, ".", function(c)
1442
		return string.char(bit32_bxor(string.byte(c), 0x36))
1443
	end) .. string.rep("6", block_size - KeyLength)) -- 6 = string.char(0x36)
1444

1445
	local result
1446

1447
	local function partial(message_part)
1448
		if not message_part then
1449
			result = result
1450
				or hash_func(
1451
					string.gsub(key, ".", function(c)
1452
						return string.char(bit32_bxor(string.byte(c), 0x5c))
1453
					end)
1454
						.. string.rep("\\", block_size - KeyLength) -- \ = string.char(0x5c)
1455
						.. (string.gsub(append(), "%x%x", HexToBinFunction))
1456
				)
1457

1458
			return result
1459
		elseif result then
1460
			error("Adding more chunks is not allowed after receiving the result", 2)
1461
		else
1462
			append(message_part)
1463
			return partial
1464
		end
1465
	end
1466

1467
	if message then
1468
		-- Actually perform calculations and return the HMAC of a message
1469
		local FinalMessage = partial(message)()
1470
		return AsBinary and (string.gsub(FinalMessage, "%x%x", BinaryStringMap)) or FinalMessage
1471
	else
1472
		-- Return function for chunk-by-chunk loading of a message
1473
		-- User should feed every chunk of the message as single argument to this function and finally get HMAC by invoking this function without an argument
1474
		return partial
1475
	end
1476
end
1477

1478
local sha = {
1479
	md5 = md5,
1480
	sha1 = sha1,
1481
	-- SHA2 hash functions:
1482
	sha224 = function(message)
1483
		return sha256ext(224, message)
1484
	end,
1485

1486
	sha256 = function(message)
1487
		return sha256ext(256, message)
1488
	end,
1489

1490
	sha512_224 = function(message)
1491
		return sha512ext(224, message)
1492
	end,
1493

1494
	sha512_256 = function(message)
1495
		return sha512ext(256, message)
1496
	end,
1497

1498
	sha384 = function(message)
1499
		return sha512ext(384, message)
1500
	end,
1501

1502
	sha512 = function(message)
1503
		return sha512ext(512, message)
1504
	end,
1505

1506
	-- SHA3 hash functions:
1507
	sha3_224 = function(message)
1508
		return keccak((1600 - 2 * 224) / 8, 224 / 8, false, message)
1509
	end,
1510

1511
	sha3_256 = function(message)
1512
		return keccak((1600 - 2 * 256) / 8, 256 / 8, false, message)
1513
	end,
1514

1515
	sha3_384 = function(message)
1516
		return keccak((1600 - 2 * 384) / 8, 384 / 8, false, message)
1517
	end,
1518

1519
	sha3_512 = function(message)
1520
		return keccak((1600 - 2 * 512) / 8, 512 / 8, false, message)
1521
	end,
1522

1523
	shake128 = function(message, digest_size_in_bytes)
1524
		return keccak((1600 - 2 * 128) / 8, digest_size_in_bytes, true, message)
1525
	end,
1526

1527
	shake256 = function(message, digest_size_in_bytes)
1528
		return keccak((1600 - 2 * 256) / 8, digest_size_in_bytes, true, message)
1529
	end,
1530

1531
	-- misc utilities:
1532
	hmac = hmac, -- HMAC(hash_func, key, message) is applicable to any hash function from this module except SHAKE*
1533
	hex_to_bin = hex2bin, -- converts hexadecimal representation to binary string
1534
	base64_to_bin = base642bin, -- converts base64 representation to binary string
1535
	bin_to_base64 = bin2base64, -- converts binary string to base64 representation
1536
	base64_encode = Base64.Encode,
1537
	base64_decode = Base64.Decode,
1538
}
1539

1540
block_size_for_HMAC = {
1541
	[sha.md5] = 64,
1542
	[sha.sha1] = 64,
1543
	[sha.sha224] = 64,
1544
	[sha.sha256] = 64,
1545
	[sha.sha512_224] = 128,
1546
	[sha.sha512_256] = 128,
1547
	[sha.sha384] = 128,
1548
	[sha.sha512] = 128,
1549
	[sha.sha3_224] = (1600 - 2 * 224) / 8,
1550
	[sha.sha3_256] = (1600 - 2 * 256) / 8,
1551
	[sha.sha3_384] = (1600 - 2 * 384) / 8,
1552
	[sha.sha3_512] = (1600 - 2 * 512) / 8,
1553
}
1554

1555
return sha
1556

1557
Product

Resources

Company