CoCalc -- mini-lz4.js

GitHub Repository: emscripten-core/emscripten
Path: blob/main/third_party/mini-lz4.js
⁴¹²⁹ views
1
/*
2
MiniLZ4: Minimal LZ4 block decoding and encoding.
3

4
based off of node-lz4, https://github.com/pierrec/node-lz4
5

6
====
7
Copyright (c) 2012 Pierre Curto
8

9
Permission is hereby granted, free of charge, to any person obtaining a copy
10
of this software and associated documentation files (the "Software"), to deal
11
in the Software without restriction, including without limitation the rights
12
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
copies of the Software, and to permit persons to whom the Software is
14
furnished to do so, subject to the following conditions:
15

16
The above copyright notice and this permission notice shall be included in
17
all copies or substantial portions of the Software.
18

19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
THE SOFTWARE.
26
====
27

28
changes have the same license
29
*/
30

31
var MiniLZ4 = (function() {
32

33
var exports = {};
34

35
/**
36
 * Decode a block. Assumptions: input contains all sequences of a 
37
 * chunk, output is large enough to receive the decoded data.
38
 * If the output buffer is too small, an error will be thrown.
39
 * If the returned value is negative, an error occured at the returned offset.
40
 *
41
 * @param {ArrayBufferView} input input data
42
 * @param {ArrayBufferView} output output data
43
 * @param {number=} sIdx
44
 * @param {number=} eIdx
45
 * @return {number} number of decoded bytes
46
 * @private
47
 */
48
exports.uncompress = function (input, output, sIdx, eIdx) {
49
	sIdx = sIdx || 0
50
	eIdx = eIdx || (input.length - sIdx)
51
	// Process each sequence in the incoming data
52
	for (var i = sIdx, n = eIdx, j = 0; i < n;) {
53
		var token = input[i++]
54

55
		// Literals
56
		var literals_length = (token >> 4)
57
		if (literals_length > 0) {
58
			// length of literals
59
			var l = literals_length + 240
60
			while (l === 255) {
61
				l = input[i++]
62
				literals_length += l
63
			}
64

65
			// Copy the literals
66
			var end = i + literals_length
67
			while (i < end) output[j++] = input[i++]
68

69
			// End of buffer?
70
			if (i === n) return j
71
		}
72

73
		// Match copy
74
		// 2 bytes offset (little endian)
75
		var offset = input[i++] | (input[i++] << 8)
76

77
		// XXX 0 is an invalid offset value
78
		if (offset === 0) return j
79
		if (offset > j) return -(i-2)
80

81
		// length of match copy
82
		var match_length = (token & 0xf)
83
		var l = match_length + 240
84
		while (l === 255) {
85
			l = input[i++]
86
			match_length += l
87
		}
88

89
		// Copy the match
90
		var pos = j - offset // position of the match copy in the current output
91
		var end = j + match_length + 4 // minmatch = 4
92
		while (j < end) output[j++] = output[pos++]
93
	}
94

95
	return j
96
}
97

98
var
99
	maxInputSize	= 0x7E000000
100
,	minMatch		= 4
101
// uint32() optimization
102
,	hashLog			= 16
103
,	hashShift		= (minMatch * 8) - hashLog
104
,	hashSize		= 1 << hashLog
105

106
,	copyLength		= 8
107
,	lastLiterals	= 5
108
,	mfLimit			= copyLength + minMatch
109
,	skipStrength	= 6
110

111
,	mlBits  		= 4
112
,	mlMask  		= (1 << mlBits) - 1
113
,	runBits 		= 8 - mlBits
114
,	runMask 		= (1 << runBits) - 1
115

116
,	hasher 			= /* XXX uint32( */ 2654435761 /* ) */
117

118
assert(hashShift === 16);
119
var hashTable = new Int16Array(1<<16);
120
var empty = new Int16Array(hashTable.length);
121

122
// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
123
exports.compressBound = function (isize) {
124
	return isize > maxInputSize
125
		? 0
126
		: (isize + (isize/255) + 16) | 0
127
}
128

129
/** @param {number=} sIdx
130
	@param {number=} eIdx */
131
exports.compress = function (src, dst, sIdx, eIdx) {
132
	hashTable.set(empty);
133
	return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
134
}
135

136
function compressBlock (src, dst, pos, sIdx, eIdx) {
137
	// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
138
	var dpos = sIdx
139
	var dlen = eIdx - sIdx
140
	var anchor = 0
141

142
	if (src.length >= maxInputSize) throw new Error("input too large")
143

144
	// Minimum of input bytes for compression (LZ4 specs)
145
	if (src.length > mfLimit) {
146
		var n = exports.compressBound(src.length)
147
		if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)
148

149
		var 
150
			step  = 1
151
		,	findMatchAttempts = (1 << skipStrength) + 3
152
		// Keep last few bytes incompressible (LZ4 specs):
153
		// last 5 bytes must be literals
154
		,	srcLength = src.length - mfLimit
155

156
		while (pos + minMatch < srcLength) {
157
			// Find a match
158
			// min match of 4 bytes aka sequence
159
			var sequenceLowBits = src[pos+1]<<8 | src[pos]
160
			var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
161
			// compute hash for the current sequence
162
			var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
163
			/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
164
							.multiply(hasher)
165
							.shiftr(hashShift)
166
							.toNumber() */
167
			// get the position of the sequence matching the hash
168
			// NB. since 2 different sequences may have the same hash
169
			// it is double-checked below
170
			// do -1 to distinguish between initialized and uninitialized values
171
			var ref = hashTable[hash] - 1
172
			// save position of current sequence in hash table
173
			hashTable[hash] = pos + 1
174

175
			// first reference or within 64k limit or current sequence !== hashed one: no match
176
			if ( ref < 0 ||
177
				((pos - ref) >>> 16) > 0 ||
178
				(
179
					((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
180
					((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
181
				)
182
			) {
183
				// increase step if nothing found within limit
184
				step = findMatchAttempts++ >> skipStrength
185
				pos += step
186
				continue
187
			}
188

189
			findMatchAttempts = (1 << skipStrength) + 3
190

191
			// got a match
192
			var literals_length = pos - anchor
193
			var offset = pos - ref
194

195
			// minMatch already verified
196
			pos += minMatch
197
			ref += minMatch
198

199
			// move to the end of the match (>=minMatch)
200
			var match_length = pos
201
			while (pos < srcLength && src[pos] == src[ref]) {
202
				pos++
203
				ref++
204
			}
205

206
			// match length
207
			match_length = pos - match_length
208

209
			// token
210
			var token = match_length < mlMask ? match_length : mlMask
211

212
			// encode literals length
213
			if (literals_length >= runMask) {
214
				// add match length to the token
215
				dst[dpos++] = (runMask << mlBits) + token
216
				for (var len = literals_length - runMask; len > 254; len -= 255) {
217
					dst[dpos++] = 255
218
				}
219
				dst[dpos++] = len
220
			} else {
221
				// add match length to the token
222
				dst[dpos++] = (literals_length << mlBits) + token
223
			}
224

225
			// write literals
226
			for (var i = 0; i < literals_length; i++) {
227
				dst[dpos++] = src[anchor+i]
228
			}
229

230
			// encode offset
231
			dst[dpos++] = offset
232
			dst[dpos++] = (offset >> 8)
233

234
			// encode match length
235
			if (match_length >= mlMask) {
236
				match_length -= mlMask
237
				while (match_length >= 255) {
238
					match_length -= 255
239
					dst[dpos++] = 255
240
				}
241

242
				dst[dpos++] = match_length
243
			}
244

245
			anchor = pos
246
		}
247
	}
248

249
	// cannot compress input
250
	if (anchor == 0) return 0
251

252
	// Write last literals
253
	// encode literals length
254
	literals_length = src.length - anchor
255
	if (literals_length >= runMask) {
256
		// add match length to the token
257
		dst[dpos++] = (runMask << mlBits)
258
		for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
259
			dst[dpos++] = 255
260
		}
261
		dst[dpos++] = ln
262
	} else {
263
		// add match length to the token
264
		dst[dpos++] = (literals_length << mlBits)
265
	}
266

267
	// write literals
268
	pos = anchor
269
	while (pos < src.length) {
270
		dst[dpos++] = src[pos++]
271
	}
272

273
	return dpos
274
}
275

276
exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea
277

278
exports.compressPackage = function(data, verify) {
279
  if (verify) {
280
    var temp = new Uint8Array(exports.CHUNK_SIZE);
281
  }
282
  // compress the data in chunks
283
  assert(data instanceof ArrayBuffer);
284
  data = new Uint8Array(data);
285
  console.log('compressing package of size ' + data.length);
286
  var compressedChunks = [];
287
  var successes = [];
288
  var offset = 0;
289
  var total = 0;
290
  while (offset < data.length) {
291
    var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
292
    //console.log('compress a chunk ' + [offset, total, data.length]);
293
    offset += exports.CHUNK_SIZE;
294
    var bound = exports.compressBound(chunk.length);
295
    var compressed = new Uint8Array(bound);
296
    var compressedSize = exports.compress(chunk, compressed);
297
    if (compressedSize > 0) {
298
      assert(compressedSize <= bound);
299
      compressed = compressed.subarray(0, compressedSize);
300
      compressedChunks.push(compressed);
301
      total += compressedSize;
302
      successes.push(1);
303
      if (verify) {
304
        var back = exports.uncompress(compressed, temp);
305
        assert(back === chunk.length, [back, chunk.length]);
306
        for (var i = 0; i < chunk.length; i++) {
307
          assert(chunk[i] === temp[i]);
308
        }
309
      }
310
    } else {
311
      assert(compressedSize === 0);
312
      // failure to compress :(
313
      compressedChunks.push(chunk);
314
      total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
315
      successes.push(0);
316
    }
317
  }
318
  data = null; // XXX null out pack['data'] too?
319
  var compressedData = {
320
    'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
321
    'cachedOffset': total,
322
    'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
323
    'cachedChunks': [null, null],
324
    'offsets': [], // chunk# => start in compressed data
325
    'sizes': [],
326
    'successes': successes, // 1 if chunk is compressed
327
  };
328
  offset = 0;
329
  for (var i = 0; i < compressedChunks.length; i++) {
330
    compressedData['data'].set(compressedChunks[i], offset);
331
    compressedData['offsets'][i] = offset;
332
    compressedData['sizes'][i] = compressedChunks[i].length
333
    offset += compressedChunks[i].length;
334
  }
335
  console.log('compressed package into ' + [compressedData['data'].length]);
336
  assert(offset === total);
337
  return compressedData;
338
};
339

340
assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller
341

342
return exports;
343

344
})();
345

346
if (typeof module != 'undefined') {
347
  module.exports = MiniLZ4;
348
}
349

350
Product

Resources

Company