Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/third_party/mini-lz4.js
4129 views
1
/*
2
MiniLZ4: Minimal LZ4 block decoding and encoding.
3
4
based off of node-lz4, https://github.com/pierrec/node-lz4
5
6
====
7
Copyright (c) 2012 Pierre Curto
8
9
Permission is hereby granted, free of charge, to any person obtaining a copy
10
of this software and associated documentation files (the "Software"), to deal
11
in the Software without restriction, including without limitation the rights
12
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
copies of the Software, and to permit persons to whom the Software is
14
furnished to do so, subject to the following conditions:
15
16
The above copyright notice and this permission notice shall be included in
17
all copies or substantial portions of the Software.
18
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
THE SOFTWARE.
26
====
27
28
changes have the same license
29
*/
30
31
var MiniLZ4 = (function() {
32
33
var exports = {};
34
35
/**
36
* Decode a block. Assumptions: input contains all sequences of a
37
* chunk, output is large enough to receive the decoded data.
38
* If the output buffer is too small, an error will be thrown.
39
* If the returned value is negative, an error occured at the returned offset.
40
*
41
* @param {ArrayBufferView} input input data
42
* @param {ArrayBufferView} output output data
43
* @param {number=} sIdx
44
* @param {number=} eIdx
45
* @return {number} number of decoded bytes
46
* @private
47
*/
48
exports.uncompress = function (input, output, sIdx, eIdx) {
49
sIdx = sIdx || 0
50
eIdx = eIdx || (input.length - sIdx)
51
// Process each sequence in the incoming data
52
for (var i = sIdx, n = eIdx, j = 0; i < n;) {
53
var token = input[i++]
54
55
// Literals
56
var literals_length = (token >> 4)
57
if (literals_length > 0) {
58
// length of literals
59
var l = literals_length + 240
60
while (l === 255) {
61
l = input[i++]
62
literals_length += l
63
}
64
65
// Copy the literals
66
var end = i + literals_length
67
while (i < end) output[j++] = input[i++]
68
69
// End of buffer?
70
if (i === n) return j
71
}
72
73
// Match copy
74
// 2 bytes offset (little endian)
75
var offset = input[i++] | (input[i++] << 8)
76
77
// XXX 0 is an invalid offset value
78
if (offset === 0) return j
79
if (offset > j) return -(i-2)
80
81
// length of match copy
82
var match_length = (token & 0xf)
83
var l = match_length + 240
84
while (l === 255) {
85
l = input[i++]
86
match_length += l
87
}
88
89
// Copy the match
90
var pos = j - offset // position of the match copy in the current output
91
var end = j + match_length + 4 // minmatch = 4
92
while (j < end) output[j++] = output[pos++]
93
}
94
95
return j
96
}
97
98
var
99
maxInputSize = 0x7E000000
100
, minMatch = 4
101
// uint32() optimization
102
, hashLog = 16
103
, hashShift = (minMatch * 8) - hashLog
104
, hashSize = 1 << hashLog
105
106
, copyLength = 8
107
, lastLiterals = 5
108
, mfLimit = copyLength + minMatch
109
, skipStrength = 6
110
111
, mlBits = 4
112
, mlMask = (1 << mlBits) - 1
113
, runBits = 8 - mlBits
114
, runMask = (1 << runBits) - 1
115
116
, hasher = /* XXX uint32( */ 2654435761 /* ) */
117
118
assert(hashShift === 16);
119
var hashTable = new Int16Array(1<<16);
120
var empty = new Int16Array(hashTable.length);
121
122
// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
123
exports.compressBound = function (isize) {
124
return isize > maxInputSize
125
? 0
126
: (isize + (isize/255) + 16) | 0
127
}
128
129
/** @param {number=} sIdx
130
@param {number=} eIdx */
131
exports.compress = function (src, dst, sIdx, eIdx) {
132
hashTable.set(empty);
133
return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
134
}
135
136
function compressBlock (src, dst, pos, sIdx, eIdx) {
137
// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
138
var dpos = sIdx
139
var dlen = eIdx - sIdx
140
var anchor = 0
141
142
if (src.length >= maxInputSize) throw new Error("input too large")
143
144
// Minimum of input bytes for compression (LZ4 specs)
145
if (src.length > mfLimit) {
146
var n = exports.compressBound(src.length)
147
if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)
148
149
var
150
step = 1
151
, findMatchAttempts = (1 << skipStrength) + 3
152
// Keep last few bytes incompressible (LZ4 specs):
153
// last 5 bytes must be literals
154
, srcLength = src.length - mfLimit
155
156
while (pos + minMatch < srcLength) {
157
// Find a match
158
// min match of 4 bytes aka sequence
159
var sequenceLowBits = src[pos+1]<<8 | src[pos]
160
var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
161
// compute hash for the current sequence
162
var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
163
/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
164
.multiply(hasher)
165
.shiftr(hashShift)
166
.toNumber() */
167
// get the position of the sequence matching the hash
168
// NB. since 2 different sequences may have the same hash
169
// it is double-checked below
170
// do -1 to distinguish between initialized and uninitialized values
171
var ref = hashTable[hash] - 1
172
// save position of current sequence in hash table
173
hashTable[hash] = pos + 1
174
175
// first reference or within 64k limit or current sequence !== hashed one: no match
176
if ( ref < 0 ||
177
((pos - ref) >>> 16) > 0 ||
178
(
179
((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
180
((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
181
)
182
) {
183
// increase step if nothing found within limit
184
step = findMatchAttempts++ >> skipStrength
185
pos += step
186
continue
187
}
188
189
findMatchAttempts = (1 << skipStrength) + 3
190
191
// got a match
192
var literals_length = pos - anchor
193
var offset = pos - ref
194
195
// minMatch already verified
196
pos += minMatch
197
ref += minMatch
198
199
// move to the end of the match (>=minMatch)
200
var match_length = pos
201
while (pos < srcLength && src[pos] == src[ref]) {
202
pos++
203
ref++
204
}
205
206
// match length
207
match_length = pos - match_length
208
209
// token
210
var token = match_length < mlMask ? match_length : mlMask
211
212
// encode literals length
213
if (literals_length >= runMask) {
214
// add match length to the token
215
dst[dpos++] = (runMask << mlBits) + token
216
for (var len = literals_length - runMask; len > 254; len -= 255) {
217
dst[dpos++] = 255
218
}
219
dst[dpos++] = len
220
} else {
221
// add match length to the token
222
dst[dpos++] = (literals_length << mlBits) + token
223
}
224
225
// write literals
226
for (var i = 0; i < literals_length; i++) {
227
dst[dpos++] = src[anchor+i]
228
}
229
230
// encode offset
231
dst[dpos++] = offset
232
dst[dpos++] = (offset >> 8)
233
234
// encode match length
235
if (match_length >= mlMask) {
236
match_length -= mlMask
237
while (match_length >= 255) {
238
match_length -= 255
239
dst[dpos++] = 255
240
}
241
242
dst[dpos++] = match_length
243
}
244
245
anchor = pos
246
}
247
}
248
249
// cannot compress input
250
if (anchor == 0) return 0
251
252
// Write last literals
253
// encode literals length
254
literals_length = src.length - anchor
255
if (literals_length >= runMask) {
256
// add match length to the token
257
dst[dpos++] = (runMask << mlBits)
258
for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
259
dst[dpos++] = 255
260
}
261
dst[dpos++] = ln
262
} else {
263
// add match length to the token
264
dst[dpos++] = (literals_length << mlBits)
265
}
266
267
// write literals
268
pos = anchor
269
while (pos < src.length) {
270
dst[dpos++] = src[pos++]
271
}
272
273
return dpos
274
}
275
276
exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea
277
278
exports.compressPackage = function(data, verify) {
279
if (verify) {
280
var temp = new Uint8Array(exports.CHUNK_SIZE);
281
}
282
// compress the data in chunks
283
assert(data instanceof ArrayBuffer);
284
data = new Uint8Array(data);
285
console.log('compressing package of size ' + data.length);
286
var compressedChunks = [];
287
var successes = [];
288
var offset = 0;
289
var total = 0;
290
while (offset < data.length) {
291
var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
292
//console.log('compress a chunk ' + [offset, total, data.length]);
293
offset += exports.CHUNK_SIZE;
294
var bound = exports.compressBound(chunk.length);
295
var compressed = new Uint8Array(bound);
296
var compressedSize = exports.compress(chunk, compressed);
297
if (compressedSize > 0) {
298
assert(compressedSize <= bound);
299
compressed = compressed.subarray(0, compressedSize);
300
compressedChunks.push(compressed);
301
total += compressedSize;
302
successes.push(1);
303
if (verify) {
304
var back = exports.uncompress(compressed, temp);
305
assert(back === chunk.length, [back, chunk.length]);
306
for (var i = 0; i < chunk.length; i++) {
307
assert(chunk[i] === temp[i]);
308
}
309
}
310
} else {
311
assert(compressedSize === 0);
312
// failure to compress :(
313
compressedChunks.push(chunk);
314
total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
315
successes.push(0);
316
}
317
}
318
data = null; // XXX null out pack['data'] too?
319
var compressedData = {
320
'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
321
'cachedOffset': total,
322
'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
323
'cachedChunks': [null, null],
324
'offsets': [], // chunk# => start in compressed data
325
'sizes': [],
326
'successes': successes, // 1 if chunk is compressed
327
};
328
offset = 0;
329
for (var i = 0; i < compressedChunks.length; i++) {
330
compressedData['data'].set(compressedChunks[i], offset);
331
compressedData['offsets'][i] = offset;
332
compressedData['sizes'][i] = compressedChunks[i].length
333
offset += compressedChunks[i].length;
334
}
335
console.log('compressed package into ' + [compressedData['data'].length]);
336
assert(offset === total);
337
return compressedData;
338
};
339
340
assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller
341
342
return exports;
343
344
})();
345
346
if (typeof module != 'undefined') {
347
module.exports = MiniLZ4;
348
}
349
350