Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmzstd/lib/compress/zstd_opt.c
5012 views
1
/*
2
* Copyright (c) Meta Platforms, Inc. and affiliates.
3
* All rights reserved.
4
*
5
* This source code is licensed under both the BSD-style license (found in the
6
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
* in the COPYING file in the root directory of this source tree).
8
* You may select, at your option, one of the above-listed licenses.
9
*/
10
11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14
15
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18
19
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20
#define ZSTD_MAX_PRICE (1<<30)
21
22
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23
24
25
/*-*************************************
26
* Price functions for optimal parser
27
***************************************/
28
29
#if 0 /* approximation at bit level (for tests) */
30
# define BITCOST_ACCURACY 0
31
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
#elif 0 /* fractional bit accuracy (for tests) */
34
# define BITCOST_ACCURACY 8
35
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37
#else /* opt==approx, ultra==accurate */
38
# define BITCOST_ACCURACY 8
39
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41
#endif
42
43
/* ZSTD_bitWeight() :
44
* provide estimated "cost" of a stat in full bits only */
45
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46
{
47
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48
}
49
50
/* ZSTD_fracWeight() :
51
* provide fractional-bit "cost" of a stat,
52
* using linear interpolation approximation */
53
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54
{
55
U32 const stat = rawStat + 1;
56
U32 const hb = ZSTD_highbit32(stat);
57
U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
/* Fweight was meant for "Fractional weight"
59
* but it's effectively a value between 1 and 2
60
* using fixed point arithmetic */
61
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62
U32 const weight = BWeight + FWeight;
63
assert(hb + BITCOST_ACCURACY < 31);
64
return weight;
65
}
66
67
#if (DEBUGLEVEL>=2)
68
/* debugging function,
69
* @return price in bytes as fractional value
70
* for debug messages only */
71
MEM_STATIC double ZSTD_fCost(int price)
72
{
73
return (double)price / (BITCOST_MULTIPLIER*8);
74
}
75
#endif
76
77
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78
{
79
return optPtr->literalCompressionMode != ZSTD_ps_disable;
80
}
81
82
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83
{
84
if (ZSTD_compressedLiterals(optPtr))
85
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89
}
90
91
92
static U32 sum_u32(const unsigned table[], size_t nbElts)
93
{
94
size_t n;
95
U32 total = 0;
96
for (n=0; n<nbElts; n++) {
97
total += table[n];
98
}
99
return total;
100
}
101
102
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
104
static U32
105
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106
{
107
U32 s, sum=0;
108
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
(unsigned)lastEltIndex+1, (unsigned)shift );
110
assert(shift < 30);
111
for (s=0; s<lastEltIndex+1; s++) {
112
unsigned const base = base1 ? 1 : (table[s]>0);
113
unsigned const newStat = base + (table[s] >> shift);
114
sum += newStat;
115
table[s] = newStat;
116
}
117
return sum;
118
}
119
120
/* ZSTD_scaleStats() :
121
* reduce all elt frequencies in table if sum too large
122
* return the resulting sum of elements */
123
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
{
125
U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
U32 const factor = prevsum >> logTarget;
127
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
assert(logTarget < 30);
129
if (factor <= 1) return prevsum;
130
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
}
132
133
/* ZSTD_rescaleFreqs() :
134
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
135
* take hints from dictionary if there is one
136
* and init from zero if there is none,
137
* using src for literals stats, and baseline stats for sequence symbols
138
* otherwise downscale existing stats, to be used as seed for next block.
139
*/
140
static void
141
ZSTD_rescaleFreqs(optState_t* const optPtr,
142
const BYTE* const src, size_t const srcSize,
143
int const optLevel)
144
{
145
int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147
optPtr->priceType = zop_dynamic;
148
149
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
150
151
/* heuristic: use pre-defined stats for too small inputs */
152
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154
optPtr->priceType = zop_predef;
155
}
156
157
assert(optPtr->symbolCosts != NULL);
158
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159
160
/* huffman stats covering the full value set : table presumed generated by dictionary */
161
optPtr->priceType = zop_dynamic;
162
163
if (compressedLiterals) {
164
/* generate literals statistics from huffman table */
165
unsigned lit;
166
assert(optPtr->litFreq != NULL);
167
optPtr->litSum = 0;
168
for (lit=0; lit<=MaxLit; lit++) {
169
U32 const scaleLog = 11; /* scale to 2K */
170
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171
assert(bitCost <= scaleLog);
172
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173
optPtr->litSum += optPtr->litFreq[lit];
174
} }
175
176
{ unsigned ll;
177
FSE_CState_t llstate;
178
FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179
optPtr->litLengthSum = 0;
180
for (ll=0; ll<=MaxLL; ll++) {
181
U32 const scaleLog = 10; /* scale to 1K */
182
U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183
assert(bitCost < scaleLog);
184
optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185
optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186
} }
187
188
{ unsigned ml;
189
FSE_CState_t mlstate;
190
FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191
optPtr->matchLengthSum = 0;
192
for (ml=0; ml<=MaxML; ml++) {
193
U32 const scaleLog = 10;
194
U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195
assert(bitCost < scaleLog);
196
optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197
optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198
} }
199
200
{ unsigned of;
201
FSE_CState_t ofstate;
202
FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203
optPtr->offCodeSum = 0;
204
for (of=0; of<=MaxOff; of++) {
205
U32 const scaleLog = 10;
206
U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207
assert(bitCost < scaleLog);
208
optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209
optPtr->offCodeSum += optPtr->offCodeFreq[of];
210
} }
211
212
} else { /* first block, no dictionary */
213
214
assert(optPtr->litFreq != NULL);
215
if (compressedLiterals) {
216
/* base initial cost of literals on direct frequency within src */
217
unsigned lit = MaxLit;
218
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
219
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220
}
221
222
{ unsigned const baseLLfreqs[MaxLL+1] = {
223
4, 2, 1, 1, 1, 1, 1, 1,
224
1, 1, 1, 1, 1, 1, 1, 1,
225
1, 1, 1, 1, 1, 1, 1, 1,
226
1, 1, 1, 1, 1, 1, 1, 1,
227
1, 1, 1, 1
228
};
229
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231
}
232
233
{ unsigned ml;
234
for (ml=0; ml<=MaxML; ml++)
235
optPtr->matchLengthFreq[ml] = 1;
236
}
237
optPtr->matchLengthSum = MaxML+1;
238
239
{ unsigned const baseOFCfreqs[MaxOff+1] = {
240
6, 2, 1, 1, 2, 3, 4, 4,
241
4, 3, 2, 1, 1, 1, 1, 1,
242
1, 1, 1, 1, 1, 1, 1, 1,
243
1, 1, 1, 1, 1, 1, 1, 1
244
};
245
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247
}
248
249
}
250
251
} else { /* new block : scale down accumulated statistics */
252
253
if (compressedLiterals)
254
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258
}
259
260
ZSTD_setBasePrices(optPtr, optLevel);
261
}
262
263
/* ZSTD_rawLiteralsCost() :
264
* price of literals (only) in specified segment (which length can be 0).
265
* does not include price of literalLength symbol */
266
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267
const optState_t* const optPtr,
268
int optLevel)
269
{
270
DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271
if (litLength == 0) return 0;
272
273
if (!ZSTD_compressedLiterals(optPtr))
274
return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
275
276
if (optPtr->priceType == zop_predef)
277
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
278
279
/* dynamic statistics */
280
{ U32 price = optPtr->litSumBasePrice * litLength;
281
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282
U32 u;
283
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284
for (u=0; u < litLength; u++) {
285
U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
price -= litPrice;
288
}
289
return price;
290
}
291
}
292
293
/* ZSTD_litLengthPrice() :
294
* cost of literalLength symbol */
295
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296
{
297
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
if (optPtr->priceType == zop_predef)
299
return WEIGHT(litLength, optLevel);
300
301
/* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
* because it isn't representable in the zstd format.
303
* So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
* In such a case, the block would be all literals.
305
*/
306
if (litLength == ZSTD_BLOCKSIZE_MAX)
307
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308
309
/* dynamic statistics */
310
{ U32 const llCode = ZSTD_LLcode(litLength);
311
return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312
+ optPtr->litLengthSumBasePrice
313
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314
}
315
}
316
317
/* ZSTD_getMatchPrice() :
318
* Provides the cost of the match part (offset + matchLength) of a sequence.
319
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
*/
323
FORCE_INLINE_TEMPLATE U32
324
ZSTD_getMatchPrice(U32 const offBase,
325
U32 const matchLength,
326
const optState_t* const optPtr,
327
int const optLevel)
328
{
329
U32 price;
330
U32 const offCode = ZSTD_highbit32(offBase);
331
U32 const mlBase = matchLength - MINMATCH;
332
assert(matchLength >= MINMATCH);
333
334
if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
335
return WEIGHT(mlBase, optLevel)
336
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337
338
/* dynamic statistics */
339
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340
if ((optLevel<2) /*static*/ && offCode >= 20)
341
price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342
343
/* match Length */
344
{ U32 const mlCode = ZSTD_MLcode(mlBase);
345
price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346
}
347
348
price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349
350
DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351
return price;
352
}
353
354
/* ZSTD_updateStats() :
355
* assumption : literals + litLength <= iend */
356
static void ZSTD_updateStats(optState_t* const optPtr,
357
U32 litLength, const BYTE* literals,
358
U32 offBase, U32 matchLength)
359
{
360
/* literals */
361
if (ZSTD_compressedLiterals(optPtr)) {
362
U32 u;
363
for (u=0; u < litLength; u++)
364
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366
}
367
368
/* literal Length */
369
{ U32 const llCode = ZSTD_LLcode(litLength);
370
optPtr->litLengthFreq[llCode]++;
371
optPtr->litLengthSum++;
372
}
373
374
/* offset code : follows storeSeq() numeric representation */
375
{ U32 const offCode = ZSTD_highbit32(offBase);
376
assert(offCode <= MaxOff);
377
optPtr->offCodeFreq[offCode]++;
378
optPtr->offCodeSum++;
379
}
380
381
/* match Length */
382
{ U32 const mlBase = matchLength - MINMATCH;
383
U32 const mlCode = ZSTD_MLcode(mlBase);
384
optPtr->matchLengthFreq[mlCode]++;
385
optPtr->matchLengthSum++;
386
}
387
}
388
389
390
/* ZSTD_readMINMATCH() :
391
* function safe only for comparisons
392
* assumption : memPtr must be at least 4 bytes before end of buffer */
393
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394
{
395
switch (length)
396
{
397
default :
398
case 4 : return MEM_read32(memPtr);
399
case 3 : if (MEM_isLittleEndian())
400
return MEM_read32(memPtr)<<8;
401
else
402
return MEM_read32(memPtr)>>8;
403
}
404
}
405
406
407
/* Update hashTable3 up to ip (excluded)
408
Assumption : always within prefix (i.e. not within extDict) */
409
static
410
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
412
U32* nextToUpdate3,
413
const BYTE* const ip)
414
{
415
U32* const hashTable3 = ms->hashTable3;
416
U32 const hashLog3 = ms->hashLog3;
417
const BYTE* const base = ms->window.base;
418
U32 idx = *nextToUpdate3;
419
U32 const target = (U32)(ip - base);
420
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421
assert(hashLog3 > 0);
422
423
while(idx < target) {
424
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425
idx++;
426
}
427
428
*nextToUpdate3 = target;
429
return hashTable3[hash3];
430
}
431
432
433
/*-*************************************
434
* Binary Tree search
435
***************************************/
436
/** ZSTD_insertBt1() : add one or multiple positions to tree.
437
* @param ip assumed <= iend-8 .
438
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439
* @return : nb of positions added */
440
static
441
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
U32 ZSTD_insertBt1(
443
const ZSTD_MatchState_t* ms,
444
const BYTE* const ip, const BYTE* const iend,
445
U32 const target,
446
U32 const mls, const int extDict)
447
{
448
const ZSTD_compressionParameters* const cParams = &ms->cParams;
449
U32* const hashTable = ms->hashTable;
450
U32 const hashLog = cParams->hashLog;
451
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
452
U32* const bt = ms->chainTable;
453
U32 const btLog = cParams->chainLog - 1;
454
U32 const btMask = (1 << btLog) - 1;
455
U32 matchIndex = hashTable[h];
456
size_t commonLengthSmaller=0, commonLengthLarger=0;
457
const BYTE* const base = ms->window.base;
458
const BYTE* const dictBase = ms->window.dictBase;
459
const U32 dictLimit = ms->window.dictLimit;
460
const BYTE* const dictEnd = dictBase + dictLimit;
461
const BYTE* const prefixStart = base + dictLimit;
462
const BYTE* match;
463
const U32 curr = (U32)(ip-base);
464
const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465
U32* smallerPtr = bt + 2*(curr&btMask);
466
U32* largerPtr = smallerPtr + 1;
467
U32 dummy32; /* to be nullified at the end */
468
/* windowLow is based on target because
469
* we only need positions that will be in the window at the end of the tree update.
470
*/
471
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472
U32 matchEndIdx = curr+8+1;
473
size_t bestLength = 8;
474
U32 nbCompares = 1U << cParams->searchLog;
475
#ifdef ZSTD_C_PREDICT
476
U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477
U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478
predictedSmall += (predictedSmall>0);
479
predictedLarge += (predictedLarge>0);
480
#endif /* ZSTD_C_PREDICT */
481
482
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483
484
assert(curr <= target);
485
assert(ip <= iend-8); /* required for h calculation */
486
hashTable[h] = curr; /* Update Hash Table */
487
488
assert(windowLow > 0);
489
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490
U32* const nextPtr = bt + 2*(matchIndex & btMask);
491
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
492
assert(matchIndex < curr);
493
494
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
495
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
496
if (matchIndex == predictedSmall) {
497
/* no need to check length, result known */
498
*smallerPtr = matchIndex;
499
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
500
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
501
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
502
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503
continue;
504
}
505
if (matchIndex == predictedLarge) {
506
*largerPtr = matchIndex;
507
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
508
largerPtr = nextPtr;
509
matchIndex = nextPtr[0];
510
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511
continue;
512
}
513
#endif
514
515
if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
517
match = base + matchIndex;
518
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519
} else {
520
match = dictBase + matchIndex;
521
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522
if (matchIndex+matchLength >= dictLimit)
523
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
524
}
525
526
if (matchLength > bestLength) {
527
bestLength = matchLength;
528
if (matchLength > matchEndIdx - matchIndex)
529
matchEndIdx = matchIndex + (U32)matchLength;
530
}
531
532
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
533
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534
}
535
536
if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
537
/* match is smaller than current */
538
*smallerPtr = matchIndex; /* update smaller idx */
539
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
540
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
541
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
542
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
543
} else {
544
/* match is larger than current */
545
*largerPtr = matchIndex;
546
commonLengthLarger = matchLength;
547
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
548
largerPtr = nextPtr;
549
matchIndex = nextPtr[0];
550
} }
551
552
*smallerPtr = *largerPtr = 0;
553
{ U32 positions = 0;
554
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
555
assert(matchEndIdx > curr + 8);
556
return MAX(positions, matchEndIdx - (curr + 8));
557
}
558
}
559
560
FORCE_INLINE_TEMPLATE
561
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
562
void ZSTD_updateTree_internal(
563
ZSTD_MatchState_t* ms,
564
const BYTE* const ip, const BYTE* const iend,
565
const U32 mls, const ZSTD_dictMode_e dictMode)
566
{
567
const BYTE* const base = ms->window.base;
568
U32 const target = (U32)(ip - base);
569
U32 idx = ms->nextToUpdate;
570
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
571
idx, target, dictMode);
572
573
while(idx < target) {
574
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575
assert(idx < (U32)(idx + forward));
576
idx += forward;
577
}
578
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580
ms->nextToUpdate = target;
581
}
582
583
void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
584
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585
}
586
587
FORCE_INLINE_TEMPLATE
588
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
U32
590
ZSTD_insertBtAndGetAllMatches (
591
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
592
ZSTD_MatchState_t* ms,
593
U32* nextToUpdate3,
594
const BYTE* const ip, const BYTE* const iLimit,
595
const ZSTD_dictMode_e dictMode,
596
const U32 rep[ZSTD_REP_NUM],
597
const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
const U32 lengthToBeat,
599
const U32 mls /* template */)
600
{
601
const ZSTD_compressionParameters* const cParams = &ms->cParams;
602
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603
const BYTE* const base = ms->window.base;
604
U32 const curr = (U32)(ip-base);
605
U32 const hashLog = cParams->hashLog;
606
U32 const minMatch = (mls==3) ? 3 : 4;
607
U32* const hashTable = ms->hashTable;
608
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
609
U32 matchIndex = hashTable[h];
610
U32* const bt = ms->chainTable;
611
U32 const btLog = cParams->chainLog - 1;
612
U32 const btMask= (1U << btLog) - 1;
613
size_t commonLengthSmaller=0, commonLengthLarger=0;
614
const BYTE* const dictBase = ms->window.dictBase;
615
U32 const dictLimit = ms->window.dictLimit;
616
const BYTE* const dictEnd = dictBase + dictLimit;
617
const BYTE* const prefixStart = base + dictLimit;
618
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620
U32 const matchLow = windowLow ? windowLow : 1;
621
U32* smallerPtr = bt + 2*(curr&btMask);
622
U32* largerPtr = bt + 2*(curr&btMask) + 1;
623
U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
624
U32 dummy32; /* to be nullified at the end */
625
U32 mnum = 0;
626
U32 nbCompares = 1U << cParams->searchLog;
627
628
const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629
const ZSTD_compressionParameters* const dmsCParams =
630
dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631
const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632
const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633
U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634
U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635
U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636
U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637
U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638
U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640
641
size_t bestLength = lengthToBeat-1;
642
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643
644
/* check repCode */
645
assert(ll0 <= 1); /* necessarily 1 or 0 */
646
{ U32 const lastR = ZSTD_REP_NUM + ll0;
647
U32 repCode;
648
for (repCode = ll0; repCode < lastR; repCode++) {
649
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650
U32 const repIndex = curr - repOffset;
651
U32 repLen = 0;
652
assert(curr >= dictLimit);
653
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
654
/* We must validate the repcode offset because when we're using a dictionary the
655
* valid offset range shrinks when the dictionary goes out of bounds.
656
*/
657
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659
}
660
} else { /* repIndex < dictLimit || repIndex >= curr */
661
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662
dmsBase + repIndex - dmsIndexDelta :
663
dictBase + repIndex;
664
assert(curr >= windowLow);
665
if ( dictMode == ZSTD_extDict
666
&& ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
667
& (ZSTD_index_overlap_check(dictLimit, repIndex)) )
668
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670
}
671
if (dictMode == ZSTD_dictMatchState
672
&& ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
673
& (ZSTD_index_overlap_check(dictLimit, repIndex)) )
674
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676
} }
677
/* save longer solution */
678
if (repLen > bestLength) {
679
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680
repCode, ll0, repOffset, repLen);
681
bestLength = repLen;
682
matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
683
matches[mnum].len = (U32)repLen;
684
mnum++;
685
if ( (repLen > sufficient_len)
686
| (ip+repLen == iLimit) ) { /* best possible */
687
return mnum;
688
} } } }
689
690
/* HC3 match finder */
691
if ((mls == 3) /*static*/ && (bestLength < mls)) {
692
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693
if ((matchIndex3 >= matchLow)
694
& (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695
size_t mlen;
696
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697
const BYTE* const match = base + matchIndex3;
698
mlen = ZSTD_count(ip, match, iLimit);
699
} else {
700
const BYTE* const match = dictBase + matchIndex3;
701
mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702
}
703
704
/* save best solution */
705
if (mlen >= mls /* == 3 > bestLength */) {
706
DEBUGLOG(8, "found small match with hlog3, of length %u",
707
(U32)mlen);
708
bestLength = mlen;
709
assert(curr > matchIndex3);
710
assert(mnum==0); /* no prior solution */
711
matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712
matches[0].len = (U32)mlen;
713
mnum = 1;
714
if ( (mlen > sufficient_len) |
715
(ip+mlen == iLimit) ) { /* best possible length */
716
ms->nextToUpdate = curr+1; /* skip insertion */
717
return 1;
718
} } }
719
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
720
} /* if (mls == 3) */
721
722
hashTable[h] = curr; /* Update Hash Table */
723
724
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725
U32* const nextPtr = bt + 2*(matchIndex & btMask);
726
const BYTE* match;
727
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
728
assert(curr > matchIndex);
729
730
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
732
match = base + matchIndex;
733
if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
734
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735
} else {
736
match = dictBase + matchIndex;
737
assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
738
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739
if (matchIndex+matchLength >= dictLimit)
740
match = base + matchIndex; /* prepare for match[matchLength] read */
741
}
742
743
if (matchLength > bestLength) {
744
DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746
assert(matchEndIdx > matchIndex);
747
if (matchLength > matchEndIdx - matchIndex)
748
matchEndIdx = matchIndex + (U32)matchLength;
749
bestLength = matchLength;
750
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751
matches[mnum].len = (U32)matchLength;
752
mnum++;
753
if ( (matchLength > ZSTD_OPT_NUM)
754
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757
} }
758
759
if (match[matchLength] < ip[matchLength]) {
760
/* match smaller than current */
761
*smallerPtr = matchIndex; /* update smaller idx */
762
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
763
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
764
smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
765
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
766
} else {
767
*largerPtr = matchIndex;
768
commonLengthLarger = matchLength;
769
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
770
largerPtr = nextPtr;
771
matchIndex = nextPtr[0];
772
} }
773
774
*smallerPtr = *largerPtr = 0;
775
776
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777
if (dictMode == ZSTD_dictMatchState && nbCompares) {
778
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779
U32 dictMatchIndex = dms->hashTable[dmsH];
780
const U32* const dmsBt = dms->chainTable;
781
commonLengthSmaller = commonLengthLarger = 0;
782
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
785
const BYTE* match = dmsBase + dictMatchIndex;
786
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787
if (dictMatchIndex+matchLength >= dmsHighLimit)
788
match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
789
790
if (matchLength > bestLength) {
791
matchIndex = dictMatchIndex + dmsIndexDelta;
792
DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794
if (matchLength > matchEndIdx - matchIndex)
795
matchEndIdx = matchIndex + (U32)matchLength;
796
bestLength = matchLength;
797
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798
matches[mnum].len = (U32)matchLength;
799
mnum++;
800
if ( (matchLength > ZSTD_OPT_NUM)
801
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802
break; /* drop, to guarantee consistency (miss a little bit of compression) */
803
} }
804
805
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
806
if (match[matchLength] < ip[matchLength]) {
807
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
808
dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
809
} else {
810
/* match is larger than current */
811
commonLengthLarger = matchLength;
812
dictMatchIndex = nextPtr[0];
813
} } } /* if (dictMode == ZSTD_dictMatchState) */
814
815
assert(matchEndIdx > curr+8);
816
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
817
return mnum;
818
}
819
820
typedef U32 (*ZSTD_getAllMatchesFn)(
821
ZSTD_match_t*,
822
ZSTD_MatchState_t*,
823
U32*,
824
const BYTE*,
825
const BYTE*,
826
const U32 rep[ZSTD_REP_NUM],
827
U32 const ll0,
828
U32 const lengthToBeat);
829
830
FORCE_INLINE_TEMPLATE
831
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
U32 ZSTD_btGetAllMatches_internal(
833
ZSTD_match_t* matches,
834
ZSTD_MatchState_t* ms,
835
U32* nextToUpdate3,
836
const BYTE* ip,
837
const BYTE* const iHighLimit,
838
const U32 rep[ZSTD_REP_NUM],
839
U32 const ll0,
840
U32 const lengthToBeat,
841
const ZSTD_dictMode_e dictMode,
842
const U32 mls)
843
{
844
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
if (ip < ms->window.base + ms->nextToUpdate)
847
return 0; /* skipped area */
848
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
}
851
852
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
854
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
855
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
856
ZSTD_match_t* matches, \
857
ZSTD_MatchState_t* ms, \
858
U32* nextToUpdate3, \
859
const BYTE* ip, \
860
const BYTE* const iHighLimit, \
861
const U32 rep[ZSTD_REP_NUM], \
862
U32 const ll0, \
863
U32 const lengthToBeat) \
864
{ \
865
return ZSTD_btGetAllMatches_internal( \
866
matches, ms, nextToUpdate3, ip, iHighLimit, \
867
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
}
869
870
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
871
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
872
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
873
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
874
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
876
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
880
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
881
{ \
882
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
886
}
887
888
static ZSTD_getAllMatchesFn
889
ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
{
891
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
};
896
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
assert((U32)dictMode < 3);
898
assert(mls - 3 < 4);
899
return getAllMatchesFns[(int)dictMode][mls - 3];
900
}
901
902
/*************************
903
* LDM helper functions *
904
*************************/
905
906
/* Struct containing info needed to make decision about ldm inclusion */
907
typedef struct {
908
RawSeqStore_t seqStore; /* External match candidates store for this block */
909
U32 startPosInBlock; /* Start position of the current match candidate */
910
U32 endPosInBlock; /* End position of the current match candidate */
911
U32 offset; /* Offset of the match candidate */
912
} ZSTD_optLdm_t;
913
914
/* ZSTD_optLdm_skipRawSeqStoreBytes():
915
* Moves forward in @rawSeqStore by @nbBytes,
916
* which will update the fields 'pos' and 'posInSequence'.
917
*/
918
static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
919
{
920
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923
if (currPos >= currSeq.litLength + currSeq.matchLength) {
924
currPos -= currSeq.litLength + currSeq.matchLength;
925
rawSeqStore->pos++;
926
} else {
927
rawSeqStore->posInSequence = currPos;
928
break;
929
}
930
}
931
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932
rawSeqStore->posInSequence = 0;
933
}
934
}
935
936
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937
* Calculates the beginning and end of the next match in the current block.
938
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939
*/
940
static void
941
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
U32 blockBytesRemaining)
943
{
944
rawSeq currSeq;
945
U32 currBlockEndPos;
946
U32 literalsBytesRemaining;
947
U32 matchBytesRemaining;
948
949
/* Setting match end position to MAX to ensure we never use an LDM during this block */
950
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951
optLdm->startPosInBlock = UINT_MAX;
952
optLdm->endPosInBlock = UINT_MAX;
953
return;
954
}
955
/* Calculate appropriate bytes left in matchLength and litLength
956
* after adjusting based on ldmSeqStore->posInSequence */
957
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959
currBlockEndPos = currPosInBlock + blockBytesRemaining;
960
literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961
currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962
0;
963
matchBytesRemaining = (literalsBytesRemaining == 0) ?
964
currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965
currSeq.matchLength;
966
967
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968
if (literalsBytesRemaining >= blockBytesRemaining) {
969
optLdm->startPosInBlock = UINT_MAX;
970
optLdm->endPosInBlock = UINT_MAX;
971
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972
return;
973
}
974
975
/* Matches may be < minMatch by this process. In that case, we will reject them
976
when we are deciding whether or not to add the ldm */
977
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979
optLdm->offset = currSeq.offset;
980
981
if (optLdm->endPosInBlock > currBlockEndPos) {
982
/* Match ends after the block ends, we can't use the whole match */
983
optLdm->endPosInBlock = currBlockEndPos;
984
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985
} else {
986
/* Consume nb of bytes equal to size of sequence left */
987
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988
}
989
}
990
991
/* ZSTD_optLdm_maybeAddMatch():
992
* Adds a match if it's long enough,
993
* based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
* into 'matches'. Maintains the correct ordering of 'matches'.
995
*/
996
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
998
U32 minMatch)
999
{
1000
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1001
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1002
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1003
1004
/* Ensure that current block position is not outside of the match */
1005
if (currPosInBlock < optLdm->startPosInBlock
1006
|| currPosInBlock >= optLdm->endPosInBlock
1007
|| candidateMatchLength < minMatch) {
1008
return;
1009
}
1010
1011
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1012
U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1013
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1014
candidateOffBase, candidateMatchLength, currPosInBlock);
1015
matches[*nbMatches].len = candidateMatchLength;
1016
matches[*nbMatches].off = candidateOffBase;
1017
(*nbMatches)++;
1018
}
1019
}
1020
1021
/* ZSTD_optLdm_processMatchCandidate():
1022
* Wrapper function to update ldm seq store and call ldm functions as necessary.
1023
*/
1024
static void
1025
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1026
ZSTD_match_t* matches, U32* nbMatches,
1027
U32 currPosInBlock, U32 remainingBytes,
1028
U32 minMatch)
1029
{
1030
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1031
return;
1032
}
1033
1034
if (currPosInBlock >= optLdm->endPosInBlock) {
1035
if (currPosInBlock > optLdm->endPosInBlock) {
1036
/* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1037
* at the end of a match from the ldm seq store, and will often be some bytes
1038
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1039
*/
1040
U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1041
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1042
}
1043
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1044
}
1045
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1046
}
1047
1048
1049
/*-*******************************
1050
* Optimal parser
1051
*********************************/
1052
1053
#if 0 /* debug */
1054
1055
static void
1056
listStats(const U32* table, int lastEltID)
1057
{
1058
int const nbElts = lastEltID + 1;
1059
int enb;
1060
for (enb=0; enb < nbElts; enb++) {
1061
(void)table;
1062
/* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1063
RAWLOG(2, "%4i,", table[enb]);
1064
}
1065
RAWLOG(2, " \n");
1066
}
1067
1068
#endif
1069
1070
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1071
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1072
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1073
1074
FORCE_INLINE_TEMPLATE
1075
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1076
size_t
1077
ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1078
SeqStore_t* seqStore,
1079
U32 rep[ZSTD_REP_NUM],
1080
const void* src, size_t srcSize,
1081
const int optLevel,
1082
const ZSTD_dictMode_e dictMode)
1083
{
1084
optState_t* const optStatePtr = &ms->opt;
1085
const BYTE* const istart = (const BYTE*)src;
1086
const BYTE* ip = istart;
1087
const BYTE* anchor = istart;
1088
const BYTE* const iend = istart + srcSize;
1089
const BYTE* const ilimit = iend - 8;
1090
const BYTE* const base = ms->window.base;
1091
const BYTE* const prefixStart = base + ms->window.dictLimit;
1092
const ZSTD_compressionParameters* const cParams = &ms->cParams;
1093
1094
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1095
1096
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1097
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1098
U32 nextToUpdate3 = ms->nextToUpdate;
1099
1100
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1101
ZSTD_match_t* const matches = optStatePtr->matchTable;
1102
ZSTD_optimal_t lastStretch;
1103
ZSTD_optLdm_t optLdm;
1104
1105
ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1106
1107
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1108
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1109
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1110
1111
/* init */
1112
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1113
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1114
assert(optLevel <= 2);
1115
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1116
ip += (ip==prefixStart);
1117
1118
/* Match Loop */
1119
while (ip < ilimit) {
1120
U32 cur, last_pos = 0;
1121
1122
/* find first match */
1123
{ U32 const litlen = (U32)(ip - anchor);
1124
U32 const ll0 = !litlen;
1125
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1126
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1127
(U32)(ip-istart), (U32)(iend-ip),
1128
minMatch);
1129
if (!nbMatches) {
1130
DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1131
ip++;
1132
continue;
1133
}
1134
1135
/* Match found: let's store this solution, and eventually find more candidates.
1136
* During this forward pass, @opt is used to store stretches,
1137
* defined as "a match followed by N literals".
1138
* Note how this is different from a Sequence, which is "N literals followed by a match".
1139
* Storing stretches allows us to store different match predecessors
1140
* for each literal position part of a literals run. */
1141
1142
/* initialize opt[0] */
1143
opt[0].mlen = 0; /* there are only literals so far */
1144
opt[0].litlen = litlen;
1145
/* No need to include the actual price of the literals before the first match
1146
* because it is static for the duration of the forward pass, and is included
1147
* in every subsequent price. But, we include the literal length because
1148
* the cost variation of litlen depends on the value of litlen.
1149
*/
1150
opt[0].price = LL_PRICE(litlen);
1151
ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1152
ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1153
1154
/* large match -> immediate encoding */
1155
{ U32 const maxML = matches[nbMatches-1].len;
1156
U32 const maxOffBase = matches[nbMatches-1].off;
1157
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1158
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1159
1160
if (maxML > sufficient_len) {
1161
lastStretch.litlen = 0;
1162
lastStretch.mlen = maxML;
1163
lastStretch.off = maxOffBase;
1164
DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1165
maxML, sufficient_len);
1166
cur = 0;
1167
last_pos = maxML;
1168
goto _shortestPath;
1169
} }
1170
1171
/* set prices for first matches starting position == 0 */
1172
assert(opt[0].price >= 0);
1173
{ U32 pos;
1174
U32 matchNb;
1175
for (pos = 1; pos < minMatch; pos++) {
1176
opt[pos].price = ZSTD_MAX_PRICE;
1177
opt[pos].mlen = 0;
1178
opt[pos].litlen = litlen + pos;
1179
}
1180
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1181
U32 const offBase = matches[matchNb].off;
1182
U32 const end = matches[matchNb].len;
1183
for ( ; pos <= end ; pos++ ) {
1184
int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1185
int const sequencePrice = opt[0].price + matchPrice;
1186
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1187
pos, ZSTD_fCost(sequencePrice));
1188
opt[pos].mlen = pos;
1189
opt[pos].off = offBase;
1190
opt[pos].litlen = 0; /* end of match */
1191
opt[pos].price = sequencePrice + LL_PRICE(0);
1192
}
1193
}
1194
last_pos = pos-1;
1195
opt[pos].price = ZSTD_MAX_PRICE;
1196
}
1197
}
1198
1199
/* check further positions */
1200
for (cur = 1; cur <= last_pos; cur++) {
1201
const BYTE* const inr = ip + cur;
1202
assert(cur <= ZSTD_OPT_NUM);
1203
DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1204
1205
/* Fix current position with one literal if cheaper */
1206
{ U32 const litlen = opt[cur-1].litlen + 1;
1207
int const price = opt[cur-1].price
1208
+ LIT_PRICE(ip+cur-1)
1209
+ LL_INCPRICE(litlen);
1210
assert(price < 1000000000); /* overflow check */
1211
if (price <= opt[cur].price) {
1212
ZSTD_optimal_t const prevMatch = opt[cur];
1213
DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1214
(int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1215
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1216
opt[cur] = opt[cur-1];
1217
opt[cur].litlen = litlen;
1218
opt[cur].price = price;
1219
if ( (optLevel >= 1) /* additional check only for higher modes */
1220
&& (prevMatch.litlen == 0) /* replace a match */
1221
&& (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1222
&& LIKELY(ip + cur < iend)
1223
) {
1224
/* check next position, in case it would be cheaper */
1225
int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1226
int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1227
DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1228
cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1229
if ( (with1literal < withMoreLiterals)
1230
&& (with1literal < opt[cur+1].price) ) {
1231
/* update offset history - before it disappears */
1232
U32 const prev = cur - prevMatch.mlen;
1233
Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1234
assert(cur >= prevMatch.mlen);
1235
DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1236
ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1237
newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1238
opt[cur+1] = prevMatch; /* mlen & offbase */
1239
ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1240
opt[cur+1].litlen = 1;
1241
opt[cur+1].price = with1literal;
1242
if (last_pos < cur+1) last_pos = cur+1;
1243
}
1244
}
1245
} else {
1246
DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1247
(int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1248
}
1249
}
1250
1251
/* Offset history is not updated during match comparison.
1252
* Do it here, now that the match is selected and confirmed.
1253
*/
1254
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1255
assert(cur >= opt[cur].mlen);
1256
if (opt[cur].litlen == 0) {
1257
/* just finished a match => alter offset history */
1258
U32 const prev = cur - opt[cur].mlen;
1259
Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1260
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1261
}
1262
1263
/* last match must start at a minimum distance of 8 from oend */
1264
if (inr > ilimit) continue;
1265
1266
if (cur == last_pos) break;
1267
1268
if ( (optLevel==0) /*static_test*/
1269
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1270
DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1271
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1272
}
1273
1274
assert(opt[cur].price >= 0);
1275
{ U32 const ll0 = (opt[cur].litlen == 0);
1276
int const previousPrice = opt[cur].price;
1277
int const basePrice = previousPrice + LL_PRICE(0);
1278
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1279
U32 matchNb;
1280
1281
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1282
(U32)(inr-istart), (U32)(iend-inr),
1283
minMatch);
1284
1285
if (!nbMatches) {
1286
DEBUGLOG(7, "rPos:%u : no match found", cur);
1287
continue;
1288
}
1289
1290
{ U32 const longestML = matches[nbMatches-1].len;
1291
DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1292
(int)(inr-istart), cur, nbMatches, longestML);
1293
1294
if ( (longestML > sufficient_len)
1295
|| (cur + longestML >= ZSTD_OPT_NUM)
1296
|| (ip + cur + longestML >= iend) ) {
1297
lastStretch.mlen = longestML;
1298
lastStretch.off = matches[nbMatches-1].off;
1299
lastStretch.litlen = 0;
1300
last_pos = cur + longestML;
1301
goto _shortestPath;
1302
} }
1303
1304
/* set prices using matches found at position == cur */
1305
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1306
U32 const offset = matches[matchNb].off;
1307
U32 const lastML = matches[matchNb].len;
1308
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1309
U32 mlen;
1310
1311
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1312
matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1313
1314
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1315
U32 const pos = cur + mlen;
1316
int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1317
1318
if ((pos > last_pos) || (price < opt[pos].price)) {
1319
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1320
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1321
while (last_pos < pos) {
1322
/* fill empty positions, for future comparisons */
1323
last_pos++;
1324
opt[last_pos].price = ZSTD_MAX_PRICE;
1325
opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1326
}
1327
opt[pos].mlen = mlen;
1328
opt[pos].off = offset;
1329
opt[pos].litlen = 0;
1330
opt[pos].price = price;
1331
} else {
1332
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1333
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1334
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1335
}
1336
} } }
1337
opt[last_pos+1].price = ZSTD_MAX_PRICE;
1338
} /* for (cur = 1; cur <= last_pos; cur++) */
1339
1340
lastStretch = opt[last_pos];
1341
assert(cur >= lastStretch.mlen);
1342
cur = last_pos - lastStretch.mlen;
1343
1344
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1345
assert(opt[0].mlen == 0);
1346
assert(last_pos >= lastStretch.mlen);
1347
assert(cur == last_pos - lastStretch.mlen);
1348
1349
if (lastStretch.mlen==0) {
1350
/* no solution : all matches have been converted into literals */
1351
assert(lastStretch.litlen == (ip - anchor) + last_pos);
1352
ip += last_pos;
1353
continue;
1354
}
1355
assert(lastStretch.off > 0);
1356
1357
/* Update offset history */
1358
if (lastStretch.litlen == 0) {
1359
/* finishing on a match : update offset history */
1360
Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1361
ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1362
} else {
1363
ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1364
assert(cur >= lastStretch.litlen);
1365
cur -= lastStretch.litlen;
1366
}
1367
1368
/* Let's write the shortest path solution.
1369
* It is stored in @opt in reverse order,
1370
* starting from @storeEnd (==cur+2),
1371
* effectively partially @opt overwriting.
1372
* Content is changed too:
1373
* - So far, @opt stored stretches, aka a match followed by literals
1374
* - Now, it will store sequences, aka literals followed by a match
1375
*/
1376
{ U32 const storeEnd = cur + 2;
1377
U32 storeStart = storeEnd;
1378
U32 stretchPos = cur;
1379
1380
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1381
last_pos, cur); (void)last_pos;
1382
assert(storeEnd < ZSTD_OPT_SIZE);
1383
DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1384
storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1385
if (lastStretch.litlen > 0) {
1386
/* last "sequence" is unfinished: just a bunch of literals */
1387
opt[storeEnd].litlen = lastStretch.litlen;
1388
opt[storeEnd].mlen = 0;
1389
storeStart = storeEnd-1;
1390
opt[storeStart] = lastStretch;
1391
} {
1392
opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1393
storeStart = storeEnd;
1394
}
1395
while (1) {
1396
ZSTD_optimal_t nextStretch = opt[stretchPos];
1397
opt[storeStart].litlen = nextStretch.litlen;
1398
DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1399
opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1400
if (nextStretch.mlen == 0) {
1401
/* reaching beginning of segment */
1402
break;
1403
}
1404
storeStart--;
1405
opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1406
assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1407
stretchPos -= nextStretch.litlen + nextStretch.mlen;
1408
}
1409
1410
/* save sequences */
1411
DEBUGLOG(6, "sending selected sequences into seqStore");
1412
{ U32 storePos;
1413
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1414
U32 const llen = opt[storePos].litlen;
1415
U32 const mlen = opt[storePos].mlen;
1416
U32 const offBase = opt[storePos].off;
1417
U32 const advance = llen + mlen;
1418
DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1419
(int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1420
1421
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1422
assert(storePos == storeEnd); /* must be last sequence */
1423
ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1424
continue; /* will finish */
1425
}
1426
1427
assert(anchor + llen <= iend);
1428
ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1429
ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1430
anchor += advance;
1431
ip = anchor;
1432
} }
1433
DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1434
1435
/* update all costs */
1436
ZSTD_setBasePrices(optStatePtr, optLevel);
1437
}
1438
} /* while (ip < ilimit) */
1439
1440
/* Return the last literals size */
1441
return (size_t)(iend - anchor);
1442
}
1443
#endif /* build exclusions */
1444
1445
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1446
static size_t ZSTD_compressBlock_opt0(
1447
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1448
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1449
{
1450
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1451
}
1452
#endif
1453
1454
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1455
static size_t ZSTD_compressBlock_opt2(
1456
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1457
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1458
{
1459
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1460
}
1461
#endif
1462
1463
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1464
size_t ZSTD_compressBlock_btopt(
1465
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1466
const void* src, size_t srcSize)
1467
{
1468
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1469
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1470
}
1471
#endif
1472
1473
1474
1475
1476
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1477
/* ZSTD_initStats_ultra():
1478
* make a first compression pass, just to seed stats with more accurate starting values.
1479
* only works on first block, with no dictionary and no ldm.
1480
* this function cannot error out, its narrow contract must be respected.
1481
*/
1482
static
1483
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1484
void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1485
SeqStore_t* seqStore,
1486
U32 rep[ZSTD_REP_NUM],
1487
const void* src, size_t srcSize)
1488
{
1489
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1490
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1491
1492
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1493
assert(ms->opt.litLengthSum == 0); /* first block */
1494
assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1495
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1496
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1497
1498
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1499
1500
/* invalidate first scan from history, only keep entropy stats */
1501
ZSTD_resetSeqStore(seqStore);
1502
ms->window.base -= srcSize;
1503
ms->window.dictLimit += (U32)srcSize;
1504
ms->window.lowLimit = ms->window.dictLimit;
1505
ms->nextToUpdate = ms->window.dictLimit;
1506
1507
}
1508
1509
size_t ZSTD_compressBlock_btultra(
1510
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1511
const void* src, size_t srcSize)
1512
{
1513
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1514
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1515
}
1516
1517
size_t ZSTD_compressBlock_btultra2(
1518
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1519
const void* src, size_t srcSize)
1520
{
1521
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1522
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1523
1524
/* 2-passes strategy:
1525
* this strategy makes a first pass over first block to collect statistics
1526
* in order to seed next round's statistics with it.
1527
* After 1st pass, function forgets history, and starts a new block.
1528
* Consequently, this can only work if no data has been previously loaded in tables,
1529
* aka, no dictionary, no prefix, no ldm preprocessing.
1530
* The compression ratio gain is generally small (~0.5% on first block),
1531
* the cost is 2x cpu time on first block. */
1532
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1533
if ( (ms->opt.litLengthSum==0) /* first block */
1534
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1535
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1536
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1537
&& (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1538
) {
1539
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1540
}
1541
1542
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1543
}
1544
#endif
1545
1546
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1547
size_t ZSTD_compressBlock_btopt_dictMatchState(
1548
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1549
const void* src, size_t srcSize)
1550
{
1551
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1552
}
1553
1554
size_t ZSTD_compressBlock_btopt_extDict(
1555
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1556
const void* src, size_t srcSize)
1557
{
1558
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1559
}
1560
#endif
1561
1562
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1563
size_t ZSTD_compressBlock_btultra_dictMatchState(
1564
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1565
const void* src, size_t srcSize)
1566
{
1567
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1568
}
1569
1570
size_t ZSTD_compressBlock_btultra_extDict(
1571
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1572
const void* src, size_t srcSize)
1573
{
1574
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1575
}
1576
#endif
1577
1578
/* note : no btultra2 variant for extDict nor dictMatchState,
1579
* because btultra2 is not meant to work with dictionaries
1580
* and is only specific for the first block (no prefix) */
1581
1582