CoCalc -- zstd

GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/zstd/lib/compress/zstd_opt.c
⁴⁸³⁷⁸ views
1
/*
2
 * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10

11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14

15

16
#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
#define ZSTD_MAX_PRICE     (1<<30)
18

19
#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20

21

22
/*-*************************************
23
*  Price functions for optimal parser
24
***************************************/
25

26
#if 0    /* approximation at bit level (for tests) */
27
#  define BITCOST_ACCURACY 0
28
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29
#  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30
#elif 0  /* fractional bit accuracy (for tests) */
31
#  define BITCOST_ACCURACY 8
32
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33
#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34
#else    /* opt==approx, ultra==accurate */
35
#  define BITCOST_ACCURACY 8
36
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37
#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38
#endif
39

40
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41
{
42
    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43
}
44

45
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46
{
47
    U32 const stat = rawStat + 1;
48
    U32 const hb = ZSTD_highbit32(stat);
49
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
50
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51
    U32 const weight = BWeight + FWeight;
52
    assert(hb + BITCOST_ACCURACY < 31);
53
    return weight;
54
}
55

56
#if (DEBUGLEVEL>=2)
57
/* debugging function,
58
 * @return price in bytes as fractional value
59
 * for debug messages only */
60
MEM_STATIC double ZSTD_fCost(U32 price)
61
{
62
    return (double)price / (BITCOST_MULTIPLIER*8);
63
}
64
#endif
65

66
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67
{
68
    return optPtr->literalCompressionMode != ZSTD_ps_disable;
69
}
70

71
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72
{
73
    if (ZSTD_compressedLiterals(optPtr))
74
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
78
}
79

80

81
static U32 sum_u32(const unsigned table[], size_t nbElts)
82
{
83
    size_t n;
84
    U32 total = 0;
85
    for (n=0; n<nbElts; n++) {
86
        total += table[n];
87
    }
88
    return total;
89
}
90

91
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
92
{
93
    U32 s, sum=0;
94
    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95
    assert(shift < 30);
96
    for (s=0; s<lastEltIndex+1; s++) {
97
        table[s] = 1 + (table[s] >> shift);
98
        sum += table[s];
99
    }
100
    return sum;
101
}
102

103
/* ZSTD_scaleStats() :
104
 * reduce all elements in table is sum too large
105
 * return the resulting sum of elements */
106
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
{
108
    U32 const prevsum = sum_u32(table, lastEltIndex+1);
109
    U32 const factor = prevsum >> logTarget;
110
    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
    assert(logTarget < 30);
112
    if (factor <= 1) return prevsum;
113
    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114
}
115

116
/* ZSTD_rescaleFreqs() :
117
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118
 *    take hints from dictionary if there is one
119
 *    and init from zero if there is none,
120
 *    using src for literals stats, and baseline stats for sequence symbols
121
 * otherwise downscale existing stats, to be used as seed for next block.
122
 */
123
static void
124
ZSTD_rescaleFreqs(optState_t* const optPtr,
125
            const BYTE* const src, size_t const srcSize,
126
                  int const optLevel)
127
{
128
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130
    optPtr->priceType = zop_dynamic;
131

132
    if (optPtr->litLengthSum == 0) {  /* first block : init */
133
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
134
            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135
            optPtr->priceType = zop_predef;
136
        }
137

138
        assert(optPtr->symbolCosts != NULL);
139
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140
            /* huffman table presumed generated by dictionary */
141
            optPtr->priceType = zop_dynamic;
142

143
            if (compressedLiterals) {
144
                unsigned lit;
145
                assert(optPtr->litFreq != NULL);
146
                optPtr->litSum = 0;
147
                for (lit=0; lit<=MaxLit; lit++) {
148
                    U32 const scaleLog = 11;   /* scale to 2K */
149
                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150
                    assert(bitCost <= scaleLog);
151
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152
                    optPtr->litSum += optPtr->litFreq[lit];
153
            }   }
154

155
            {   unsigned ll;
156
                FSE_CState_t llstate;
157
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158
                optPtr->litLengthSum = 0;
159
                for (ll=0; ll<=MaxLL; ll++) {
160
                    U32 const scaleLog = 10;   /* scale to 1K */
161
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162
                    assert(bitCost < scaleLog);
163
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165
            }   }
166

167
            {   unsigned ml;
168
                FSE_CState_t mlstate;
169
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170
                optPtr->matchLengthSum = 0;
171
                for (ml=0; ml<=MaxML; ml++) {
172
                    U32 const scaleLog = 10;
173
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174
                    assert(bitCost < scaleLog);
175
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177
            }   }
178

179
            {   unsigned of;
180
                FSE_CState_t ofstate;
181
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182
                optPtr->offCodeSum = 0;
183
                for (of=0; of<=MaxOff; of++) {
184
                    U32 const scaleLog = 10;
185
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186
                    assert(bitCost < scaleLog);
187
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
189
            }   }
190

191
        } else {  /* not a dictionary */
192

193
            assert(optPtr->litFreq != NULL);
194
            if (compressedLiterals) {
195
                unsigned lit = MaxLit;
196
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
197
                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
198
            }
199

200
            {   unsigned const baseLLfreqs[MaxLL+1] = {
201
                    4, 2, 1, 1, 1, 1, 1, 1,
202
                    1, 1, 1, 1, 1, 1, 1, 1,
203
                    1, 1, 1, 1, 1, 1, 1, 1,
204
                    1, 1, 1, 1, 1, 1, 1, 1,
205
                    1, 1, 1, 1
206
                };
207
                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
208
                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
209
            }
210

211
            {   unsigned ml;
212
                for (ml=0; ml<=MaxML; ml++)
213
                    optPtr->matchLengthFreq[ml] = 1;
214
            }
215
            optPtr->matchLengthSum = MaxML+1;
216

217
            {   unsigned const baseOFCfreqs[MaxOff+1] = {
218
                    6, 2, 1, 1, 2, 3, 4, 4,
219
                    4, 3, 2, 1, 1, 1, 1, 1,
220
                    1, 1, 1, 1, 1, 1, 1, 1,
221
                    1, 1, 1, 1, 1, 1, 1, 1
222
                };
223
                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
224
                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
225
            }
226

227

228
        }
229

230
    } else {   /* new block : re-use previous statistics, scaled down */
231

232
        if (compressedLiterals)
233
            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
234
        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
235
        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
236
        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
237
    }
238

239
    ZSTD_setBasePrices(optPtr, optLevel);
240
}
241

242
/* ZSTD_rawLiteralsCost() :
243
 * price of literals (only) in specified segment (which length can be 0).
244
 * does not include price of literalLength symbol */
245
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
246
                                const optState_t* const optPtr,
247
                                int optLevel)
248
{
249
    if (litLength == 0) return 0;
250

251
    if (!ZSTD_compressedLiterals(optPtr))
252
        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
253

254
    if (optPtr->priceType == zop_predef)
255
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
256

257
    /* dynamic statistics */
258
    {   U32 price = litLength * optPtr->litSumBasePrice;
259
        U32 u;
260
        for (u=0; u < litLength; u++) {
261
            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
262
            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
263
        }
264
        return price;
265
    }
266
}
267

268
/* ZSTD_litLengthPrice() :
269
 * cost of literalLength symbol */
270
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
271
{
272
    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273
    if (optPtr->priceType == zop_predef)
274
        return WEIGHT(litLength, optLevel);
275
    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276
     * because it isn't representable in the zstd format. So instead just
277
     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278
     * would be all literals.
279
     */
280
    if (litLength == ZSTD_BLOCKSIZE_MAX)
281
        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
282

283
    /* dynamic statistics */
284
    {   U32 const llCode = ZSTD_LLcode(litLength);
285
        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
286
             + optPtr->litLengthSumBasePrice
287
             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
288
    }
289
}
290

291
/* ZSTD_getMatchPrice() :
292
 * Provides the cost of the match part (offset + matchLength) of a sequence
293
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294
 * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
295
 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296
 */
297
FORCE_INLINE_TEMPLATE U32
298
ZSTD_getMatchPrice(U32 const offcode,
299
                   U32 const matchLength,
300
             const optState_t* const optPtr,
301
                   int const optLevel)
302
{
303
    U32 price;
304
    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
305
    U32 const mlBase = matchLength - MINMATCH;
306
    assert(matchLength >= MINMATCH);
307

308
    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
309
        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
310

311
    /* dynamic statistics */
312
    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
313
    if ((optLevel<2) /*static*/ && offCode >= 20)
314
        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
315

316
    /* match Length */
317
    {   U32 const mlCode = ZSTD_MLcode(mlBase);
318
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
319
    }
320

321
    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
322

323
    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
324
    return price;
325
}
326

327
/* ZSTD_updateStats() :
328
 * assumption : literals + litLengtn <= iend */
329
static void ZSTD_updateStats(optState_t* const optPtr,
330
                             U32 litLength, const BYTE* literals,
331
                             U32 offsetCode, U32 matchLength)
332
{
333
    /* literals */
334
    if (ZSTD_compressedLiterals(optPtr)) {
335
        U32 u;
336
        for (u=0; u < litLength; u++)
337
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
338
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
339
    }
340

341
    /* literal Length */
342
    {   U32 const llCode = ZSTD_LLcode(litLength);
343
        optPtr->litLengthFreq[llCode]++;
344
        optPtr->litLengthSum++;
345
    }
346

347
    /* offset code : expected to follow storeSeq() numeric representation */
348
    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
349
        assert(offCode <= MaxOff);
350
        optPtr->offCodeFreq[offCode]++;
351
        optPtr->offCodeSum++;
352
    }
353

354
    /* match Length */
355
    {   U32 const mlBase = matchLength - MINMATCH;
356
        U32 const mlCode = ZSTD_MLcode(mlBase);
357
        optPtr->matchLengthFreq[mlCode]++;
358
        optPtr->matchLengthSum++;
359
    }
360
}
361

362

363
/* ZSTD_readMINMATCH() :
364
 * function safe only for comparisons
365
 * assumption : memPtr must be at least 4 bytes before end of buffer */
366
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
367
{
368
    switch (length)
369
    {
370
    default :
371
    case 4 : return MEM_read32(memPtr);
372
    case 3 : if (MEM_isLittleEndian())
373
                return MEM_read32(memPtr)<<8;
374
             else
375
                return MEM_read32(memPtr)>>8;
376
    }
377
}
378

379

380
/* Update hashTable3 up to ip (excluded)
381
   Assumption : always within prefix (i.e. not within extDict) */
382
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
383
                                              U32* nextToUpdate3,
384
                                              const BYTE* const ip)
385
{
386
    U32* const hashTable3 = ms->hashTable3;
387
    U32 const hashLog3 = ms->hashLog3;
388
    const BYTE* const base = ms->window.base;
389
    U32 idx = *nextToUpdate3;
390
    U32 const target = (U32)(ip - base);
391
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
392
    assert(hashLog3 > 0);
393

394
    while(idx < target) {
395
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
396
        idx++;
397
    }
398

399
    *nextToUpdate3 = target;
400
    return hashTable3[hash3];
401
}
402

403

404
/*-*************************************
405
*  Binary Tree search
406
***************************************/
407
/** ZSTD_insertBt1() : add one or multiple positions to tree.
408
 * @param ip assumed <= iend-8 .
409
 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
410
 * @return : nb of positions added */
411
static U32 ZSTD_insertBt1(
412
                const ZSTD_matchState_t* ms,
413
                const BYTE* const ip, const BYTE* const iend,
414
                U32 const target,
415
                U32 const mls, const int extDict)
416
{
417
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
418
    U32*   const hashTable = ms->hashTable;
419
    U32    const hashLog = cParams->hashLog;
420
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
421
    U32*   const bt = ms->chainTable;
422
    U32    const btLog  = cParams->chainLog - 1;
423
    U32    const btMask = (1 << btLog) - 1;
424
    U32 matchIndex = hashTable[h];
425
    size_t commonLengthSmaller=0, commonLengthLarger=0;
426
    const BYTE* const base = ms->window.base;
427
    const BYTE* const dictBase = ms->window.dictBase;
428
    const U32 dictLimit = ms->window.dictLimit;
429
    const BYTE* const dictEnd = dictBase + dictLimit;
430
    const BYTE* const prefixStart = base + dictLimit;
431
    const BYTE* match;
432
    const U32 curr = (U32)(ip-base);
433
    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
434
    U32* smallerPtr = bt + 2*(curr&btMask);
435
    U32* largerPtr  = smallerPtr + 1;
436
    U32 dummy32;   /* to be nullified at the end */
437
    /* windowLow is based on target because
438
     * we only need positions that will be in the window at the end of the tree update.
439
     */
440
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
441
    U32 matchEndIdx = curr+8+1;
442
    size_t bestLength = 8;
443
    U32 nbCompares = 1U << cParams->searchLog;
444
#ifdef ZSTD_C_PREDICT
445
    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
446
    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
447
    predictedSmall += (predictedSmall>0);
448
    predictedLarge += (predictedLarge>0);
449
#endif /* ZSTD_C_PREDICT */
450

451
    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
452

453
    assert(curr <= target);
454
    assert(ip <= iend-8);   /* required for h calculation */
455
    hashTable[h] = curr;   /* Update Hash Table */
456

457
    assert(windowLow > 0);
458
    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
459
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
460
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
461
        assert(matchIndex < curr);
462

463
#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
464
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
465
        if (matchIndex == predictedSmall) {
466
            /* no need to check length, result known */
467
            *smallerPtr = matchIndex;
468
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
469
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
470
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
471
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
472
            continue;
473
        }
474
        if (matchIndex == predictedLarge) {
475
            *largerPtr = matchIndex;
476
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
477
            largerPtr = nextPtr;
478
            matchIndex = nextPtr[0];
479
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
480
            continue;
481
        }
482
#endif
483

484
        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
485
            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
486
            match = base + matchIndex;
487
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
488
        } else {
489
            match = dictBase + matchIndex;
490
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
491
            if (matchIndex+matchLength >= dictLimit)
492
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
493
        }
494

495
        if (matchLength > bestLength) {
496
            bestLength = matchLength;
497
            if (matchLength > matchEndIdx - matchIndex)
498
                matchEndIdx = matchIndex + (U32)matchLength;
499
        }
500

501
        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
502
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
503
        }
504

505
        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
506
            /* match is smaller than current */
507
            *smallerPtr = matchIndex;             /* update smaller idx */
508
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
509
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
510
            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
511
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
512
        } else {
513
            /* match is larger than current */
514
            *largerPtr = matchIndex;
515
            commonLengthLarger = matchLength;
516
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
517
            largerPtr = nextPtr;
518
            matchIndex = nextPtr[0];
519
    }   }
520

521
    *smallerPtr = *largerPtr = 0;
522
    {   U32 positions = 0;
523
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
524
        assert(matchEndIdx > curr + 8);
525
        return MAX(positions, matchEndIdx - (curr + 8));
526
    }
527
}
528

529
FORCE_INLINE_TEMPLATE
530
void ZSTD_updateTree_internal(
531
                ZSTD_matchState_t* ms,
532
                const BYTE* const ip, const BYTE* const iend,
533
                const U32 mls, const ZSTD_dictMode_e dictMode)
534
{
535
    const BYTE* const base = ms->window.base;
536
    U32 const target = (U32)(ip - base);
537
    U32 idx = ms->nextToUpdate;
538
    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
539
                idx, target, dictMode);
540

541
    while(idx < target) {
542
        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
543
        assert(idx < (U32)(idx + forward));
544
        idx += forward;
545
    }
546
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
547
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
548
    ms->nextToUpdate = target;
549
}
550

551
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
552
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
553
}
554

555
FORCE_INLINE_TEMPLATE
556
U32 ZSTD_insertBtAndGetAllMatches (
557
                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
558
                    ZSTD_matchState_t* ms,
559
                    U32* nextToUpdate3,
560
                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561
                    const U32 rep[ZSTD_REP_NUM],
562
                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563
                    const U32 lengthToBeat,
564
                    U32 const mls /* template */)
565
{
566
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
567
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
568
    const BYTE* const base = ms->window.base;
569
    U32 const curr = (U32)(ip-base);
570
    U32 const hashLog = cParams->hashLog;
571
    U32 const minMatch = (mls==3) ? 3 : 4;
572
    U32* const hashTable = ms->hashTable;
573
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
574
    U32 matchIndex  = hashTable[h];
575
    U32* const bt   = ms->chainTable;
576
    U32 const btLog = cParams->chainLog - 1;
577
    U32 const btMask= (1U << btLog) - 1;
578
    size_t commonLengthSmaller=0, commonLengthLarger=0;
579
    const BYTE* const dictBase = ms->window.dictBase;
580
    U32 const dictLimit = ms->window.dictLimit;
581
    const BYTE* const dictEnd = dictBase + dictLimit;
582
    const BYTE* const prefixStart = base + dictLimit;
583
    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
584
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
585
    U32 const matchLow = windowLow ? windowLow : 1;
586
    U32* smallerPtr = bt + 2*(curr&btMask);
587
    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
588
    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
589
    U32 dummy32;   /* to be nullified at the end */
590
    U32 mnum = 0;
591
    U32 nbCompares = 1U << cParams->searchLog;
592

593
    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
594
    const ZSTD_compressionParameters* const dmsCParams =
595
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
596
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
597
    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
598
    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
599
    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
600
    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
601
    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
602
    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
603
    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
604
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
605

606
    size_t bestLength = lengthToBeat-1;
607
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
608

609
    /* check repCode */
610
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
611
    {   U32 const lastR = ZSTD_REP_NUM + ll0;
612
        U32 repCode;
613
        for (repCode = ll0; repCode < lastR; repCode++) {
614
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
615
            U32 const repIndex = curr - repOffset;
616
            U32 repLen = 0;
617
            assert(curr >= dictLimit);
618
            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
619
                /* We must validate the repcode offset because when we're using a dictionary the
620
                 * valid offset range shrinks when the dictionary goes out of bounds.
621
                 */
622
                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
623
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
624
                }
625
            } else {  /* repIndex < dictLimit || repIndex >= curr */
626
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
627
                                             dmsBase + repIndex - dmsIndexDelta :
628
                                             dictBase + repIndex;
629
                assert(curr >= windowLow);
630
                if ( dictMode == ZSTD_extDict
631
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
632
                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
633
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
634
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
635
                }
636
                if (dictMode == ZSTD_dictMatchState
637
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
638
                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
639
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
640
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
641
            }   }
642
            /* save longer solution */
643
            if (repLen > bestLength) {
644
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645
                            repCode, ll0, repOffset, repLen);
646
                bestLength = repLen;
647
                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
648
                matches[mnum].len = (U32)repLen;
649
                mnum++;
650
                if ( (repLen > sufficient_len)
651
                   | (ip+repLen == iLimit) ) {  /* best possible */
652
                    return mnum;
653
    }   }   }   }
654

655
    /* HC3 match finder */
656
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
657
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
658
        if ((matchIndex3 >= matchLow)
659
          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
660
            size_t mlen;
661
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
662
                const BYTE* const match = base + matchIndex3;
663
                mlen = ZSTD_count(ip, match, iLimit);
664
            } else {
665
                const BYTE* const match = dictBase + matchIndex3;
666
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
667
            }
668

669
            /* save best solution */
670
            if (mlen >= mls /* == 3 > bestLength */) {
671
                DEBUGLOG(8, "found small match with hlog3, of length %u",
672
                            (U32)mlen);
673
                bestLength = mlen;
674
                assert(curr > matchIndex3);
675
                assert(mnum==0);  /* no prior solution */
676
                matches[0].off = STORE_OFFSET(curr - matchIndex3);
677
                matches[0].len = (U32)mlen;
678
                mnum = 1;
679
                if ( (mlen > sufficient_len) |
680
                     (ip+mlen == iLimit) ) {  /* best possible length */
681
                    ms->nextToUpdate = curr+1;  /* skip insertion */
682
                    return 1;
683
        }   }   }
684
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
685
    }  /* if (mls == 3) */
686

687
    hashTable[h] = curr;   /* Update Hash Table */
688

689
    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
690
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
691
        const BYTE* match;
692
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
693
        assert(curr > matchIndex);
694

695
        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
696
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
697
            match = base + matchIndex;
698
            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
699
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
700
        } else {
701
            match = dictBase + matchIndex;
702
            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
703
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
704
            if (matchIndex+matchLength >= dictLimit)
705
                match = base + matchIndex;   /* prepare for match[matchLength] read */
706
        }
707

708
        if (matchLength > bestLength) {
709
            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710
                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
711
            assert(matchEndIdx > matchIndex);
712
            if (matchLength > matchEndIdx - matchIndex)
713
                matchEndIdx = matchIndex + (U32)matchLength;
714
            bestLength = matchLength;
715
            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
716
            matches[mnum].len = (U32)matchLength;
717
            mnum++;
718
            if ( (matchLength > ZSTD_OPT_NUM)
719
               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
721
                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
722
        }   }
723

724
        if (match[matchLength] < ip[matchLength]) {
725
            /* match smaller than current */
726
            *smallerPtr = matchIndex;             /* update smaller idx */
727
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
728
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
729
            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
730
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
731
        } else {
732
            *largerPtr = matchIndex;
733
            commonLengthLarger = matchLength;
734
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
735
            largerPtr = nextPtr;
736
            matchIndex = nextPtr[0];
737
    }   }
738

739
    *smallerPtr = *largerPtr = 0;
740

741
    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
742
    if (dictMode == ZSTD_dictMatchState && nbCompares) {
743
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
744
        U32 dictMatchIndex = dms->hashTable[dmsH];
745
        const U32* const dmsBt = dms->chainTable;
746
        commonLengthSmaller = commonLengthLarger = 0;
747
        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
748
            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
749
            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
750
            const BYTE* match = dmsBase + dictMatchIndex;
751
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
752
            if (dictMatchIndex+matchLength >= dmsHighLimit)
753
                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
754

755
            if (matchLength > bestLength) {
756
                matchIndex = dictMatchIndex + dmsIndexDelta;
757
                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758
                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
759
                if (matchLength > matchEndIdx - matchIndex)
760
                    matchEndIdx = matchIndex + (U32)matchLength;
761
                bestLength = matchLength;
762
                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
763
                matches[mnum].len = (U32)matchLength;
764
                mnum++;
765
                if ( (matchLength > ZSTD_OPT_NUM)
766
                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
767
                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
768
            }   }
769

770
            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
771
            if (match[matchLength] < ip[matchLength]) {
772
                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
773
                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
774
            } else {
775
                /* match is larger than current */
776
                commonLengthLarger = matchLength;
777
                dictMatchIndex = nextPtr[0];
778
    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
779

780
    assert(matchEndIdx > curr+8);
781
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
782
    return mnum;
783
}
784

785
typedef U32 (*ZSTD_getAllMatchesFn)(
786
    ZSTD_match_t*,
787
    ZSTD_matchState_t*,
788
    U32*,
789
    const BYTE*,
790
    const BYTE*,
791
    const U32 rep[ZSTD_REP_NUM],
792
    U32 const ll0,
793
    U32 const lengthToBeat);
794

795
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
796
        ZSTD_match_t* matches,
797
        ZSTD_matchState_t* ms,
798
        U32* nextToUpdate3,
799
        const BYTE* ip,
800
        const BYTE* const iHighLimit,
801
        const U32 rep[ZSTD_REP_NUM],
802
        U32 const ll0,
803
        U32 const lengthToBeat,
804
        const ZSTD_dictMode_e dictMode,
805
        const U32 mls)
806
{
807
    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
808
    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
809
    if (ip < ms->window.base + ms->nextToUpdate)
810
        return 0;   /* skipped area */
811
    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
812
    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
813
}
814

815
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
816

817
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
818
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
819
            ZSTD_match_t* matches,                             \
820
            ZSTD_matchState_t* ms,                             \
821
            U32* nextToUpdate3,                                \
822
            const BYTE* ip,                                    \
823
            const BYTE* const iHighLimit,                      \
824
            const U32 rep[ZSTD_REP_NUM],                       \
825
            U32 const ll0,                                     \
826
            U32 const lengthToBeat)                            \
827
    {                                                          \
828
        return ZSTD_btGetAllMatches_internal(                  \
829
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
830
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
831
    }
832

833
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
834
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
835
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
836
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
837
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
838

839
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
840
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
841
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
842

843
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
844
    {                                            \
845
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
846
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
847
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
848
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
849
    }
850

851
static ZSTD_getAllMatchesFn
852
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
853
{
854
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
855
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
856
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
857
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
858
    };
859
    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
860
    assert((U32)dictMode < 3);
861
    assert(mls - 3 < 4);
862
    return getAllMatchesFns[(int)dictMode][mls - 3];
863
}
864

865
/*************************
866
*  LDM helper functions  *
867
*************************/
868

869
/* Struct containing info needed to make decision about ldm inclusion */
870
typedef struct {
871
    rawSeqStore_t seqStore;   /* External match candidates store for this block */
872
    U32 startPosInBlock;      /* Start position of the current match candidate */
873
    U32 endPosInBlock;        /* End position of the current match candidate */
874
    U32 offset;               /* Offset of the match candidate */
875
} ZSTD_optLdm_t;
876

877
/* ZSTD_optLdm_skipRawSeqStoreBytes():
878
 * Moves forward in @rawSeqStore by @nbBytes,
879
 * which will update the fields 'pos' and 'posInSequence'.
880
 */
881
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
882
{
883
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
884
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
885
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
886
        if (currPos >= currSeq.litLength + currSeq.matchLength) {
887
            currPos -= currSeq.litLength + currSeq.matchLength;
888
            rawSeqStore->pos++;
889
        } else {
890
            rawSeqStore->posInSequence = currPos;
891
            break;
892
        }
893
    }
894
    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
895
        rawSeqStore->posInSequence = 0;
896
    }
897
}
898

899
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
900
 * Calculates the beginning and end of the next match in the current block.
901
 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
902
 */
903
static void
904
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
905
                                       U32 blockBytesRemaining)
906
{
907
    rawSeq currSeq;
908
    U32 currBlockEndPos;
909
    U32 literalsBytesRemaining;
910
    U32 matchBytesRemaining;
911

912
    /* Setting match end position to MAX to ensure we never use an LDM during this block */
913
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
914
        optLdm->startPosInBlock = UINT_MAX;
915
        optLdm->endPosInBlock = UINT_MAX;
916
        return;
917
    }
918
    /* Calculate appropriate bytes left in matchLength and litLength
919
     * after adjusting based on ldmSeqStore->posInSequence */
920
    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
921
    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
922
    currBlockEndPos = currPosInBlock + blockBytesRemaining;
923
    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
924
            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
925
            0;
926
    matchBytesRemaining = (literalsBytesRemaining == 0) ?
927
            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
928
            currSeq.matchLength;
929

930
    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
931
    if (literalsBytesRemaining >= blockBytesRemaining) {
932
        optLdm->startPosInBlock = UINT_MAX;
933
        optLdm->endPosInBlock = UINT_MAX;
934
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
935
        return;
936
    }
937

938
    /* Matches may be < MINMATCH by this process. In that case, we will reject them
939
       when we are deciding whether or not to add the ldm */
940
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
941
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
942
    optLdm->offset = currSeq.offset;
943

944
    if (optLdm->endPosInBlock > currBlockEndPos) {
945
        /* Match ends after the block ends, we can't use the whole match */
946
        optLdm->endPosInBlock = currBlockEndPos;
947
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
948
    } else {
949
        /* Consume nb of bytes equal to size of sequence left */
950
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
951
    }
952
}
953

954
/* ZSTD_optLdm_maybeAddMatch():
955
 * Adds a match if it's long enough,
956
 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
957
 * into 'matches'. Maintains the correct ordering of 'matches'.
958
 */
959
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960
                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
961
{
962
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963
    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
964
    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
965

966
    /* Ensure that current block position is not outside of the match */
967
    if (currPosInBlock < optLdm->startPosInBlock
968
      || currPosInBlock >= optLdm->endPosInBlock
969
      || candidateMatchLength < MINMATCH) {
970
        return;
971
    }
972

973
    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974
        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975
        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976
                 candidateOffCode, candidateMatchLength, currPosInBlock);
977
        matches[*nbMatches].len = candidateMatchLength;
978
        matches[*nbMatches].off = candidateOffCode;
979
        (*nbMatches)++;
980
    }
981
}
982

983
/* ZSTD_optLdm_processMatchCandidate():
984
 * Wrapper function to update ldm seq store and call ldm functions as necessary.
985
 */
986
static void
987
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
988
                                  ZSTD_match_t* matches, U32* nbMatches,
989
                                  U32 currPosInBlock, U32 remainingBytes)
990
{
991
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
992
        return;
993
    }
994

995
    if (currPosInBlock >= optLdm->endPosInBlock) {
996
        if (currPosInBlock > optLdm->endPosInBlock) {
997
            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
998
             * at the end of a match from the ldm seq store, and will often be some bytes
999
             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1000
             */
1001
            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1002
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1003
        }
1004
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1005
    }
1006
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1007
}
1008

1009

1010
/*-*******************************
1011
*  Optimal parser
1012
*********************************/
1013

1014
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1015
{
1016
    return sol.litlen + sol.mlen;
1017
}
1018

1019
#if 0 /* debug */
1020

1021
static void
1022
listStats(const U32* table, int lastEltID)
1023
{
1024
    int const nbElts = lastEltID + 1;
1025
    int enb;
1026
    for (enb=0; enb < nbElts; enb++) {
1027
        (void)table;
1028
        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1029
        RAWLOG(2, "%4i,", table[enb]);
1030
    }
1031
    RAWLOG(2, " \n");
1032
}
1033

1034
#endif
1035

1036
FORCE_INLINE_TEMPLATE size_t
1037
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038
                               seqStore_t* seqStore,
1039
                               U32 rep[ZSTD_REP_NUM],
1040
                         const void* src, size_t srcSize,
1041
                         const int optLevel,
1042
                         const ZSTD_dictMode_e dictMode)
1043
{
1044
    optState_t* const optStatePtr = &ms->opt;
1045
    const BYTE* const istart = (const BYTE*)src;
1046
    const BYTE* ip = istart;
1047
    const BYTE* anchor = istart;
1048
    const BYTE* const iend = istart + srcSize;
1049
    const BYTE* const ilimit = iend - 8;
1050
    const BYTE* const base = ms->window.base;
1051
    const BYTE* const prefixStart = base + ms->window.dictLimit;
1052
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1053

1054
    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1055

1056
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1057
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1058
    U32 nextToUpdate3 = ms->nextToUpdate;
1059

1060
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061
    ZSTD_match_t* const matches = optStatePtr->matchTable;
1062
    ZSTD_optimal_t lastSequence;
1063
    ZSTD_optLdm_t optLdm;
1064

1065
    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066
    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067
    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1068

1069
    /* init */
1070
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1071
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1072
    assert(optLevel <= 2);
1073
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1074
    ip += (ip==prefixStart);
1075

1076
    /* Match Loop */
1077
    while (ip < ilimit) {
1078
        U32 cur, last_pos = 0;
1079

1080
        /* find first match */
1081
        {   U32 const litlen = (U32)(ip - anchor);
1082
            U32 const ll0 = !litlen;
1083
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085
                                              (U32)(ip-istart), (U32)(iend - ip));
1086
            if (!nbMatches) { ip++; continue; }
1087

1088
            /* initialize opt[0] */
1089
            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090
            opt[0].mlen = 0;  /* means is_a_literal */
1091
            opt[0].litlen = litlen;
1092
            /* We don't need to include the actual price of the literals because
1093
             * it is static for the duration of the forward pass, and is included
1094
             * in every price. We include the literal length to avoid negative
1095
             * prices when we subtract the previous literal length.
1096
             */
1097
            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1098

1099
            /* large match -> immediate encoding */
1100
            {   U32 const maxML = matches[nbMatches-1].len;
1101
                U32 const maxOffcode = matches[nbMatches-1].off;
1102
                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103
                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1104

1105
                if (maxML > sufficient_len) {
1106
                    lastSequence.litlen = litlen;
1107
                    lastSequence.mlen = maxML;
1108
                    lastSequence.off = maxOffcode;
1109
                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110
                                maxML, sufficient_len);
1111
                    cur = 0;
1112
                    last_pos = ZSTD_totalLen(lastSequence);
1113
                    goto _shortestPath;
1114
            }   }
1115

1116
            /* set prices for first matches starting position == 0 */
1117
            assert(opt[0].price >= 0);
1118
            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119
                U32 pos;
1120
                U32 matchNb;
1121
                for (pos = 1; pos < minMatch; pos++) {
1122
                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
1123
                }
1124
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125
                    U32 const offcode = matches[matchNb].off;
1126
                    U32 const end = matches[matchNb].len;
1127
                    for ( ; pos <= end ; pos++ ) {
1128
                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129
                        U32 const sequencePrice = literalsPrice + matchPrice;
1130
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131
                                    pos, ZSTD_fCost(sequencePrice));
1132
                        opt[pos].mlen = pos;
1133
                        opt[pos].off = offcode;
1134
                        opt[pos].litlen = litlen;
1135
                        opt[pos].price = (int)sequencePrice;
1136
                }   }
1137
                last_pos = pos-1;
1138
            }
1139
        }
1140

1141
        /* check further positions */
1142
        for (cur = 1; cur <= last_pos; cur++) {
1143
            const BYTE* const inr = ip + cur;
1144
            assert(cur < ZSTD_OPT_NUM);
1145
            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1146

1147
            /* Fix current position with one literal if cheaper */
1148
            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1149
                int const price = opt[cur-1].price
1150
                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151
                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152
                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1153
                assert(price < 1000000000); /* overflow check */
1154
                if (price <= opt[cur].price) {
1155
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158
                    opt[cur].mlen = 0;
1159
                    opt[cur].off = 0;
1160
                    opt[cur].litlen = litlen;
1161
                    opt[cur].price = price;
1162
                } else {
1163
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165
                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1166
                }
1167
            }
1168

1169
            /* Set the repcodes of the current position. We must do it here
1170
             * because we rely on the repcodes of the 2nd to last sequence being
1171
             * correct to set the next chunks repcodes during the backward
1172
             * traversal.
1173
             */
1174
            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1175
            assert(cur >= opt[cur].mlen);
1176
            if (opt[cur].mlen != 0) {
1177
                U32 const prev = cur - opt[cur].mlen;
1178
                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179
                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180
            } else {
1181
                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1182
            }
1183

1184
            /* last match must start at a minimum distance of 8 from oend */
1185
            if (inr > ilimit) continue;
1186

1187
            if (cur == last_pos) break;
1188

1189
            if ( (optLevel==0) /*static_test*/
1190
              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191
                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1192
                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1193
            }
1194

1195
            assert(opt[cur].price >= 0);
1196
            {   U32 const ll0 = (opt[cur].mlen != 0);
1197
                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198
                U32 const previousPrice = (U32)opt[cur].price;
1199
                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1200
                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201
                U32 matchNb;
1202

1203
                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204
                                                  (U32)(inr-istart), (U32)(iend-inr));
1205

1206
                if (!nbMatches) {
1207
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1208
                    continue;
1209
                }
1210

1211
                {   U32 const maxML = matches[nbMatches-1].len;
1212
                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213
                                inr-istart, cur, nbMatches, maxML);
1214

1215
                    if ( (maxML > sufficient_len)
1216
                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217
                        lastSequence.mlen = maxML;
1218
                        lastSequence.off = matches[nbMatches-1].off;
1219
                        lastSequence.litlen = litlen;
1220
                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221
                        last_pos = cur + ZSTD_totalLen(lastSequence);
1222
                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
1223
                        goto _shortestPath;
1224
                }   }
1225

1226
                /* set prices using matches found at position == cur */
1227
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1228
                    U32 const offset = matches[matchNb].off;
1229
                    U32 const lastML = matches[matchNb].len;
1230
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231
                    U32 mlen;
1232

1233
                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234
                                matchNb, matches[matchNb].off, lastML, litlen);
1235

1236
                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1237
                        U32 const pos = cur + mlen;
1238
                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1239

1240
                        if ((pos > last_pos) || (price < opt[pos].price)) {
1241
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243
                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
1244
                            opt[pos].mlen = mlen;
1245
                            opt[pos].off = offset;
1246
                            opt[pos].litlen = litlen;
1247
                            opt[pos].price = price;
1248
                        } else {
1249
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1250
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1251
                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1252
                        }
1253
            }   }   }
1254
        }  /* for (cur = 1; cur <= last_pos; cur++) */
1255

1256
        lastSequence = opt[last_pos];
1257
        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
1258
        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
1259

1260
_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1261
        assert(opt[0].mlen == 0);
1262

1263
        /* Set the next chunk's repcodes based on the repcodes of the beginning
1264
         * of the last match, and the last sequence. This avoids us having to
1265
         * update them while traversing the sequences.
1266
         */
1267
        if (lastSequence.mlen != 0) {
1268
            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269
            ZSTD_memcpy(rep, &reps, sizeof(reps));
1270
        } else {
1271
            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1272
        }
1273

1274
        {   U32 const storeEnd = cur + 1;
1275
            U32 storeStart = storeEnd;
1276
            U32 seqPos = cur;
1277

1278
            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279
                        last_pos, cur); (void)last_pos;
1280
            assert(storeEnd < ZSTD_OPT_NUM);
1281
            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282
                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283
            opt[storeEnd] = lastSequence;
1284
            while (seqPos > 0) {
1285
                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1286
                storeStart--;
1287
                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288
                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289
                opt[storeStart] = opt[seqPos];
1290
                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1291
            }
1292

1293
            /* save sequences */
1294
            DEBUGLOG(6, "sending selected sequences into seqStore")
1295
            {   U32 storePos;
1296
                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297
                    U32 const llen = opt[storePos].litlen;
1298
                    U32 const mlen = opt[storePos].mlen;
1299
                    U32 const offCode = opt[storePos].off;
1300
                    U32 const advance = llen + mlen;
1301
                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302
                                anchor - istart, (unsigned)llen, (unsigned)mlen);
1303

1304
                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305
                        assert(storePos == storeEnd);   /* must be last sequence */
1306
                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1307
                        continue;   /* will finish */
1308
                    }
1309

1310
                    assert(anchor + llen <= iend);
1311
                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1313
                    anchor += advance;
1314
                    ip = anchor;
1315
            }   }
1316
            ZSTD_setBasePrices(optStatePtr, optLevel);
1317
        }
1318
    }   /* while (ip < ilimit) */
1319

1320
    /* Return the last literals size */
1321
    return (size_t)(iend - anchor);
1322
}
1323

1324
static size_t ZSTD_compressBlock_opt0(
1325
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1327
{
1328
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1329
}
1330

1331
static size_t ZSTD_compressBlock_opt2(
1332
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1334
{
1335
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1336
}
1337

1338
size_t ZSTD_compressBlock_btopt(
1339
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340
        const void* src, size_t srcSize)
1341
{
1342
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1344
}
1345

1346

1347

1348

1349
/* ZSTD_initStats_ultra():
1350
 * make a first compression pass, just to seed stats with more accurate starting values.
1351
 * only works on first block, with no dictionary and no ldm.
1352
 * this function cannot error, hence its contract must be respected.
1353
 */
1354
static void
1355
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356
                     seqStore_t* seqStore,
1357
                     U32 rep[ZSTD_REP_NUM],
1358
               const void* src, size_t srcSize)
1359
{
1360
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1361
    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1362

1363
    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1364
    assert(ms->opt.litLengthSum == 0);    /* first block */
1365
    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1366
    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1367
    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1368

1369
    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1370

1371
    /* invalidate first scan from history */
1372
    ZSTD_resetSeqStore(seqStore);
1373
    ms->window.base -= srcSize;
1374
    ms->window.dictLimit += (U32)srcSize;
1375
    ms->window.lowLimit = ms->window.dictLimit;
1376
    ms->nextToUpdate = ms->window.dictLimit;
1377

1378
}
1379

1380
size_t ZSTD_compressBlock_btultra(
1381
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382
        const void* src, size_t srcSize)
1383
{
1384
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1385
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1386
}
1387

1388
size_t ZSTD_compressBlock_btultra2(
1389
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390
        const void* src, size_t srcSize)
1391
{
1392
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394

1395
    /* 2-pass strategy:
1396
     * this strategy makes a first pass over first block to collect statistics
1397
     * and seed next round's statistics with it.
1398
     * After 1st pass, function forgets everything, and starts a new block.
1399
     * Consequently, this can only work if no data has been previously loaded in tables,
1400
     * aka, no dictionary, no prefix, no ldm preprocessing.
1401
     * The compression ratio gain is generally small (~0.5% on first block),
1402
     * the cost is 2x cpu time on first block. */
1403
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404
    if ( (ms->opt.litLengthSum==0)   /* first block */
1405
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1406
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1407
      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
1408
      && (srcSize > ZSTD_PREDEF_THRESHOLD)
1409
      ) {
1410
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411
    }
1412

1413
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1414
}
1415

1416
size_t ZSTD_compressBlock_btopt_dictMatchState(
1417
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418
        const void* src, size_t srcSize)
1419
{
1420
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1421
}
1422

1423
size_t ZSTD_compressBlock_btultra_dictMatchState(
1424
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425
        const void* src, size_t srcSize)
1426
{
1427
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1428
}
1429

1430
size_t ZSTD_compressBlock_btopt_extDict(
1431
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432
        const void* src, size_t srcSize)
1433
{
1434
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1435
}
1436

1437
size_t ZSTD_compressBlock_btultra_extDict(
1438
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439
        const void* src, size_t srcSize)
1440
{
1441
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1442
}
1443

1444
/* note : no btultra2 variant for extDict nor dictMatchState,
1445
 * because btultra2 is not meant to work with dictionaries
1446
 * and is only specific for the first block (no prefix) */
1447

1448
Product

Resources

Company