CoCalc -- zstd

GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmzstd/lib/compress/zstd_opt.c
³¹⁵⁸ views
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10

11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14

15

16
#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
#define ZSTD_MAX_PRICE     (1<<30)
18

19
#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20

21

22
/*-*************************************
23
*  Price functions for optimal parser
24
***************************************/
25

26
#if 0    /* approximation at bit level (for tests) */
27
#  define BITCOST_ACCURACY 0
28
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29
#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
30
#elif 0  /* fractional bit accuracy (for tests) */
31
#  define BITCOST_ACCURACY 8
32
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33
#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
34
#else    /* opt==approx, ultra==accurate */
35
#  define BITCOST_ACCURACY 8
36
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37
#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38
#endif
39

40
/* ZSTD_bitWeight() :
41
 * provide estimated "cost" of a stat in full bits only */
42
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
43
{
44
    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
45
}
46

47
/* ZSTD_fracWeight() :
48
 * provide fractional-bit "cost" of a stat,
49
 * using linear interpolation approximation */
50
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
51
{
52
    U32 const stat = rawStat + 1;
53
    U32 const hb = ZSTD_highbit32(stat);
54
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
55
    /* Fweight was meant for "Fractional weight"
56
     * but it's effectively a value between 1 and 2
57
     * using fixed point arithmetic */
58
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
59
    U32 const weight = BWeight + FWeight;
60
    assert(hb + BITCOST_ACCURACY < 31);
61
    return weight;
62
}
63

64
#if (DEBUGLEVEL>=2)
65
/* debugging function,
66
 * @return price in bytes as fractional value
67
 * for debug messages only */
68
MEM_STATIC double ZSTD_fCost(int price)
69
{
70
    return (double)price / (BITCOST_MULTIPLIER*8);
71
}
72
#endif
73

74
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
75
{
76
    return optPtr->literalCompressionMode != ZSTD_ps_disable;
77
}
78

79
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
80
{
81
    if (ZSTD_compressedLiterals(optPtr))
82
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
83
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
84
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
85
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
86
}
87

88

89
static U32 sum_u32(const unsigned table[], size_t nbElts)
90
{
91
    size_t n;
92
    U32 total = 0;
93
    for (n=0; n<nbElts; n++) {
94
        total += table[n];
95
    }
96
    return total;
97
}
98

99
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
100

101
static U32
102
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
103
{
104
    U32 s, sum=0;
105
    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
106
            (unsigned)lastEltIndex+1, (unsigned)shift );
107
    assert(shift < 30);
108
    for (s=0; s<lastEltIndex+1; s++) {
109
        unsigned const base = base1 ? 1 : (table[s]>0);
110
        unsigned const newStat = base + (table[s] >> shift);
111
        sum += newStat;
112
        table[s] = newStat;
113
    }
114
    return sum;
115
}
116

117
/* ZSTD_scaleStats() :
118
 * reduce all elt frequencies in table if sum too large
119
 * return the resulting sum of elements */
120
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
121
{
122
    U32 const prevsum = sum_u32(table, lastEltIndex+1);
123
    U32 const factor = prevsum >> logTarget;
124
    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
125
    assert(logTarget < 30);
126
    if (factor <= 1) return prevsum;
127
    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
128
}
129

130
/* ZSTD_rescaleFreqs() :
131
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
132
 *    take hints from dictionary if there is one
133
 *    and init from zero if there is none,
134
 *    using src for literals stats, and baseline stats for sequence symbols
135
 * otherwise downscale existing stats, to be used as seed for next block.
136
 */
137
static void
138
ZSTD_rescaleFreqs(optState_t* const optPtr,
139
            const BYTE* const src, size_t const srcSize,
140
                  int const optLevel)
141
{
142
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
143
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
144
    optPtr->priceType = zop_dynamic;
145

146
    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
147

148
        /* heuristic: use pre-defined stats for too small inputs */
149
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
150
            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
151
            optPtr->priceType = zop_predef;
152
        }
153

154
        assert(optPtr->symbolCosts != NULL);
155
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
156

157
            /* huffman stats covering the full value set : table presumed generated by dictionary */
158
            optPtr->priceType = zop_dynamic;
159

160
            if (compressedLiterals) {
161
                /* generate literals statistics from huffman table */
162
                unsigned lit;
163
                assert(optPtr->litFreq != NULL);
164
                optPtr->litSum = 0;
165
                for (lit=0; lit<=MaxLit; lit++) {
166
                    U32 const scaleLog = 11;   /* scale to 2K */
167
                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
168
                    assert(bitCost <= scaleLog);
169
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
170
                    optPtr->litSum += optPtr->litFreq[lit];
171
            }   }
172

173
            {   unsigned ll;
174
                FSE_CState_t llstate;
175
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
176
                optPtr->litLengthSum = 0;
177
                for (ll=0; ll<=MaxLL; ll++) {
178
                    U32 const scaleLog = 10;   /* scale to 1K */
179
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
180
                    assert(bitCost < scaleLog);
181
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
182
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
183
            }   }
184

185
            {   unsigned ml;
186
                FSE_CState_t mlstate;
187
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
188
                optPtr->matchLengthSum = 0;
189
                for (ml=0; ml<=MaxML; ml++) {
190
                    U32 const scaleLog = 10;
191
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
192
                    assert(bitCost < scaleLog);
193
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
194
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
195
            }   }
196

197
            {   unsigned of;
198
                FSE_CState_t ofstate;
199
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
200
                optPtr->offCodeSum = 0;
201
                for (of=0; of<=MaxOff; of++) {
202
                    U32 const scaleLog = 10;
203
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
204
                    assert(bitCost < scaleLog);
205
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
206
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
207
            }   }
208

209
        } else {  /* first block, no dictionary */
210

211
            assert(optPtr->litFreq != NULL);
212
            if (compressedLiterals) {
213
                /* base initial cost of literals on direct frequency within src */
214
                unsigned lit = MaxLit;
215
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
216
                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
217
            }
218

219
            {   unsigned const baseLLfreqs[MaxLL+1] = {
220
                    4, 2, 1, 1, 1, 1, 1, 1,
221
                    1, 1, 1, 1, 1, 1, 1, 1,
222
                    1, 1, 1, 1, 1, 1, 1, 1,
223
                    1, 1, 1, 1, 1, 1, 1, 1,
224
                    1, 1, 1, 1
225
                };
226
                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
227
                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
228
            }
229

230
            {   unsigned ml;
231
                for (ml=0; ml<=MaxML; ml++)
232
                    optPtr->matchLengthFreq[ml] = 1;
233
            }
234
            optPtr->matchLengthSum = MaxML+1;
235

236
            {   unsigned const baseOFCfreqs[MaxOff+1] = {
237
                    6, 2, 1, 1, 2, 3, 4, 4,
238
                    4, 3, 2, 1, 1, 1, 1, 1,
239
                    1, 1, 1, 1, 1, 1, 1, 1,
240
                    1, 1, 1, 1, 1, 1, 1, 1
241
                };
242
                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
243
                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
244
            }
245

246
        }
247

248
    } else {   /* new block : scale down accumulated statistics */
249

250
        if (compressedLiterals)
251
            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
252
        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
253
        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
254
        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
255
    }
256

257
    ZSTD_setBasePrices(optPtr, optLevel);
258
}
259

260
/* ZSTD_rawLiteralsCost() :
261
 * price of literals (only) in specified segment (which length can be 0).
262
 * does not include price of literalLength symbol */
263
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
264
                                const optState_t* const optPtr,
265
                                int optLevel)
266
{
267
    if (litLength == 0) return 0;
268

269
    if (!ZSTD_compressedLiterals(optPtr))
270
        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
271

272
    if (optPtr->priceType == zop_predef)
273
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
274

275
    /* dynamic statistics */
276
    {   U32 price = optPtr->litSumBasePrice * litLength;
277
        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
278
        U32 u;
279
        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
280
        for (u=0; u < litLength; u++) {
281
            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
282
            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
283
            price -= litPrice;
284
        }
285
        return price;
286
    }
287
}
288

289
/* ZSTD_litLengthPrice() :
290
 * cost of literalLength symbol */
291
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
292
{
293
    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
294
    if (optPtr->priceType == zop_predef)
295
        return WEIGHT(litLength, optLevel);
296

297
    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
298
     * because it isn't representable in the zstd format.
299
     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
300
     * In such a case, the block would be all literals.
301
     */
302
    if (litLength == ZSTD_BLOCKSIZE_MAX)
303
        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
304

305
    /* dynamic statistics */
306
    {   U32 const llCode = ZSTD_LLcode(litLength);
307
        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
308
             + optPtr->litLengthSumBasePrice
309
             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
310
    }
311
}
312

313
/* ZSTD_getMatchPrice() :
314
 * Provides the cost of the match part (offset + matchLength) of a sequence.
315
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
316
 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
317
 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
318
 */
319
FORCE_INLINE_TEMPLATE U32
320
ZSTD_getMatchPrice(U32 const offBase,
321
                   U32 const matchLength,
322
             const optState_t* const optPtr,
323
                   int const optLevel)
324
{
325
    U32 price;
326
    U32 const offCode = ZSTD_highbit32(offBase);
327
    U32 const mlBase = matchLength - MINMATCH;
328
    assert(matchLength >= MINMATCH);
329

330
    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
331
        return WEIGHT(mlBase, optLevel)
332
             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
333

334
    /* dynamic statistics */
335
    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
336
    if ((optLevel<2) /*static*/ && offCode >= 20)
337
        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
338

339
    /* match Length */
340
    {   U32 const mlCode = ZSTD_MLcode(mlBase);
341
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
342
    }
343

344
    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
345

346
    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
347
    return price;
348
}
349

350
/* ZSTD_updateStats() :
351
 * assumption : literals + litLength <= iend */
352
static void ZSTD_updateStats(optState_t* const optPtr,
353
                             U32 litLength, const BYTE* literals,
354
                             U32 offBase, U32 matchLength)
355
{
356
    /* literals */
357
    if (ZSTD_compressedLiterals(optPtr)) {
358
        U32 u;
359
        for (u=0; u < litLength; u++)
360
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
361
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
362
    }
363

364
    /* literal Length */
365
    {   U32 const llCode = ZSTD_LLcode(litLength);
366
        optPtr->litLengthFreq[llCode]++;
367
        optPtr->litLengthSum++;
368
    }
369

370
    /* offset code : follows storeSeq() numeric representation */
371
    {   U32 const offCode = ZSTD_highbit32(offBase);
372
        assert(offCode <= MaxOff);
373
        optPtr->offCodeFreq[offCode]++;
374
        optPtr->offCodeSum++;
375
    }
376

377
    /* match Length */
378
    {   U32 const mlBase = matchLength - MINMATCH;
379
        U32 const mlCode = ZSTD_MLcode(mlBase);
380
        optPtr->matchLengthFreq[mlCode]++;
381
        optPtr->matchLengthSum++;
382
    }
383
}
384

385

386
/* ZSTD_readMINMATCH() :
387
 * function safe only for comparisons
388
 * assumption : memPtr must be at least 4 bytes before end of buffer */
389
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
390
{
391
    switch (length)
392
    {
393
    default :
394
    case 4 : return MEM_read32(memPtr);
395
    case 3 : if (MEM_isLittleEndian())
396
                return MEM_read32(memPtr)<<8;
397
             else
398
                return MEM_read32(memPtr)>>8;
399
    }
400
}
401

402

403
/* Update hashTable3 up to ip (excluded)
404
   Assumption : always within prefix (i.e. not within extDict) */
405
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
406
                                              U32* nextToUpdate3,
407
                                              const BYTE* const ip)
408
{
409
    U32* const hashTable3 = ms->hashTable3;
410
    U32 const hashLog3 = ms->hashLog3;
411
    const BYTE* const base = ms->window.base;
412
    U32 idx = *nextToUpdate3;
413
    U32 const target = (U32)(ip - base);
414
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
415
    assert(hashLog3 > 0);
416

417
    while(idx < target) {
418
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
419
        idx++;
420
    }
421

422
    *nextToUpdate3 = target;
423
    return hashTable3[hash3];
424
}
425

426

427
/*-*************************************
428
*  Binary Tree search
429
***************************************/
430
/** ZSTD_insertBt1() : add one or multiple positions to tree.
431
 * @param ip assumed <= iend-8 .
432
 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
433
 * @return : nb of positions added */
434
static U32 ZSTD_insertBt1(
435
                const ZSTD_matchState_t* ms,
436
                const BYTE* const ip, const BYTE* const iend,
437
                U32 const target,
438
                U32 const mls, const int extDict)
439
{
440
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
441
    U32*   const hashTable = ms->hashTable;
442
    U32    const hashLog = cParams->hashLog;
443
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
444
    U32*   const bt = ms->chainTable;
445
    U32    const btLog  = cParams->chainLog - 1;
446
    U32    const btMask = (1 << btLog) - 1;
447
    U32 matchIndex = hashTable[h];
448
    size_t commonLengthSmaller=0, commonLengthLarger=0;
449
    const BYTE* const base = ms->window.base;
450
    const BYTE* const dictBase = ms->window.dictBase;
451
    const U32 dictLimit = ms->window.dictLimit;
452
    const BYTE* const dictEnd = dictBase + dictLimit;
453
    const BYTE* const prefixStart = base + dictLimit;
454
    const BYTE* match;
455
    const U32 curr = (U32)(ip-base);
456
    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
457
    U32* smallerPtr = bt + 2*(curr&btMask);
458
    U32* largerPtr  = smallerPtr + 1;
459
    U32 dummy32;   /* to be nullified at the end */
460
    /* windowLow is based on target because
461
     * we only need positions that will be in the window at the end of the tree update.
462
     */
463
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
464
    U32 matchEndIdx = curr+8+1;
465
    size_t bestLength = 8;
466
    U32 nbCompares = 1U << cParams->searchLog;
467
#ifdef ZSTD_C_PREDICT
468
    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
469
    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
470
    predictedSmall += (predictedSmall>0);
471
    predictedLarge += (predictedLarge>0);
472
#endif /* ZSTD_C_PREDICT */
473

474
    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
475

476
    assert(curr <= target);
477
    assert(ip <= iend-8);   /* required for h calculation */
478
    hashTable[h] = curr;   /* Update Hash Table */
479

480
    assert(windowLow > 0);
481
    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
482
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
483
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
484
        assert(matchIndex < curr);
485

486
#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
487
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
488
        if (matchIndex == predictedSmall) {
489
            /* no need to check length, result known */
490
            *smallerPtr = matchIndex;
491
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
492
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
493
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
494
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
495
            continue;
496
        }
497
        if (matchIndex == predictedLarge) {
498
            *largerPtr = matchIndex;
499
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
500
            largerPtr = nextPtr;
501
            matchIndex = nextPtr[0];
502
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
503
            continue;
504
        }
505
#endif
506

507
        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
508
            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
509
            match = base + matchIndex;
510
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
511
        } else {
512
            match = dictBase + matchIndex;
513
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
514
            if (matchIndex+matchLength >= dictLimit)
515
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
516
        }
517

518
        if (matchLength > bestLength) {
519
            bestLength = matchLength;
520
            if (matchLength > matchEndIdx - matchIndex)
521
                matchEndIdx = matchIndex + (U32)matchLength;
522
        }
523

524
        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
525
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
526
        }
527

528
        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
529
            /* match is smaller than current */
530
            *smallerPtr = matchIndex;             /* update smaller idx */
531
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
532
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
533
            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
534
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
535
        } else {
536
            /* match is larger than current */
537
            *largerPtr = matchIndex;
538
            commonLengthLarger = matchLength;
539
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
540
            largerPtr = nextPtr;
541
            matchIndex = nextPtr[0];
542
    }   }
543

544
    *smallerPtr = *largerPtr = 0;
545
    {   U32 positions = 0;
546
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
547
        assert(matchEndIdx > curr + 8);
548
        return MAX(positions, matchEndIdx - (curr + 8));
549
    }
550
}
551

552
FORCE_INLINE_TEMPLATE
553
void ZSTD_updateTree_internal(
554
                ZSTD_matchState_t* ms,
555
                const BYTE* const ip, const BYTE* const iend,
556
                const U32 mls, const ZSTD_dictMode_e dictMode)
557
{
558
    const BYTE* const base = ms->window.base;
559
    U32 const target = (U32)(ip - base);
560
    U32 idx = ms->nextToUpdate;
561
    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
562
                idx, target, dictMode);
563

564
    while(idx < target) {
565
        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
566
        assert(idx < (U32)(idx + forward));
567
        idx += forward;
568
    }
569
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
570
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
571
    ms->nextToUpdate = target;
572
}
573

574
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
575
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
576
}
577

578
FORCE_INLINE_TEMPLATE U32
579
ZSTD_insertBtAndGetAllMatches (
580
                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
581
                ZSTD_matchState_t* ms,
582
                U32* nextToUpdate3,
583
                const BYTE* const ip, const BYTE* const iLimit,
584
                const ZSTD_dictMode_e dictMode,
585
                const U32 rep[ZSTD_REP_NUM],
586
                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
587
                const U32 lengthToBeat,
588
                const U32 mls /* template */)
589
{
590
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
591
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
592
    const BYTE* const base = ms->window.base;
593
    U32 const curr = (U32)(ip-base);
594
    U32 const hashLog = cParams->hashLog;
595
    U32 const minMatch = (mls==3) ? 3 : 4;
596
    U32* const hashTable = ms->hashTable;
597
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
598
    U32 matchIndex  = hashTable[h];
599
    U32* const bt   = ms->chainTable;
600
    U32 const btLog = cParams->chainLog - 1;
601
    U32 const btMask= (1U << btLog) - 1;
602
    size_t commonLengthSmaller=0, commonLengthLarger=0;
603
    const BYTE* const dictBase = ms->window.dictBase;
604
    U32 const dictLimit = ms->window.dictLimit;
605
    const BYTE* const dictEnd = dictBase + dictLimit;
606
    const BYTE* const prefixStart = base + dictLimit;
607
    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
608
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
609
    U32 const matchLow = windowLow ? windowLow : 1;
610
    U32* smallerPtr = bt + 2*(curr&btMask);
611
    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
612
    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
613
    U32 dummy32;   /* to be nullified at the end */
614
    U32 mnum = 0;
615
    U32 nbCompares = 1U << cParams->searchLog;
616

617
    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
618
    const ZSTD_compressionParameters* const dmsCParams =
619
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
620
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
621
    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
622
    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
623
    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
624
    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
625
    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
626
    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
627
    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
628
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
629

630
    size_t bestLength = lengthToBeat-1;
631
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
632

633
    /* check repCode */
634
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
635
    {   U32 const lastR = ZSTD_REP_NUM + ll0;
636
        U32 repCode;
637
        for (repCode = ll0; repCode < lastR; repCode++) {
638
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
639
            U32 const repIndex = curr - repOffset;
640
            U32 repLen = 0;
641
            assert(curr >= dictLimit);
642
            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
643
                /* We must validate the repcode offset because when we're using a dictionary the
644
                 * valid offset range shrinks when the dictionary goes out of bounds.
645
                 */
646
                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
647
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
648
                }
649
            } else {  /* repIndex < dictLimit || repIndex >= curr */
650
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
651
                                             dmsBase + repIndex - dmsIndexDelta :
652
                                             dictBase + repIndex;
653
                assert(curr >= windowLow);
654
                if ( dictMode == ZSTD_extDict
655
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
656
                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
657
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
658
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
659
                }
660
                if (dictMode == ZSTD_dictMatchState
661
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
662
                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
663
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
664
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
665
            }   }
666
            /* save longer solution */
667
            if (repLen > bestLength) {
668
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
669
                            repCode, ll0, repOffset, repLen);
670
                bestLength = repLen;
671
                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
672
                matches[mnum].len = (U32)repLen;
673
                mnum++;
674
                if ( (repLen > sufficient_len)
675
                   | (ip+repLen == iLimit) ) {  /* best possible */
676
                    return mnum;
677
    }   }   }   }
678

679
    /* HC3 match finder */
680
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
681
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
682
        if ((matchIndex3 >= matchLow)
683
          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
684
            size_t mlen;
685
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
686
                const BYTE* const match = base + matchIndex3;
687
                mlen = ZSTD_count(ip, match, iLimit);
688
            } else {
689
                const BYTE* const match = dictBase + matchIndex3;
690
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
691
            }
692

693
            /* save best solution */
694
            if (mlen >= mls /* == 3 > bestLength */) {
695
                DEBUGLOG(8, "found small match with hlog3, of length %u",
696
                            (U32)mlen);
697
                bestLength = mlen;
698
                assert(curr > matchIndex3);
699
                assert(mnum==0);  /* no prior solution */
700
                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
701
                matches[0].len = (U32)mlen;
702
                mnum = 1;
703
                if ( (mlen > sufficient_len) |
704
                     (ip+mlen == iLimit) ) {  /* best possible length */
705
                    ms->nextToUpdate = curr+1;  /* skip insertion */
706
                    return 1;
707
        }   }   }
708
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
709
    }  /* if (mls == 3) */
710

711
    hashTable[h] = curr;   /* Update Hash Table */
712

713
    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
714
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
715
        const BYTE* match;
716
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
717
        assert(curr > matchIndex);
718

719
        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
720
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
721
            match = base + matchIndex;
722
            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
723
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
724
        } else {
725
            match = dictBase + matchIndex;
726
            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
727
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
728
            if (matchIndex+matchLength >= dictLimit)
729
                match = base + matchIndex;   /* prepare for match[matchLength] read */
730
        }
731

732
        if (matchLength > bestLength) {
733
            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
734
                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
735
            assert(matchEndIdx > matchIndex);
736
            if (matchLength > matchEndIdx - matchIndex)
737
                matchEndIdx = matchIndex + (U32)matchLength;
738
            bestLength = matchLength;
739
            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
740
            matches[mnum].len = (U32)matchLength;
741
            mnum++;
742
            if ( (matchLength > ZSTD_OPT_NUM)
743
               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
744
                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
745
                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
746
        }   }
747

748
        if (match[matchLength] < ip[matchLength]) {
749
            /* match smaller than current */
750
            *smallerPtr = matchIndex;             /* update smaller idx */
751
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
752
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
753
            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
754
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
755
        } else {
756
            *largerPtr = matchIndex;
757
            commonLengthLarger = matchLength;
758
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
759
            largerPtr = nextPtr;
760
            matchIndex = nextPtr[0];
761
    }   }
762

763
    *smallerPtr = *largerPtr = 0;
764

765
    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
766
    if (dictMode == ZSTD_dictMatchState && nbCompares) {
767
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
768
        U32 dictMatchIndex = dms->hashTable[dmsH];
769
        const U32* const dmsBt = dms->chainTable;
770
        commonLengthSmaller = commonLengthLarger = 0;
771
        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
772
            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
773
            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
774
            const BYTE* match = dmsBase + dictMatchIndex;
775
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
776
            if (dictMatchIndex+matchLength >= dmsHighLimit)
777
                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
778

779
            if (matchLength > bestLength) {
780
                matchIndex = dictMatchIndex + dmsIndexDelta;
781
                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
782
                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
783
                if (matchLength > matchEndIdx - matchIndex)
784
                    matchEndIdx = matchIndex + (U32)matchLength;
785
                bestLength = matchLength;
786
                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
787
                matches[mnum].len = (U32)matchLength;
788
                mnum++;
789
                if ( (matchLength > ZSTD_OPT_NUM)
790
                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
791
                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
792
            }   }
793

794
            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
795
            if (match[matchLength] < ip[matchLength]) {
796
                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
797
                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
798
            } else {
799
                /* match is larger than current */
800
                commonLengthLarger = matchLength;
801
                dictMatchIndex = nextPtr[0];
802
    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
803

804
    assert(matchEndIdx > curr+8);
805
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
806
    return mnum;
807
}
808

809
typedef U32 (*ZSTD_getAllMatchesFn)(
810
    ZSTD_match_t*,
811
    ZSTD_matchState_t*,
812
    U32*,
813
    const BYTE*,
814
    const BYTE*,
815
    const U32 rep[ZSTD_REP_NUM],
816
    U32 const ll0,
817
    U32 const lengthToBeat);
818

819
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
820
        ZSTD_match_t* matches,
821
        ZSTD_matchState_t* ms,
822
        U32* nextToUpdate3,
823
        const BYTE* ip,
824
        const BYTE* const iHighLimit,
825
        const U32 rep[ZSTD_REP_NUM],
826
        U32 const ll0,
827
        U32 const lengthToBeat,
828
        const ZSTD_dictMode_e dictMode,
829
        const U32 mls)
830
{
831
    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
832
    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
833
    if (ip < ms->window.base + ms->nextToUpdate)
834
        return 0;   /* skipped area */
835
    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
836
    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
837
}
838

839
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
840

841
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
842
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
843
            ZSTD_match_t* matches,                             \
844
            ZSTD_matchState_t* ms,                             \
845
            U32* nextToUpdate3,                                \
846
            const BYTE* ip,                                    \
847
            const BYTE* const iHighLimit,                      \
848
            const U32 rep[ZSTD_REP_NUM],                       \
849
            U32 const ll0,                                     \
850
            U32 const lengthToBeat)                            \
851
    {                                                          \
852
        return ZSTD_btGetAllMatches_internal(                  \
853
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
854
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
855
    }
856

857
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
858
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
859
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
860
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
861
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
862

863
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
864
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
865
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
866

867
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
868
    {                                            \
869
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
870
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
871
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
872
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
873
    }
874

875
static ZSTD_getAllMatchesFn
876
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
877
{
878
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
879
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
880
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
881
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
882
    };
883
    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
884
    assert((U32)dictMode < 3);
885
    assert(mls - 3 < 4);
886
    return getAllMatchesFns[(int)dictMode][mls - 3];
887
}
888

889
/*************************
890
*  LDM helper functions  *
891
*************************/
892

893
/* Struct containing info needed to make decision about ldm inclusion */
894
typedef struct {
895
    rawSeqStore_t seqStore;   /* External match candidates store for this block */
896
    U32 startPosInBlock;      /* Start position of the current match candidate */
897
    U32 endPosInBlock;        /* End position of the current match candidate */
898
    U32 offset;               /* Offset of the match candidate */
899
} ZSTD_optLdm_t;
900

901
/* ZSTD_optLdm_skipRawSeqStoreBytes():
902
 * Moves forward in @rawSeqStore by @nbBytes,
903
 * which will update the fields 'pos' and 'posInSequence'.
904
 */
905
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
906
{
907
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
908
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
909
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
910
        if (currPos >= currSeq.litLength + currSeq.matchLength) {
911
            currPos -= currSeq.litLength + currSeq.matchLength;
912
            rawSeqStore->pos++;
913
        } else {
914
            rawSeqStore->posInSequence = currPos;
915
            break;
916
        }
917
    }
918
    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
919
        rawSeqStore->posInSequence = 0;
920
    }
921
}
922

923
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
924
 * Calculates the beginning and end of the next match in the current block.
925
 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
926
 */
927
static void
928
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
929
                                       U32 blockBytesRemaining)
930
{
931
    rawSeq currSeq;
932
    U32 currBlockEndPos;
933
    U32 literalsBytesRemaining;
934
    U32 matchBytesRemaining;
935

936
    /* Setting match end position to MAX to ensure we never use an LDM during this block */
937
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
938
        optLdm->startPosInBlock = UINT_MAX;
939
        optLdm->endPosInBlock = UINT_MAX;
940
        return;
941
    }
942
    /* Calculate appropriate bytes left in matchLength and litLength
943
     * after adjusting based on ldmSeqStore->posInSequence */
944
    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
945
    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
946
    currBlockEndPos = currPosInBlock + blockBytesRemaining;
947
    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
948
            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
949
            0;
950
    matchBytesRemaining = (literalsBytesRemaining == 0) ?
951
            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
952
            currSeq.matchLength;
953

954
    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
955
    if (literalsBytesRemaining >= blockBytesRemaining) {
956
        optLdm->startPosInBlock = UINT_MAX;
957
        optLdm->endPosInBlock = UINT_MAX;
958
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
959
        return;
960
    }
961

962
    /* Matches may be < MINMATCH by this process. In that case, we will reject them
963
       when we are deciding whether or not to add the ldm */
964
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
965
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
966
    optLdm->offset = currSeq.offset;
967

968
    if (optLdm->endPosInBlock > currBlockEndPos) {
969
        /* Match ends after the block ends, we can't use the whole match */
970
        optLdm->endPosInBlock = currBlockEndPos;
971
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
972
    } else {
973
        /* Consume nb of bytes equal to size of sequence left */
974
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
975
    }
976
}
977

978
/* ZSTD_optLdm_maybeAddMatch():
979
 * Adds a match if it's long enough,
980
 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
981
 * into 'matches'. Maintains the correct ordering of 'matches'.
982
 */
983
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
984
                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
985
{
986
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
987
    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
988
    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
989

990
    /* Ensure that current block position is not outside of the match */
991
    if (currPosInBlock < optLdm->startPosInBlock
992
      || currPosInBlock >= optLdm->endPosInBlock
993
      || candidateMatchLength < MINMATCH) {
994
        return;
995
    }
996

997
    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
998
        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
999
        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1000
                 candidateOffBase, candidateMatchLength, currPosInBlock);
1001
        matches[*nbMatches].len = candidateMatchLength;
1002
        matches[*nbMatches].off = candidateOffBase;
1003
        (*nbMatches)++;
1004
    }
1005
}
1006

1007
/* ZSTD_optLdm_processMatchCandidate():
1008
 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1009
 */
1010
static void
1011
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1012
                                  ZSTD_match_t* matches, U32* nbMatches,
1013
                                  U32 currPosInBlock, U32 remainingBytes)
1014
{
1015
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1016
        return;
1017
    }
1018

1019
    if (currPosInBlock >= optLdm->endPosInBlock) {
1020
        if (currPosInBlock > optLdm->endPosInBlock) {
1021
            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1022
             * at the end of a match from the ldm seq store, and will often be some bytes
1023
             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1024
             */
1025
            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1026
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1027
        }
1028
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1029
    }
1030
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1031
}
1032

1033

1034
/*-*******************************
1035
*  Optimal parser
1036
*********************************/
1037

1038
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1039
{
1040
    return sol.litlen + sol.mlen;
1041
}
1042

1043
#if 0 /* debug */
1044

1045
static void
1046
listStats(const U32* table, int lastEltID)
1047
{
1048
    int const nbElts = lastEltID + 1;
1049
    int enb;
1050
    for (enb=0; enb < nbElts; enb++) {
1051
        (void)table;
1052
        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1053
        RAWLOG(2, "%4i,", table[enb]);
1054
    }
1055
    RAWLOG(2, " \n");
1056
}
1057

1058
#endif
1059

1060
FORCE_INLINE_TEMPLATE size_t
1061
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1062
                               seqStore_t* seqStore,
1063
                               U32 rep[ZSTD_REP_NUM],
1064
                         const void* src, size_t srcSize,
1065
                         const int optLevel,
1066
                         const ZSTD_dictMode_e dictMode)
1067
{
1068
    optState_t* const optStatePtr = &ms->opt;
1069
    const BYTE* const istart = (const BYTE*)src;
1070
    const BYTE* ip = istart;
1071
    const BYTE* anchor = istart;
1072
    const BYTE* const iend = istart + srcSize;
1073
    const BYTE* const ilimit = iend - 8;
1074
    const BYTE* const base = ms->window.base;
1075
    const BYTE* const prefixStart = base + ms->window.dictLimit;
1076
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1077

1078
    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1079

1080
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1081
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1082
    U32 nextToUpdate3 = ms->nextToUpdate;
1083

1084
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1085
    ZSTD_match_t* const matches = optStatePtr->matchTable;
1086
    ZSTD_optimal_t lastSequence;
1087
    ZSTD_optLdm_t optLdm;
1088

1089
    ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1090

1091
    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1092
    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1093
    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1094

1095
    /* init */
1096
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1097
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1098
    assert(optLevel <= 2);
1099
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1100
    ip += (ip==prefixStart);
1101

1102
    /* Match Loop */
1103
    while (ip < ilimit) {
1104
        U32 cur, last_pos = 0;
1105

1106
        /* find first match */
1107
        {   U32 const litlen = (U32)(ip - anchor);
1108
            U32 const ll0 = !litlen;
1109
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1110
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1111
                                              (U32)(ip-istart), (U32)(iend - ip));
1112
            if (!nbMatches) { ip++; continue; }
1113

1114
            /* initialize opt[0] */
1115
            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1116
            opt[0].mlen = 0;  /* means is_a_literal */
1117
            opt[0].litlen = litlen;
1118
            /* We don't need to include the actual price of the literals because
1119
             * it is static for the duration of the forward pass, and is included
1120
             * in every price. We include the literal length to avoid negative
1121
             * prices when we subtract the previous literal length.
1122
             */
1123
            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1124

1125
            /* large match -> immediate encoding */
1126
            {   U32 const maxML = matches[nbMatches-1].len;
1127
                U32 const maxOffBase = matches[nbMatches-1].off;
1128
                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1129
                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1130

1131
                if (maxML > sufficient_len) {
1132
                    lastSequence.litlen = litlen;
1133
                    lastSequence.mlen = maxML;
1134
                    lastSequence.off = maxOffBase;
1135
                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1136
                                maxML, sufficient_len);
1137
                    cur = 0;
1138
                    last_pos = ZSTD_totalLen(lastSequence);
1139
                    goto _shortestPath;
1140
            }   }
1141

1142
            /* set prices for first matches starting position == 0 */
1143
            assert(opt[0].price >= 0);
1144
            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1145
                U32 pos;
1146
                U32 matchNb;
1147
                for (pos = 1; pos < minMatch; pos++) {
1148
                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
1149
                }
1150
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1151
                    U32 const offBase = matches[matchNb].off;
1152
                    U32 const end = matches[matchNb].len;
1153
                    for ( ; pos <= end ; pos++ ) {
1154
                        U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1155
                        U32 const sequencePrice = literalsPrice + matchPrice;
1156
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1157
                                    pos, ZSTD_fCost((int)sequencePrice));
1158
                        opt[pos].mlen = pos;
1159
                        opt[pos].off = offBase;
1160
                        opt[pos].litlen = litlen;
1161
                        opt[pos].price = (int)sequencePrice;
1162
                }   }
1163
                last_pos = pos-1;
1164
            }
1165
        }
1166

1167
        /* check further positions */
1168
        for (cur = 1; cur <= last_pos; cur++) {
1169
            const BYTE* const inr = ip + cur;
1170
            assert(cur < ZSTD_OPT_NUM);
1171
            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1172

1173
            /* Fix current position with one literal if cheaper */
1174
            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1175
                int const price = opt[cur-1].price
1176
                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1177
                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1178
                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1179
                assert(price < 1000000000); /* overflow check */
1180
                if (price <= opt[cur].price) {
1181
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1182
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1183
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1184
                    opt[cur].mlen = 0;
1185
                    opt[cur].off = 0;
1186
                    opt[cur].litlen = litlen;
1187
                    opt[cur].price = price;
1188
                } else {
1189
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1190
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1191
                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1192
                }
1193
            }
1194

1195
            /* Set the repcodes of the current position. We must do it here
1196
             * because we rely on the repcodes of the 2nd to last sequence being
1197
             * correct to set the next chunks repcodes during the backward
1198
             * traversal.
1199
             */
1200
            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1201
            assert(cur >= opt[cur].mlen);
1202
            if (opt[cur].mlen != 0) {
1203
                U32 const prev = cur - opt[cur].mlen;
1204
                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1205
                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1206
            } else {
1207
                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1208
            }
1209

1210
            /* last match must start at a minimum distance of 8 from oend */
1211
            if (inr > ilimit) continue;
1212

1213
            if (cur == last_pos) break;
1214

1215
            if ( (optLevel==0) /*static_test*/
1216
              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1217
                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1218
                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1219
            }
1220

1221
            assert(opt[cur].price >= 0);
1222
            {   U32 const ll0 = (opt[cur].mlen != 0);
1223
                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1224
                U32 const previousPrice = (U32)opt[cur].price;
1225
                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1226
                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1227
                U32 matchNb;
1228

1229
                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1230
                                                  (U32)(inr-istart), (U32)(iend-inr));
1231

1232
                if (!nbMatches) {
1233
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1234
                    continue;
1235
                }
1236

1237
                {   U32 const maxML = matches[nbMatches-1].len;
1238
                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1239
                                inr-istart, cur, nbMatches, maxML);
1240

1241
                    if ( (maxML > sufficient_len)
1242
                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
1243
                        lastSequence.mlen = maxML;
1244
                        lastSequence.off = matches[nbMatches-1].off;
1245
                        lastSequence.litlen = litlen;
1246
                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1247
                        last_pos = cur + ZSTD_totalLen(lastSequence);
1248
                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
1249
                        goto _shortestPath;
1250
                }   }
1251

1252
                /* set prices using matches found at position == cur */
1253
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1254
                    U32 const offset = matches[matchNb].off;
1255
                    U32 const lastML = matches[matchNb].len;
1256
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1257
                    U32 mlen;
1258

1259
                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1260
                                matchNb, matches[matchNb].off, lastML, litlen);
1261

1262
                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1263
                        U32 const pos = cur + mlen;
1264
                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1265

1266
                        if ((pos > last_pos) || (price < opt[pos].price)) {
1267
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1268
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1269
                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
1270
                            opt[pos].mlen = mlen;
1271
                            opt[pos].off = offset;
1272
                            opt[pos].litlen = litlen;
1273
                            opt[pos].price = price;
1274
                        } else {
1275
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1276
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1277
                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1278
                        }
1279
            }   }   }
1280
        }  /* for (cur = 1; cur <= last_pos; cur++) */
1281

1282
        lastSequence = opt[last_pos];
1283
        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
1284
        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
1285

1286
_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1287
        assert(opt[0].mlen == 0);
1288

1289
        /* Set the next chunk's repcodes based on the repcodes of the beginning
1290
         * of the last match, and the last sequence. This avoids us having to
1291
         * update them while traversing the sequences.
1292
         */
1293
        if (lastSequence.mlen != 0) {
1294
            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1295
            ZSTD_memcpy(rep, &reps, sizeof(reps));
1296
        } else {
1297
            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1298
        }
1299

1300
        {   U32 const storeEnd = cur + 1;
1301
            U32 storeStart = storeEnd;
1302
            U32 seqPos = cur;
1303

1304
            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1305
                        last_pos, cur); (void)last_pos;
1306
            assert(storeEnd < ZSTD_OPT_NUM);
1307
            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1308
                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1309
            opt[storeEnd] = lastSequence;
1310
            while (seqPos > 0) {
1311
                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1312
                storeStart--;
1313
                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1314
                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1315
                opt[storeStart] = opt[seqPos];
1316
                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1317
            }
1318

1319
            /* save sequences */
1320
            DEBUGLOG(6, "sending selected sequences into seqStore")
1321
            {   U32 storePos;
1322
                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1323
                    U32 const llen = opt[storePos].litlen;
1324
                    U32 const mlen = opt[storePos].mlen;
1325
                    U32 const offBase = opt[storePos].off;
1326
                    U32 const advance = llen + mlen;
1327
                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1328
                                anchor - istart, (unsigned)llen, (unsigned)mlen);
1329

1330
                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1331
                        assert(storePos == storeEnd);   /* must be last sequence */
1332
                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1333
                        continue;   /* will finish */
1334
                    }
1335

1336
                    assert(anchor + llen <= iend);
1337
                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1338
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1339
                    anchor += advance;
1340
                    ip = anchor;
1341
            }   }
1342
            ZSTD_setBasePrices(optStatePtr, optLevel);
1343
        }
1344
    }   /* while (ip < ilimit) */
1345

1346
    /* Return the last literals size */
1347
    return (size_t)(iend - anchor);
1348
}
1349

1350
static size_t ZSTD_compressBlock_opt0(
1351
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1352
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1353
{
1354
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1355
}
1356

1357
static size_t ZSTD_compressBlock_opt2(
1358
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1360
{
1361
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1362
}
1363

1364
size_t ZSTD_compressBlock_btopt(
1365
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1366
        const void* src, size_t srcSize)
1367
{
1368
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1369
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1370
}
1371

1372

1373

1374

1375
/* ZSTD_initStats_ultra():
1376
 * make a first compression pass, just to seed stats with more accurate starting values.
1377
 * only works on first block, with no dictionary and no ldm.
1378
 * this function cannot error out, its narrow contract must be respected.
1379
 */
1380
static void
1381
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1382
                     seqStore_t* seqStore,
1383
                     U32 rep[ZSTD_REP_NUM],
1384
               const void* src, size_t srcSize)
1385
{
1386
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1387
    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1388

1389
    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1390
    assert(ms->opt.litLengthSum == 0);    /* first block */
1391
    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1392
    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1393
    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1394

1395
    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1396

1397
    /* invalidate first scan from history, only keep entropy stats */
1398
    ZSTD_resetSeqStore(seqStore);
1399
    ms->window.base -= srcSize;
1400
    ms->window.dictLimit += (U32)srcSize;
1401
    ms->window.lowLimit = ms->window.dictLimit;
1402
    ms->nextToUpdate = ms->window.dictLimit;
1403

1404
}
1405

1406
size_t ZSTD_compressBlock_btultra(
1407
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1408
        const void* src, size_t srcSize)
1409
{
1410
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1411
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1412
}
1413

1414
size_t ZSTD_compressBlock_btultra2(
1415
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1416
        const void* src, size_t srcSize)
1417
{
1418
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1419
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1420

1421
    /* 2-passes strategy:
1422
     * this strategy makes a first pass over first block to collect statistics
1423
     * in order to seed next round's statistics with it.
1424
     * After 1st pass, function forgets history, and starts a new block.
1425
     * Consequently, this can only work if no data has been previously loaded in tables,
1426
     * aka, no dictionary, no prefix, no ldm preprocessing.
1427
     * The compression ratio gain is generally small (~0.5% on first block),
1428
    ** the cost is 2x cpu time on first block. */
1429
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1430
    if ( (ms->opt.litLengthSum==0)   /* first block */
1431
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1432
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1433
      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1434
      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1435
      ) {
1436
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1437
    }
1438

1439
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1440
}
1441

1442
size_t ZSTD_compressBlock_btopt_dictMatchState(
1443
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444
        const void* src, size_t srcSize)
1445
{
1446
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1447
}
1448

1449
size_t ZSTD_compressBlock_btultra_dictMatchState(
1450
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1451
        const void* src, size_t srcSize)
1452
{
1453
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1454
}
1455

1456
size_t ZSTD_compressBlock_btopt_extDict(
1457
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1458
        const void* src, size_t srcSize)
1459
{
1460
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1461
}
1462

1463
size_t ZSTD_compressBlock_btultra_extDict(
1464
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1465
        const void* src, size_t srcSize)
1466
{
1467
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1468
}
1469

1470
/* note : no btultra2 variant for extDict nor dictMatchState,
1471
 * because btultra2 is not meant to work with dictionaries
1472
 * and is only specific for the first block (no prefix) */
1473

1474
Product

Resources

Company