CoCalc -- zstd_compress

GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
³¹⁵⁸ views
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10

11
/* This header contains definitions
12
 * that shall **only** be used by modules within lib/compress.
13
 */
14

15
#ifndef ZSTD_COMPRESS_H
16
#define ZSTD_COMPRESS_H
17

18
/*-*************************************
19
*  Dependencies
20
***************************************/
21
#include "../common/zstd_internal.h"
22
#include "zstd_cwksp.h"
23
#ifdef ZSTD_MULTITHREAD
24
#  include "zstdmt_compress.h"
25
#endif
26
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
27

28
#if defined (__cplusplus)
29
extern "C" {
30
#endif
31

32
/*-*************************************
33
*  Constants
34
***************************************/
35
#define kSearchStrength      8
36
#define HASH_READ_SIZE       8
37
#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
38
                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
39
                                       It's not a big deal though : candidate will just be sorted again.
40
                                       Additionally, candidate position 1 will be lost.
41
                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
42
                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
43
                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
44

45

46
/*-*************************************
47
*  Context memory management
48
***************************************/
49
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
50
typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
51

52
typedef struct ZSTD_prefixDict_s {
53
    const void* dict;
54
    size_t dictSize;
55
    ZSTD_dictContentType_e dictContentType;
56
} ZSTD_prefixDict;
57

58
typedef struct {
59
    void* dictBuffer;
60
    void const* dict;
61
    size_t dictSize;
62
    ZSTD_dictContentType_e dictContentType;
63
    ZSTD_CDict* cdict;
64
} ZSTD_localDict;
65

66
typedef struct {
67
    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
68
    HUF_repeat repeatMode;
69
} ZSTD_hufCTables_t;
70

71
typedef struct {
72
    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
73
    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
74
    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
75
    FSE_repeat offcode_repeatMode;
76
    FSE_repeat matchlength_repeatMode;
77
    FSE_repeat litlength_repeatMode;
78
} ZSTD_fseCTables_t;
79

80
typedef struct {
81
    ZSTD_hufCTables_t huf;
82
    ZSTD_fseCTables_t fse;
83
} ZSTD_entropyCTables_t;
84

85
/***********************************************
86
*  Entropy buffer statistics structs and funcs *
87
***********************************************/
88
/** ZSTD_hufCTablesMetadata_t :
89
 *  Stores Literals Block Type for a super-block in hType, and
90
 *  huffman tree description in hufDesBuffer.
91
 *  hufDesSize refers to the size of huffman tree description in bytes.
92
 *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
93
typedef struct {
94
    symbolEncodingType_e hType;
95
    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
96
    size_t hufDesSize;
97
} ZSTD_hufCTablesMetadata_t;
98

99
/** ZSTD_fseCTablesMetadata_t :
100
 *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
101
 *  fse tables in fseTablesBuffer.
102
 *  fseTablesSize refers to the size of fse tables in bytes.
103
 *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
104
typedef struct {
105
    symbolEncodingType_e llType;
106
    symbolEncodingType_e ofType;
107
    symbolEncodingType_e mlType;
108
    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
109
    size_t fseTablesSize;
110
    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
111
} ZSTD_fseCTablesMetadata_t;
112

113
typedef struct {
114
    ZSTD_hufCTablesMetadata_t hufMetadata;
115
    ZSTD_fseCTablesMetadata_t fseMetadata;
116
} ZSTD_entropyCTablesMetadata_t;
117

118
/** ZSTD_buildBlockEntropyStats() :
119
 *  Builds entropy for the block.
120
 *  @return : 0 on success or error code */
121
size_t ZSTD_buildBlockEntropyStats(
122
                    const seqStore_t* seqStorePtr,
123
                    const ZSTD_entropyCTables_t* prevEntropy,
124
                          ZSTD_entropyCTables_t* nextEntropy,
125
                    const ZSTD_CCtx_params* cctxParams,
126
                          ZSTD_entropyCTablesMetadata_t* entropyMetadata,
127
                          void* workspace, size_t wkspSize);
128

129
/*********************************
130
*  Compression internals structs *
131
*********************************/
132

133
typedef struct {
134
    U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
135
    U32 len;            /* Raw length of match */
136
} ZSTD_match_t;
137

138
typedef struct {
139
    U32 offset;         /* Offset of sequence */
140
    U32 litLength;      /* Length of literals prior to match */
141
    U32 matchLength;    /* Raw length of match */
142
} rawSeq;
143

144
typedef struct {
145
  rawSeq* seq;          /* The start of the sequences */
146
  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
147
  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
148
                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
149
  size_t size;          /* The number of sequences. <= capacity. */
150
  size_t capacity;      /* The capacity starting from `seq` pointer */
151
} rawSeqStore_t;
152

153
typedef struct {
154
    U32 idx;            /* Index in array of ZSTD_Sequence */
155
    U32 posInSequence;  /* Position within sequence at idx */
156
    size_t posInSrc;    /* Number of bytes given by sequences provided so far */
157
} ZSTD_sequencePosition;
158

159
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
160

161
typedef struct {
162
    int price;
163
    U32 off;
164
    U32 mlen;
165
    U32 litlen;
166
    U32 rep[ZSTD_REP_NUM];
167
} ZSTD_optimal_t;
168

169
typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
170

171
typedef struct {
172
    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
173
    unsigned* litFreq;           /* table of literals statistics, of size 256 */
174
    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
175
    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
176
    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
177
    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
178
    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
179

180
    U32  litSum;                 /* nb of literals */
181
    U32  litLengthSum;           /* nb of litLength codes */
182
    U32  matchLengthSum;         /* nb of matchLength codes */
183
    U32  offCodeSum;             /* nb of offset codes */
184
    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
185
    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
186
    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
187
    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
188
    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
189
    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
190
    ZSTD_paramSwitch_e literalCompressionMode;
191
} optState_t;
192

193
typedef struct {
194
  ZSTD_entropyCTables_t entropy;
195
  U32 rep[ZSTD_REP_NUM];
196
} ZSTD_compressedBlockState_t;
197

198
typedef struct {
199
    BYTE const* nextSrc;       /* next block here to continue on current prefix */
200
    BYTE const* base;          /* All regular indexes relative to this position */
201
    BYTE const* dictBase;      /* extDict indexes relative to this position */
202
    U32 dictLimit;             /* below that point, need extDict */
203
    U32 lowLimit;              /* below that point, no more valid data */
204
    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
205
                                * ZSTD_window_init(). Useful for debugging coredumps
206
                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
207
                                */
208
} ZSTD_window_t;
209

210
#define ZSTD_WINDOW_START_INDEX 2
211

212
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
213

214
#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
215

216
struct ZSTD_matchState_t {
217
    ZSTD_window_t window;   /* State for window round buffer management */
218
    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
219
                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
220
                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
221
                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
222
                             * When dict referential is copied into active context (i.e. not attached),
223
                             * loadedDictEnd == dictSize, since referential starts from zero.
224
                             */
225
    U32 nextToUpdate;       /* index from which to continue table update */
226
    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
227

228
    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
229
    BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
230
    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
231
    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for re-use of tag table */
232
    U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
233

234
    U32* hashTable;
235
    U32* hashTable3;
236
    U32* chainTable;
237

238
    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
239

240
    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
241
                               * dedicated dictionary search structure.
242
                               */
243
    optState_t opt;         /* optimal parser state */
244
    const ZSTD_matchState_t* dictMatchState;
245
    ZSTD_compressionParameters cParams;
246
    const rawSeqStore_t* ldmSeqStore;
247

248
    /* Controls prefetching in some dictMatchState matchfinders.
249
     * This behavior is controlled from the cctx ms.
250
     * This parameter has no effect in the cdict ms. */
251
    int prefetchCDictTables;
252

253
    /* When == 0, lazy match finders insert every position.
254
     * When != 0, lazy match finders only insert positions they search.
255
     * This allows them to skip much faster over incompressible data,
256
     * at a small cost to compression ratio.
257
     */
258
    int lazySkipping;
259
};
260

261
typedef struct {
262
    ZSTD_compressedBlockState_t* prevCBlock;
263
    ZSTD_compressedBlockState_t* nextCBlock;
264
    ZSTD_matchState_t matchState;
265
} ZSTD_blockState_t;
266

267
typedef struct {
268
    U32 offset;
269
    U32 checksum;
270
} ldmEntry_t;
271

272
typedef struct {
273
    BYTE const* split;
274
    U32 hash;
275
    U32 checksum;
276
    ldmEntry_t* bucket;
277
} ldmMatchCandidate_t;
278

279
#define LDM_BATCH_SIZE 64
280

281
typedef struct {
282
    ZSTD_window_t window;   /* State for the window round buffer management */
283
    ldmEntry_t* hashTable;
284
    U32 loadedDictEnd;
285
    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
286
    size_t splitIndices[LDM_BATCH_SIZE];
287
    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
288
} ldmState_t;
289

290
typedef struct {
291
    ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
292
    U32 hashLog;            /* Log size of hashTable */
293
    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
294
    U32 minMatchLength;     /* Minimum match length */
295
    U32 hashRateLog;       /* Log number of entries to skip */
296
    U32 windowLog;          /* Window log for the LDM */
297
} ldmParams_t;
298

299
typedef struct {
300
    int collectSequences;
301
    ZSTD_Sequence* seqStart;
302
    size_t seqIndex;
303
    size_t maxSequences;
304
} SeqCollector;
305

306
struct ZSTD_CCtx_params_s {
307
    ZSTD_format_e format;
308
    ZSTD_compressionParameters cParams;
309
    ZSTD_frameParameters fParams;
310

311
    int compressionLevel;
312
    int forceWindow;           /* force back-references to respect limit of
313
                                * 1<<wLog, even for dictionary */
314
    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
315
                                * No target when targetCBlockSize == 0.
316
                                * There is no guarantee on compressed block size */
317
    int srcSizeHint;           /* User's best guess of source size.
318
                                * Hint is not valid when srcSizeHint == 0.
319
                                * There is no guarantee that hint is close to actual source size */
320

321
    ZSTD_dictAttachPref_e attachDictPref;
322
    ZSTD_paramSwitch_e literalCompressionMode;
323

324
    /* Multithreading: used to pass parameters to mtctx */
325
    int nbWorkers;
326
    size_t jobSize;
327
    int overlapLog;
328
    int rsyncable;
329

330
    /* Long distance matching parameters */
331
    ldmParams_t ldmParams;
332

333
    /* Dedicated dict search algorithm trigger */
334
    int enableDedicatedDictSearch;
335

336
    /* Input/output buffer modes */
337
    ZSTD_bufferMode_e inBufferMode;
338
    ZSTD_bufferMode_e outBufferMode;
339

340
    /* Sequence compression API */
341
    ZSTD_sequenceFormat_e blockDelimiters;
342
    int validateSequences;
343

344
    /* Block splitting */
345
    ZSTD_paramSwitch_e useBlockSplitter;
346

347
    /* Param for deciding whether to use row-based matchfinder */
348
    ZSTD_paramSwitch_e useRowMatchFinder;
349

350
    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
351
    int deterministicRefPrefix;
352

353
    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
354
    ZSTD_customMem customMem;
355

356
    /* Controls prefetching in some dictMatchState matchfinders */
357
    ZSTD_paramSwitch_e prefetchCDictTables;
358

359
    /* Controls whether zstd will fall back to an internal matchfinder
360
     * if the external matchfinder returns an error code. */
361
    int enableMatchFinderFallback;
362

363
    /* Indicates whether an external matchfinder has been referenced.
364
     * Users can't set this externally.
365
     * It is set internally in ZSTD_registerSequenceProducer(). */
366
    int useSequenceProducer;
367

368
    /* Adjust the max block size*/
369
    size_t maxBlockSize;
370

371
    /* Controls repcode search in external sequence parsing */
372
    ZSTD_paramSwitch_e searchForExternalRepcodes;
373
};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
374

375
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
376
#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
377

378
/**
379
 * Indicates whether this compression proceeds directly from user-provided
380
 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
381
 * whether the context needs to buffer the input/output (ZSTDb_buffered).
382
 */
383
typedef enum {
384
    ZSTDb_not_buffered,
385
    ZSTDb_buffered
386
} ZSTD_buffered_policy_e;
387

388
/**
389
 * Struct that contains all elements of block splitter that should be allocated
390
 * in a wksp.
391
 */
392
#define ZSTD_MAX_NB_BLOCK_SPLITS 196
393
typedef struct {
394
    seqStore_t fullSeqStoreChunk;
395
    seqStore_t firstHalfSeqStore;
396
    seqStore_t secondHalfSeqStore;
397
    seqStore_t currSeqStore;
398
    seqStore_t nextSeqStore;
399

400
    U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
401
    ZSTD_entropyCTablesMetadata_t entropyMetadata;
402
} ZSTD_blockSplitCtx;
403

404
/* Context for block-level external matchfinder API */
405
typedef struct {
406
  void* mState;
407
  ZSTD_sequenceProducer_F* mFinder;
408
  ZSTD_Sequence* seqBuffer;
409
  size_t seqBufferCapacity;
410
} ZSTD_externalMatchCtx;
411

412
struct ZSTD_CCtx_s {
413
    ZSTD_compressionStage_e stage;
414
    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
415
    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
416
    ZSTD_CCtx_params requestedParams;
417
    ZSTD_CCtx_params appliedParams;
418
    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
419
    U32   dictID;
420
    size_t dictContentSize;
421

422
    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
423
    size_t blockSize;
424
    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
425
    unsigned long long consumedSrcSize;
426
    unsigned long long producedCSize;
427
    XXH64_state_t xxhState;
428
    ZSTD_customMem customMem;
429
    ZSTD_threadPool* pool;
430
    size_t staticSize;
431
    SeqCollector seqCollector;
432
    int isFirstBlock;
433
    int initialized;
434

435
    seqStore_t seqStore;      /* sequences storage ptrs */
436
    ldmState_t ldmState;      /* long distance matching state */
437
    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
438
    size_t maxNbLdmSequences;
439
    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
440
    ZSTD_blockState_t blockState;
441
    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
442

443
    /* Whether we are streaming or not */
444
    ZSTD_buffered_policy_e bufferedPolicy;
445

446
    /* streaming */
447
    char*  inBuff;
448
    size_t inBuffSize;
449
    size_t inToCompress;
450
    size_t inBuffPos;
451
    size_t inBuffTarget;
452
    char*  outBuff;
453
    size_t outBuffSize;
454
    size_t outBuffContentSize;
455
    size_t outBuffFlushedSize;
456
    ZSTD_cStreamStage streamStage;
457
    U32    frameEnded;
458

459
    /* Stable in/out buffer verification */
460
    ZSTD_inBuffer expectedInBuffer;
461
    size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
462
    size_t expectedOutBufferSize;
463

464
    /* Dictionary */
465
    ZSTD_localDict localDict;
466
    const ZSTD_CDict* cdict;
467
    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
468

469
    /* Multi-threading */
470
#ifdef ZSTD_MULTITHREAD
471
    ZSTDMT_CCtx* mtctx;
472
#endif
473

474
    /* Tracing */
475
#if ZSTD_TRACE
476
    ZSTD_TraceCtx traceCtx;
477
#endif
478

479
    /* Workspace for block splitter */
480
    ZSTD_blockSplitCtx blockSplitCtx;
481

482
    /* Workspace for external matchfinder */
483
    ZSTD_externalMatchCtx externalMatchCtx;
484
};
485

486
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
487
typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
488

489
typedef enum {
490
    ZSTD_noDict = 0,
491
    ZSTD_extDict = 1,
492
    ZSTD_dictMatchState = 2,
493
    ZSTD_dedicatedDictSearch = 3
494
} ZSTD_dictMode_e;
495

496
typedef enum {
497
    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
498
                                 * In this mode we use both the srcSize and the dictSize
499
                                 * when selecting and adjusting parameters.
500
                                 */
501
    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
502
                                 * In this mode we only take the srcSize into account when selecting
503
                                 * and adjusting parameters.
504
                                 */
505
    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
506
                                 * In this mode we take both the source size and the dictionary size
507
                                 * into account when selecting and adjusting the parameters.
508
                                 */
509
    ZSTD_cpm_unknown = 3        /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
510
                                 * We don't know what these parameters are for. We default to the legacy
511
                                 * behavior of taking both the source size and the dict size into account
512
                                 * when selecting and adjusting parameters.
513
                                 */
514
} ZSTD_cParamMode_e;
515

516
typedef size_t (*ZSTD_blockCompressor) (
517
        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
518
        void const* src, size_t srcSize);
519
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
520

521

522
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
523
{
524
    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
525
                                       8,  9, 10, 11, 12, 13, 14, 15,
526
                                      16, 16, 17, 17, 18, 18, 19, 19,
527
                                      20, 20, 20, 20, 21, 21, 21, 21,
528
                                      22, 22, 22, 22, 22, 22, 22, 22,
529
                                      23, 23, 23, 23, 23, 23, 23, 23,
530
                                      24, 24, 24, 24, 24, 24, 24, 24,
531
                                      24, 24, 24, 24, 24, 24, 24, 24 };
532
    static const U32 LL_deltaCode = 19;
533
    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
534
}
535

536
/* ZSTD_MLcode() :
537
 * note : mlBase = matchLength - MINMATCH;
538
 *        because it's the format it's stored in seqStore->sequences */
539
MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
540
{
541
    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
542
                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
543
                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
544
                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
545
                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
546
                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
547
                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
548
                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
549
    static const U32 ML_deltaCode = 36;
550
    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
551
}
552

553
/* ZSTD_cParam_withinBounds:
554
 * @return 1 if value is within cParam bounds,
555
 * 0 otherwise */
556
MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
557
{
558
    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
559
    if (ZSTD_isError(bounds.error)) return 0;
560
    if (value < bounds.lowerBound) return 0;
561
    if (value > bounds.upperBound) return 0;
562
    return 1;
563
}
564

565
/* ZSTD_noCompressBlock() :
566
 * Writes uncompressed block to dst buffer from given src.
567
 * Returns the size of the block */
568
MEM_STATIC size_t
569
ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
570
{
571
    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
572
    DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
573
    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
574
                    dstSize_tooSmall, "dst buf too small for uncompressed block");
575
    MEM_writeLE24(dst, cBlockHeader24);
576
    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
577
    return ZSTD_blockHeaderSize + srcSize;
578
}
579

580
MEM_STATIC size_t
581
ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
582
{
583
    BYTE* const op = (BYTE*)dst;
584
    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
585
    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
586
    MEM_writeLE24(op, cBlockHeader);
587
    op[3] = src;
588
    return 4;
589
}
590

591

592
/* ZSTD_minGain() :
593
 * minimum compression required
594
 * to generate a compress block or a compressed literals section.
595
 * note : use same formula for both situations */
596
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
597
{
598
    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
599
    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
600
    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
601
    return (srcSize >> minlog) + 2;
602
}
603

604
MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
605
{
606
    switch (cctxParams->literalCompressionMode) {
607
    case ZSTD_ps_enable:
608
        return 0;
609
    case ZSTD_ps_disable:
610
        return 1;
611
    default:
612
        assert(0 /* impossible: pre-validated */);
613
        ZSTD_FALLTHROUGH;
614
    case ZSTD_ps_auto:
615
        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
616
    }
617
}
618

619
/*! ZSTD_safecopyLiterals() :
620
 *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
621
 *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
622
 *  large copies.
623
 */
624
static void
625
ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
626
{
627
    assert(iend > ilimit_w);
628
    if (ip <= ilimit_w) {
629
        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
630
        op += ilimit_w - ip;
631
        ip = ilimit_w;
632
    }
633
    while (ip < iend) *op++ = *ip++;
634
}
635

636

637
#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
638
#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
639
#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
640
#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
641
#define OFFSET_TO_OFFBASE(o)  (assert((o)>0), o + ZSTD_REP_NUM)
642
#define OFFBASE_IS_OFFSET(o)  ((o) > ZSTD_REP_NUM)
643
#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
644
#define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
645
#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */
646

647
/*! ZSTD_storeSeq() :
648
 *  Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
649
 *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
650
 *  @matchLength : must be >= MINMATCH
651
 *  Allowed to over-read literals up to litLimit.
652
*/
653
HINT_INLINE UNUSED_ATTR void
654
ZSTD_storeSeq(seqStore_t* seqStorePtr,
655
              size_t litLength, const BYTE* literals, const BYTE* litLimit,
656
              U32 offBase,
657
              size_t matchLength)
658
{
659
    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
660
    BYTE const* const litEnd = literals + litLength;
661
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
662
    static const BYTE* g_start = NULL;
663
    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
664
    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
665
        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
666
               pos, (U32)litLength, (U32)matchLength, (U32)offBase);
667
    }
668
#endif
669
    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
670
    /* copy Literals */
671
    assert(seqStorePtr->maxNbLit <= 128 KB);
672
    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
673
    assert(literals + litLength <= litLimit);
674
    if (litEnd <= litLimit_w) {
675
        /* Common case we can use wildcopy.
676
         * First copy 16 bytes, because literals are likely short.
677
         */
678
        ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
679
        ZSTD_copy16(seqStorePtr->lit, literals);
680
        if (litLength > 16) {
681
            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
682
        }
683
    } else {
684
        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
685
    }
686
    seqStorePtr->lit += litLength;
687

688
    /* literal Length */
689
    if (litLength>0xFFFF) {
690
        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
691
        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
692
        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
693
    }
694
    seqStorePtr->sequences[0].litLength = (U16)litLength;
695

696
    /* match offset */
697
    seqStorePtr->sequences[0].offBase = offBase;
698

699
    /* match Length */
700
    assert(matchLength >= MINMATCH);
701
    {   size_t const mlBase = matchLength - MINMATCH;
702
        if (mlBase>0xFFFF) {
703
            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
704
            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
705
            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
706
        }
707
        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
708
    }
709

710
    seqStorePtr->sequences++;
711
}
712

713
/* ZSTD_updateRep() :
714
 * updates in-place @rep (array of repeat offsets)
715
 * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
716
 */
717
MEM_STATIC void
718
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
719
{
720
    if (OFFBASE_IS_OFFSET(offBase)) {  /* full offset */
721
        rep[2] = rep[1];
722
        rep[1] = rep[0];
723
        rep[0] = OFFBASE_TO_OFFSET(offBase);
724
    } else {   /* repcode */
725
        U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
726
        if (repCode > 0) {  /* note : if repCode==0, no change */
727
            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
728
            rep[2] = (repCode >= 2) ? rep[1] : rep[2];
729
            rep[1] = rep[0];
730
            rep[0] = currentOffset;
731
        } else {   /* repCode == 0 */
732
            /* nothing to do */
733
        }
734
    }
735
}
736

737
typedef struct repcodes_s {
738
    U32 rep[3];
739
} repcodes_t;
740

741
MEM_STATIC repcodes_t
742
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
743
{
744
    repcodes_t newReps;
745
    ZSTD_memcpy(&newReps, rep, sizeof(newReps));
746
    ZSTD_updateRep(newReps.rep, offBase, ll0);
747
    return newReps;
748
}
749

750

751
/*-*************************************
752
*  Match length counter
753
***************************************/
754
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
755
{
756
    const BYTE* const pStart = pIn;
757
    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
758

759
    if (pIn < pInLoopLimit) {
760
        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
761
          if (diff) return ZSTD_NbCommonBytes(diff); }
762
        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
763
        while (pIn < pInLoopLimit) {
764
            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
765
            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
766
            pIn += ZSTD_NbCommonBytes(diff);
767
            return (size_t)(pIn - pStart);
768
    }   }
769
    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
770
    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
771
    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
772
    return (size_t)(pIn - pStart);
773
}
774

775
/** ZSTD_count_2segments() :
776
 *  can count match length with `ip` & `match` in 2 different segments.
777
 *  convention : on reaching mEnd, match count continue starting from iStart
778
 */
779
MEM_STATIC size_t
780
ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
781
                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
782
{
783
    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
784
    size_t const matchLength = ZSTD_count(ip, match, vEnd);
785
    if (match + matchLength != mEnd) return matchLength;
786
    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
787
    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
788
    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
789
    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
790
    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
791
    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
792
}
793

794

795
/*-*************************************
796
 *  Hashes
797
 ***************************************/
798
static const U32 prime3bytes = 506832829U;
799
static U32    ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s)  >> (32-h) ; }
800
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
801
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
802

803
static const U32 prime4bytes = 2654435761U;
804
static U32    ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
805
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
806
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
807

808
static const U64 prime5bytes = 889523592379ULL;
809
static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
810
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
811
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
812

813
static const U64 prime6bytes = 227718039650203ULL;
814
static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
815
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
816
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
817

818
static const U64 prime7bytes = 58295818150454627ULL;
819
static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
820
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
821
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
822

823
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
824
static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes)  ^ s) >> (64-h)) ; }
825
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
826
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
827

828

829
MEM_STATIC FORCE_INLINE_ATTR
830
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
831
{
832
    /* Although some of these hashes do support hBits up to 64, some do not.
833
     * To be on the safe side, always avoid hBits > 32. */
834
    assert(hBits <= 32);
835

836
    switch(mls)
837
    {
838
    default:
839
    case 4: return ZSTD_hash4Ptr(p, hBits);
840
    case 5: return ZSTD_hash5Ptr(p, hBits);
841
    case 6: return ZSTD_hash6Ptr(p, hBits);
842
    case 7: return ZSTD_hash7Ptr(p, hBits);
843
    case 8: return ZSTD_hash8Ptr(p, hBits);
844
    }
845
}
846

847
MEM_STATIC FORCE_INLINE_ATTR
848
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
849
    /* Although some of these hashes do support hBits up to 64, some do not.
850
     * To be on the safe side, always avoid hBits > 32. */
851
    assert(hBits <= 32);
852

853
    switch(mls)
854
    {
855
        default:
856
        case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
857
        case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
858
        case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
859
        case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
860
        case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
861
    }
862
}
863

864

865
/** ZSTD_ipow() :
866
 * Return base^exponent.
867
 */
868
static U64 ZSTD_ipow(U64 base, U64 exponent)
869
{
870
    U64 power = 1;
871
    while (exponent) {
872
      if (exponent & 1) power *= base;
873
      exponent >>= 1;
874
      base *= base;
875
    }
876
    return power;
877
}
878

879
#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
880

881
/** ZSTD_rollingHash_append() :
882
 * Add the buffer to the hash value.
883
 */
884
static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
885
{
886
    BYTE const* istart = (BYTE const*)buf;
887
    size_t pos;
888
    for (pos = 0; pos < size; ++pos) {
889
        hash *= prime8bytes;
890
        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
891
    }
892
    return hash;
893
}
894

895
/** ZSTD_rollingHash_compute() :
896
 * Compute the rolling hash value of the buffer.
897
 */
898
MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
899
{
900
    return ZSTD_rollingHash_append(0, buf, size);
901
}
902

903
/** ZSTD_rollingHash_primePower() :
904
 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
905
 * over a window of length bytes.
906
 */
907
MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
908
{
909
    return ZSTD_ipow(prime8bytes, length - 1);
910
}
911

912
/** ZSTD_rollingHash_rotate() :
913
 * Rotate the rolling hash by one byte.
914
 */
915
MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
916
{
917
    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
918
    hash *= prime8bytes;
919
    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
920
    return hash;
921
}
922

923
/*-*************************************
924
*  Round buffer management
925
***************************************/
926
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
927
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
928
#endif
929
/* Max current allowed */
930
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
931
/* Maximum chunk size before overflow correction needs to be called again */
932
#define ZSTD_CHUNKSIZE_MAX                                                     \
933
    ( ((U32)-1)                  /* Maximum ending current index */            \
934
    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
935

936
/**
937
 * ZSTD_window_clear():
938
 * Clears the window containing the history by simply setting it to empty.
939
 */
940
MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
941
{
942
    size_t const endT = (size_t)(window->nextSrc - window->base);
943
    U32 const end = (U32)endT;
944

945
    window->lowLimit = end;
946
    window->dictLimit = end;
947
}
948

949
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
950
{
951
    return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
952
           window.lowLimit == ZSTD_WINDOW_START_INDEX &&
953
           (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
954
}
955

956
/**
957
 * ZSTD_window_hasExtDict():
958
 * Returns non-zero if the window has a non-empty extDict.
959
 */
960
MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
961
{
962
    return window.lowLimit < window.dictLimit;
963
}
964

965
/**
966
 * ZSTD_matchState_dictMode():
967
 * Inspects the provided matchState and figures out what dictMode should be
968
 * passed to the compressor.
969
 */
970
MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
971
{
972
    return ZSTD_window_hasExtDict(ms->window) ?
973
        ZSTD_extDict :
974
        ms->dictMatchState != NULL ?
975
            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
976
            ZSTD_noDict;
977
}
978

979
/* Defining this macro to non-zero tells zstd to run the overflow correction
980
 * code much more frequently. This is very inefficient, and should only be
981
 * used for tests and fuzzers.
982
 */
983
#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
984
#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
985
#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
986
#  else
987
#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
988
#  endif
989
#endif
990

991
/**
992
 * ZSTD_window_canOverflowCorrect():
993
 * Returns non-zero if the indices are large enough for overflow correction
994
 * to work correctly without impacting compression ratio.
995
 */
996
MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
997
                                              U32 cycleLog,
998
                                              U32 maxDist,
999
                                              U32 loadedDictEnd,
1000
                                              void const* src)
1001
{
1002
    U32 const cycleSize = 1u << cycleLog;
1003
    U32 const curr = (U32)((BYTE const*)src - window.base);
1004
    U32 const minIndexToOverflowCorrect = cycleSize
1005
                                        + MAX(maxDist, cycleSize)
1006
                                        + ZSTD_WINDOW_START_INDEX;
1007

1008
    /* Adjust the min index to backoff the overflow correction frequency,
1009
     * so we don't waste too much CPU in overflow correction. If this
1010
     * computation overflows we don't really care, we just need to make
1011
     * sure it is at least minIndexToOverflowCorrect.
1012
     */
1013
    U32 const adjustment = window.nbOverflowCorrections + 1;
1014
    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
1015
                                  minIndexToOverflowCorrect);
1016
    U32 const indexLargeEnough = curr > adjustedIndex;
1017

1018
    /* Only overflow correct early if the dictionary is invalidated already,
1019
     * so we don't hurt compression ratio.
1020
     */
1021
    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1022

1023
    return indexLargeEnough && dictionaryInvalidated;
1024
}
1025

1026
/**
1027
 * ZSTD_window_needOverflowCorrection():
1028
 * Returns non-zero if the indices are getting too large and need overflow
1029
 * protection.
1030
 */
1031
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1032
                                                  U32 cycleLog,
1033
                                                  U32 maxDist,
1034
                                                  U32 loadedDictEnd,
1035
                                                  void const* src,
1036
                                                  void const* srcEnd)
1037
{
1038
    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1039
    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1040
        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1041
            return 1;
1042
        }
1043
    }
1044
    return curr > ZSTD_CURRENT_MAX;
1045
}
1046

1047
/**
1048
 * ZSTD_window_correctOverflow():
1049
 * Reduces the indices to protect from index overflow.
1050
 * Returns the correction made to the indices, which must be applied to every
1051
 * stored index.
1052
 *
1053
 * The least significant cycleLog bits of the indices must remain the same,
1054
 * which may be 0. Every index up to maxDist in the past must be valid.
1055
 */
1056
MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1057
                                           U32 maxDist, void const* src)
1058
{
1059
    /* preemptive overflow correction:
1060
     * 1. correction is large enough:
1061
     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1062
     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1063
     *
1064
     *    current - newCurrent
1065
     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1066
     *    > (3<<29) - (1<<chainLog)
1067
     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1068
     *    > 1<<29
1069
     *
1070
     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1071
     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1072
     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1073
     *    In 32-bit mode we are safe, because (chainLog <= 29), so
1074
     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1075
     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1076
     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1077
     */
1078
    U32 const cycleSize = 1u << cycleLog;
1079
    U32 const cycleMask = cycleSize - 1;
1080
    U32 const curr = (U32)((BYTE const*)src - window->base);
1081
    U32 const currentCycle = curr & cycleMask;
1082
    /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1083
    U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1084
                                     ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1085
                                     : 0;
1086
    U32 const newCurrent = currentCycle
1087
                         + currentCycleCorrection
1088
                         + MAX(maxDist, cycleSize);
1089
    U32 const correction = curr - newCurrent;
1090
    /* maxDist must be a power of two so that:
1091
     *   (newCurrent & cycleMask) == (curr & cycleMask)
1092
     * This is required to not corrupt the chains / binary tree.
1093
     */
1094
    assert((maxDist & (maxDist - 1)) == 0);
1095
    assert((curr & cycleMask) == (newCurrent & cycleMask));
1096
    assert(curr > newCurrent);
1097
    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1098
        /* Loose bound, should be around 1<<29 (see above) */
1099
        assert(correction > 1<<28);
1100
    }
1101

1102
    window->base += correction;
1103
    window->dictBase += correction;
1104
    if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1105
        window->lowLimit = ZSTD_WINDOW_START_INDEX;
1106
    } else {
1107
        window->lowLimit -= correction;
1108
    }
1109
    if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1110
        window->dictLimit = ZSTD_WINDOW_START_INDEX;
1111
    } else {
1112
        window->dictLimit -= correction;
1113
    }
1114

1115
    /* Ensure we can still reference the full window. */
1116
    assert(newCurrent >= maxDist);
1117
    assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1118
    /* Ensure that lowLimit and dictLimit didn't underflow. */
1119
    assert(window->lowLimit <= newCurrent);
1120
    assert(window->dictLimit <= newCurrent);
1121

1122
    ++window->nbOverflowCorrections;
1123

1124
    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1125
             window->lowLimit);
1126
    return correction;
1127
}
1128

1129
/**
1130
 * ZSTD_window_enforceMaxDist():
1131
 * Updates lowLimit so that:
1132
 *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1133
 *
1134
 * It ensures index is valid as long as index >= lowLimit.
1135
 * This must be called before a block compression call.
1136
 *
1137
 * loadedDictEnd is only defined if a dictionary is in use for current compression.
1138
 * As the name implies, loadedDictEnd represents the index at end of dictionary.
1139
 * The value lies within context's referential, it can be directly compared to blockEndIdx.
1140
 *
1141
 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1142
 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1143
 * This is because dictionaries are allowed to be referenced fully
1144
 * as long as the last byte of the dictionary is in the window.
1145
 * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1146
 *
1147
 * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1148
 * In dictMatchState mode, lowLimit and dictLimit are the same,
1149
 * and the dictionary is below them.
1150
 * forceWindow and dictMatchState are therefore incompatible.
1151
 */
1152
MEM_STATIC void
1153
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1154
                     const void* blockEnd,
1155
                           U32   maxDist,
1156
                           U32*  loadedDictEndPtr,
1157
                     const ZSTD_matchState_t** dictMatchStatePtr)
1158
{
1159
    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1160
    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1161
    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1162
                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1163

1164
    /* - When there is no dictionary : loadedDictEnd == 0.
1165
         In which case, the test (blockEndIdx > maxDist) is merely to avoid
1166
         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1167
       - When there is a standard dictionary :
1168
         Index referential is copied from the dictionary,
1169
         which means it starts from 0.
1170
         In which case, loadedDictEnd == dictSize,
1171
         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1172
         since `blockEndIdx` also starts from zero.
1173
       - When there is an attached dictionary :
1174
         loadedDictEnd is expressed within the referential of the context,
1175
         so it can be directly compared against blockEndIdx.
1176
    */
1177
    if (blockEndIdx > maxDist + loadedDictEnd) {
1178
        U32 const newLowLimit = blockEndIdx - maxDist;
1179
        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1180
        if (window->dictLimit < window->lowLimit) {
1181
            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1182
                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1183
            window->dictLimit = window->lowLimit;
1184
        }
1185
        /* On reaching window size, dictionaries are invalidated */
1186
        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1187
        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1188
    }
1189
}
1190

1191
/* Similar to ZSTD_window_enforceMaxDist(),
1192
 * but only invalidates dictionary
1193
 * when input progresses beyond window size.
1194
 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1195
 *              loadedDictEnd uses same referential as window->base
1196
 *              maxDist is the window size */
1197
MEM_STATIC void
1198
ZSTD_checkDictValidity(const ZSTD_window_t* window,
1199
                       const void* blockEnd,
1200
                             U32   maxDist,
1201
                             U32*  loadedDictEndPtr,
1202
                       const ZSTD_matchState_t** dictMatchStatePtr)
1203
{
1204
    assert(loadedDictEndPtr != NULL);
1205
    assert(dictMatchStatePtr != NULL);
1206
    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1207
        U32 const loadedDictEnd = *loadedDictEndPtr;
1208
        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1209
                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1210
        assert(blockEndIdx >= loadedDictEnd);
1211

1212
        if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
1213
            /* On reaching window size, dictionaries are invalidated.
1214
             * For simplification, if window size is reached anywhere within next block,
1215
             * the dictionary is invalidated for the full block.
1216
             *
1217
             * We also have to invalidate the dictionary if ZSTD_window_update() has detected
1218
             * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
1219
             * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
1220
             * dictMatchState, so setting it to NULL is not a problem.
1221
             */
1222
            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1223
            *loadedDictEndPtr = 0;
1224
            *dictMatchStatePtr = NULL;
1225
        } else {
1226
            if (*loadedDictEndPtr != 0) {
1227
                DEBUGLOG(6, "dictionary considered valid for current block");
1228
    }   }   }
1229
}
1230

1231
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1232
    ZSTD_memset(window, 0, sizeof(*window));
1233
    window->base = (BYTE const*)" ";
1234
    window->dictBase = (BYTE const*)" ";
1235
    ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1236
    window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1237
    window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1238
    window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1239
    window->nbOverflowCorrections = 0;
1240
}
1241

1242
/**
1243
 * ZSTD_window_update():
1244
 * Updates the window by appending [src, src + srcSize) to the window.
1245
 * If it is not contiguous, the current prefix becomes the extDict, and we
1246
 * forget about the extDict. Handles overlap of the prefix and extDict.
1247
 * Returns non-zero if the segment is contiguous.
1248
 */
1249
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1250
                                  void const* src, size_t srcSize,
1251
                                  int forceNonContiguous)
1252
{
1253
    BYTE const* const ip = (BYTE const*)src;
1254
    U32 contiguous = 1;
1255
    DEBUGLOG(5, "ZSTD_window_update");
1256
    if (srcSize == 0)
1257
        return contiguous;
1258
    assert(window->base != NULL);
1259
    assert(window->dictBase != NULL);
1260
    /* Check if blocks follow each other */
1261
    if (src != window->nextSrc || forceNonContiguous) {
1262
        /* not contiguous */
1263
        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1264
        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1265
        window->lowLimit = window->dictLimit;
1266
        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1267
        window->dictLimit = (U32)distanceFromBase;
1268
        window->dictBase = window->base;
1269
        window->base = ip - distanceFromBase;
1270
        /* ms->nextToUpdate = window->dictLimit; */
1271
        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1272
        contiguous = 0;
1273
    }
1274
    window->nextSrc = ip + srcSize;
1275
    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1276
    if ( (ip+srcSize > window->dictBase + window->lowLimit)
1277
       & (ip < window->dictBase + window->dictLimit)) {
1278
        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1279
        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1280
        window->lowLimit = lowLimitMax;
1281
        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1282
    }
1283
    return contiguous;
1284
}
1285

1286
/**
1287
 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1288
 */
1289
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1290
{
1291
    U32 const maxDistance = 1U << windowLog;
1292
    U32 const lowestValid = ms->window.lowLimit;
1293
    U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1294
    U32 const isDictionary = (ms->loadedDictEnd != 0);
1295
    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1296
     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1297
     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1298
     */
1299
    U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1300
    return matchLowest;
1301
}
1302

1303
/**
1304
 * Returns the lowest allowed match index in the prefix.
1305
 */
1306
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1307
{
1308
    U32    const maxDistance = 1U << windowLog;
1309
    U32    const lowestValid = ms->window.dictLimit;
1310
    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1311
    U32    const isDictionary = (ms->loadedDictEnd != 0);
1312
    /* When computing the lowest prefix index we need to take the dictionary into account to handle
1313
     * the edge case where the dictionary and the source are contiguous in memory.
1314
     */
1315
    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1316
    return matchLowest;
1317
}
1318

1319

1320

1321
/* debug functions */
1322
#if (DEBUGLEVEL>=2)
1323

1324
MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1325
{
1326
    U32 const fp_accuracy = 8;
1327
    U32 const fp_multiplier = (1 << fp_accuracy);
1328
    U32 const newStat = rawStat + 1;
1329
    U32 const hb = ZSTD_highbit32(newStat);
1330
    U32 const BWeight = hb * fp_multiplier;
1331
    U32 const FWeight = (newStat << fp_accuracy) >> hb;
1332
    U32 const weight = BWeight + FWeight;
1333
    assert(hb + fp_accuracy < 31);
1334
    return (double)weight / fp_multiplier;
1335
}
1336

1337
/* display a table content,
1338
 * listing each element, its frequency, and its predicted bit cost */
1339
MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1340
{
1341
    unsigned u, sum;
1342
    for (u=0, sum=0; u<=max; u++) sum += table[u];
1343
    DEBUGLOG(2, "total nb elts: %u", sum);
1344
    for (u=0; u<=max; u++) {
1345
        DEBUGLOG(2, "%2u: %5u  (%.2f)",
1346
                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1347
    }
1348
}
1349

1350
#endif
1351

1352
/* Short Cache */
1353

1354
/* Normally, zstd matchfinders follow this flow:
1355
 *     1. Compute hash at ip
1356
 *     2. Load index from hashTable[hash]
1357
 *     3. Check if *ip == *(base + index)
1358
 * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
1359
 *
1360
 * Short cache is an optimization which allows us to avoid step 3 most of the time
1361
 * when the data doesn't actually match. With short cache, the flow becomes:
1362
 *     1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
1363
 *     2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
1364
 *     3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
1365
 *
1366
 * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
1367
 * dictMatchState matchfinders.
1368
 */
1369
#define ZSTD_SHORT_CACHE_TAG_BITS 8
1370
#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
1371

1372
/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
1373
 * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
1374
MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
1375
    size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
1376
    U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
1377
    assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
1378
    hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
1379
}
1380

1381
/* Helper function for short cache matchfinders.
1382
 * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
1383
MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
1384
    U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
1385
    U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
1386
    return tag1 == tag2;
1387
}
1388

1389
#if defined (__cplusplus)
1390
}
1391
#endif
1392

1393
/* ===============================================================
1394
 * Shared internal declarations
1395
 * These prototypes may be called from sources not in lib/compress
1396
 * =============================================================== */
1397

1398
/* ZSTD_loadCEntropy() :
1399
 * dict : must point at beginning of a valid zstd dictionary.
1400
 * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1401
 * assumptions : magic number supposed already checked
1402
 *               and dictSize >= 8 */
1403
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1404
                         const void* const dict, size_t dictSize);
1405

1406
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1407

1408
/* ==============================================================
1409
 * Private declarations
1410
 * These prototypes shall only be called from within lib/compress
1411
 * ============================================================== */
1412

1413
/* ZSTD_getCParamsFromCCtxParams() :
1414
 * cParams are built depending on compressionLevel, src size hints,
1415
 * LDM and manually set compression parameters.
1416
 * Note: srcSizeHint == 0 means 0!
1417
 */
1418
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1419
        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1420

1421
/*! ZSTD_initCStream_internal() :
1422
 *  Private use only. Init streaming operation.
1423
 *  expects params to be valid.
1424
 *  must receive dict, or cdict, or none, but not both.
1425
 *  @return : 0, or an error code */
1426
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1427
                     const void* dict, size_t dictSize,
1428
                     const ZSTD_CDict* cdict,
1429
                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1430

1431
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1432

1433
/*! ZSTD_getCParamsFromCDict() :
1434
 *  as the name implies */
1435
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1436

1437
/* ZSTD_compressBegin_advanced_internal() :
1438
 * Private use only. To be called from zstdmt_compress.c. */
1439
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1440
                                    const void* dict, size_t dictSize,
1441
                                    ZSTD_dictContentType_e dictContentType,
1442
                                    ZSTD_dictTableLoadMethod_e dtlm,
1443
                                    const ZSTD_CDict* cdict,
1444
                                    const ZSTD_CCtx_params* params,
1445
                                    unsigned long long pledgedSrcSize);
1446

1447
/* ZSTD_compress_advanced_internal() :
1448
 * Private use only. To be called from zstdmt_compress.c. */
1449
size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1450
                                       void* dst, size_t dstCapacity,
1451
                                 const void* src, size_t srcSize,
1452
                                 const void* dict,size_t dictSize,
1453
                                 const ZSTD_CCtx_params* params);
1454

1455

1456
/* ZSTD_writeLastEmptyBlock() :
1457
 * output an empty Block with end-of-frame mark to complete a frame
1458
 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1459
 *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1460
 */
1461
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1462

1463

1464
/* ZSTD_referenceExternalSequences() :
1465
 * Must be called before starting a compression operation.
1466
 * seqs must parse a prefix of the source.
1467
 * This cannot be used when long range matching is enabled.
1468
 * Zstd will use these sequences, and pass the literals to a secondary block
1469
 * compressor.
1470
 * @return : An error code on failure.
1471
 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1472
 * access and data corruption.
1473
 */
1474
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1475

1476
/** ZSTD_cycleLog() :
1477
 *  condition for correct operation : hashLog > 1 */
1478
U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1479

1480
/** ZSTD_CCtx_trace() :
1481
 *  Trace the end of a compression call.
1482
 */
1483
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1484

1485
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
1486
 * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
1487
 * Note that the block delimiter must include the last literals of the block.
1488
 */
1489
size_t
1490
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
1491
                                              ZSTD_sequencePosition* seqPos,
1492
                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
1493
                                        const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
1494

1495
/* Returns the number of bytes to move the current read position back by.
1496
 * Only non-zero if we ended up splitting a sequence.
1497
 * Otherwise, it may return a ZSTD error if something went wrong.
1498
 *
1499
 * This function will attempt to scan through blockSize bytes
1500
 * represented by the sequences in @inSeqs,
1501
 * storing any (partial) sequences.
1502
 *
1503
 * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
1504
 * avoid splitting a match, or to avoid splitting a match such that it would produce a match
1505
 * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
1506
 */
1507
size_t
1508
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
1509
                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
1510
                                   const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
1511

1512

1513
/* ===============================================================
1514
 * Deprecated definitions that are still used internally to avoid
1515
 * deprecation warnings. These functions are exactly equivalent to
1516
 * their public variants, but avoid the deprecation warnings.
1517
 * =============================================================== */
1518

1519
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1520

1521
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
1522
                                    void* dst, size_t dstCapacity,
1523
                              const void* src, size_t srcSize);
1524

1525
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
1526
                               void* dst, size_t dstCapacity,
1527
                         const void* src, size_t srcSize);
1528

1529
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1530

1531

1532
#endif /* ZSTD_COMPRESS_H */
1533

1534
Product

Resources

Company