Path: blob/master/Utilities/cmzstd/lib/compress/zstd_compress.c
3158 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910/*-*************************************11* Dependencies12***************************************/13#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */14#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */15#include "../common/mem.h"16#include "hist.h" /* HIST_countFast_wksp */17#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */18#include "../common/fse.h"19#include "../common/huf.h"20#include "zstd_compress_internal.h"21#include "zstd_compress_sequences.h"22#include "zstd_compress_literals.h"23#include "zstd_fast.h"24#include "zstd_double_fast.h"25#include "zstd_lazy.h"26#include "zstd_opt.h"27#include "zstd_ldm.h"28#include "zstd_compress_superblock.h"29#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */3031/* ***************************************************************32* Tuning parameters33*****************************************************************/34/*!35* COMPRESS_HEAPMODE :36* Select how default decompression function ZSTD_compress() allocates its context,37* on stack (0, default), or into heap (1).38* Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.39*/40#ifndef ZSTD_COMPRESS_HEAPMODE41# define ZSTD_COMPRESS_HEAPMODE 042#endif4344/*!45* ZSTD_HASHLOG3_MAX :46* Maximum size of the hash table dedicated to find 3-bytes matches,47* in log format, aka 17 => 1 << 17 == 128Ki positions.48* This structure is only used in zstd_opt.49* Since allocation is centralized for all strategies, it has to be known here.50* The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,51* so that zstd_opt.c doesn't need to know about this constant.52*/53#ifndef ZSTD_HASHLOG3_MAX54# define ZSTD_HASHLOG3_MAX 1755#endif5657/*-*************************************58* Helper functions59***************************************/60/* ZSTD_compressBound()61* Note that the result from this function is only valid for62* the one-pass compression functions.63* When employing the streaming mode,64* if flushes are frequently altering the size of blocks,65* the overhead from block headers can make the compressed data larger66* than the return value of ZSTD_compressBound().67*/68size_t ZSTD_compressBound(size_t srcSize) {69size_t const r = ZSTD_COMPRESSBOUND(srcSize);70if (r==0) return ERROR(srcSize_wrong);71return r;72}737475/*-*************************************76* Context memory management77***************************************/78struct ZSTD_CDict_s {79const void* dictContent;80size_t dictContentSize;81ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */82U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */83ZSTD_cwksp workspace;84ZSTD_matchState_t matchState;85ZSTD_compressedBlockState_t cBlockState;86ZSTD_customMem customMem;87U32 dictID;88int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */89ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use90* row-based matchfinder. Unless the cdict is reloaded, we will use91* the same greedy/lazy matchfinder at compression time.92*/93}; /* typedef'd to ZSTD_CDict within "zstd.h" */9495ZSTD_CCtx* ZSTD_createCCtx(void)96{97return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);98}99100static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)101{102assert(cctx != NULL);103ZSTD_memset(cctx, 0, sizeof(*cctx));104cctx->customMem = memManager;105cctx->bmi2 = ZSTD_cpuSupportsBmi2();106{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);107assert(!ZSTD_isError(err));108(void)err;109}110}111112ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)113{114ZSTD_STATIC_ASSERT(zcss_init==0);115ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));116if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;117{ ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);118if (!cctx) return NULL;119ZSTD_initCCtx(cctx, customMem);120return cctx;121}122}123124ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)125{126ZSTD_cwksp ws;127ZSTD_CCtx* cctx;128if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */129if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */130ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);131132cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));133if (cctx == NULL) return NULL;134135ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));136ZSTD_cwksp_move(&cctx->workspace, &ws);137cctx->staticSize = workspaceSize;138139/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */140if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;141cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));142cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));143cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);144cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());145return cctx;146}147148/**149* Clears and frees all of the dictionaries in the CCtx.150*/151static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)152{153ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);154ZSTD_freeCDict(cctx->localDict.cdict);155ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));156ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));157cctx->cdict = NULL;158}159160static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)161{162size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;163size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);164return bufferSize + cdictSize;165}166167static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)168{169assert(cctx != NULL);170assert(cctx->staticSize == 0);171ZSTD_clearAllDicts(cctx);172#ifdef ZSTD_MULTITHREAD173ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;174#endif175ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);176}177178size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)179{180if (cctx==NULL) return 0; /* support free on NULL */181RETURN_ERROR_IF(cctx->staticSize, memory_allocation,182"not compatible with static CCtx");183{ int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);184ZSTD_freeCCtxContent(cctx);185if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem);186}187return 0;188}189190191static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)192{193#ifdef ZSTD_MULTITHREAD194return ZSTDMT_sizeof_CCtx(cctx->mtctx);195#else196(void)cctx;197return 0;198#endif199}200201202size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)203{204if (cctx==NULL) return 0; /* support sizeof on NULL */205/* cctx may be in the workspace */206return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))207+ ZSTD_cwksp_sizeof(&cctx->workspace)208+ ZSTD_sizeof_localDict(cctx->localDict)209+ ZSTD_sizeof_mtctx(cctx);210}211212size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)213{214return ZSTD_sizeof_CCtx(zcs); /* same object */215}216217/* private API call, for dictBuilder only */218const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }219220/* Returns true if the strategy supports using a row based matchfinder */221static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {222return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);223}224225/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder226* for this compression.227*/228static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) {229assert(mode != ZSTD_ps_auto);230return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable);231}232233/* Returns row matchfinder usage given an initial mode and cParams */234static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,235const ZSTD_compressionParameters* const cParams) {236#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)237int const kHasSIMD128 = 1;238#else239int const kHasSIMD128 = 0;240#endif241if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */242mode = ZSTD_ps_disable;243if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;244if (kHasSIMD128) {245if (cParams->windowLog > 14) mode = ZSTD_ps_enable;246} else {247if (cParams->windowLog > 17) mode = ZSTD_ps_enable;248}249return mode;250}251252/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */253static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,254const ZSTD_compressionParameters* const cParams) {255if (mode != ZSTD_ps_auto) return mode;256return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;257}258259/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */260static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,261const ZSTD_paramSwitch_e useRowMatchFinder,262const U32 forDDSDict) {263assert(useRowMatchFinder != ZSTD_ps_auto);264/* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.265* We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.266*/267return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));268}269270/* Returns ZSTD_ps_enable if compression parameters are such that we should271* enable long distance matching (wlog >= 27, strategy >= btopt).272* Returns ZSTD_ps_disable otherwise.273*/274static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,275const ZSTD_compressionParameters* const cParams) {276if (mode != ZSTD_ps_auto) return mode;277return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;278}279280static int ZSTD_resolveExternalSequenceValidation(int mode) {281return mode;282}283284/* Resolves maxBlockSize to the default if no value is present. */285static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {286if (maxBlockSize == 0) {287return ZSTD_BLOCKSIZE_MAX;288} else {289return maxBlockSize;290}291}292293static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {294if (value != ZSTD_ps_auto) return value;295if (cLevel < 10) {296return ZSTD_ps_disable;297} else {298return ZSTD_ps_enable;299}300}301302/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.303* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */304static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {305return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;306}307308static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(309ZSTD_compressionParameters cParams)310{311ZSTD_CCtx_params cctxParams;312/* should not matter, as all cParams are presumed properly defined */313ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);314cctxParams.cParams = cParams;315316/* Adjust advanced params according to cParams */317cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams);318if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {319ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);320assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);321assert(cctxParams.ldmParams.hashRateLog < 32);322}323cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);324cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);325cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);326cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);327cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,328cctxParams.compressionLevel);329assert(!ZSTD_checkCParams(cParams));330return cctxParams;331}332333static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(334ZSTD_customMem customMem)335{336ZSTD_CCtx_params* params;337if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;338params = (ZSTD_CCtx_params*)ZSTD_customCalloc(339sizeof(ZSTD_CCtx_params), customMem);340if (!params) { return NULL; }341ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);342params->customMem = customMem;343return params;344}345346ZSTD_CCtx_params* ZSTD_createCCtxParams(void)347{348return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);349}350351size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)352{353if (params == NULL) { return 0; }354ZSTD_customFree(params, params->customMem);355return 0;356}357358size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)359{360return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);361}362363size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {364RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");365ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));366cctxParams->compressionLevel = compressionLevel;367cctxParams->fParams.contentSizeFlag = 1;368return 0;369}370371#define ZSTD_NO_CLEVEL 0372373/**374* Initializes `cctxParams` from `params` and `compressionLevel`.375* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.376*/377static void378ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,379const ZSTD_parameters* params,380int compressionLevel)381{382assert(!ZSTD_checkCParams(params->cParams));383ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));384cctxParams->cParams = params->cParams;385cctxParams->fParams = params->fParams;386/* Should not matter, as all cParams are presumed properly defined.387* But, set it for tracing anyway.388*/389cctxParams->compressionLevel = compressionLevel;390cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);391cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);392cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);393cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);394cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);395cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);396DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",397cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);398}399400size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)401{402RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");403FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");404ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL);405return 0;406}407408/**409* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.410* @param params Validated zstd parameters.411*/412static void ZSTD_CCtxParams_setZstdParams(413ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)414{415assert(!ZSTD_checkCParams(params->cParams));416cctxParams->cParams = params->cParams;417cctxParams->fParams = params->fParams;418/* Should not matter, as all cParams are presumed properly defined.419* But, set it for tracing anyway.420*/421cctxParams->compressionLevel = ZSTD_NO_CLEVEL;422}423424ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)425{426ZSTD_bounds bounds = { 0, 0, 0 };427428switch(param)429{430case ZSTD_c_compressionLevel:431bounds.lowerBound = ZSTD_minCLevel();432bounds.upperBound = ZSTD_maxCLevel();433return bounds;434435case ZSTD_c_windowLog:436bounds.lowerBound = ZSTD_WINDOWLOG_MIN;437bounds.upperBound = ZSTD_WINDOWLOG_MAX;438return bounds;439440case ZSTD_c_hashLog:441bounds.lowerBound = ZSTD_HASHLOG_MIN;442bounds.upperBound = ZSTD_HASHLOG_MAX;443return bounds;444445case ZSTD_c_chainLog:446bounds.lowerBound = ZSTD_CHAINLOG_MIN;447bounds.upperBound = ZSTD_CHAINLOG_MAX;448return bounds;449450case ZSTD_c_searchLog:451bounds.lowerBound = ZSTD_SEARCHLOG_MIN;452bounds.upperBound = ZSTD_SEARCHLOG_MAX;453return bounds;454455case ZSTD_c_minMatch:456bounds.lowerBound = ZSTD_MINMATCH_MIN;457bounds.upperBound = ZSTD_MINMATCH_MAX;458return bounds;459460case ZSTD_c_targetLength:461bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;462bounds.upperBound = ZSTD_TARGETLENGTH_MAX;463return bounds;464465case ZSTD_c_strategy:466bounds.lowerBound = ZSTD_STRATEGY_MIN;467bounds.upperBound = ZSTD_STRATEGY_MAX;468return bounds;469470case ZSTD_c_contentSizeFlag:471bounds.lowerBound = 0;472bounds.upperBound = 1;473return bounds;474475case ZSTD_c_checksumFlag:476bounds.lowerBound = 0;477bounds.upperBound = 1;478return bounds;479480case ZSTD_c_dictIDFlag:481bounds.lowerBound = 0;482bounds.upperBound = 1;483return bounds;484485case ZSTD_c_nbWorkers:486bounds.lowerBound = 0;487#ifdef ZSTD_MULTITHREAD488bounds.upperBound = ZSTDMT_NBWORKERS_MAX;489#else490bounds.upperBound = 0;491#endif492return bounds;493494case ZSTD_c_jobSize:495bounds.lowerBound = 0;496#ifdef ZSTD_MULTITHREAD497bounds.upperBound = ZSTDMT_JOBSIZE_MAX;498#else499bounds.upperBound = 0;500#endif501return bounds;502503case ZSTD_c_overlapLog:504#ifdef ZSTD_MULTITHREAD505bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;506bounds.upperBound = ZSTD_OVERLAPLOG_MAX;507#else508bounds.lowerBound = 0;509bounds.upperBound = 0;510#endif511return bounds;512513case ZSTD_c_enableDedicatedDictSearch:514bounds.lowerBound = 0;515bounds.upperBound = 1;516return bounds;517518case ZSTD_c_enableLongDistanceMatching:519bounds.lowerBound = (int)ZSTD_ps_auto;520bounds.upperBound = (int)ZSTD_ps_disable;521return bounds;522523case ZSTD_c_ldmHashLog:524bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;525bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;526return bounds;527528case ZSTD_c_ldmMinMatch:529bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;530bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;531return bounds;532533case ZSTD_c_ldmBucketSizeLog:534bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;535bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;536return bounds;537538case ZSTD_c_ldmHashRateLog:539bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;540bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;541return bounds;542543/* experimental parameters */544case ZSTD_c_rsyncable:545bounds.lowerBound = 0;546bounds.upperBound = 1;547return bounds;548549case ZSTD_c_forceMaxWindow :550bounds.lowerBound = 0;551bounds.upperBound = 1;552return bounds;553554case ZSTD_c_format:555ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);556bounds.lowerBound = ZSTD_f_zstd1;557bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */558return bounds;559560case ZSTD_c_forceAttachDict:561ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);562bounds.lowerBound = ZSTD_dictDefaultAttach;563bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */564return bounds;565566case ZSTD_c_literalCompressionMode:567ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);568bounds.lowerBound = (int)ZSTD_ps_auto;569bounds.upperBound = (int)ZSTD_ps_disable;570return bounds;571572case ZSTD_c_targetCBlockSize:573bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;574bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;575return bounds;576577case ZSTD_c_srcSizeHint:578bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;579bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;580return bounds;581582case ZSTD_c_stableInBuffer:583case ZSTD_c_stableOutBuffer:584bounds.lowerBound = (int)ZSTD_bm_buffered;585bounds.upperBound = (int)ZSTD_bm_stable;586return bounds;587588case ZSTD_c_blockDelimiters:589bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;590bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;591return bounds;592593case ZSTD_c_validateSequences:594bounds.lowerBound = 0;595bounds.upperBound = 1;596return bounds;597598case ZSTD_c_useBlockSplitter:599bounds.lowerBound = (int)ZSTD_ps_auto;600bounds.upperBound = (int)ZSTD_ps_disable;601return bounds;602603case ZSTD_c_useRowMatchFinder:604bounds.lowerBound = (int)ZSTD_ps_auto;605bounds.upperBound = (int)ZSTD_ps_disable;606return bounds;607608case ZSTD_c_deterministicRefPrefix:609bounds.lowerBound = 0;610bounds.upperBound = 1;611return bounds;612613case ZSTD_c_prefetchCDictTables:614bounds.lowerBound = (int)ZSTD_ps_auto;615bounds.upperBound = (int)ZSTD_ps_disable;616return bounds;617618case ZSTD_c_enableSeqProducerFallback:619bounds.lowerBound = 0;620bounds.upperBound = 1;621return bounds;622623case ZSTD_c_maxBlockSize:624bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;625bounds.upperBound = ZSTD_BLOCKSIZE_MAX;626return bounds;627628case ZSTD_c_searchForExternalRepcodes:629bounds.lowerBound = (int)ZSTD_ps_auto;630bounds.upperBound = (int)ZSTD_ps_disable;631return bounds;632633default:634bounds.error = ERROR(parameter_unsupported);635return bounds;636}637}638639/* ZSTD_cParam_clampBounds:640* Clamps the value into the bounded range.641*/642static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)643{644ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);645if (ZSTD_isError(bounds.error)) return bounds.error;646if (*value < bounds.lowerBound) *value = bounds.lowerBound;647if (*value > bounds.upperBound) *value = bounds.upperBound;648return 0;649}650651#define BOUNDCHECK(cParam, val) { \652RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \653parameter_outOfBound, "Param out of bounds"); \654}655656657static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)658{659switch(param)660{661case ZSTD_c_compressionLevel:662case ZSTD_c_hashLog:663case ZSTD_c_chainLog:664case ZSTD_c_searchLog:665case ZSTD_c_minMatch:666case ZSTD_c_targetLength:667case ZSTD_c_strategy:668return 1;669670case ZSTD_c_format:671case ZSTD_c_windowLog:672case ZSTD_c_contentSizeFlag:673case ZSTD_c_checksumFlag:674case ZSTD_c_dictIDFlag:675case ZSTD_c_forceMaxWindow :676case ZSTD_c_nbWorkers:677case ZSTD_c_jobSize:678case ZSTD_c_overlapLog:679case ZSTD_c_rsyncable:680case ZSTD_c_enableDedicatedDictSearch:681case ZSTD_c_enableLongDistanceMatching:682case ZSTD_c_ldmHashLog:683case ZSTD_c_ldmMinMatch:684case ZSTD_c_ldmBucketSizeLog:685case ZSTD_c_ldmHashRateLog:686case ZSTD_c_forceAttachDict:687case ZSTD_c_literalCompressionMode:688case ZSTD_c_targetCBlockSize:689case ZSTD_c_srcSizeHint:690case ZSTD_c_stableInBuffer:691case ZSTD_c_stableOutBuffer:692case ZSTD_c_blockDelimiters:693case ZSTD_c_validateSequences:694case ZSTD_c_useBlockSplitter:695case ZSTD_c_useRowMatchFinder:696case ZSTD_c_deterministicRefPrefix:697case ZSTD_c_prefetchCDictTables:698case ZSTD_c_enableSeqProducerFallback:699case ZSTD_c_maxBlockSize:700case ZSTD_c_searchForExternalRepcodes:701default:702return 0;703}704}705706size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)707{708DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);709if (cctx->streamStage != zcss_init) {710if (ZSTD_isUpdateAuthorized(param)) {711cctx->cParamsChanged = 1;712} else {713RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");714} }715716switch(param)717{718case ZSTD_c_nbWorkers:719RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,720"MT not compatible with static alloc");721break;722723case ZSTD_c_compressionLevel:724case ZSTD_c_windowLog:725case ZSTD_c_hashLog:726case ZSTD_c_chainLog:727case ZSTD_c_searchLog:728case ZSTD_c_minMatch:729case ZSTD_c_targetLength:730case ZSTD_c_strategy:731case ZSTD_c_ldmHashRateLog:732case ZSTD_c_format:733case ZSTD_c_contentSizeFlag:734case ZSTD_c_checksumFlag:735case ZSTD_c_dictIDFlag:736case ZSTD_c_forceMaxWindow:737case ZSTD_c_forceAttachDict:738case ZSTD_c_literalCompressionMode:739case ZSTD_c_jobSize:740case ZSTD_c_overlapLog:741case ZSTD_c_rsyncable:742case ZSTD_c_enableDedicatedDictSearch:743case ZSTD_c_enableLongDistanceMatching:744case ZSTD_c_ldmHashLog:745case ZSTD_c_ldmMinMatch:746case ZSTD_c_ldmBucketSizeLog:747case ZSTD_c_targetCBlockSize:748case ZSTD_c_srcSizeHint:749case ZSTD_c_stableInBuffer:750case ZSTD_c_stableOutBuffer:751case ZSTD_c_blockDelimiters:752case ZSTD_c_validateSequences:753case ZSTD_c_useBlockSplitter:754case ZSTD_c_useRowMatchFinder:755case ZSTD_c_deterministicRefPrefix:756case ZSTD_c_prefetchCDictTables:757case ZSTD_c_enableSeqProducerFallback:758case ZSTD_c_maxBlockSize:759case ZSTD_c_searchForExternalRepcodes:760break;761762default: RETURN_ERROR(parameter_unsupported, "unknown parameter");763}764return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);765}766767size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,768ZSTD_cParameter param, int value)769{770DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);771switch(param)772{773case ZSTD_c_format :774BOUNDCHECK(ZSTD_c_format, value);775CCtxParams->format = (ZSTD_format_e)value;776return (size_t)CCtxParams->format;777778case ZSTD_c_compressionLevel : {779FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");780if (value == 0)781CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */782else783CCtxParams->compressionLevel = value;784if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;785return 0; /* return type (size_t) cannot represent negative values */786}787788case ZSTD_c_windowLog :789if (value!=0) /* 0 => use default */790BOUNDCHECK(ZSTD_c_windowLog, value);791CCtxParams->cParams.windowLog = (U32)value;792return CCtxParams->cParams.windowLog;793794case ZSTD_c_hashLog :795if (value!=0) /* 0 => use default */796BOUNDCHECK(ZSTD_c_hashLog, value);797CCtxParams->cParams.hashLog = (U32)value;798return CCtxParams->cParams.hashLog;799800case ZSTD_c_chainLog :801if (value!=0) /* 0 => use default */802BOUNDCHECK(ZSTD_c_chainLog, value);803CCtxParams->cParams.chainLog = (U32)value;804return CCtxParams->cParams.chainLog;805806case ZSTD_c_searchLog :807if (value!=0) /* 0 => use default */808BOUNDCHECK(ZSTD_c_searchLog, value);809CCtxParams->cParams.searchLog = (U32)value;810return (size_t)value;811812case ZSTD_c_minMatch :813if (value!=0) /* 0 => use default */814BOUNDCHECK(ZSTD_c_minMatch, value);815CCtxParams->cParams.minMatch = (U32)value;816return CCtxParams->cParams.minMatch;817818case ZSTD_c_targetLength :819BOUNDCHECK(ZSTD_c_targetLength, value);820CCtxParams->cParams.targetLength = (U32)value;821return CCtxParams->cParams.targetLength;822823case ZSTD_c_strategy :824if (value!=0) /* 0 => use default */825BOUNDCHECK(ZSTD_c_strategy, value);826CCtxParams->cParams.strategy = (ZSTD_strategy)value;827return (size_t)CCtxParams->cParams.strategy;828829case ZSTD_c_contentSizeFlag :830/* Content size written in frame header _when known_ (default:1) */831DEBUGLOG(4, "set content size flag = %u", (value!=0));832CCtxParams->fParams.contentSizeFlag = value != 0;833return (size_t)CCtxParams->fParams.contentSizeFlag;834835case ZSTD_c_checksumFlag :836/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */837CCtxParams->fParams.checksumFlag = value != 0;838return (size_t)CCtxParams->fParams.checksumFlag;839840case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */841DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));842CCtxParams->fParams.noDictIDFlag = !value;843return !CCtxParams->fParams.noDictIDFlag;844845case ZSTD_c_forceMaxWindow :846CCtxParams->forceWindow = (value != 0);847return (size_t)CCtxParams->forceWindow;848849case ZSTD_c_forceAttachDict : {850const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;851BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);852CCtxParams->attachDictPref = pref;853return CCtxParams->attachDictPref;854}855856case ZSTD_c_literalCompressionMode : {857const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;858BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);859CCtxParams->literalCompressionMode = lcm;860return CCtxParams->literalCompressionMode;861}862863case ZSTD_c_nbWorkers :864#ifndef ZSTD_MULTITHREAD865RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");866return 0;867#else868FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");869CCtxParams->nbWorkers = value;870return CCtxParams->nbWorkers;871#endif872873case ZSTD_c_jobSize :874#ifndef ZSTD_MULTITHREAD875RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");876return 0;877#else878/* Adjust to the minimum non-default value. */879if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)880value = ZSTDMT_JOBSIZE_MIN;881FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");882assert(value >= 0);883CCtxParams->jobSize = value;884return CCtxParams->jobSize;885#endif886887case ZSTD_c_overlapLog :888#ifndef ZSTD_MULTITHREAD889RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");890return 0;891#else892FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");893CCtxParams->overlapLog = value;894return CCtxParams->overlapLog;895#endif896897case ZSTD_c_rsyncable :898#ifndef ZSTD_MULTITHREAD899RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");900return 0;901#else902FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");903CCtxParams->rsyncable = value;904return CCtxParams->rsyncable;905#endif906907case ZSTD_c_enableDedicatedDictSearch :908CCtxParams->enableDedicatedDictSearch = (value!=0);909return (size_t)CCtxParams->enableDedicatedDictSearch;910911case ZSTD_c_enableLongDistanceMatching :912BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value);913CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;914return CCtxParams->ldmParams.enableLdm;915916case ZSTD_c_ldmHashLog :917if (value!=0) /* 0 ==> auto */918BOUNDCHECK(ZSTD_c_ldmHashLog, value);919CCtxParams->ldmParams.hashLog = (U32)value;920return CCtxParams->ldmParams.hashLog;921922case ZSTD_c_ldmMinMatch :923if (value!=0) /* 0 ==> default */924BOUNDCHECK(ZSTD_c_ldmMinMatch, value);925CCtxParams->ldmParams.minMatchLength = (U32)value;926return CCtxParams->ldmParams.minMatchLength;927928case ZSTD_c_ldmBucketSizeLog :929if (value!=0) /* 0 ==> default */930BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);931CCtxParams->ldmParams.bucketSizeLog = (U32)value;932return CCtxParams->ldmParams.bucketSizeLog;933934case ZSTD_c_ldmHashRateLog :935if (value!=0) /* 0 ==> default */936BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);937CCtxParams->ldmParams.hashRateLog = (U32)value;938return CCtxParams->ldmParams.hashRateLog;939940case ZSTD_c_targetCBlockSize :941if (value!=0) /* 0 ==> default */942BOUNDCHECK(ZSTD_c_targetCBlockSize, value);943CCtxParams->targetCBlockSize = (U32)value;944return CCtxParams->targetCBlockSize;945946case ZSTD_c_srcSizeHint :947if (value!=0) /* 0 ==> default */948BOUNDCHECK(ZSTD_c_srcSizeHint, value);949CCtxParams->srcSizeHint = value;950return (size_t)CCtxParams->srcSizeHint;951952case ZSTD_c_stableInBuffer:953BOUNDCHECK(ZSTD_c_stableInBuffer, value);954CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;955return CCtxParams->inBufferMode;956957case ZSTD_c_stableOutBuffer:958BOUNDCHECK(ZSTD_c_stableOutBuffer, value);959CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;960return CCtxParams->outBufferMode;961962case ZSTD_c_blockDelimiters:963BOUNDCHECK(ZSTD_c_blockDelimiters, value);964CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;965return CCtxParams->blockDelimiters;966967case ZSTD_c_validateSequences:968BOUNDCHECK(ZSTD_c_validateSequences, value);969CCtxParams->validateSequences = value;970return CCtxParams->validateSequences;971972case ZSTD_c_useBlockSplitter:973BOUNDCHECK(ZSTD_c_useBlockSplitter, value);974CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;975return CCtxParams->useBlockSplitter;976977case ZSTD_c_useRowMatchFinder:978BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);979CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;980return CCtxParams->useRowMatchFinder;981982case ZSTD_c_deterministicRefPrefix:983BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);984CCtxParams->deterministicRefPrefix = !!value;985return CCtxParams->deterministicRefPrefix;986987case ZSTD_c_prefetchCDictTables:988BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);989CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;990return CCtxParams->prefetchCDictTables;991992case ZSTD_c_enableSeqProducerFallback:993BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);994CCtxParams->enableMatchFinderFallback = value;995return CCtxParams->enableMatchFinderFallback;996997case ZSTD_c_maxBlockSize:998if (value!=0) /* 0 ==> default */999BOUNDCHECK(ZSTD_c_maxBlockSize, value);1000CCtxParams->maxBlockSize = value;1001return CCtxParams->maxBlockSize;10021003case ZSTD_c_searchForExternalRepcodes:1004BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);1005CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;1006return CCtxParams->searchForExternalRepcodes;10071008default: RETURN_ERROR(parameter_unsupported, "unknown parameter");1009}1010}10111012size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)1013{1014return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);1015}10161017size_t ZSTD_CCtxParams_getParameter(1018ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)1019{1020switch(param)1021{1022case ZSTD_c_format :1023*value = CCtxParams->format;1024break;1025case ZSTD_c_compressionLevel :1026*value = CCtxParams->compressionLevel;1027break;1028case ZSTD_c_windowLog :1029*value = (int)CCtxParams->cParams.windowLog;1030break;1031case ZSTD_c_hashLog :1032*value = (int)CCtxParams->cParams.hashLog;1033break;1034case ZSTD_c_chainLog :1035*value = (int)CCtxParams->cParams.chainLog;1036break;1037case ZSTD_c_searchLog :1038*value = CCtxParams->cParams.searchLog;1039break;1040case ZSTD_c_minMatch :1041*value = CCtxParams->cParams.minMatch;1042break;1043case ZSTD_c_targetLength :1044*value = CCtxParams->cParams.targetLength;1045break;1046case ZSTD_c_strategy :1047*value = (unsigned)CCtxParams->cParams.strategy;1048break;1049case ZSTD_c_contentSizeFlag :1050*value = CCtxParams->fParams.contentSizeFlag;1051break;1052case ZSTD_c_checksumFlag :1053*value = CCtxParams->fParams.checksumFlag;1054break;1055case ZSTD_c_dictIDFlag :1056*value = !CCtxParams->fParams.noDictIDFlag;1057break;1058case ZSTD_c_forceMaxWindow :1059*value = CCtxParams->forceWindow;1060break;1061case ZSTD_c_forceAttachDict :1062*value = CCtxParams->attachDictPref;1063break;1064case ZSTD_c_literalCompressionMode :1065*value = CCtxParams->literalCompressionMode;1066break;1067case ZSTD_c_nbWorkers :1068#ifndef ZSTD_MULTITHREAD1069assert(CCtxParams->nbWorkers == 0);1070#endif1071*value = CCtxParams->nbWorkers;1072break;1073case ZSTD_c_jobSize :1074#ifndef ZSTD_MULTITHREAD1075RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");1076#else1077assert(CCtxParams->jobSize <= INT_MAX);1078*value = (int)CCtxParams->jobSize;1079break;1080#endif1081case ZSTD_c_overlapLog :1082#ifndef ZSTD_MULTITHREAD1083RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");1084#else1085*value = CCtxParams->overlapLog;1086break;1087#endif1088case ZSTD_c_rsyncable :1089#ifndef ZSTD_MULTITHREAD1090RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");1091#else1092*value = CCtxParams->rsyncable;1093break;1094#endif1095case ZSTD_c_enableDedicatedDictSearch :1096*value = CCtxParams->enableDedicatedDictSearch;1097break;1098case ZSTD_c_enableLongDistanceMatching :1099*value = CCtxParams->ldmParams.enableLdm;1100break;1101case ZSTD_c_ldmHashLog :1102*value = CCtxParams->ldmParams.hashLog;1103break;1104case ZSTD_c_ldmMinMatch :1105*value = CCtxParams->ldmParams.minMatchLength;1106break;1107case ZSTD_c_ldmBucketSizeLog :1108*value = CCtxParams->ldmParams.bucketSizeLog;1109break;1110case ZSTD_c_ldmHashRateLog :1111*value = CCtxParams->ldmParams.hashRateLog;1112break;1113case ZSTD_c_targetCBlockSize :1114*value = (int)CCtxParams->targetCBlockSize;1115break;1116case ZSTD_c_srcSizeHint :1117*value = (int)CCtxParams->srcSizeHint;1118break;1119case ZSTD_c_stableInBuffer :1120*value = (int)CCtxParams->inBufferMode;1121break;1122case ZSTD_c_stableOutBuffer :1123*value = (int)CCtxParams->outBufferMode;1124break;1125case ZSTD_c_blockDelimiters :1126*value = (int)CCtxParams->blockDelimiters;1127break;1128case ZSTD_c_validateSequences :1129*value = (int)CCtxParams->validateSequences;1130break;1131case ZSTD_c_useBlockSplitter :1132*value = (int)CCtxParams->useBlockSplitter;1133break;1134case ZSTD_c_useRowMatchFinder :1135*value = (int)CCtxParams->useRowMatchFinder;1136break;1137case ZSTD_c_deterministicRefPrefix:1138*value = (int)CCtxParams->deterministicRefPrefix;1139break;1140case ZSTD_c_prefetchCDictTables:1141*value = (int)CCtxParams->prefetchCDictTables;1142break;1143case ZSTD_c_enableSeqProducerFallback:1144*value = CCtxParams->enableMatchFinderFallback;1145break;1146case ZSTD_c_maxBlockSize:1147*value = (int)CCtxParams->maxBlockSize;1148break;1149case ZSTD_c_searchForExternalRepcodes:1150*value = (int)CCtxParams->searchForExternalRepcodes;1151break;1152default: RETURN_ERROR(parameter_unsupported, "unknown parameter");1153}1154return 0;1155}11561157/** ZSTD_CCtx_setParametersUsingCCtxParams() :1158* just applies `params` into `cctx`1159* no action is performed, parameters are merely stored.1160* If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.1161* This is possible even if a compression is ongoing.1162* In which case, new parameters will be applied on the fly, starting with next compression job.1163*/1164size_t ZSTD_CCtx_setParametersUsingCCtxParams(1165ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)1166{1167DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");1168RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1169"The context is in the wrong stage!");1170RETURN_ERROR_IF(cctx->cdict, stage_wrong,1171"Can't override parameters with cdict attached (some must "1172"be inherited from the cdict).");11731174cctx->requestedParams = *params;1175return 0;1176}11771178size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)1179{1180ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);1181DEBUGLOG(4, "ZSTD_CCtx_setCParams");1182/* only update if all parameters are valid */1183FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");1184FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");1185FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");1186FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");1187FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");1188FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");1189FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");1190FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");1191return 0;1192}11931194size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)1195{1196ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);1197DEBUGLOG(4, "ZSTD_CCtx_setFParams");1198FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");1199FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");1200FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");1201return 0;1202}12031204size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)1205{1206DEBUGLOG(4, "ZSTD_CCtx_setParams");1207/* First check cParams, because we want to update all or none. */1208FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");1209/* Next set fParams, because this could fail if the cctx isn't in init stage. */1210FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");1211/* Finally set cParams, which should succeed. */1212FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");1213return 0;1214}12151216size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)1217{1218DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);1219RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1220"Can't set pledgedSrcSize when not in init stage.");1221cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;1222return 0;1223}12241225static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(1226int const compressionLevel,1227size_t const dictSize);1228static int ZSTD_dedicatedDictSearch_isSupported(1229const ZSTD_compressionParameters* cParams);1230static void ZSTD_dedicatedDictSearch_revertCParams(1231ZSTD_compressionParameters* cParams);12321233/**1234* Initializes the local dictionary using requested parameters.1235* NOTE: Initialization does not employ the pledged src size,1236* because the dictionary may be used for multiple compressions.1237*/1238static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)1239{1240ZSTD_localDict* const dl = &cctx->localDict;1241if (dl->dict == NULL) {1242/* No local dictionary. */1243assert(dl->dictBuffer == NULL);1244assert(dl->cdict == NULL);1245assert(dl->dictSize == 0);1246return 0;1247}1248if (dl->cdict != NULL) {1249/* Local dictionary already initialized. */1250assert(cctx->cdict == dl->cdict);1251return 0;1252}1253assert(dl->dictSize > 0);1254assert(cctx->cdict == NULL);1255assert(cctx->prefixDict.dict == NULL);12561257dl->cdict = ZSTD_createCDict_advanced2(1258dl->dict,1259dl->dictSize,1260ZSTD_dlm_byRef,1261dl->dictContentType,1262&cctx->requestedParams,1263cctx->customMem);1264RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");1265cctx->cdict = dl->cdict;1266return 0;1267}12681269size_t ZSTD_CCtx_loadDictionary_advanced(1270ZSTD_CCtx* cctx,1271const void* dict, size_t dictSize,1272ZSTD_dictLoadMethod_e dictLoadMethod,1273ZSTD_dictContentType_e dictContentType)1274{1275DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);1276RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1277"Can't load a dictionary when cctx is not in init stage.");1278ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */1279if (dict == NULL || dictSize == 0) /* no dictionary */1280return 0;1281if (dictLoadMethod == ZSTD_dlm_byRef) {1282cctx->localDict.dict = dict;1283} else {1284/* copy dictionary content inside CCtx to own its lifetime */1285void* dictBuffer;1286RETURN_ERROR_IF(cctx->staticSize, memory_allocation,1287"static CCtx can't allocate for an internal copy of dictionary");1288dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);1289RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,1290"allocation failed for dictionary content");1291ZSTD_memcpy(dictBuffer, dict, dictSize);1292cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */1293cctx->localDict.dict = dictBuffer; /* read-only reference */1294}1295cctx->localDict.dictSize = dictSize;1296cctx->localDict.dictContentType = dictContentType;1297return 0;1298}12991300size_t ZSTD_CCtx_loadDictionary_byReference(1301ZSTD_CCtx* cctx, const void* dict, size_t dictSize)1302{1303return ZSTD_CCtx_loadDictionary_advanced(1304cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);1305}13061307size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)1308{1309return ZSTD_CCtx_loadDictionary_advanced(1310cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);1311}131213131314size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)1315{1316RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1317"Can't ref a dict when ctx not in init stage.");1318/* Free the existing local cdict (if any) to save memory. */1319ZSTD_clearAllDicts(cctx);1320cctx->cdict = cdict;1321return 0;1322}13231324size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)1325{1326RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1327"Can't ref a pool when ctx not in init stage.");1328cctx->pool = pool;1329return 0;1330}13311332size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)1333{1334return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);1335}13361337size_t ZSTD_CCtx_refPrefix_advanced(1338ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)1339{1340RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1341"Can't ref a prefix when ctx not in init stage.");1342ZSTD_clearAllDicts(cctx);1343if (prefix != NULL && prefixSize > 0) {1344cctx->prefixDict.dict = prefix;1345cctx->prefixDict.dictSize = prefixSize;1346cctx->prefixDict.dictContentType = dictContentType;1347}1348return 0;1349}13501351/*! ZSTD_CCtx_reset() :1352* Also dumps dictionary */1353size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)1354{1355if ( (reset == ZSTD_reset_session_only)1356|| (reset == ZSTD_reset_session_and_parameters) ) {1357cctx->streamStage = zcss_init;1358cctx->pledgedSrcSizePlusOne = 0;1359}1360if ( (reset == ZSTD_reset_parameters)1361|| (reset == ZSTD_reset_session_and_parameters) ) {1362RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1363"Reset parameters is only possible during init stage.");1364ZSTD_clearAllDicts(cctx);1365ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));1366return ZSTD_CCtxParams_reset(&cctx->requestedParams);1367}1368return 0;1369}137013711372/** ZSTD_checkCParams() :1373control CParam values remain within authorized range.1374@return : 0, or an error code if one value is beyond authorized range */1375size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)1376{1377BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);1378BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);1379BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);1380BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);1381BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);1382BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);1383BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);1384return 0;1385}13861387/** ZSTD_clampCParams() :1388* make CParam values within valid range.1389* @return : valid CParams */1390static ZSTD_compressionParameters1391ZSTD_clampCParams(ZSTD_compressionParameters cParams)1392{1393# define CLAMP_TYPE(cParam, val, type) { \1394ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \1395if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \1396else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \1397}1398# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)1399CLAMP(ZSTD_c_windowLog, cParams.windowLog);1400CLAMP(ZSTD_c_chainLog, cParams.chainLog);1401CLAMP(ZSTD_c_hashLog, cParams.hashLog);1402CLAMP(ZSTD_c_searchLog, cParams.searchLog);1403CLAMP(ZSTD_c_minMatch, cParams.minMatch);1404CLAMP(ZSTD_c_targetLength,cParams.targetLength);1405CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);1406return cParams;1407}14081409/** ZSTD_cycleLog() :1410* condition for correct operation : hashLog > 1 */1411U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)1412{1413U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);1414return hashLog - btScale;1415}14161417/** ZSTD_dictAndWindowLog() :1418* Returns an adjusted window log that is large enough to fit the source and the dictionary.1419* The zstd format says that the entire dictionary is valid if one byte of the dictionary1420* is within the window. So the hashLog and chainLog should be large enough to reference both1421* the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing1422* the hashLog and windowLog.1423* NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.1424*/1425static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)1426{1427const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;1428/* No dictionary ==> No change */1429if (dictSize == 0) {1430return windowLog;1431}1432assert(windowLog <= ZSTD_WINDOWLOG_MAX);1433assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */1434{1435U64 const windowSize = 1ULL << windowLog;1436U64 const dictAndWindowSize = dictSize + windowSize;1437/* If the window size is already large enough to fit both the source and the dictionary1438* then just use the window size. Otherwise adjust so that it fits the dictionary and1439* the window.1440*/1441if (windowSize >= dictSize + srcSize) {1442return windowLog; /* Window size large enough already */1443} else if (dictAndWindowSize >= maxWindowSize) {1444return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */1445} else {1446return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;1447}1448}1449}14501451/** ZSTD_adjustCParams_internal() :1452* optimize `cPar` for a specified input (`srcSize` and `dictSize`).1453* mostly downsize to reduce memory consumption and initialization latency.1454* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.1455* `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.1456* note : `srcSize==0` means 0!1457* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */1458static ZSTD_compressionParameters1459ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,1460unsigned long long srcSize,1461size_t dictSize,1462ZSTD_cParamMode_e mode,1463ZSTD_paramSwitch_e useRowMatchFinder)1464{1465const U64 minSrcSize = 513; /* (1<<9) + 1 */1466const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);1467assert(ZSTD_checkCParams(cPar)==0);14681469switch (mode) {1470case ZSTD_cpm_unknown:1471case ZSTD_cpm_noAttachDict:1472/* If we don't know the source size, don't make any1473* assumptions about it. We will already have selected1474* smaller parameters if a dictionary is in use.1475*/1476break;1477case ZSTD_cpm_createCDict:1478/* Assume a small source size when creating a dictionary1479* with an unknown source size.1480*/1481if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)1482srcSize = minSrcSize;1483break;1484case ZSTD_cpm_attachDict:1485/* Dictionary has its own dedicated parameters which have1486* already been selected. We are selecting parameters1487* for only the source.1488*/1489dictSize = 0;1490break;1491default:1492assert(0);1493break;1494}14951496/* resize windowLog if input is small enough, to use less memory */1497if ( (srcSize <= maxWindowResize)1498&& (dictSize <= maxWindowResize) ) {1499U32 const tSize = (U32)(srcSize + dictSize);1500static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;1501U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :1502ZSTD_highbit32(tSize-1) + 1;1503if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;1504}1505if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {1506U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);1507U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);1508if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;1509if (cycleLog > dictAndWindowLog)1510cPar.chainLog -= (cycleLog - dictAndWindowLog);1511}15121513if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)1514cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */15151516/* We can't use more than 32 bits of hash in total, so that means that we require:1517* (hashLog + 8) <= 32 && (chainLog + 8) <= 321518*/1519if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {1520U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;1521if (cPar.hashLog > maxShortCacheHashLog) {1522cPar.hashLog = maxShortCacheHashLog;1523}1524if (cPar.chainLog > maxShortCacheHashLog) {1525cPar.chainLog = maxShortCacheHashLog;1526}1527}152815291530/* At this point, we aren't 100% sure if we are using the row match finder.1531* Unless it is explicitly disabled, conservatively assume that it is enabled.1532* In this case it will only be disabled for small sources, so shrinking the1533* hash log a little bit shouldn't result in any ratio loss.1534*/1535if (useRowMatchFinder == ZSTD_ps_auto)1536useRowMatchFinder = ZSTD_ps_enable;15371538/* We can't hash more than 32-bits in total. So that means that we require:1539* (hashLog - rowLog + 8) <= 321540*/1541if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {1542/* Switch to 32-entry rows if searchLog is 5 (or more) */1543U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);1544U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;1545U32 const maxHashLog = maxRowHashLog + rowLog;1546assert(cPar.hashLog >= rowLog);1547if (cPar.hashLog > maxHashLog) {1548cPar.hashLog = maxHashLog;1549}1550}15511552return cPar;1553}15541555ZSTD_compressionParameters1556ZSTD_adjustCParams(ZSTD_compressionParameters cPar,1557unsigned long long srcSize,1558size_t dictSize)1559{1560cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */1561if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;1562return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);1563}15641565static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);1566static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);15671568static void ZSTD_overrideCParams(1569ZSTD_compressionParameters* cParams,1570const ZSTD_compressionParameters* overrides)1571{1572if (overrides->windowLog) cParams->windowLog = overrides->windowLog;1573if (overrides->hashLog) cParams->hashLog = overrides->hashLog;1574if (overrides->chainLog) cParams->chainLog = overrides->chainLog;1575if (overrides->searchLog) cParams->searchLog = overrides->searchLog;1576if (overrides->minMatch) cParams->minMatch = overrides->minMatch;1577if (overrides->targetLength) cParams->targetLength = overrides->targetLength;1578if (overrides->strategy) cParams->strategy = overrides->strategy;1579}15801581ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(1582const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)1583{1584ZSTD_compressionParameters cParams;1585if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {1586srcSizeHint = CCtxParams->srcSizeHint;1587}1588cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);1589if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;1590ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);1591assert(!ZSTD_checkCParams(cParams));1592/* srcSizeHint == 0 means 0 */1593return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);1594}15951596static size_t1597ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,1598const ZSTD_paramSwitch_e useRowMatchFinder,1599const U32 enableDedicatedDictSearch,1600const U32 forCCtx)1601{1602/* chain table size should be 0 for fast or row-hash strategies */1603size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)1604? ((size_t)1 << cParams->chainLog)1605: 0;1606size_t const hSize = ((size_t)1) << cParams->hashLog;1607U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;1608size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;1609/* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't1610* surrounded by redzones in ASAN. */1611size_t const tableSpace = chainSize * sizeof(U32)1612+ hSize * sizeof(U32)1613+ h3Size * sizeof(U32);1614size_t const optPotentialSpace =1615ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))1616+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))1617+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))1618+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))1619+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))1620+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));1621size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)1622? ZSTD_cwksp_aligned_alloc_size(hSize)1623: 0;1624size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))1625? optPotentialSpace1626: 0;1627size_t const slackSpace = ZSTD_cwksp_slack_space_required();16281629/* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */1630ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);1631assert(useRowMatchFinder != ZSTD_ps_auto);16321633DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",1634(U32)chainSize, (U32)hSize, (U32)h3Size);1635return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;1636}16371638/* Helper function for calculating memory requirements.1639* Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */1640static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {1641U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;1642return blockSize / divider;1643}16441645static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(1646const ZSTD_compressionParameters* cParams,1647const ldmParams_t* ldmParams,1648const int isStatic,1649const ZSTD_paramSwitch_e useRowMatchFinder,1650const size_t buffInSize,1651const size_t buffOutSize,1652const U64 pledgedSrcSize,1653int useSequenceProducer,1654size_t maxBlockSize)1655{1656size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);1657size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);1658size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);1659size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)1660+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))1661+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));1662size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);1663size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));1664size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);16651666size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);1667size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);1668size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?1669ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;167016711672size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)1673+ ZSTD_cwksp_alloc_size(buffOutSize);16741675size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;16761677size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);1678size_t const externalSeqSpace = useSequenceProducer1679? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))1680: 0;16811682size_t const neededSpace =1683cctxSpace +1684entropySpace +1685blockStateSpace +1686ldmSpace +1687ldmSeqSpace +1688matchStateSize +1689tokenSpace +1690bufferSpace +1691externalSeqSpace;16921693DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);1694return neededSpace;1695}16961697size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)1698{1699ZSTD_compressionParameters const cParams =1700ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);1701ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,1702&cParams);17031704RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");1705/* estimateCCtxSize is for one-shot compression. So no buffers should1706* be needed. However, we still allocate two 0-sized buffers, which can1707* take space under ASAN. */1708return ZSTD_estimateCCtxSize_usingCCtxParams_internal(1709&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);1710}17111712size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)1713{1714ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);1715if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {1716/* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */1717size_t noRowCCtxSize;1718size_t rowCCtxSize;1719initialParams.useRowMatchFinder = ZSTD_ps_disable;1720noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);1721initialParams.useRowMatchFinder = ZSTD_ps_enable;1722rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);1723return MAX(noRowCCtxSize, rowCCtxSize);1724} else {1725return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);1726}1727}17281729static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)1730{1731int tier = 0;1732size_t largestSize = 0;1733static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};1734for (; tier < 4; ++tier) {1735/* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */1736ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);1737largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);1738}1739return largestSize;1740}17411742size_t ZSTD_estimateCCtxSize(int compressionLevel)1743{1744int level;1745size_t memBudget = 0;1746for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {1747/* Ensure monotonically increasing memory usage as compression level increases */1748size_t const newMB = ZSTD_estimateCCtxSize_internal(level);1749if (newMB > memBudget) memBudget = newMB;1750}1751return memBudget;1752}17531754size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)1755{1756RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");1757{ ZSTD_compressionParameters const cParams =1758ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);1759size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog);1760size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)1761? ((size_t)1 << cParams.windowLog) + blockSize1762: 0;1763size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)1764? ZSTD_compressBound(blockSize) + 11765: 0;1766ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams);17671768return ZSTD_estimateCCtxSize_usingCCtxParams_internal(1769&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,1770ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);1771}1772}17731774size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)1775{1776ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);1777if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {1778/* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */1779size_t noRowCCtxSize;1780size_t rowCCtxSize;1781initialParams.useRowMatchFinder = ZSTD_ps_disable;1782noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);1783initialParams.useRowMatchFinder = ZSTD_ps_enable;1784rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);1785return MAX(noRowCCtxSize, rowCCtxSize);1786} else {1787return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);1788}1789}17901791static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)1792{1793ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);1794return ZSTD_estimateCStreamSize_usingCParams(cParams);1795}17961797size_t ZSTD_estimateCStreamSize(int compressionLevel)1798{1799int level;1800size_t memBudget = 0;1801for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {1802size_t const newMB = ZSTD_estimateCStreamSize_internal(level);1803if (newMB > memBudget) memBudget = newMB;1804}1805return memBudget;1806}18071808/* ZSTD_getFrameProgression():1809* tells how much data has been consumed (input) and produced (output) for current frame.1810* able to count progression inside worker threads (non-blocking mode).1811*/1812ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)1813{1814#ifdef ZSTD_MULTITHREAD1815if (cctx->appliedParams.nbWorkers > 0) {1816return ZSTDMT_getFrameProgression(cctx->mtctx);1817}1818#endif1819{ ZSTD_frameProgression fp;1820size_t const buffered = (cctx->inBuff == NULL) ? 0 :1821cctx->inBuffPos - cctx->inToCompress;1822if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);1823assert(buffered <= ZSTD_BLOCKSIZE_MAX);1824fp.ingested = cctx->consumedSrcSize + buffered;1825fp.consumed = cctx->consumedSrcSize;1826fp.produced = cctx->producedCSize;1827fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */1828fp.currentJobID = 0;1829fp.nbActiveWorkers = 0;1830return fp;1831} }18321833/*! ZSTD_toFlushNow()1834* Only useful for multithreading scenarios currently (nbWorkers >= 1).1835*/1836size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)1837{1838#ifdef ZSTD_MULTITHREAD1839if (cctx->appliedParams.nbWorkers > 0) {1840return ZSTDMT_toFlushNow(cctx->mtctx);1841}1842#endif1843(void)cctx;1844return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */1845}18461847static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,1848ZSTD_compressionParameters cParams2)1849{1850(void)cParams1;1851(void)cParams2;1852assert(cParams1.windowLog == cParams2.windowLog);1853assert(cParams1.chainLog == cParams2.chainLog);1854assert(cParams1.hashLog == cParams2.hashLog);1855assert(cParams1.searchLog == cParams2.searchLog);1856assert(cParams1.minMatch == cParams2.minMatch);1857assert(cParams1.targetLength == cParams2.targetLength);1858assert(cParams1.strategy == cParams2.strategy);1859}18601861void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)1862{1863int i;1864for (i = 0; i < ZSTD_REP_NUM; ++i)1865bs->rep[i] = repStartValue[i];1866bs->entropy.huf.repeatMode = HUF_repeat_none;1867bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;1868bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;1869bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;1870}18711872/*! ZSTD_invalidateMatchState()1873* Invalidate all the matches in the match finder tables.1874* Requires nextSrc and base to be set (can be NULL).1875*/1876static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)1877{1878ZSTD_window_clear(&ms->window);18791880ms->nextToUpdate = ms->window.dictLimit;1881ms->loadedDictEnd = 0;1882ms->opt.litLengthSum = 0; /* force reset of btopt stats */1883ms->dictMatchState = NULL;1884}18851886/**1887* Controls, for this matchState reset, whether the tables need to be cleared /1888* prepared for the coming compression (ZSTDcrp_makeClean), or whether the1889* tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a1890* subsequent operation will overwrite the table space anyways (e.g., copying1891* the matchState contents in from a CDict).1892*/1893typedef enum {1894ZSTDcrp_makeClean,1895ZSTDcrp_leaveDirty1896} ZSTD_compResetPolicy_e;18971898/**1899* Controls, for this matchState reset, whether indexing can continue where it1900* left off (ZSTDirp_continue), or whether it needs to be restarted from zero1901* (ZSTDirp_reset).1902*/1903typedef enum {1904ZSTDirp_continue,1905ZSTDirp_reset1906} ZSTD_indexResetPolicy_e;19071908typedef enum {1909ZSTD_resetTarget_CDict,1910ZSTD_resetTarget_CCtx1911} ZSTD_resetTarget_e;19121913/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */1914static U64 ZSTD_bitmix(U64 val, U64 len) {1915val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);1916val *= 0x9FB21C651E98DF25ULL;1917val ^= (val >> 35) + len ;1918val *= 0x9FB21C651E98DF25ULL;1919return val ^ (val >> 28);1920}19211922/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */1923static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {1924ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);1925}19261927static size_t1928ZSTD_reset_matchState(ZSTD_matchState_t* ms,1929ZSTD_cwksp* ws,1930const ZSTD_compressionParameters* cParams,1931const ZSTD_paramSwitch_e useRowMatchFinder,1932const ZSTD_compResetPolicy_e crp,1933const ZSTD_indexResetPolicy_e forceResetIndex,1934const ZSTD_resetTarget_e forWho)1935{1936/* disable chain table allocation for fast or row-based strategies */1937size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,1938ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))1939? ((size_t)1 << cParams->chainLog)1940: 0;1941size_t const hSize = ((size_t)1) << cParams->hashLog;1942U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;1943size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;19441945DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);1946assert(useRowMatchFinder != ZSTD_ps_auto);1947if (forceResetIndex == ZSTDirp_reset) {1948ZSTD_window_init(&ms->window);1949ZSTD_cwksp_mark_tables_dirty(ws);1950}19511952ms->hashLog3 = hashLog3;1953ms->lazySkipping = 0;19541955ZSTD_invalidateMatchState(ms);19561957assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */19581959ZSTD_cwksp_clear_tables(ws);19601961DEBUGLOG(5, "reserving table space");1962/* table Space */1963ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));1964ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));1965ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));1966RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,1967"failed a workspace allocation in ZSTD_reset_matchState");19681969DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);1970if (crp!=ZSTDcrp_leaveDirty) {1971/* reset tables only */1972ZSTD_cwksp_clean_tables(ws);1973}19741975if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {1976/* Row match finder needs an additional table of hashes ("tags") */1977size_t const tagTableSize = hSize;1978/* We want to generate a new salt in case we reset a Cctx, but we always want to use1979* 0 when we reset a Cdict */1980if(forWho == ZSTD_resetTarget_CCtx) {1981ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);1982ZSTD_advanceHashSalt(ms);1983} else {1984/* When we are not salting we want to always memset the memory */1985ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);1986ZSTD_memset(ms->tagTable, 0, tagTableSize);1987ms->hashSalt = 0;1988}1989{ /* Switch to 32-entry rows if searchLog is 5 (or more) */1990U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);1991assert(cParams->hashLog >= rowLog);1992ms->rowHashLog = cParams->hashLog - rowLog;1993}1994}19951996/* opt parser space */1997if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {1998DEBUGLOG(4, "reserving optimal parser space");1999ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));2000ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));2001ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));2002ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));2003ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));2004ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));2005}20062007ms->cParams = *cParams;20082009RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,2010"failed a workspace allocation in ZSTD_reset_matchState");2011return 0;2012}20132014/* ZSTD_indexTooCloseToMax() :2015* minor optimization : prefer memset() rather than reduceIndex()2016* which is measurably slow in some circumstances (reported for Visual Studio).2017* Works when re-using a context for a lot of smallish inputs :2018* if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,2019* memset() will be triggered before reduceIndex().2020*/2021#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)2022static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)2023{2024return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);2025}20262027/** ZSTD_dictTooBig():2028* When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in2029* one go generically. So we ensure that in that case we reset the tables to zero,2030* so that we can load as much of the dictionary as possible.2031*/2032static int ZSTD_dictTooBig(size_t const loadedDictSize)2033{2034return loadedDictSize > ZSTD_CHUNKSIZE_MAX;2035}20362037/*! ZSTD_resetCCtx_internal() :2038* @param loadedDictSize The size of the dictionary to be loaded2039* into the context, if any. If no dictionary is used, or the2040* dictionary is being attached / copied, then pass 0.2041* note : `params` are assumed fully validated at this stage.2042*/2043static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,2044ZSTD_CCtx_params const* params,2045U64 const pledgedSrcSize,2046size_t const loadedDictSize,2047ZSTD_compResetPolicy_e const crp,2048ZSTD_buffered_policy_e const zbuff)2049{2050ZSTD_cwksp* const ws = &zc->workspace;2051DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",2052(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);2053assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));20542055zc->isFirstBlock = 1;20562057/* Set applied params early so we can modify them for LDM,2058* and point params at the applied params.2059*/2060zc->appliedParams = *params;2061params = &zc->appliedParams;20622063assert(params->useRowMatchFinder != ZSTD_ps_auto);2064assert(params->useBlockSplitter != ZSTD_ps_auto);2065assert(params->ldmParams.enableLdm != ZSTD_ps_auto);2066assert(params->maxBlockSize != 0);2067if (params->ldmParams.enableLdm == ZSTD_ps_enable) {2068/* Adjust long distance matching parameters */2069ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);2070assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);2071assert(params->ldmParams.hashRateLog < 32);2072}20732074{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));2075size_t const blockSize = MIN(params->maxBlockSize, windowSize);2076size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);2077size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)2078? ZSTD_compressBound(blockSize) + 12079: 0;2080size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)2081? windowSize + blockSize2082: 0;2083size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);20842085int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);2086int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);2087ZSTD_indexResetPolicy_e needsIndexReset =2088(indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;20892090size_t const neededSpace =2091ZSTD_estimateCCtxSize_usingCCtxParams_internal(2092¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,2093buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);2094int resizeWorkspace;20952096FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");20972098if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);20992100{ /* Check if workspace is large enough, alloc a new one if needed */2101int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;2102int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);2103resizeWorkspace = workspaceTooSmall || workspaceWasteful;2104DEBUGLOG(4, "Need %zu B workspace", neededSpace);2105DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);21062107if (resizeWorkspace) {2108DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",2109ZSTD_cwksp_sizeof(ws) >> 10,2110neededSpace >> 10);21112112RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");21132114needsIndexReset = ZSTDirp_reset;21152116ZSTD_cwksp_free(ws, zc->customMem);2117FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");21182119DEBUGLOG(5, "reserving object space");2120/* Statically sized space.2121* entropyWorkspace never moves,2122* though prev/next block swap places */2123assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));2124zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));2125RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");2126zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));2127RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");2128zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);2129RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");2130} }21312132ZSTD_cwksp_clear(ws);21332134/* init params */2135zc->blockState.matchState.cParams = params->cParams;2136zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;2137zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;2138zc->consumedSrcSize = 0;2139zc->producedCSize = 0;2140if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)2141zc->appliedParams.fParams.contentSizeFlag = 0;2142DEBUGLOG(4, "pledged content size : %u ; flag : %u",2143(unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);2144zc->blockSize = blockSize;21452146XXH64_reset(&zc->xxhState, 0);2147zc->stage = ZSTDcs_init;2148zc->dictID = 0;2149zc->dictContentSize = 0;21502151ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);21522153FORWARD_IF_ERROR(ZSTD_reset_matchState(2154&zc->blockState.matchState,2155ws,2156¶ms->cParams,2157params->useRowMatchFinder,2158crp,2159needsIndexReset,2160ZSTD_resetTarget_CCtx), "");21612162zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));21632164/* ldm hash table */2165if (params->ldmParams.enableLdm == ZSTD_ps_enable) {2166/* TODO: avoid memset? */2167size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;2168zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));2169ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));2170zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));2171zc->maxNbLdmSequences = maxNbLdmSeq;21722173ZSTD_window_init(&zc->ldmState.window);2174zc->ldmState.loadedDictEnd = 0;2175}21762177/* reserve space for block-level external sequences */2178if (params->useSequenceProducer) {2179size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);2180zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;2181zc->externalMatchCtx.seqBuffer =2182(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));2183}21842185/* buffers */21862187/* ZSTD_wildcopy() is used to copy into the literals buffer,2188* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.2189*/2190zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);2191zc->seqStore.maxNbLit = blockSize;21922193zc->bufferedPolicy = zbuff;2194zc->inBuffSize = buffInSize;2195zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);2196zc->outBuffSize = buffOutSize;2197zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);21982199/* ldm bucketOffsets table */2200if (params->ldmParams.enableLdm == ZSTD_ps_enable) {2201/* TODO: avoid memset? */2202size_t const numBuckets =2203((size_t)1) << (params->ldmParams.hashLog -2204params->ldmParams.bucketSizeLog);2205zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);2206ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);2207}22082209/* sequences storage */2210ZSTD_referenceExternalSequences(zc, NULL, 0);2211zc->seqStore.maxNbSeq = maxNbSeq;2212zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));2213zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));2214zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));22152216DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));2217assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));22182219zc->initialized = 1;22202221return 0;2222}2223}22242225/* ZSTD_invalidateRepCodes() :2226* ensures next compression will not use repcodes from previous block.2227* Note : only works with regular variant;2228* do not use with extDict variant ! */2229void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {2230int i;2231for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;2232assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));2233}22342235/* These are the approximate sizes for each strategy past which copying the2236* dictionary tables into the working context is faster than using them2237* in-place.2238*/2239static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {22408 KB, /* unused */22418 KB, /* ZSTD_fast */224216 KB, /* ZSTD_dfast */224332 KB, /* ZSTD_greedy */224432 KB, /* ZSTD_lazy */224532 KB, /* ZSTD_lazy2 */224632 KB, /* ZSTD_btlazy2 */224732 KB, /* ZSTD_btopt */22488 KB, /* ZSTD_btultra */22498 KB /* ZSTD_btultra2 */2250};22512252static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,2253const ZSTD_CCtx_params* params,2254U64 pledgedSrcSize)2255{2256size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];2257int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;2258return dedicatedDictSearch2259|| ( ( pledgedSrcSize <= cutoff2260|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN2261|| params->attachDictPref == ZSTD_dictForceAttach )2262&& params->attachDictPref != ZSTD_dictForceCopy2263&& !params->forceWindow ); /* dictMatchState isn't correctly2264* handled in _enforceMaxDist */2265}22662267static size_t2268ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,2269const ZSTD_CDict* cdict,2270ZSTD_CCtx_params params,2271U64 pledgedSrcSize,2272ZSTD_buffered_policy_e zbuff)2273{2274DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",2275(unsigned long long)pledgedSrcSize);2276{2277ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;2278unsigned const windowLog = params.cParams.windowLog;2279assert(windowLog != 0);2280/* Resize working context table params for input only, since the dict2281* has its own tables. */2282/* pledgedSrcSize == 0 means 0! */22832284if (cdict->matchState.dedicatedDictSearch) {2285ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);2286}22872288params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,2289cdict->dictContentSize, ZSTD_cpm_attachDict,2290params.useRowMatchFinder);2291params.cParams.windowLog = windowLog;2292params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */2293FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,2294/* loadedDictSize */ 0,2295ZSTDcrp_makeClean, zbuff), "");2296assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);2297}22982299{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc2300- cdict->matchState.window.base);2301const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;2302if (cdictLen == 0) {2303/* don't even attach dictionaries with no contents */2304DEBUGLOG(4, "skipping attaching empty dictionary");2305} else {2306DEBUGLOG(4, "attaching dictionary into context");2307cctx->blockState.matchState.dictMatchState = &cdict->matchState;23082309/* prep working match state so dict matches never have negative indices2310* when they are translated to the working context's index space. */2311if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {2312cctx->blockState.matchState.window.nextSrc =2313cctx->blockState.matchState.window.base + cdictEnd;2314ZSTD_window_clear(&cctx->blockState.matchState.window);2315}2316/* loadedDictEnd is expressed within the referential of the active context */2317cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;2318} }23192320cctx->dictID = cdict->dictID;2321cctx->dictContentSize = cdict->dictContentSize;23222323/* copy block state */2324ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));23252326return 0;2327}23282329static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,2330ZSTD_compressionParameters const* cParams) {2331if (ZSTD_CDictIndicesAreTagged(cParams)){2332/* Remove tags from the CDict table if they are present.2333* See docs on "short cache" in zstd_compress_internal.h for context. */2334size_t i;2335for (i = 0; i < tableSize; i++) {2336U32 const taggedIndex = src[i];2337U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;2338dst[i] = index;2339}2340} else {2341ZSTD_memcpy(dst, src, tableSize * sizeof(U32));2342}2343}23442345static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,2346const ZSTD_CDict* cdict,2347ZSTD_CCtx_params params,2348U64 pledgedSrcSize,2349ZSTD_buffered_policy_e zbuff)2350{2351const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;23522353assert(!cdict->matchState.dedicatedDictSearch);2354DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",2355(unsigned long long)pledgedSrcSize);23562357{ unsigned const windowLog = params.cParams.windowLog;2358assert(windowLog != 0);2359/* Copy only compression parameters related to tables. */2360params.cParams = *cdict_cParams;2361params.cParams.windowLog = windowLog;2362params.useRowMatchFinder = cdict->useRowMatchFinder;2363FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,2364/* loadedDictSize */ 0,2365ZSTDcrp_leaveDirty, zbuff), "");2366assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);2367assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);2368assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);2369}23702371ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);2372assert(params.useRowMatchFinder != ZSTD_ps_auto);23732374/* copy tables */2375{ size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)2376? ((size_t)1 << cdict_cParams->chainLog)2377: 0;2378size_t const hSize = (size_t)1 << cdict_cParams->hashLog;23792380ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,2381cdict->matchState.hashTable,2382hSize, cdict_cParams);23832384/* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */2385if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {2386ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,2387cdict->matchState.chainTable,2388chainSize, cdict_cParams);2389}2390/* copy tag table */2391if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {2392size_t const tagTableSize = hSize;2393ZSTD_memcpy(cctx->blockState.matchState.tagTable,2394cdict->matchState.tagTable,2395tagTableSize);2396cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;2397}2398}23992400/* Zero the hashTable3, since the cdict never fills it */2401{ int const h3log = cctx->blockState.matchState.hashLog3;2402size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;2403assert(cdict->matchState.hashLog3 == 0);2404ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));2405}24062407ZSTD_cwksp_mark_tables_clean(&cctx->workspace);24082409/* copy dictionary offsets */2410{ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;2411ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;2412dstMatchState->window = srcMatchState->window;2413dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;2414dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;2415}24162417cctx->dictID = cdict->dictID;2418cctx->dictContentSize = cdict->dictContentSize;24192420/* copy block state */2421ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));24222423return 0;2424}24252426/* We have a choice between copying the dictionary context into the working2427* context, or referencing the dictionary context from the working context2428* in-place. We decide here which strategy to use. */2429static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,2430const ZSTD_CDict* cdict,2431const ZSTD_CCtx_params* params,2432U64 pledgedSrcSize,2433ZSTD_buffered_policy_e zbuff)2434{24352436DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",2437(unsigned)pledgedSrcSize);24382439if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {2440return ZSTD_resetCCtx_byAttachingCDict(2441cctx, cdict, *params, pledgedSrcSize, zbuff);2442} else {2443return ZSTD_resetCCtx_byCopyingCDict(2444cctx, cdict, *params, pledgedSrcSize, zbuff);2445}2446}24472448/*! ZSTD_copyCCtx_internal() :2449* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.2450* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).2451* The "context", in this case, refers to the hash and chain tables,2452* entropy tables, and dictionary references.2453* `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.2454* @return : 0, or an error code */2455static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,2456const ZSTD_CCtx* srcCCtx,2457ZSTD_frameParameters fParams,2458U64 pledgedSrcSize,2459ZSTD_buffered_policy_e zbuff)2460{2461RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,2462"Can't copy a ctx that's not in init stage.");2463DEBUGLOG(5, "ZSTD_copyCCtx_internal");2464ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));2465{ ZSTD_CCtx_params params = dstCCtx->requestedParams;2466/* Copy only compression parameters related to tables. */2467params.cParams = srcCCtx->appliedParams.cParams;2468assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);2469assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);2470assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);2471params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;2472params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;2473params.ldmParams = srcCCtx->appliedParams.ldmParams;2474params.fParams = fParams;2475params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;2476ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,2477/* loadedDictSize */ 0,2478ZSTDcrp_leaveDirty, zbuff);2479assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);2480assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);2481assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);2482assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);2483assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);2484}24852486ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);24872488/* copy tables */2489{ size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,2490srcCCtx->appliedParams.useRowMatchFinder,24910 /* forDDSDict */)2492? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)2493: 0;2494size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;2495int const h3log = srcCCtx->blockState.matchState.hashLog3;2496size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;24972498ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,2499srcCCtx->blockState.matchState.hashTable,2500hSize * sizeof(U32));2501ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,2502srcCCtx->blockState.matchState.chainTable,2503chainSize * sizeof(U32));2504ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,2505srcCCtx->blockState.matchState.hashTable3,2506h3Size * sizeof(U32));2507}25082509ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);25102511/* copy dictionary offsets */2512{2513const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;2514ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;2515dstMatchState->window = srcMatchState->window;2516dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;2517dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;2518}2519dstCCtx->dictID = srcCCtx->dictID;2520dstCCtx->dictContentSize = srcCCtx->dictContentSize;25212522/* copy block state */2523ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));25242525return 0;2526}25272528/*! ZSTD_copyCCtx() :2529* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.2530* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).2531* pledgedSrcSize==0 means "unknown".2532* @return : 0, or an error code */2533size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)2534{2535ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };2536ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;2537ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);2538if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;2539fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);25402541return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,2542fParams, pledgedSrcSize,2543zbuff);2544}254525462547#define ZSTD_ROWSIZE 162548/*! ZSTD_reduceTable() :2549* reduce table indexes by `reducerValue`, or squash to zero.2550* PreserveMark preserves "unsorted mark" for btlazy2 strategy.2551* It must be set to a clear 0/1 value, to remove branch during inlining.2552* Presume table size is a multiple of ZSTD_ROWSIZE2553* to help auto-vectorization */2554FORCE_INLINE_TEMPLATE void2555ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)2556{2557int const nbRows = (int)size / ZSTD_ROWSIZE;2558int cellNb = 0;2559int rowNb;2560/* Protect special index values < ZSTD_WINDOW_START_INDEX. */2561U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX;2562assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */2563assert(size < (1U<<31)); /* can be casted to int */25642565#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)2566/* To validate that the table re-use logic is sound, and that we don't2567* access table space that we haven't cleaned, we re-"poison" the table2568* space every time we mark it dirty.2569*2570* This function however is intended to operate on those dirty tables and2571* re-clean them. So when this function is used correctly, we can unpoison2572* the memory it operated on. This introduces a blind spot though, since2573* if we now try to operate on __actually__ poisoned memory, we will not2574* detect that. */2575__msan_unpoison(table, size * sizeof(U32));2576#endif25772578for (rowNb=0 ; rowNb < nbRows ; rowNb++) {2579int column;2580for (column=0; column<ZSTD_ROWSIZE; column++) {2581U32 newVal;2582if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) {2583/* This write is pointless, but is required(?) for the compiler2584* to auto-vectorize the loop. */2585newVal = ZSTD_DUBT_UNSORTED_MARK;2586} else if (table[cellNb] < reducerThreshold) {2587newVal = 0;2588} else {2589newVal = table[cellNb] - reducerValue;2590}2591table[cellNb] = newVal;2592cellNb++;2593} }2594}25952596static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)2597{2598ZSTD_reduceTable_internal(table, size, reducerValue, 0);2599}26002601static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)2602{2603ZSTD_reduceTable_internal(table, size, reducerValue, 1);2604}26052606/*! ZSTD_reduceIndex() :2607* rescale all indexes to avoid future overflow (indexes are U32) */2608static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)2609{2610{ U32 const hSize = (U32)1 << params->cParams.hashLog;2611ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);2612}26132614if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {2615U32 const chainSize = (U32)1 << params->cParams.chainLog;2616if (params->cParams.strategy == ZSTD_btlazy2)2617ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);2618else2619ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);2620}26212622if (ms->hashLog3) {2623U32 const h3Size = (U32)1 << ms->hashLog3;2624ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);2625}2626}262726282629/*-*******************************************************2630* Block entropic compression2631*********************************************************/26322633/* See doc/zstd_compression_format.md for detailed format description */26342635int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)2636{2637const seqDef* const sequences = seqStorePtr->sequencesStart;2638BYTE* const llCodeTable = seqStorePtr->llCode;2639BYTE* const ofCodeTable = seqStorePtr->ofCode;2640BYTE* const mlCodeTable = seqStorePtr->mlCode;2641U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);2642U32 u;2643int longOffsets = 0;2644assert(nbSeq <= seqStorePtr->maxNbSeq);2645for (u=0; u<nbSeq; u++) {2646U32 const llv = sequences[u].litLength;2647U32 const ofCode = ZSTD_highbit32(sequences[u].offBase);2648U32 const mlv = sequences[u].mlBase;2649llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);2650ofCodeTable[u] = (BYTE)ofCode;2651mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);2652assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN));2653if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN)2654longOffsets = 1;2655}2656if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)2657llCodeTable[seqStorePtr->longLengthPos] = MaxLL;2658if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)2659mlCodeTable[seqStorePtr->longLengthPos] = MaxML;2660return longOffsets;2661}26622663/* ZSTD_useTargetCBlockSize():2664* Returns if target compressed block size param is being used.2665* If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.2666* Returns 1 if true, 0 otherwise. */2667static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)2668{2669DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);2670return (cctxParams->targetCBlockSize != 0);2671}26722673/* ZSTD_blockSplitterEnabled():2674* Returns if block splitting param is being used2675* If used, compression will do best effort to split a block in order to improve compression ratio.2676* At the time this function is called, the parameter must be finalized.2677* Returns 1 if true, 0 otherwise. */2678static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)2679{2680DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);2681assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);2682return (cctxParams->useBlockSplitter == ZSTD_ps_enable);2683}26842685/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types2686* and size of the sequences statistics2687*/2688typedef struct {2689U32 LLtype;2690U32 Offtype;2691U32 MLtype;2692size_t size;2693size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */2694int longOffsets;2695} ZSTD_symbolEncodingTypeStats_t;26962697/* ZSTD_buildSequencesStatistics():2698* Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.2699* Modifies `nextEntropy` to have the appropriate values as a side effect.2700* nbSeq must be greater than 0.2701*2702* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)2703*/2704static ZSTD_symbolEncodingTypeStats_t2705ZSTD_buildSequencesStatistics(2706const seqStore_t* seqStorePtr, size_t nbSeq,2707const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,2708BYTE* dst, const BYTE* const dstEnd,2709ZSTD_strategy strategy, unsigned* countWorkspace,2710void* entropyWorkspace, size_t entropyWkspSize)2711{2712BYTE* const ostart = dst;2713const BYTE* const oend = dstEnd;2714BYTE* op = ostart;2715FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;2716FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;2717FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;2718const BYTE* const ofCodeTable = seqStorePtr->ofCode;2719const BYTE* const llCodeTable = seqStorePtr->llCode;2720const BYTE* const mlCodeTable = seqStorePtr->mlCode;2721ZSTD_symbolEncodingTypeStats_t stats;27222723stats.lastCountSize = 0;2724/* convert length/distances into codes */2725stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);2726assert(op <= oend);2727assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */2728/* build CTable for Literal Lengths */2729{ unsigned max = MaxLL;2730size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */2731DEBUGLOG(5, "Building LL table");2732nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;2733stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,2734countWorkspace, max, mostFrequent, nbSeq,2735LLFSELog, prevEntropy->litlengthCTable,2736LL_defaultNorm, LL_defaultNormLog,2737ZSTD_defaultAllowed, strategy);2738assert(set_basic < set_compressed && set_rle < set_compressed);2739assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */2740{ size_t const countSize = ZSTD_buildCTable(2741op, (size_t)(oend - op),2742CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,2743countWorkspace, max, llCodeTable, nbSeq,2744LL_defaultNorm, LL_defaultNormLog, MaxLL,2745prevEntropy->litlengthCTable,2746sizeof(prevEntropy->litlengthCTable),2747entropyWorkspace, entropyWkspSize);2748if (ZSTD_isError(countSize)) {2749DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");2750stats.size = countSize;2751return stats;2752}2753if (stats.LLtype == set_compressed)2754stats.lastCountSize = countSize;2755op += countSize;2756assert(op <= oend);2757} }2758/* build CTable for Offsets */2759{ unsigned max = MaxOff;2760size_t const mostFrequent = HIST_countFast_wksp(2761countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */2762/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */2763ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;2764DEBUGLOG(5, "Building OF table");2765nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;2766stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,2767countWorkspace, max, mostFrequent, nbSeq,2768OffFSELog, prevEntropy->offcodeCTable,2769OF_defaultNorm, OF_defaultNormLog,2770defaultPolicy, strategy);2771assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */2772{ size_t const countSize = ZSTD_buildCTable(2773op, (size_t)(oend - op),2774CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,2775countWorkspace, max, ofCodeTable, nbSeq,2776OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,2777prevEntropy->offcodeCTable,2778sizeof(prevEntropy->offcodeCTable),2779entropyWorkspace, entropyWkspSize);2780if (ZSTD_isError(countSize)) {2781DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");2782stats.size = countSize;2783return stats;2784}2785if (stats.Offtype == set_compressed)2786stats.lastCountSize = countSize;2787op += countSize;2788assert(op <= oend);2789} }2790/* build CTable for MatchLengths */2791{ unsigned max = MaxML;2792size_t const mostFrequent = HIST_countFast_wksp(2793countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */2794DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));2795nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;2796stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,2797countWorkspace, max, mostFrequent, nbSeq,2798MLFSELog, prevEntropy->matchlengthCTable,2799ML_defaultNorm, ML_defaultNormLog,2800ZSTD_defaultAllowed, strategy);2801assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */2802{ size_t const countSize = ZSTD_buildCTable(2803op, (size_t)(oend - op),2804CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,2805countWorkspace, max, mlCodeTable, nbSeq,2806ML_defaultNorm, ML_defaultNormLog, MaxML,2807prevEntropy->matchlengthCTable,2808sizeof(prevEntropy->matchlengthCTable),2809entropyWorkspace, entropyWkspSize);2810if (ZSTD_isError(countSize)) {2811DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");2812stats.size = countSize;2813return stats;2814}2815if (stats.MLtype == set_compressed)2816stats.lastCountSize = countSize;2817op += countSize;2818assert(op <= oend);2819} }2820stats.size = (size_t)(op-ostart);2821return stats;2822}28232824/* ZSTD_entropyCompressSeqStore_internal():2825* compresses both literals and sequences2826* Returns compressed size of block, or a zstd error.2827*/2828#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 202829MEM_STATIC size_t2830ZSTD_entropyCompressSeqStore_internal(2831const seqStore_t* seqStorePtr,2832const ZSTD_entropyCTables_t* prevEntropy,2833ZSTD_entropyCTables_t* nextEntropy,2834const ZSTD_CCtx_params* cctxParams,2835void* dst, size_t dstCapacity,2836void* entropyWorkspace, size_t entropyWkspSize,2837const int bmi2)2838{2839ZSTD_strategy const strategy = cctxParams->cParams.strategy;2840unsigned* count = (unsigned*)entropyWorkspace;2841FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;2842FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;2843FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;2844const seqDef* const sequences = seqStorePtr->sequencesStart;2845const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);2846const BYTE* const ofCodeTable = seqStorePtr->ofCode;2847const BYTE* const llCodeTable = seqStorePtr->llCode;2848const BYTE* const mlCodeTable = seqStorePtr->mlCode;2849BYTE* const ostart = (BYTE*)dst;2850BYTE* const oend = ostart + dstCapacity;2851BYTE* op = ostart;2852size_t lastCountSize;2853int longOffsets = 0;28542855entropyWorkspace = count + (MaxSeq + 1);2856entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);28572858DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);2859ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));2860assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);28612862/* Compress literals */2863{ const BYTE* const literals = seqStorePtr->litStart;2864size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);2865size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);2866/* Base suspicion of uncompressibility on ratio of literals to sequences */2867unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);2868size_t const litSize = (size_t)(seqStorePtr->lit - literals);28692870size_t const cSize = ZSTD_compressLiterals(2871op, dstCapacity,2872literals, litSize,2873entropyWorkspace, entropyWkspSize,2874&prevEntropy->huf, &nextEntropy->huf,2875cctxParams->cParams.strategy,2876ZSTD_literalsCompressionIsDisabled(cctxParams),2877suspectUncompressible, bmi2);2878FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");2879assert(cSize <= dstCapacity);2880op += cSize;2881}28822883/* Sequences Header */2884RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,2885dstSize_tooSmall, "Can't fit seq hdr in output buf!");2886if (nbSeq < 128) {2887*op++ = (BYTE)nbSeq;2888} else if (nbSeq < LONGNBSEQ) {2889op[0] = (BYTE)((nbSeq>>8) + 0x80);2890op[1] = (BYTE)nbSeq;2891op+=2;2892} else {2893op[0]=0xFF;2894MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));2895op+=3;2896}2897assert(op <= oend);2898if (nbSeq==0) {2899/* Copy the old tables over as if we repeated them */2900ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));2901return (size_t)(op - ostart);2902}2903{ BYTE* const seqHead = op++;2904/* build stats for sequences */2905const ZSTD_symbolEncodingTypeStats_t stats =2906ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,2907&prevEntropy->fse, &nextEntropy->fse,2908op, oend,2909strategy, count,2910entropyWorkspace, entropyWkspSize);2911FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");2912*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));2913lastCountSize = stats.lastCountSize;2914op += stats.size;2915longOffsets = stats.longOffsets;2916}29172918{ size_t const bitstreamSize = ZSTD_encodeSequences(2919op, (size_t)(oend - op),2920CTable_MatchLength, mlCodeTable,2921CTable_OffsetBits, ofCodeTable,2922CTable_LitLength, llCodeTable,2923sequences, nbSeq,2924longOffsets, bmi2);2925FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");2926op += bitstreamSize;2927assert(op <= oend);2928/* zstd versions <= 1.3.4 mistakenly report corruption when2929* FSE_readNCount() receives a buffer < 4 bytes.2930* Fixed by https://github.com/facebook/zstd/pull/1146.2931* This can happen when the last set_compressed table present is 22932* bytes and the bitstream is only one byte.2933* In this exceedingly rare case, we will simply emit an uncompressed2934* block, since it isn't worth optimizing.2935*/2936if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {2937/* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */2938assert(lastCountSize + bitstreamSize == 3);2939DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "2940"emitting an uncompressed block.");2941return 0;2942}2943}29442945DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));2946return (size_t)(op - ostart);2947}29482949MEM_STATIC size_t2950ZSTD_entropyCompressSeqStore(2951const seqStore_t* seqStorePtr,2952const ZSTD_entropyCTables_t* prevEntropy,2953ZSTD_entropyCTables_t* nextEntropy,2954const ZSTD_CCtx_params* cctxParams,2955void* dst, size_t dstCapacity,2956size_t srcSize,2957void* entropyWorkspace, size_t entropyWkspSize,2958int bmi2)2959{2960size_t const cSize = ZSTD_entropyCompressSeqStore_internal(2961seqStorePtr, prevEntropy, nextEntropy, cctxParams,2962dst, dstCapacity,2963entropyWorkspace, entropyWkspSize, bmi2);2964if (cSize == 0) return 0;2965/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.2966* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.2967*/2968if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) {2969DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);2970return 0; /* block not compressed */2971}2972FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");29732974/* Check compressibility */2975{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);2976if (cSize >= maxCSize) return 0; /* block not compressed */2977}2978DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);2979/* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.2980* This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.2981*/2982assert(cSize < ZSTD_BLOCKSIZE_MAX);2983return cSize;2984}29852986/* ZSTD_selectBlockCompressor() :2987* Not static, but internal use only (used by long distance matcher)2988* assumption : strat is a valid strategy */2989ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode)2990{2991static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {2992{ ZSTD_compressBlock_fast /* default for 0 */,2993ZSTD_compressBlock_fast,2994ZSTD_compressBlock_doubleFast,2995ZSTD_compressBlock_greedy,2996ZSTD_compressBlock_lazy,2997ZSTD_compressBlock_lazy2,2998ZSTD_compressBlock_btlazy2,2999ZSTD_compressBlock_btopt,3000ZSTD_compressBlock_btultra,3001ZSTD_compressBlock_btultra2 },3002{ ZSTD_compressBlock_fast_extDict /* default for 0 */,3003ZSTD_compressBlock_fast_extDict,3004ZSTD_compressBlock_doubleFast_extDict,3005ZSTD_compressBlock_greedy_extDict,3006ZSTD_compressBlock_lazy_extDict,3007ZSTD_compressBlock_lazy2_extDict,3008ZSTD_compressBlock_btlazy2_extDict,3009ZSTD_compressBlock_btopt_extDict,3010ZSTD_compressBlock_btultra_extDict,3011ZSTD_compressBlock_btultra_extDict },3012{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,3013ZSTD_compressBlock_fast_dictMatchState,3014ZSTD_compressBlock_doubleFast_dictMatchState,3015ZSTD_compressBlock_greedy_dictMatchState,3016ZSTD_compressBlock_lazy_dictMatchState,3017ZSTD_compressBlock_lazy2_dictMatchState,3018ZSTD_compressBlock_btlazy2_dictMatchState,3019ZSTD_compressBlock_btopt_dictMatchState,3020ZSTD_compressBlock_btultra_dictMatchState,3021ZSTD_compressBlock_btultra_dictMatchState },3022{ NULL /* default for 0 */,3023NULL,3024NULL,3025ZSTD_compressBlock_greedy_dedicatedDictSearch,3026ZSTD_compressBlock_lazy_dedicatedDictSearch,3027ZSTD_compressBlock_lazy2_dedicatedDictSearch,3028NULL,3029NULL,3030NULL,3031NULL }3032};3033ZSTD_blockCompressor selectedCompressor;3034ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);30353036assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));3037DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);3038if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {3039static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {3040{ ZSTD_compressBlock_greedy_row,3041ZSTD_compressBlock_lazy_row,3042ZSTD_compressBlock_lazy2_row },3043{ ZSTD_compressBlock_greedy_extDict_row,3044ZSTD_compressBlock_lazy_extDict_row,3045ZSTD_compressBlock_lazy2_extDict_row },3046{ ZSTD_compressBlock_greedy_dictMatchState_row,3047ZSTD_compressBlock_lazy_dictMatchState_row,3048ZSTD_compressBlock_lazy2_dictMatchState_row },3049{ ZSTD_compressBlock_greedy_dedicatedDictSearch_row,3050ZSTD_compressBlock_lazy_dedicatedDictSearch_row,3051ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }3052};3053DEBUGLOG(4, "Selecting a row-based matchfinder");3054assert(useRowMatchFinder != ZSTD_ps_auto);3055selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];3056} else {3057selectedCompressor = blockCompressor[(int)dictMode][(int)strat];3058}3059assert(selectedCompressor != NULL);3060return selectedCompressor;3061}30623063static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,3064const BYTE* anchor, size_t lastLLSize)3065{3066ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);3067seqStorePtr->lit += lastLLSize;3068}30693070void ZSTD_resetSeqStore(seqStore_t* ssPtr)3071{3072ssPtr->lit = ssPtr->litStart;3073ssPtr->sequences = ssPtr->sequencesStart;3074ssPtr->longLengthType = ZSTD_llt_none;3075}30763077/* ZSTD_postProcessSequenceProducerResult() :3078* Validates and post-processes sequences obtained through the external matchfinder API:3079* - Checks whether nbExternalSeqs represents an error condition.3080* - Appends a block delimiter to outSeqs if one is not already present.3081* See zstd.h for context regarding block delimiters.3082* Returns the number of sequences after post-processing, or an error code. */3083static size_t ZSTD_postProcessSequenceProducerResult(3084ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize3085) {3086RETURN_ERROR_IF(3087nbExternalSeqs > outSeqsCapacity,3088sequenceProducer_failed,3089"External sequence producer returned error code %lu",3090(unsigned long)nbExternalSeqs3091);30923093RETURN_ERROR_IF(3094nbExternalSeqs == 0 && srcSize > 0,3095sequenceProducer_failed,3096"Got zero sequences from external sequence producer for a non-empty src buffer!"3097);30983099if (srcSize == 0) {3100ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));3101return 1;3102}31033104{3105ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];31063107/* We can return early if lastSeq is already a block delimiter. */3108if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {3109return nbExternalSeqs;3110}31113112/* This error condition is only possible if the external matchfinder3113* produced an invalid parse, by definition of ZSTD_sequenceBound(). */3114RETURN_ERROR_IF(3115nbExternalSeqs == outSeqsCapacity,3116sequenceProducer_failed,3117"nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"3118);31193120/* lastSeq is not a block delimiter, so we need to append one. */3121ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));3122return nbExternalSeqs + 1;3123}3124}31253126/* ZSTD_fastSequenceLengthSum() :3127* Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.3128* Similar to another function in zstd_compress.c (determine_blockSize),3129* except it doesn't check for a block delimiter to end summation.3130* Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).3131* This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */3132static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {3133size_t matchLenSum, litLenSum, i;3134matchLenSum = 0;3135litLenSum = 0;3136for (i = 0; i < seqBufSize; i++) {3137litLenSum += seqBuf[i].litLength;3138matchLenSum += seqBuf[i].matchLength;3139}3140return litLenSum + matchLenSum;3141}31423143typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;31443145static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)3146{3147ZSTD_matchState_t* const ms = &zc->blockState.matchState;3148DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);3149assert(srcSize <= ZSTD_BLOCKSIZE_MAX);3150/* Assert that we have correctly flushed the ctx params into the ms's copy */3151ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);3152/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding3153* additional 1. We need to revisit and change this logic to be more consistent */3154if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {3155if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {3156ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);3157} else {3158ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);3159}3160return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */3161}3162ZSTD_resetSeqStore(&(zc->seqStore));3163/* required for optimal parser to read stats from dictionary */3164ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;3165/* tell the optimal parser how we expect to compress literals */3166ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;3167/* a gap between an attached dict and the current window is not safe,3168* they must remain adjacent,3169* and when that stops being the case, the dict must be unset */3170assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);31713172/* limited update after a very long match */3173{ const BYTE* const base = ms->window.base;3174const BYTE* const istart = (const BYTE*)src;3175const U32 curr = (U32)(istart-base);3176if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */3177if (curr > ms->nextToUpdate + 384)3178ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));3179}31803181/* select and store sequences */3182{ ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);3183size_t lastLLSize;3184{ int i;3185for (i = 0; i < ZSTD_REP_NUM; ++i)3186zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];3187}3188if (zc->externSeqStore.pos < zc->externSeqStore.size) {3189assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);31903191/* External matchfinder + LDM is technically possible, just not implemented yet.3192* We need to revisit soon and implement it. */3193RETURN_ERROR_IF(3194zc->appliedParams.useSequenceProducer,3195parameter_combination_unsupported,3196"Long-distance matching with external sequence producer enabled is not currently supported."3197);31983199/* Updates ldmSeqStore.pos */3200lastLLSize =3201ZSTD_ldm_blockCompress(&zc->externSeqStore,3202ms, &zc->seqStore,3203zc->blockState.nextCBlock->rep,3204zc->appliedParams.useRowMatchFinder,3205src, srcSize);3206assert(zc->externSeqStore.pos <= zc->externSeqStore.size);3207} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {3208rawSeqStore_t ldmSeqStore = kNullRawSeqStore;32093210/* External matchfinder + LDM is technically possible, just not implemented yet.3211* We need to revisit soon and implement it. */3212RETURN_ERROR_IF(3213zc->appliedParams.useSequenceProducer,3214parameter_combination_unsupported,3215"Long-distance matching with external sequence producer enabled is not currently supported."3216);32173218ldmSeqStore.seq = zc->ldmSequences;3219ldmSeqStore.capacity = zc->maxNbLdmSequences;3220/* Updates ldmSeqStore.size */3221FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,3222&zc->appliedParams.ldmParams,3223src, srcSize), "");3224/* Updates ldmSeqStore.pos */3225lastLLSize =3226ZSTD_ldm_blockCompress(&ldmSeqStore,3227ms, &zc->seqStore,3228zc->blockState.nextCBlock->rep,3229zc->appliedParams.useRowMatchFinder,3230src, srcSize);3231assert(ldmSeqStore.pos == ldmSeqStore.size);3232} else if (zc->appliedParams.useSequenceProducer) {3233assert(3234zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)3235);3236assert(zc->externalMatchCtx.mFinder != NULL);32373238{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;32393240size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(3241zc->externalMatchCtx.mState,3242zc->externalMatchCtx.seqBuffer,3243zc->externalMatchCtx.seqBufferCapacity,3244src, srcSize,3245NULL, 0, /* dict and dictSize, currently not supported */3246zc->appliedParams.compressionLevel,3247windowSize3248);32493250size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(3251zc->externalMatchCtx.seqBuffer,3252nbExternalSeqs,3253zc->externalMatchCtx.seqBufferCapacity,3254srcSize3255);32563257/* Return early if there is no error, since we don't need to worry about last literals */3258if (!ZSTD_isError(nbPostProcessedSeqs)) {3259ZSTD_sequencePosition seqPos = {0,0,0};3260size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);3261RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");3262FORWARD_IF_ERROR(3263ZSTD_copySequencesToSeqStoreExplicitBlockDelim(3264zc, &seqPos,3265zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,3266src, srcSize,3267zc->appliedParams.searchForExternalRepcodes3268),3269"Failed to copy external sequences to seqStore!"3270);3271ms->ldmSeqStore = NULL;3272DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);3273return ZSTDbss_compress;3274}32753276/* Propagate the error if fallback is disabled */3277if (!zc->appliedParams.enableMatchFinderFallback) {3278return nbPostProcessedSeqs;3279}32803281/* Fallback to software matchfinder */3282{ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,3283zc->appliedParams.useRowMatchFinder,3284dictMode);3285ms->ldmSeqStore = NULL;3286DEBUGLOG(32875,3288"External sequence producer returned error code %lu. Falling back to internal parser.",3289(unsigned long)nbExternalSeqs3290);3291lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);3292} }3293} else { /* not long range mode and no external matchfinder */3294ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,3295zc->appliedParams.useRowMatchFinder,3296dictMode);3297ms->ldmSeqStore = NULL;3298lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);3299}3300{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;3301ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);3302} }3303return ZSTDbss_compress;3304}33053306static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)3307{3308const seqStore_t* seqStore = ZSTD_getSeqStore(zc);3309const seqDef* seqStoreSeqs = seqStore->sequencesStart;3310size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;3311size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);3312size_t literalsRead = 0;3313size_t lastLLSize;33143315ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];3316size_t i;3317repcodes_t updatedRepcodes;33183319assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);3320/* Ensure we have enough space for last literals "sequence" */3321assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);3322ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));3323for (i = 0; i < seqStoreSeqSize; ++i) {3324U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;3325outSeqs[i].litLength = seqStoreSeqs[i].litLength;3326outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;3327outSeqs[i].rep = 0;33283329if (i == seqStore->longLengthPos) {3330if (seqStore->longLengthType == ZSTD_llt_literalLength) {3331outSeqs[i].litLength += 0x10000;3332} else if (seqStore->longLengthType == ZSTD_llt_matchLength) {3333outSeqs[i].matchLength += 0x10000;3334}3335}33363337if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {3338/* Derive the correct offset corresponding to a repcode */3339outSeqs[i].rep = seqStoreSeqs[i].offBase;3340if (outSeqs[i].litLength != 0) {3341rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];3342} else {3343if (outSeqs[i].rep == 3) {3344rawOffset = updatedRepcodes.rep[0] - 1;3345} else {3346rawOffset = updatedRepcodes.rep[outSeqs[i].rep];3347}3348}3349}3350outSeqs[i].offset = rawOffset;3351/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode3352so we provide seqStoreSeqs[i].offset - 1 */3353ZSTD_updateRep(updatedRepcodes.rep,3354seqStoreSeqs[i].offBase,3355seqStoreSeqs[i].litLength == 0);3356literalsRead += outSeqs[i].litLength;3357}3358/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.3359* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker3360* for the block boundary, according to the API.3361*/3362assert(seqStoreLiteralsSize >= literalsRead);3363lastLLSize = seqStoreLiteralsSize - literalsRead;3364outSeqs[i].litLength = (U32)lastLLSize;3365outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;3366seqStoreSeqSize++;3367zc->seqCollector.seqIndex += seqStoreSeqSize;3368}33693370size_t ZSTD_sequenceBound(size_t srcSize) {3371return (srcSize / ZSTD_MINMATCH_MIN) + 1;3372}33733374size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,3375size_t outSeqsSize, const void* src, size_t srcSize)3376{3377const size_t dstCapacity = ZSTD_compressBound(srcSize);3378void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);3379SeqCollector seqCollector;33803381RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");33823383seqCollector.collectSequences = 1;3384seqCollector.seqStart = outSeqs;3385seqCollector.seqIndex = 0;3386seqCollector.maxSequences = outSeqsSize;3387zc->seqCollector = seqCollector;33883389ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);3390ZSTD_customFree(dst, ZSTD_defaultCMem);3391return zc->seqCollector.seqIndex;3392}33933394size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {3395size_t in = 0;3396size_t out = 0;3397for (; in < seqsSize; ++in) {3398if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {3399if (in != seqsSize - 1) {3400sequences[in+1].litLength += sequences[in].litLength;3401}3402} else {3403sequences[out] = sequences[in];3404++out;3405}3406}3407return out;3408}34093410/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */3411static int ZSTD_isRLE(const BYTE* src, size_t length) {3412const BYTE* ip = src;3413const BYTE value = ip[0];3414const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);3415const size_t unrollSize = sizeof(size_t) * 4;3416const size_t unrollMask = unrollSize - 1;3417const size_t prefixLength = length & unrollMask;3418size_t i;3419if (length == 1) return 1;3420/* Check if prefix is RLE first before using unrolled loop */3421if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {3422return 0;3423}3424for (i = prefixLength; i != length; i += unrollSize) {3425size_t u;3426for (u = 0; u < unrollSize; u += sizeof(size_t)) {3427if (MEM_readST(ip + i + u) != valueST) {3428return 0;3429} } }3430return 1;3431}34323433/* Returns true if the given block may be RLE.3434* This is just a heuristic based on the compressibility.3435* It may return both false positives and false negatives.3436*/3437static int ZSTD_maybeRLE(seqStore_t const* seqStore)3438{3439size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);3440size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);34413442return nbSeqs < 4 && nbLits < 10;3443}34443445static void3446ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)3447{3448ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;3449bs->prevCBlock = bs->nextCBlock;3450bs->nextCBlock = tmp;3451}34523453/* Writes the block header */3454static void3455writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)3456{3457U32 const cBlockHeader = cSize == 1 ?3458lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :3459lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);3460MEM_writeLE24(op, cBlockHeader);3461DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);3462}34633464/** ZSTD_buildBlockEntropyStats_literals() :3465* Builds entropy for the literals.3466* Stores literals block type (raw, rle, compressed, repeat) and3467* huffman description table to hufMetadata.3468* Requires ENTROPY_WORKSPACE_SIZE workspace3469* @return : size of huffman description table, or an error code3470*/3471static size_t3472ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,3473const ZSTD_hufCTables_t* prevHuf,3474ZSTD_hufCTables_t* nextHuf,3475ZSTD_hufCTablesMetadata_t* hufMetadata,3476const int literalsCompressionIsDisabled,3477void* workspace, size_t wkspSize,3478int hufFlags)3479{3480BYTE* const wkspStart = (BYTE*)workspace;3481BYTE* const wkspEnd = wkspStart + wkspSize;3482BYTE* const countWkspStart = wkspStart;3483unsigned* const countWksp = (unsigned*)workspace;3484const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);3485BYTE* const nodeWksp = countWkspStart + countWkspSize;3486const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp);3487unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;3488unsigned huffLog = LitHufLog;3489HUF_repeat repeat = prevHuf->repeatMode;3490DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);34913492/* Prepare nextEntropy assuming reusing the existing table */3493ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));34943495if (literalsCompressionIsDisabled) {3496DEBUGLOG(5, "set_basic - disabled");3497hufMetadata->hType = set_basic;3498return 0;3499}35003501/* small ? don't even attempt compression (speed opt) */3502#ifndef COMPRESS_LITERALS_SIZE_MIN3503# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */3504#endif3505{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;3506if (srcSize <= minLitSize) {3507DEBUGLOG(5, "set_basic - too small");3508hufMetadata->hType = set_basic;3509return 0;3510} }35113512/* Scan input and build symbol stats */3513{ size_t const largest =3514HIST_count_wksp (countWksp, &maxSymbolValue,3515(const BYTE*)src, srcSize,3516workspace, wkspSize);3517FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");3518if (largest == srcSize) {3519/* only one literal symbol */3520DEBUGLOG(5, "set_rle");3521hufMetadata->hType = set_rle;3522return 0;3523}3524if (largest <= (srcSize >> 7)+4) {3525/* heuristic: likely not compressible */3526DEBUGLOG(5, "set_basic - no gain");3527hufMetadata->hType = set_basic;3528return 0;3529} }35303531/* Validate the previous Huffman table */3532if (repeat == HUF_repeat_check3533&& !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {3534repeat = HUF_repeat_none;3535}35363537/* Build Huffman Tree */3538ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));3539huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags);3540assert(huffLog <= LitHufLog);3541{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,3542maxSymbolValue, huffLog,3543nodeWksp, nodeWkspSize);3544FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");3545huffLog = (U32)maxBits;3546}3547{ /* Build and write the CTable */3548size_t const newCSize = HUF_estimateCompressedSize(3549(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);3550size_t const hSize = HUF_writeCTable_wksp(3551hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),3552(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,3553nodeWksp, nodeWkspSize);3554/* Check against repeating the previous CTable */3555if (repeat != HUF_repeat_none) {3556size_t const oldCSize = HUF_estimateCompressedSize(3557(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);3558if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {3559DEBUGLOG(5, "set_repeat - smaller");3560ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));3561hufMetadata->hType = set_repeat;3562return 0;3563} }3564if (newCSize + hSize >= srcSize) {3565DEBUGLOG(5, "set_basic - no gains");3566ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));3567hufMetadata->hType = set_basic;3568return 0;3569}3570DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);3571hufMetadata->hType = set_compressed;3572nextHuf->repeatMode = HUF_repeat_check;3573return hSize;3574}3575}357635773578/* ZSTD_buildDummySequencesStatistics():3579* Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,3580* and updates nextEntropy to the appropriate repeatMode.3581*/3582static ZSTD_symbolEncodingTypeStats_t3583ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)3584{3585ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};3586nextEntropy->litlength_repeatMode = FSE_repeat_none;3587nextEntropy->offcode_repeatMode = FSE_repeat_none;3588nextEntropy->matchlength_repeatMode = FSE_repeat_none;3589return stats;3590}35913592/** ZSTD_buildBlockEntropyStats_sequences() :3593* Builds entropy for the sequences.3594* Stores symbol compression modes and fse table to fseMetadata.3595* Requires ENTROPY_WORKSPACE_SIZE wksp.3596* @return : size of fse tables or error code */3597static size_t3598ZSTD_buildBlockEntropyStats_sequences(3599const seqStore_t* seqStorePtr,3600const ZSTD_fseCTables_t* prevEntropy,3601ZSTD_fseCTables_t* nextEntropy,3602const ZSTD_CCtx_params* cctxParams,3603ZSTD_fseCTablesMetadata_t* fseMetadata,3604void* workspace, size_t wkspSize)3605{3606ZSTD_strategy const strategy = cctxParams->cParams.strategy;3607size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);3608BYTE* const ostart = fseMetadata->fseTablesBuffer;3609BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);3610BYTE* op = ostart;3611unsigned* countWorkspace = (unsigned*)workspace;3612unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);3613size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);3614ZSTD_symbolEncodingTypeStats_t stats;36153616DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);3617stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,3618prevEntropy, nextEntropy, op, oend,3619strategy, countWorkspace,3620entropyWorkspace, entropyWorkspaceSize)3621: ZSTD_buildDummySequencesStatistics(nextEntropy);3622FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");3623fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;3624fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;3625fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;3626fseMetadata->lastCountSize = stats.lastCountSize;3627return stats.size;3628}362936303631/** ZSTD_buildBlockEntropyStats() :3632* Builds entropy for the block.3633* Requires workspace size ENTROPY_WORKSPACE_SIZE3634* @return : 0 on success, or an error code3635* Note : also employed in superblock3636*/3637size_t ZSTD_buildBlockEntropyStats(3638const seqStore_t* seqStorePtr,3639const ZSTD_entropyCTables_t* prevEntropy,3640ZSTD_entropyCTables_t* nextEntropy,3641const ZSTD_CCtx_params* cctxParams,3642ZSTD_entropyCTablesMetadata_t* entropyMetadata,3643void* workspace, size_t wkspSize)3644{3645size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);3646int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);3647int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0;36483649entropyMetadata->hufMetadata.hufDesSize =3650ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,3651&prevEntropy->huf, &nextEntropy->huf,3652&entropyMetadata->hufMetadata,3653ZSTD_literalsCompressionIsDisabled(cctxParams),3654workspace, wkspSize, hufFlags);36553656FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");3657entropyMetadata->fseMetadata.fseTablesSize =3658ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,3659&prevEntropy->fse, &nextEntropy->fse,3660cctxParams,3661&entropyMetadata->fseMetadata,3662workspace, wkspSize);3663FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");3664return 0;3665}36663667/* Returns the size estimate for the literals section (header + content) of a block */3668static size_t3669ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,3670const ZSTD_hufCTables_t* huf,3671const ZSTD_hufCTablesMetadata_t* hufMetadata,3672void* workspace, size_t wkspSize,3673int writeEntropy)3674{3675unsigned* const countWksp = (unsigned*)workspace;3676unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;3677size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);3678U32 singleStream = litSize < 256;36793680if (hufMetadata->hType == set_basic) return litSize;3681else if (hufMetadata->hType == set_rle) return 1;3682else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {3683size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);3684if (ZSTD_isError(largest)) return litSize;3685{ size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);3686if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;3687if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */3688return cLitSizeEstimate + literalSectionHeaderSize;3689} }3690assert(0); /* impossible */3691return 0;3692}36933694/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */3695static size_t3696ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,3697const BYTE* codeTable, size_t nbSeq, unsigned maxCode,3698const FSE_CTable* fseCTable,3699const U8* additionalBits,3700short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,3701void* workspace, size_t wkspSize)3702{3703unsigned* const countWksp = (unsigned*)workspace;3704const BYTE* ctp = codeTable;3705const BYTE* const ctStart = ctp;3706const BYTE* const ctEnd = ctStart + nbSeq;3707size_t cSymbolTypeSizeEstimateInBits = 0;3708unsigned max = maxCode;37093710HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */3711if (type == set_basic) {3712/* We selected this encoding type, so it must be valid. */3713assert(max <= defaultMax);3714(void)defaultMax;3715cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);3716} else if (type == set_rle) {3717cSymbolTypeSizeEstimateInBits = 0;3718} else if (type == set_compressed || type == set_repeat) {3719cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);3720}3721if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {3722return nbSeq * 10;3723}3724while (ctp < ctEnd) {3725if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];3726else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */3727ctp++;3728}3729return cSymbolTypeSizeEstimateInBits >> 3;3730}37313732/* Returns the size estimate for the sequences section (header + content) of a block */3733static size_t3734ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,3735const BYTE* llCodeTable,3736const BYTE* mlCodeTable,3737size_t nbSeq,3738const ZSTD_fseCTables_t* fseTables,3739const ZSTD_fseCTablesMetadata_t* fseMetadata,3740void* workspace, size_t wkspSize,3741int writeEntropy)3742{3743size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);3744size_t cSeqSizeEstimate = 0;3745cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,3746fseTables->offcodeCTable, NULL,3747OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,3748workspace, wkspSize);3749cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,3750fseTables->litlengthCTable, LL_bits,3751LL_defaultNorm, LL_defaultNormLog, MaxLL,3752workspace, wkspSize);3753cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,3754fseTables->matchlengthCTable, ML_bits,3755ML_defaultNorm, ML_defaultNormLog, MaxML,3756workspace, wkspSize);3757if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;3758return cSeqSizeEstimate + sequencesSectionHeaderSize;3759}37603761/* Returns the size estimate for a given stream of literals, of, ll, ml */3762static size_t3763ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,3764const BYTE* ofCodeTable,3765const BYTE* llCodeTable,3766const BYTE* mlCodeTable,3767size_t nbSeq,3768const ZSTD_entropyCTables_t* entropy,3769const ZSTD_entropyCTablesMetadata_t* entropyMetadata,3770void* workspace, size_t wkspSize,3771int writeLitEntropy, int writeSeqEntropy)3772{3773size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,3774&entropy->huf, &entropyMetadata->hufMetadata,3775workspace, wkspSize, writeLitEntropy);3776size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,3777nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,3778workspace, wkspSize, writeSeqEntropy);3779return seqSize + literalsSize + ZSTD_blockHeaderSize;3780}37813782/* Builds entropy statistics and uses them for blocksize estimation.3783*3784* @return: estimated compressed size of the seqStore, or a zstd error.3785*/3786static size_t3787ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)3788{3789ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;3790DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");3791FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,3792&zc->blockState.prevCBlock->entropy,3793&zc->blockState.nextCBlock->entropy,3794&zc->appliedParams,3795entropyMetadata,3796zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");3797return ZSTD_estimateBlockSize(3798seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),3799seqStore->ofCode, seqStore->llCode, seqStore->mlCode,3800(size_t)(seqStore->sequences - seqStore->sequencesStart),3801&zc->blockState.nextCBlock->entropy,3802entropyMetadata,3803zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,3804(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);3805}38063807/* Returns literals bytes represented in a seqStore */3808static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)3809{3810size_t literalsBytes = 0;3811size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);3812size_t i;3813for (i = 0; i < nbSeqs; ++i) {3814seqDef const seq = seqStore->sequencesStart[i];3815literalsBytes += seq.litLength;3816if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {3817literalsBytes += 0x10000;3818} }3819return literalsBytes;3820}38213822/* Returns match bytes represented in a seqStore */3823static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)3824{3825size_t matchBytes = 0;3826size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);3827size_t i;3828for (i = 0; i < nbSeqs; ++i) {3829seqDef seq = seqStore->sequencesStart[i];3830matchBytes += seq.mlBase + MINMATCH;3831if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {3832matchBytes += 0x10000;3833} }3834return matchBytes;3835}38363837/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).3838* Stores the result in resultSeqStore.3839*/3840static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,3841const seqStore_t* originalSeqStore,3842size_t startIdx, size_t endIdx)3843{3844*resultSeqStore = *originalSeqStore;3845if (startIdx > 0) {3846resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;3847resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);3848}38493850/* Move longLengthPos into the correct position if necessary */3851if (originalSeqStore->longLengthType != ZSTD_llt_none) {3852if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {3853resultSeqStore->longLengthType = ZSTD_llt_none;3854} else {3855resultSeqStore->longLengthPos -= (U32)startIdx;3856}3857}3858resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;3859resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;3860if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {3861/* This accounts for possible last literals if the derived chunk reaches the end of the block */3862assert(resultSeqStore->lit == originalSeqStore->lit);3863} else {3864size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);3865resultSeqStore->lit = resultSeqStore->litStart + literalsBytes;3866}3867resultSeqStore->llCode += startIdx;3868resultSeqStore->mlCode += startIdx;3869resultSeqStore->ofCode += startIdx;3870}38713872/**3873* Returns the raw offset represented by the combination of offBase, ll0, and repcode history.3874* offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().3875*/3876static U323877ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)3878{3879U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */3880assert(OFFBASE_IS_REPCODE(offBase));3881if (adjustedRepCode == ZSTD_REP_NUM) {3882assert(ll0);3883/* litlength == 0 and offCode == 2 implies selection of first repcode - 13884* This is only valid if it results in a valid offset value, aka > 0.3885* Note : it may happen that `rep[0]==1` in exceptional circumstances.3886* In which case this function will return 0, which is an invalid offset.3887* It's not an issue though, since this value will be3888* compared and discarded within ZSTD_seqStore_resolveOffCodes().3889*/3890return rep[0] - 1;3891}3892return rep[adjustedRepCode];3893}38943895/**3896* ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise3897* due to emission of RLE/raw blocks that disturb the offset history,3898* and replaces any repcodes within the seqStore that may be invalid.3899*3900* dRepcodes are updated as would be on the decompression side.3901* cRepcodes are updated exactly in accordance with the seqStore.3902*3903* Note : this function assumes seq->offBase respects the following numbering scheme :3904* 0 : invalid3905* 1-3 : repcode 1-33906* 4+ : real_offset+33907*/3908static void3909ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,3910const seqStore_t* const seqStore, U32 const nbSeq)3911{3912U32 idx = 0;3913U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;3914for (; idx < nbSeq; ++idx) {3915seqDef* const seq = seqStore->sequencesStart + idx;3916U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);3917U32 const offBase = seq->offBase;3918assert(offBase > 0);3919if (OFFBASE_IS_REPCODE(offBase)) {3920U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);3921U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);3922/* Adjust simulated decompression repcode history if we come across a mismatch. Replace3923* the repcode with the offset it actually references, determined by the compression3924* repcode history.3925*/3926if (dRawOffset != cRawOffset) {3927seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);3928}3929}3930/* Compression repcode history is always updated with values directly from the unmodified seqStore.3931* Decompression repcode history may use modified seq->offset value taken from compression repcode history.3932*/3933ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);3934ZSTD_updateRep(cRepcodes->rep, offBase, ll0);3935}3936}39373938/* ZSTD_compressSeqStore_singleBlock():3939* Compresses a seqStore into a block with a block header, into the buffer dst.3940*3941* Returns the total size of that block (including header) or a ZSTD error code.3942*/3943static size_t3944ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,3945const seqStore_t* const seqStore,3946repcodes_t* const dRep, repcodes_t* const cRep,3947void* dst, size_t dstCapacity,3948const void* src, size_t srcSize,3949U32 lastBlock, U32 isPartition)3950{3951const U32 rleMaxLength = 25;3952BYTE* op = (BYTE*)dst;3953const BYTE* ip = (const BYTE*)src;3954size_t cSize;3955size_t cSeqsSize;39563957/* In case of an RLE or raw block, the simulated decompression repcode history must be reset */3958repcodes_t const dRepOriginal = *dRep;3959DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");3960if (isPartition)3961ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));39623963RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit");3964cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,3965&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,3966&zc->appliedParams,3967op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,3968srcSize,3969zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,3970zc->bmi2);3971FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");39723973if (!zc->isFirstBlock &&3974cSeqsSize < rleMaxLength &&3975ZSTD_isRLE((BYTE const*)src, srcSize)) {3976/* We don't want to emit our first block as a RLE even if it qualifies because3977* doing so will cause the decoder (cli only) to throw a "should consume all input error."3978* This is only an issue for zstd <= v1.4.33979*/3980cSeqsSize = 1;3981}39823983if (zc->seqCollector.collectSequences) {3984ZSTD_copyBlockSequences(zc);3985ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);3986return 0;3987}39883989if (cSeqsSize == 0) {3990cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);3991FORWARD_IF_ERROR(cSize, "Nocompress block failed");3992DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);3993*dRep = dRepOriginal; /* reset simulated decompression repcode history */3994} else if (cSeqsSize == 1) {3995cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);3996FORWARD_IF_ERROR(cSize, "RLE compress block failed");3997DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);3998*dRep = dRepOriginal; /* reset simulated decompression repcode history */3999} else {4000ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);4001writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);4002cSize = ZSTD_blockHeaderSize + cSeqsSize;4003DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);4004}40054006if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)4007zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;40084009return cSize;4010}40114012/* Struct to keep track of where we are in our recursive calls. */4013typedef struct {4014U32* splitLocations; /* Array of split indices */4015size_t idx; /* The current index within splitLocations being worked on */4016} seqStoreSplits;40174018#define MIN_SEQUENCES_BLOCK_SPLITTING 30040194020/* Helper function to perform the recursive search for block splits.4021* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.4022* If advantageous to split, then we recurse down the two sub-blocks.4023* If not, or if an error occurred in estimation, then we do not recurse.4024*4025* Note: The recursion depth is capped by a heuristic minimum number of sequences,4026* defined by MIN_SEQUENCES_BLOCK_SPLITTING.4027* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).4028* In practice, recursion depth usually doesn't go beyond 4.4029*4030* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.4031* At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize4032* maximum of 128 KB, this value is actually impossible to reach.4033*/4034static void4035ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,4036ZSTD_CCtx* zc, const seqStore_t* origSeqStore)4037{4038seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;4039seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;4040seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;4041size_t estimatedOriginalSize;4042size_t estimatedFirstHalfSize;4043size_t estimatedSecondHalfSize;4044size_t midIdx = (startIdx + endIdx)/2;40454046DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);4047assert(endIdx >= startIdx);4048if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {4049DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);4050return;4051}4052ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);4053ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);4054ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);4055estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);4056estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);4057estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);4058DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",4059estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);4060if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {4061return;4062}4063if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {4064DEBUGLOG(5, "split decided at seqNb:%zu", midIdx);4065ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);4066splits->splitLocations[splits->idx] = (U32)midIdx;4067splits->idx++;4068ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);4069}4070}40714072/* Base recursive function.4073* Populates a table with intra-block partition indices that can improve compression ratio.4074*4075* @return: number of splits made (which equals the size of the partition table - 1).4076*/4077static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)4078{4079seqStoreSplits splits;4080splits.splitLocations = partitions;4081splits.idx = 0;4082if (nbSeq <= 4) {4083DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);4084/* Refuse to try and split anything with less than 4 sequences */4085return 0;4086}4087ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);4088splits.splitLocations[splits.idx] = nbSeq;4089DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);4090return splits.idx;4091}40924093/* ZSTD_compressBlock_splitBlock():4094* Attempts to split a given block into multiple blocks to improve compression ratio.4095*4096* Returns combined size of all blocks (which includes headers), or a ZSTD error code.4097*/4098static size_t4099ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,4100void* dst, size_t dstCapacity,4101const void* src, size_t blockSize,4102U32 lastBlock, U32 nbSeq)4103{4104size_t cSize = 0;4105const BYTE* ip = (const BYTE*)src;4106BYTE* op = (BYTE*)dst;4107size_t i = 0;4108size_t srcBytesTotal = 0;4109U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */4110seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;4111seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;4112size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);41134114/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history4115* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two4116* separate repcode histories that simulate repcode history on compression and decompression side,4117* and use the histories to determine whether we must replace a particular repcode with its raw offset.4118*4119* 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed4120* or RLE. This allows us to retrieve the offset value that an invalid repcode references within4121* a nocompress/RLE block.4122* 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use4123* the replacement offset value rather than the original repcode to update the repcode history.4124* dRep also will be the final repcode history sent to the next block.4125*4126* See ZSTD_seqStore_resolveOffCodes() for more details.4127*/4128repcodes_t dRep;4129repcodes_t cRep;4130ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));4131ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));4132ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));41334134DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",4135(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,4136(unsigned)zc->blockState.matchState.nextToUpdate);41374138if (numSplits == 0) {4139size_t cSizeSingleBlock =4140ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,4141&dRep, &cRep,4142op, dstCapacity,4143ip, blockSize,4144lastBlock, 0 /* isPartition */);4145FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");4146DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");4147assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);4148assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);4149return cSizeSingleBlock;4150}41514152ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);4153for (i = 0; i <= numSplits; ++i) {4154size_t cSizeChunk;4155U32 const lastPartition = (i == numSplits);4156U32 lastBlockEntireSrc = 0;41574158size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);4159srcBytesTotal += srcBytes;4160if (lastPartition) {4161/* This is the final partition, need to account for possible last literals */4162srcBytes += blockSize - srcBytesTotal;4163lastBlockEntireSrc = lastBlock;4164} else {4165ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);4166}41674168cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,4169&dRep, &cRep,4170op, dstCapacity,4171ip, srcBytes,4172lastBlockEntireSrc, 1 /* isPartition */);4173DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",4174ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);4175FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");41764177ip += srcBytes;4178op += cSizeChunk;4179dstCapacity -= cSizeChunk;4180cSize += cSizeChunk;4181*currSeqStore = *nextSeqStore;4182assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);4183}4184/* cRep and dRep may have diverged during the compression.4185* If so, we use the dRep repcodes for the next block.4186*/4187ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));4188return cSize;4189}41904191static size_t4192ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,4193void* dst, size_t dstCapacity,4194const void* src, size_t srcSize, U32 lastBlock)4195{4196U32 nbSeq;4197size_t cSize;4198DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");4199assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);42004201{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);4202FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");4203if (bss == ZSTDbss_noCompress) {4204if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)4205zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;4206cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);4207FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");4208DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");4209return cSize;4210}4211nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);4212}42134214cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);4215FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");4216return cSize;4217}42184219static size_t4220ZSTD_compressBlock_internal(ZSTD_CCtx* zc,4221void* dst, size_t dstCapacity,4222const void* src, size_t srcSize, U32 frame)4223{4224/* This is an estimated upper bound for the length of an rle block.4225* This isn't the actual upper bound.4226* Finding the real threshold needs further investigation.4227*/4228const U32 rleMaxLength = 25;4229size_t cSize;4230const BYTE* ip = (const BYTE*)src;4231BYTE* op = (BYTE*)dst;4232DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",4233(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,4234(unsigned)zc->blockState.matchState.nextToUpdate);42354236{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);4237FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");4238if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }4239}42404241if (zc->seqCollector.collectSequences) {4242ZSTD_copyBlockSequences(zc);4243ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);4244return 0;4245}42464247/* encode sequences and literals */4248cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,4249&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,4250&zc->appliedParams,4251dst, dstCapacity,4252srcSize,4253zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,4254zc->bmi2);42554256if (frame &&4257/* We don't want to emit our first block as a RLE even if it qualifies because4258* doing so will cause the decoder (cli only) to throw a "should consume all input error."4259* This is only an issue for zstd <= v1.4.34260*/4261!zc->isFirstBlock &&4262cSize < rleMaxLength &&4263ZSTD_isRLE(ip, srcSize))4264{4265cSize = 1;4266op[0] = ip[0];4267}42684269out:4270if (!ZSTD_isError(cSize) && cSize > 1) {4271ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);4272}4273/* We check that dictionaries have offset codes available for the first4274* block. After the first block, the offcode table might not have large4275* enough codes to represent the offsets in the data.4276*/4277if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)4278zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;42794280return cSize;4281}42824283static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,4284void* dst, size_t dstCapacity,4285const void* src, size_t srcSize,4286const size_t bss, U32 lastBlock)4287{4288DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");4289if (bss == ZSTDbss_compress) {4290if (/* We don't want to emit our first block as a RLE even if it qualifies because4291* doing so will cause the decoder (cli only) to throw a "should consume all input error."4292* This is only an issue for zstd <= v1.4.34293*/4294!zc->isFirstBlock &&4295ZSTD_maybeRLE(&zc->seqStore) &&4296ZSTD_isRLE((BYTE const*)src, srcSize))4297{4298return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);4299}4300/* Attempt superblock compression.4301*4302* Note that compressed size of ZSTD_compressSuperBlock() is not bound by the4303* standard ZSTD_compressBound(). This is a problem, because even if we have4304* space now, taking an extra byte now could cause us to run out of space later4305* and violate ZSTD_compressBound().4306*4307* Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.4308*4309* In order to respect ZSTD_compressBound() we must attempt to emit a raw4310* uncompressed block in these cases:4311* * cSize == 0: Return code for an uncompressed block.4312* * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).4313* ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of4314* output space.4315* * cSize >= blockBound(srcSize): We have expanded the block too much so4316* emit an uncompressed block.4317*/4318{ size_t const cSize =4319ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);4320if (cSize != ERROR(dstSize_tooSmall)) {4321size_t const maxCSize =4322srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);4323FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");4324if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {4325ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);4326return cSize;4327}4328}4329}4330} /* if (bss == ZSTDbss_compress)*/43314332DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");4333/* Superblock compression failed, attempt to emit a single no compress block.4334* The decoder will be able to stream this block since it is uncompressed.4335*/4336return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);4337}43384339static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,4340void* dst, size_t dstCapacity,4341const void* src, size_t srcSize,4342U32 lastBlock)4343{4344size_t cSize = 0;4345const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);4346DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",4347(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);4348FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");43494350cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);4351FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");43524353if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)4354zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;43554356return cSize;4357}43584359static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,4360ZSTD_cwksp* ws,4361ZSTD_CCtx_params const* params,4362void const* ip,4363void const* iend)4364{4365U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);4366U32 const maxDist = (U32)1 << params->cParams.windowLog;4367if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {4368U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);4369ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);4370ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);4371ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);4372ZSTD_cwksp_mark_tables_dirty(ws);4373ZSTD_reduceIndex(ms, params, correction);4374ZSTD_cwksp_mark_tables_clean(ws);4375if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;4376else ms->nextToUpdate -= correction;4377/* invalidate dictionaries on overflow correction */4378ms->loadedDictEnd = 0;4379ms->dictMatchState = NULL;4380}4381}43824383/*! ZSTD_compress_frameChunk() :4384* Compress a chunk of data into one or multiple blocks.4385* All blocks will be terminated, all input will be consumed.4386* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.4387* Frame is supposed already started (header already produced)4388* @return : compressed size, or an error code4389*/4390static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,4391void* dst, size_t dstCapacity,4392const void* src, size_t srcSize,4393U32 lastFrameChunk)4394{4395size_t blockSize = cctx->blockSize;4396size_t remaining = srcSize;4397const BYTE* ip = (const BYTE*)src;4398BYTE* const ostart = (BYTE*)dst;4399BYTE* op = ostart;4400U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;44014402assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);44034404DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);4405if (cctx->appliedParams.fParams.checksumFlag && srcSize)4406XXH64_update(&cctx->xxhState, src, srcSize);44074408while (remaining) {4409ZSTD_matchState_t* const ms = &cctx->blockState.matchState;4410U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);44114412/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding4413* additional 1. We need to revisit and change this logic to be more consistent */4414RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1,4415dstSize_tooSmall,4416"not enough space to store compressed block");4417if (remaining < blockSize) blockSize = remaining;44184419ZSTD_overflowCorrectIfNeeded(4420ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);4421ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);4422ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);44234424/* Ensure hash/chain table insertion resumes no sooner than lowlimit */4425if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;44264427{ size_t cSize;4428if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {4429cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);4430FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");4431assert(cSize > 0);4432assert(cSize <= blockSize + ZSTD_blockHeaderSize);4433} else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {4434cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);4435FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");4436assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);4437} else {4438cSize = ZSTD_compressBlock_internal(cctx,4439op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,4440ip, blockSize, 1 /* frame */);4441FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");44424443if (cSize == 0) { /* block is not compressible */4444cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);4445FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");4446} else {4447U32 const cBlockHeader = cSize == 1 ?4448lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :4449lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);4450MEM_writeLE24(op, cBlockHeader);4451cSize += ZSTD_blockHeaderSize;4452}4453} /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/445444554456ip += blockSize;4457assert(remaining >= blockSize);4458remaining -= blockSize;4459op += cSize;4460assert(dstCapacity >= cSize);4461dstCapacity -= cSize;4462cctx->isFirstBlock = 0;4463DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",4464(unsigned)cSize);4465} }44664467if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;4468return (size_t)(op-ostart);4469}447044714472static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,4473const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)4474{ BYTE* const op = (BYTE*)dst;4475U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */4476U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */4477U32 const checksumFlag = params->fParams.checksumFlag>0;4478U32 const windowSize = (U32)1 << params->cParams.windowLog;4479U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);4480BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);4481U32 const fcsCode = params->fParams.contentSizeFlag ?4482(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */4483BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );4484size_t pos=0;44854486assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));4487RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,4488"dst buf is too small to fit worst-case frame header size.");4489DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",4490!params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);4491if (params->format == ZSTD_f_zstd1) {4492MEM_writeLE32(dst, ZSTD_MAGICNUMBER);4493pos = 4;4494}4495op[pos++] = frameHeaderDescriptionByte;4496if (!singleSegment) op[pos++] = windowLogByte;4497switch(dictIDSizeCode)4498{4499default:4500assert(0); /* impossible */4501ZSTD_FALLTHROUGH;4502case 0 : break;4503case 1 : op[pos] = (BYTE)(dictID); pos++; break;4504case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;4505case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;4506}4507switch(fcsCode)4508{4509default:4510assert(0); /* impossible */4511ZSTD_FALLTHROUGH;4512case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;4513case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;4514case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;4515case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;4516}4517return pos;4518}45194520/* ZSTD_writeSkippableFrame_advanced() :4521* Writes out a skippable frame with the specified magic number variant (16 are supported),4522* from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.4523*4524* Returns the total number of bytes written, or a ZSTD error code.4525*/4526size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,4527const void* src, size_t srcSize, unsigned magicVariant) {4528BYTE* op = (BYTE*)dst;4529RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,4530dstSize_tooSmall, "Not enough room for skippable frame");4531RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");4532RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");45334534MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));4535MEM_writeLE32(op+4, (U32)srcSize);4536ZSTD_memcpy(op+8, src, srcSize);4537return srcSize + ZSTD_SKIPPABLEHEADERSIZE;4538}45394540/* ZSTD_writeLastEmptyBlock() :4541* output an empty Block with end-of-frame mark to complete a frame4542* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))4543* or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)4544*/4545size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)4546{4547RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,4548"dst buf is too small to write frame trailer empty block.");4549{ U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */4550MEM_writeLE24(dst, cBlockHeader24);4551return ZSTD_blockHeaderSize;4552}4553}45544555size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)4556{4557RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,4558"wrong cctx stage");4559RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,4560parameter_unsupported,4561"incompatible with ldm");4562cctx->externSeqStore.seq = seq;4563cctx->externSeqStore.size = nbSeq;4564cctx->externSeqStore.capacity = nbSeq;4565cctx->externSeqStore.pos = 0;4566cctx->externSeqStore.posInSequence = 0;4567return 0;4568}456945704571static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,4572void* dst, size_t dstCapacity,4573const void* src, size_t srcSize,4574U32 frame, U32 lastFrameChunk)4575{4576ZSTD_matchState_t* const ms = &cctx->blockState.matchState;4577size_t fhSize = 0;45784579DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",4580cctx->stage, (unsigned)srcSize);4581RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,4582"missing init (ZSTD_compressBegin)");45834584if (frame && (cctx->stage==ZSTDcs_init)) {4585fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,4586cctx->pledgedSrcSizePlusOne-1, cctx->dictID);4587FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");4588assert(fhSize <= dstCapacity);4589dstCapacity -= fhSize;4590dst = (char*)dst + fhSize;4591cctx->stage = ZSTDcs_ongoing;4592}45934594if (!srcSize) return fhSize; /* do not generate an empty block if no input */45954596if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {4597ms->forceNonContiguous = 0;4598ms->nextToUpdate = ms->window.dictLimit;4599}4600if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {4601ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);4602}46034604if (!frame) {4605/* overflow check and correction for block mode */4606ZSTD_overflowCorrectIfNeeded(4607ms, &cctx->workspace, &cctx->appliedParams,4608src, (BYTE const*)src + srcSize);4609}46104611DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);4612{ size_t const cSize = frame ?4613ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :4614ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);4615FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");4616cctx->consumedSrcSize += srcSize;4617cctx->producedCSize += (cSize + fhSize);4618assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));4619if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */4620ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);4621RETURN_ERROR_IF(4622cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,4623srcSize_wrong,4624"error : pledgedSrcSize = %u, while realSrcSize >= %u",4625(unsigned)cctx->pledgedSrcSizePlusOne-1,4626(unsigned)cctx->consumedSrcSize);4627}4628return cSize + fhSize;4629}4630}46314632size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,4633void* dst, size_t dstCapacity,4634const void* src, size_t srcSize)4635{4636DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);4637return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);4638}46394640/* NOTE: Must just wrap ZSTD_compressContinue_public() */4641size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,4642void* dst, size_t dstCapacity,4643const void* src, size_t srcSize)4644{4645return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);4646}46474648static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)4649{4650ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;4651assert(!ZSTD_checkCParams(cParams));4652return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);4653}46544655/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */4656size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)4657{4658return ZSTD_getBlockSize_deprecated(cctx);4659}46604661/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */4662size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)4663{4664DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);4665{ size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);4666RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }46674668return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);4669}46704671/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */4672size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)4673{4674return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);4675}46764677/*! ZSTD_loadDictionaryContent() :4678* @return : 0, or an error code4679*/4680static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,4681ldmState_t* ls,4682ZSTD_cwksp* ws,4683ZSTD_CCtx_params const* params,4684const void* src, size_t srcSize,4685ZSTD_dictTableLoadMethod_e dtlm,4686ZSTD_tableFillPurpose_e tfp)4687{4688const BYTE* ip = (const BYTE*) src;4689const BYTE* const iend = ip + srcSize;4690int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;46914692/* Assert that the ms params match the params we're being given */4693ZSTD_assertEqualCParams(params->cParams, ms->cParams);46944695{ /* Ensure large dictionaries can't cause index overflow */46964697/* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.4698* Dictionaries right at the edge will immediately trigger overflow4699* correction, but I don't want to insert extra constraints here.4700*/4701U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;47024703int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams);4704if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {4705/* Some dictionary matchfinders in zstd use "short cache",4706* which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each4707* CDict hashtable entry as a tag rather than as part of an index.4708* When short cache is used, we need to truncate the dictionary4709* so that its indices don't overlap with the tag. */4710U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;4711maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);4712assert(!loadLdmDict);4713}47144715/* If the dictionary is too large, only load the suffix of the dictionary. */4716if (srcSize > maxDictSize) {4717ip = iend - maxDictSize;4718src = ip;4719srcSize = maxDictSize;4720}4721}47224723if (srcSize > ZSTD_CHUNKSIZE_MAX) {4724/* We must have cleared our windows when our source is this large. */4725assert(ZSTD_window_isEmpty(ms->window));4726if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));4727}4728ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);47294730DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);47314732if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */4733ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);4734ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);4735ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);4736}47374738/* If the dict is larger than we can reasonably index in our tables, only load the suffix. */4739if (params->cParams.strategy < ZSTD_btultra) {4740U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);4741if (srcSize > maxDictSize) {4742ip = iend - maxDictSize;4743/* src = ip; deadcode.DeadStores */4744srcSize = maxDictSize;4745}4746}47474748ms->nextToUpdate = (U32)(ip - ms->window.base);4749ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);4750ms->forceNonContiguous = params->deterministicRefPrefix;47514752if (srcSize <= HASH_READ_SIZE) return 0;47534754ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);47554756switch(params->cParams.strategy)4757{4758case ZSTD_fast:4759ZSTD_fillHashTable(ms, iend, dtlm, tfp);4760break;4761case ZSTD_dfast:4762ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);4763break;47644765case ZSTD_greedy:4766case ZSTD_lazy:4767case ZSTD_lazy2:4768assert(srcSize >= HASH_READ_SIZE);4769if (ms->dedicatedDictSearch) {4770assert(ms->chainTable != NULL);4771ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);4772} else {4773assert(params->useRowMatchFinder != ZSTD_ps_auto);4774if (params->useRowMatchFinder == ZSTD_ps_enable) {4775size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);4776ZSTD_memset(ms->tagTable, 0, tagTableSize);4777ZSTD_row_update(ms, iend-HASH_READ_SIZE);4778DEBUGLOG(4, "Using row-based hash table for lazy dict");4779} else {4780ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);4781DEBUGLOG(4, "Using chain-based hash table for lazy dict");4782}4783}4784break;47854786case ZSTD_btlazy2: /* we want the dictionary table fully sorted */4787case ZSTD_btopt:4788case ZSTD_btultra:4789case ZSTD_btultra2:4790assert(srcSize >= HASH_READ_SIZE);4791ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);4792break;47934794default:4795assert(0); /* not possible : not a valid strategy id */4796}47974798ms->nextToUpdate = (U32)(iend - ms->window.base);4799return 0;4800}480148024803/* Dictionaries that assign zero probability to symbols that show up causes problems4804* when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check4805* and only dictionaries with 100% valid symbols can be assumed valid.4806*/4807static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)4808{4809U32 s;4810if (dictMaxSymbolValue < maxSymbolValue) {4811return FSE_repeat_check;4812}4813for (s = 0; s <= maxSymbolValue; ++s) {4814if (normalizedCounter[s] == 0) {4815return FSE_repeat_check;4816}4817}4818return FSE_repeat_valid;4819}48204821size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,4822const void* const dict, size_t dictSize)4823{4824short offcodeNCount[MaxOff+1];4825unsigned offcodeMaxValue = MaxOff;4826const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */4827const BYTE* const dictEnd = dictPtr + dictSize;4828dictPtr += 8;4829bs->entropy.huf.repeatMode = HUF_repeat_check;48304831{ unsigned maxSymbolValue = 255;4832unsigned hasZeroWeights = 1;4833size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,4834dictEnd-dictPtr, &hasZeroWeights);48354836/* We only set the loaded table as valid if it contains all non-zero4837* weights. Otherwise, we set it to check */4838if (!hasZeroWeights)4839bs->entropy.huf.repeatMode = HUF_repeat_valid;48404841RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");4842RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");4843dictPtr += hufHeaderSize;4844}48454846{ unsigned offcodeLog;4847size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);4848RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");4849RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");4850/* fill all offset symbols to avoid garbage at end of table */4851RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(4852bs->entropy.fse.offcodeCTable,4853offcodeNCount, MaxOff, offcodeLog,4854workspace, HUF_WORKSPACE_SIZE)),4855dictionary_corrupted, "");4856/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */4857dictPtr += offcodeHeaderSize;4858}48594860{ short matchlengthNCount[MaxML+1];4861unsigned matchlengthMaxValue = MaxML, matchlengthLog;4862size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);4863RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");4864RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");4865RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(4866bs->entropy.fse.matchlengthCTable,4867matchlengthNCount, matchlengthMaxValue, matchlengthLog,4868workspace, HUF_WORKSPACE_SIZE)),4869dictionary_corrupted, "");4870bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);4871dictPtr += matchlengthHeaderSize;4872}48734874{ short litlengthNCount[MaxLL+1];4875unsigned litlengthMaxValue = MaxLL, litlengthLog;4876size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);4877RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");4878RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");4879RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(4880bs->entropy.fse.litlengthCTable,4881litlengthNCount, litlengthMaxValue, litlengthLog,4882workspace, HUF_WORKSPACE_SIZE)),4883dictionary_corrupted, "");4884bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);4885dictPtr += litlengthHeaderSize;4886}48874888RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");4889bs->rep[0] = MEM_readLE32(dictPtr+0);4890bs->rep[1] = MEM_readLE32(dictPtr+4);4891bs->rep[2] = MEM_readLE32(dictPtr+8);4892dictPtr += 12;48934894{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);4895U32 offcodeMax = MaxOff;4896if (dictContentSize <= ((U32)-1) - 128 KB) {4897U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */4898offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */4899}4900/* All offset values <= dictContentSize + 128 KB must be representable for a valid table */4901bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));49024903/* All repCodes must be <= dictContentSize and != 0 */4904{ U32 u;4905for (u=0; u<3; u++) {4906RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");4907RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");4908} } }49094910return dictPtr - (const BYTE*)dict;4911}49124913/* Dictionary format :4914* See :4915* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format4916*/4917/*! ZSTD_loadZstdDictionary() :4918* @return : dictID, or an error code4919* assumptions : magic number supposed already checked4920* dictSize supposed >= 84921*/4922static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,4923ZSTD_matchState_t* ms,4924ZSTD_cwksp* ws,4925ZSTD_CCtx_params const* params,4926const void* dict, size_t dictSize,4927ZSTD_dictTableLoadMethod_e dtlm,4928ZSTD_tableFillPurpose_e tfp,4929void* workspace)4930{4931const BYTE* dictPtr = (const BYTE*)dict;4932const BYTE* const dictEnd = dictPtr + dictSize;4933size_t dictID;4934size_t eSize;4935ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));4936assert(dictSize >= 8);4937assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);49384939dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );4940eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);4941FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");4942dictPtr += eSize;49434944{4945size_t const dictContentSize = (size_t)(dictEnd - dictPtr);4946FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(4947ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");4948}4949return dictID;4950}49514952/** ZSTD_compress_insertDictionary() :4953* @return : dictID, or an error code */4954static size_t4955ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,4956ZSTD_matchState_t* ms,4957ldmState_t* ls,4958ZSTD_cwksp* ws,4959const ZSTD_CCtx_params* params,4960const void* dict, size_t dictSize,4961ZSTD_dictContentType_e dictContentType,4962ZSTD_dictTableLoadMethod_e dtlm,4963ZSTD_tableFillPurpose_e tfp,4964void* workspace)4965{4966DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);4967if ((dict==NULL) || (dictSize<8)) {4968RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");4969return 0;4970}49714972ZSTD_reset_compressedBlockState(bs);49734974/* dict restricted modes */4975if (dictContentType == ZSTD_dct_rawContent)4976return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);49774978if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {4979if (dictContentType == ZSTD_dct_auto) {4980DEBUGLOG(4, "raw content dictionary detected");4981return ZSTD_loadDictionaryContent(4982ms, ls, ws, params, dict, dictSize, dtlm, tfp);4983}4984RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");4985assert(0); /* impossible */4986}49874988/* dict as full zstd dictionary */4989return ZSTD_loadZstdDictionary(4990bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);4991}49924993#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)4994#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)49954996/*! ZSTD_compressBegin_internal() :4997* Assumption : either @dict OR @cdict (or none) is non-NULL, never both4998* @return : 0, or an error code */4999static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,5000const void* dict, size_t dictSize,5001ZSTD_dictContentType_e dictContentType,5002ZSTD_dictTableLoadMethod_e dtlm,5003const ZSTD_CDict* cdict,5004const ZSTD_CCtx_params* params, U64 pledgedSrcSize,5005ZSTD_buffered_policy_e zbuff)5006{5007size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;5008#if ZSTD_TRACE5009cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;5010#endif5011DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);5012/* params are supposed to be fully validated at this point */5013assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));5014assert(!((dict) && (cdict))); /* either dict or cdict, not both */5015if ( (cdict)5016&& (cdict->dictContentSize > 0)5017&& ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF5018|| pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER5019|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN5020|| cdict->compressionLevel == 0)5021&& (params->attachDictPref != ZSTD_dictForceLoad) ) {5022return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);5023}50245025FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,5026dictContentSize,5027ZSTDcrp_makeClean, zbuff) , "");5028{ size_t const dictID = cdict ?5029ZSTD_compress_insertDictionary(5030cctx->blockState.prevCBlock, &cctx->blockState.matchState,5031&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,5032cdict->dictContentSize, cdict->dictContentType, dtlm,5033ZSTD_tfp_forCCtx, cctx->entropyWorkspace)5034: ZSTD_compress_insertDictionary(5035cctx->blockState.prevCBlock, &cctx->blockState.matchState,5036&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,5037dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);5038FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");5039assert(dictID <= UINT_MAX);5040cctx->dictID = (U32)dictID;5041cctx->dictContentSize = dictContentSize;5042}5043return 0;5044}50455046size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,5047const void* dict, size_t dictSize,5048ZSTD_dictContentType_e dictContentType,5049ZSTD_dictTableLoadMethod_e dtlm,5050const ZSTD_CDict* cdict,5051const ZSTD_CCtx_params* params,5052unsigned long long pledgedSrcSize)5053{5054DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);5055/* compression parameters verification and optimization */5056FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");5057return ZSTD_compressBegin_internal(cctx,5058dict, dictSize, dictContentType, dtlm,5059cdict,5060params, pledgedSrcSize,5061ZSTDb_not_buffered);5062}50635064/*! ZSTD_compressBegin_advanced() :5065* @return : 0, or an error code */5066size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,5067const void* dict, size_t dictSize,5068ZSTD_parameters params, unsigned long long pledgedSrcSize)5069{5070ZSTD_CCtx_params cctxParams;5071ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL);5072return ZSTD_compressBegin_advanced_internal(cctx,5073dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,5074NULL /*cdict*/,5075&cctxParams, pledgedSrcSize);5076}50775078static size_t5079ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)5080{5081ZSTD_CCtx_params cctxParams;5082{ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);5083ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);5084}5085DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);5086return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,5087&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);5088}50895090size_t5091ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)5092{5093return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);5094}50955096size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)5097{5098return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);5099}510051015102/*! ZSTD_writeEpilogue() :5103* Ends a frame.5104* @return : nb of bytes written into dst (or an error code) */5105static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)5106{5107BYTE* const ostart = (BYTE*)dst;5108BYTE* op = ostart;5109size_t fhSize = 0;51105111DEBUGLOG(4, "ZSTD_writeEpilogue");5112RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");51135114/* special case : empty frame */5115if (cctx->stage == ZSTDcs_init) {5116fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);5117FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");5118dstCapacity -= fhSize;5119op += fhSize;5120cctx->stage = ZSTDcs_ongoing;5121}51225123if (cctx->stage != ZSTDcs_ending) {5124/* write one last empty block, make it the "last" block */5125U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;5126RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");5127MEM_writeLE32(op, cBlockHeader24);5128op += ZSTD_blockHeaderSize;5129dstCapacity -= ZSTD_blockHeaderSize;5130}51315132if (cctx->appliedParams.fParams.checksumFlag) {5133U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);5134RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");5135DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);5136MEM_writeLE32(op, checksum);5137op += 4;5138}51395140cctx->stage = ZSTDcs_created; /* return to "created but no init" status */5141return op-ostart;5142}51435144void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)5145{5146#if ZSTD_TRACE5147if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {5148int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;5149ZSTD_Trace trace;5150ZSTD_memset(&trace, 0, sizeof(trace));5151trace.version = ZSTD_VERSION_NUMBER;5152trace.streaming = streaming;5153trace.dictionaryID = cctx->dictID;5154trace.dictionarySize = cctx->dictContentSize;5155trace.uncompressedSize = cctx->consumedSrcSize;5156trace.compressedSize = cctx->producedCSize + extraCSize;5157trace.params = &cctx->appliedParams;5158trace.cctx = cctx;5159ZSTD_trace_compress_end(cctx->traceCtx, &trace);5160}5161cctx->traceCtx = 0;5162#else5163(void)cctx;5164(void)extraCSize;5165#endif5166}51675168size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,5169void* dst, size_t dstCapacity,5170const void* src, size_t srcSize)5171{5172size_t endResult;5173size_t const cSize = ZSTD_compressContinue_internal(cctx,5174dst, dstCapacity, src, srcSize,51751 /* frame mode */, 1 /* last chunk */);5176FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");5177endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);5178FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");5179assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));5180if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */5181ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);5182DEBUGLOG(4, "end of frame : controlling src size");5183RETURN_ERROR_IF(5184cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,5185srcSize_wrong,5186"error : pledgedSrcSize = %u, while realSrcSize = %u",5187(unsigned)cctx->pledgedSrcSizePlusOne-1,5188(unsigned)cctx->consumedSrcSize);5189}5190ZSTD_CCtx_trace(cctx, endResult);5191return cSize + endResult;5192}51935194/* NOTE: Must just wrap ZSTD_compressEnd_public() */5195size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,5196void* dst, size_t dstCapacity,5197const void* src, size_t srcSize)5198{5199return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);5200}52015202size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,5203void* dst, size_t dstCapacity,5204const void* src, size_t srcSize,5205const void* dict,size_t dictSize,5206ZSTD_parameters params)5207{5208DEBUGLOG(4, "ZSTD_compress_advanced");5209FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");5210ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL);5211return ZSTD_compress_advanced_internal(cctx,5212dst, dstCapacity,5213src, srcSize,5214dict, dictSize,5215&cctx->simpleApiParams);5216}52175218/* Internal */5219size_t ZSTD_compress_advanced_internal(5220ZSTD_CCtx* cctx,5221void* dst, size_t dstCapacity,5222const void* src, size_t srcSize,5223const void* dict,size_t dictSize,5224const ZSTD_CCtx_params* params)5225{5226DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);5227FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,5228dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,5229params, srcSize, ZSTDb_not_buffered) , "");5230return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);5231}52325233size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,5234void* dst, size_t dstCapacity,5235const void* src, size_t srcSize,5236const void* dict, size_t dictSize,5237int compressionLevel)5238{5239{5240ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);5241assert(params.fParams.contentSizeFlag == 1);5242ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);5243}5244DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);5245return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);5246}52475248size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,5249void* dst, size_t dstCapacity,5250const void* src, size_t srcSize,5251int compressionLevel)5252{5253DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);5254assert(cctx != NULL);5255return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);5256}52575258size_t ZSTD_compress(void* dst, size_t dstCapacity,5259const void* src, size_t srcSize,5260int compressionLevel)5261{5262size_t result;5263#if ZSTD_COMPRESS_HEAPMODE5264ZSTD_CCtx* cctx = ZSTD_createCCtx();5265RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");5266result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);5267ZSTD_freeCCtx(cctx);5268#else5269ZSTD_CCtx ctxBody;5270ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);5271result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);5272ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */5273#endif5274return result;5275}527652775278/* ===== Dictionary API ===== */52795280/*! ZSTD_estimateCDictSize_advanced() :5281* Estimate amount of memory that will be needed to create a dictionary with following arguments */5282size_t ZSTD_estimateCDictSize_advanced(5283size_t dictSize, ZSTD_compressionParameters cParams,5284ZSTD_dictLoadMethod_e dictLoadMethod)5285{5286DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));5287return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))5288+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)5289/* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small5290* in case we are using DDS with row-hash. */5291+ ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),5292/* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)5293+ (dictLoadMethod == ZSTD_dlm_byRef ? 05294: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));5295}52965297size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)5298{5299ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);5300return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);5301}53025303size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)5304{5305if (cdict==NULL) return 0; /* support sizeof on NULL */5306DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));5307/* cdict may be in the workspace */5308return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))5309+ ZSTD_cwksp_sizeof(&cdict->workspace);5310}53115312static size_t ZSTD_initCDict_internal(5313ZSTD_CDict* cdict,5314const void* dictBuffer, size_t dictSize,5315ZSTD_dictLoadMethod_e dictLoadMethod,5316ZSTD_dictContentType_e dictContentType,5317ZSTD_CCtx_params params)5318{5319DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);5320assert(!ZSTD_checkCParams(params.cParams));5321cdict->matchState.cParams = params.cParams;5322cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;5323if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {5324cdict->dictContent = dictBuffer;5325} else {5326void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));5327RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");5328cdict->dictContent = internalBuffer;5329ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);5330}5331cdict->dictContentSize = dictSize;5332cdict->dictContentType = dictContentType;53335334cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);533553365337/* Reset the state to no dictionary */5338ZSTD_reset_compressedBlockState(&cdict->cBlockState);5339FORWARD_IF_ERROR(ZSTD_reset_matchState(5340&cdict->matchState,5341&cdict->workspace,5342¶ms.cParams,5343params.useRowMatchFinder,5344ZSTDcrp_makeClean,5345ZSTDirp_reset,5346ZSTD_resetTarget_CDict), "");5347/* (Maybe) load the dictionary5348* Skips loading the dictionary if it is < 8 bytes.5349*/5350{ params.compressionLevel = ZSTD_CLEVEL_DEFAULT;5351params.fParams.contentSizeFlag = 1;5352{ size_t const dictID = ZSTD_compress_insertDictionary(5353&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,5354¶ms, cdict->dictContent, cdict->dictContentSize,5355dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);5356FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");5357assert(dictID <= (size_t)(U32)-1);5358cdict->dictID = (U32)dictID;5359}5360}53615362return 0;5363}53645365static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,5366ZSTD_dictLoadMethod_e dictLoadMethod,5367ZSTD_compressionParameters cParams,5368ZSTD_paramSwitch_e useRowMatchFinder,5369U32 enableDedicatedDictSearch,5370ZSTD_customMem customMem)5371{5372if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;53735374{ size_t const workspaceSize =5375ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +5376ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +5377ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +5378(dictLoadMethod == ZSTD_dlm_byRef ? 05379: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));5380void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);5381ZSTD_cwksp ws;5382ZSTD_CDict* cdict;53835384if (!workspace) {5385ZSTD_customFree(workspace, customMem);5386return NULL;5387}53885389ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);53905391cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));5392assert(cdict != NULL);5393ZSTD_cwksp_move(&cdict->workspace, &ws);5394cdict->customMem = customMem;5395cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */5396cdict->useRowMatchFinder = useRowMatchFinder;5397return cdict;5398}5399}54005401ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,5402ZSTD_dictLoadMethod_e dictLoadMethod,5403ZSTD_dictContentType_e dictContentType,5404ZSTD_compressionParameters cParams,5405ZSTD_customMem customMem)5406{5407ZSTD_CCtx_params cctxParams;5408ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));5409ZSTD_CCtxParams_init(&cctxParams, 0);5410cctxParams.cParams = cParams;5411cctxParams.customMem = customMem;5412return ZSTD_createCDict_advanced2(5413dictBuffer, dictSize,5414dictLoadMethod, dictContentType,5415&cctxParams, customMem);5416}54175418ZSTD_CDict* ZSTD_createCDict_advanced2(5419const void* dict, size_t dictSize,5420ZSTD_dictLoadMethod_e dictLoadMethod,5421ZSTD_dictContentType_e dictContentType,5422const ZSTD_CCtx_params* originalCctxParams,5423ZSTD_customMem customMem)5424{5425ZSTD_CCtx_params cctxParams = *originalCctxParams;5426ZSTD_compressionParameters cParams;5427ZSTD_CDict* cdict;54285429DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);5430if (!customMem.customAlloc ^ !customMem.customFree) return NULL;54315432if (cctxParams.enableDedicatedDictSearch) {5433cParams = ZSTD_dedicatedDictSearch_getCParams(5434cctxParams.compressionLevel, dictSize);5435ZSTD_overrideCParams(&cParams, &cctxParams.cParams);5436} else {5437cParams = ZSTD_getCParamsFromCCtxParams(5438&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);5439}54405441if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {5442/* Fall back to non-DDSS params */5443cctxParams.enableDedicatedDictSearch = 0;5444cParams = ZSTD_getCParamsFromCCtxParams(5445&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);5446}54475448DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);5449cctxParams.cParams = cParams;5450cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);54515452cdict = ZSTD_createCDict_advanced_internal(dictSize,5453dictLoadMethod, cctxParams.cParams,5454cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,5455customMem);54565457if (ZSTD_isError( ZSTD_initCDict_internal(cdict,5458dict, dictSize,5459dictLoadMethod, dictContentType,5460cctxParams) )) {5461ZSTD_freeCDict(cdict);5462return NULL;5463}54645465return cdict;5466}54675468ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)5469{5470ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);5471ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,5472ZSTD_dlm_byCopy, ZSTD_dct_auto,5473cParams, ZSTD_defaultCMem);5474if (cdict)5475cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;5476return cdict;5477}54785479ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)5480{5481ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);5482ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,5483ZSTD_dlm_byRef, ZSTD_dct_auto,5484cParams, ZSTD_defaultCMem);5485if (cdict)5486cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;5487return cdict;5488}54895490size_t ZSTD_freeCDict(ZSTD_CDict* cdict)5491{5492if (cdict==NULL) return 0; /* support free on NULL */5493{ ZSTD_customMem const cMem = cdict->customMem;5494int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);5495ZSTD_cwksp_free(&cdict->workspace, cMem);5496if (!cdictInWorkspace) {5497ZSTD_customFree(cdict, cMem);5498}5499return 0;5500}5501}55025503/*! ZSTD_initStaticCDict_advanced() :5504* Generate a digested dictionary in provided memory area.5505* workspace: The memory area to emplace the dictionary into.5506* Provided pointer must 8-bytes aligned.5507* It must outlive dictionary usage.5508* workspaceSize: Use ZSTD_estimateCDictSize()5509* to determine how large workspace must be.5510* cParams : use ZSTD_getCParams() to transform a compression level5511* into its relevants cParams.5512* @return : pointer to ZSTD_CDict*, or NULL if error (size too small)5513* Note : there is no corresponding "free" function.5514* Since workspace was allocated externally, it must be freed externally.5515*/5516const ZSTD_CDict* ZSTD_initStaticCDict(5517void* workspace, size_t workspaceSize,5518const void* dict, size_t dictSize,5519ZSTD_dictLoadMethod_e dictLoadMethod,5520ZSTD_dictContentType_e dictContentType,5521ZSTD_compressionParameters cParams)5522{5523ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams);5524/* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */5525size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);5526size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))5527+ (dictLoadMethod == ZSTD_dlm_byRef ? 05528: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))5529+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)5530+ matchStateSize;5531ZSTD_CDict* cdict;5532ZSTD_CCtx_params params;55335534if ((size_t)workspace & 7) return NULL; /* 8-aligned */55355536{5537ZSTD_cwksp ws;5538ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);5539cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));5540if (cdict == NULL) return NULL;5541ZSTD_cwksp_move(&cdict->workspace, &ws);5542}55435544DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",5545(unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));5546if (workspaceSize < neededSize) return NULL;55475548ZSTD_CCtxParams_init(¶ms, 0);5549params.cParams = cParams;5550params.useRowMatchFinder = useRowMatchFinder;5551cdict->useRowMatchFinder = useRowMatchFinder;5552cdict->compressionLevel = ZSTD_NO_CLEVEL;55535554if (ZSTD_isError( ZSTD_initCDict_internal(cdict,5555dict, dictSize,5556dictLoadMethod, dictContentType,5557params) ))5558return NULL;55595560return cdict;5561}55625563ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)5564{5565assert(cdict != NULL);5566return cdict->matchState.cParams;5567}55685569/*! ZSTD_getDictID_fromCDict() :5570* Provides the dictID of the dictionary loaded into `cdict`.5571* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.5572* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */5573unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)5574{5575if (cdict==NULL) return 0;5576return cdict->dictID;5577}55785579/* ZSTD_compressBegin_usingCDict_internal() :5580* Implementation of various ZSTD_compressBegin_usingCDict* functions.5581*/5582static size_t ZSTD_compressBegin_usingCDict_internal(5583ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,5584ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)5585{5586ZSTD_CCtx_params cctxParams;5587DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");5588RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");5589/* Initialize the cctxParams from the cdict */5590{5591ZSTD_parameters params;5592params.fParams = fParams;5593params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF5594|| pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER5595|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN5596|| cdict->compressionLevel == 0 ) ?5597ZSTD_getCParamsFromCDict(cdict)5598: ZSTD_getCParams(cdict->compressionLevel,5599pledgedSrcSize,5600cdict->dictContentSize);5601ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel);5602}5603/* Increase window log to fit the entire dictionary and source if the5604* source size is known. Limit the increase to 19, which is the5605* window log for compression level 1 with the largest source size.5606*/5607if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {5608U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);5609U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;5610cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);5611}5612return ZSTD_compressBegin_internal(cctx,5613NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,5614cdict,5615&cctxParams, pledgedSrcSize,5616ZSTDb_not_buffered);5617}561856195620/* ZSTD_compressBegin_usingCDict_advanced() :5621* This function is DEPRECATED.5622* cdict must be != NULL */5623size_t ZSTD_compressBegin_usingCDict_advanced(5624ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,5625ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)5626{5627return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);5628}56295630/* ZSTD_compressBegin_usingCDict() :5631* cdict must be != NULL */5632size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)5633{5634ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };5635return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);5636}56375638size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)5639{5640return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);5641}56425643/*! ZSTD_compress_usingCDict_internal():5644* Implementation of various ZSTD_compress_usingCDict* functions.5645*/5646static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,5647void* dst, size_t dstCapacity,5648const void* src, size_t srcSize,5649const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)5650{5651FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */5652return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);5653}56545655/*! ZSTD_compress_usingCDict_advanced():5656* This function is DEPRECATED.5657*/5658size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,5659void* dst, size_t dstCapacity,5660const void* src, size_t srcSize,5661const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)5662{5663return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);5664}56655666/*! ZSTD_compress_usingCDict() :5667* Compression using a digested Dictionary.5668* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.5669* Note that compression parameters are decided at CDict creation time5670* while frame parameters are hardcoded */5671size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,5672void* dst, size_t dstCapacity,5673const void* src, size_t srcSize,5674const ZSTD_CDict* cdict)5675{5676ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };5677return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);5678}5679568056815682/* ******************************************************************5683* Streaming5684********************************************************************/56855686ZSTD_CStream* ZSTD_createCStream(void)5687{5688DEBUGLOG(3, "ZSTD_createCStream");5689return ZSTD_createCStream_advanced(ZSTD_defaultCMem);5690}56915692ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)5693{5694return ZSTD_initStaticCCtx(workspace, workspaceSize);5695}56965697ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)5698{ /* CStream and CCtx are now same object */5699return ZSTD_createCCtx_advanced(customMem);5700}57015702size_t ZSTD_freeCStream(ZSTD_CStream* zcs)5703{5704return ZSTD_freeCCtx(zcs); /* same object */5705}5706570757085709/*====== Initialization ======*/57105711size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }57125713size_t ZSTD_CStreamOutSize(void)5714{5715return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;5716}57175718static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)5719{5720if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))5721return ZSTD_cpm_attachDict;5722else5723return ZSTD_cpm_noAttachDict;5724}57255726/* ZSTD_resetCStream():5727* pledgedSrcSize == 0 means "unknown" */5728size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)5729{5730/* temporary : 0 interpreted as "unknown" during transition period.5731* Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.5732* 0 will be interpreted as "empty" in the future.5733*/5734U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;5735DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);5736FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5737FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5738return 0;5739}57405741/*! ZSTD_initCStream_internal() :5742* Note : for lib/compress only. Used by zstdmt_compress.c.5743* Assumption 1 : params are valid5744* Assumption 2 : either dict, or cdict, is defined, not both */5745size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,5746const void* dict, size_t dictSize, const ZSTD_CDict* cdict,5747const ZSTD_CCtx_params* params,5748unsigned long long pledgedSrcSize)5749{5750DEBUGLOG(4, "ZSTD_initCStream_internal");5751FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5752FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5753assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));5754zcs->requestedParams = *params;5755assert(!((dict) && (cdict))); /* either dict or cdict, not both */5756if (dict) {5757FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");5758} else {5759/* Dictionary is cleared if !cdict */5760FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");5761}5762return 0;5763}57645765/* ZSTD_initCStream_usingCDict_advanced() :5766* same as ZSTD_initCStream_usingCDict(), with control over frame parameters */5767size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,5768const ZSTD_CDict* cdict,5769ZSTD_frameParameters fParams,5770unsigned long long pledgedSrcSize)5771{5772DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");5773FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5774FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5775zcs->requestedParams.fParams = fParams;5776FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");5777return 0;5778}57795780/* note : cdict must outlive compression session */5781size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)5782{5783DEBUGLOG(4, "ZSTD_initCStream_usingCDict");5784FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5785FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");5786return 0;5787}578857895790/* ZSTD_initCStream_advanced() :5791* pledgedSrcSize must be exact.5792* if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.5793* dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */5794size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,5795const void* dict, size_t dictSize,5796ZSTD_parameters params, unsigned long long pss)5797{5798/* for compatibility with older programs relying on this behavior.5799* Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.5800* This line will be removed in the future.5801*/5802U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;5803DEBUGLOG(4, "ZSTD_initCStream_advanced");5804FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5805FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5806FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");5807ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms);5808FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");5809return 0;5810}58115812size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)5813{5814DEBUGLOG(4, "ZSTD_initCStream_usingDict");5815FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5816FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");5817FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");5818return 0;5819}58205821size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)5822{5823/* temporary : 0 interpreted as "unknown" during transition period.5824* Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.5825* 0 will be interpreted as "empty" in the future.5826*/5827U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;5828DEBUGLOG(4, "ZSTD_initCStream_srcSize");5829FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5830FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");5831FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");5832FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5833return 0;5834}58355836size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)5837{5838DEBUGLOG(4, "ZSTD_initCStream");5839FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5840FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");5841FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");5842return 0;5843}58445845/*====== Compression ======*/58465847static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)5848{5849if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {5850return cctx->blockSize - cctx->stableIn_notConsumed;5851}5852assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);5853{ size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;5854if (hintInSize==0) hintInSize = cctx->blockSize;5855return hintInSize;5856}5857}58585859/** ZSTD_compressStream_generic():5860* internal function for all *compressStream*() variants5861* @return : hint size for next input to complete ongoing block */5862static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,5863ZSTD_outBuffer* output,5864ZSTD_inBuffer* input,5865ZSTD_EndDirective const flushMode)5866{5867const char* const istart = (assert(input != NULL), (const char*)input->src);5868const char* const iend = (istart != NULL) ? istart + input->size : istart;5869const char* ip = (istart != NULL) ? istart + input->pos : istart;5870char* const ostart = (assert(output != NULL), (char*)output->dst);5871char* const oend = (ostart != NULL) ? ostart + output->size : ostart;5872char* op = (ostart != NULL) ? ostart + output->pos : ostart;5873U32 someMoreWork = 1;58745875/* check expectations */5876DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos);5877assert(zcs != NULL);5878if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {5879assert(input->pos >= zcs->stableIn_notConsumed);5880input->pos -= zcs->stableIn_notConsumed;5881ip -= zcs->stableIn_notConsumed;5882zcs->stableIn_notConsumed = 0;5883}5884if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {5885assert(zcs->inBuff != NULL);5886assert(zcs->inBuffSize > 0);5887}5888if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {5889assert(zcs->outBuff != NULL);5890assert(zcs->outBuffSize > 0);5891}5892if (input->src == NULL) assert(input->size == 0);5893assert(input->pos <= input->size);5894if (output->dst == NULL) assert(output->size == 0);5895assert(output->pos <= output->size);5896assert((U32)flushMode <= (U32)ZSTD_e_end);58975898while (someMoreWork) {5899switch(zcs->streamStage)5900{5901case zcss_init:5902RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");59035904case zcss_load:5905if ( (flushMode == ZSTD_e_end)5906&& ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */5907|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */5908&& (zcs->inBuffPos == 0) ) {5909/* shortcut to compression pass directly into output buffer */5910size_t const cSize = ZSTD_compressEnd_public(zcs,5911op, oend-op, ip, iend-ip);5912DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);5913FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");5914ip = iend;5915op += cSize;5916zcs->frameEnded = 1;5917ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);5918someMoreWork = 0; break;5919}5920/* complete loading into inBuffer in buffered mode */5921if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {5922size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;5923size_t const loaded = ZSTD_limitCopy(5924zcs->inBuff + zcs->inBuffPos, toLoad,5925ip, iend-ip);5926zcs->inBuffPos += loaded;5927if (ip) ip += loaded;5928if ( (flushMode == ZSTD_e_continue)5929&& (zcs->inBuffPos < zcs->inBuffTarget) ) {5930/* not enough input to fill full block : stop here */5931someMoreWork = 0; break;5932}5933if ( (flushMode == ZSTD_e_flush)5934&& (zcs->inBuffPos == zcs->inToCompress) ) {5935/* empty */5936someMoreWork = 0; break;5937}5938} else {5939assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);5940if ( (flushMode == ZSTD_e_continue)5941&& ( (size_t)(iend - ip) < zcs->blockSize) ) {5942/* can't compress a full block : stop here */5943zcs->stableIn_notConsumed = (size_t)(iend - ip);5944ip = iend; /* pretend to have consumed input */5945someMoreWork = 0; break;5946}5947if ( (flushMode == ZSTD_e_flush)5948&& (ip == iend) ) {5949/* empty */5950someMoreWork = 0; break;5951}5952}5953/* compress current block (note : this stage cannot be stopped in the middle) */5954DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);5955{ int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);5956void* cDst;5957size_t cSize;5958size_t oSize = oend-op;5959size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress5960: MIN((size_t)(iend - ip), zcs->blockSize);5961if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)5962cDst = op; /* compress into output buffer, to skip flush stage */5963else5964cDst = zcs->outBuff, oSize = zcs->outBuffSize;5965if (inputBuffered) {5966unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);5967cSize = lastBlock ?5968ZSTD_compressEnd_public(zcs, cDst, oSize,5969zcs->inBuff + zcs->inToCompress, iSize) :5970ZSTD_compressContinue_public(zcs, cDst, oSize,5971zcs->inBuff + zcs->inToCompress, iSize);5972FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");5973zcs->frameEnded = lastBlock;5974/* prepare next block */5975zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;5976if (zcs->inBuffTarget > zcs->inBuffSize)5977zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;5978DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",5979(unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);5980if (!lastBlock)5981assert(zcs->inBuffTarget <= zcs->inBuffSize);5982zcs->inToCompress = zcs->inBuffPos;5983} else { /* !inputBuffered, hence ZSTD_bm_stable */5984unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);5985cSize = lastBlock ?5986ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :5987ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);5988/* Consume the input prior to error checking to mirror buffered mode. */5989if (ip) ip += iSize;5990FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");5991zcs->frameEnded = lastBlock;5992if (lastBlock) assert(ip == iend);5993}5994if (cDst == op) { /* no need to flush */5995op += cSize;5996if (zcs->frameEnded) {5997DEBUGLOG(5, "Frame completed directly in outBuffer");5998someMoreWork = 0;5999ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);6000}6001break;6002}6003zcs->outBuffContentSize = cSize;6004zcs->outBuffFlushedSize = 0;6005zcs->streamStage = zcss_flush; /* pass-through to flush stage */6006}6007ZSTD_FALLTHROUGH;6008case zcss_flush:6009DEBUGLOG(5, "flush stage");6010assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);6011{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;6012size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),6013zcs->outBuff + zcs->outBuffFlushedSize, toFlush);6014DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",6015(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);6016if (flushed)6017op += flushed;6018zcs->outBuffFlushedSize += flushed;6019if (toFlush!=flushed) {6020/* flush not fully completed, presumably because dst is too small */6021assert(op==oend);6022someMoreWork = 0;6023break;6024}6025zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;6026if (zcs->frameEnded) {6027DEBUGLOG(5, "Frame completed on flush");6028someMoreWork = 0;6029ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);6030break;6031}6032zcs->streamStage = zcss_load;6033break;6034}60356036default: /* impossible */6037assert(0);6038}6039}60406041input->pos = ip - istart;6042output->pos = op - ostart;6043if (zcs->frameEnded) return 0;6044return ZSTD_nextInputSizeHint(zcs);6045}60466047static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)6048{6049#ifdef ZSTD_MULTITHREAD6050if (cctx->appliedParams.nbWorkers >= 1) {6051assert(cctx->mtctx != NULL);6052return ZSTDMT_nextInputSizeHint(cctx->mtctx);6053}6054#endif6055return ZSTD_nextInputSizeHint(cctx);60566057}60586059size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)6060{6061FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");6062return ZSTD_nextInputSizeHint_MTorST(zcs);6063}60646065/* After a compression call set the expected input/output buffer.6066* This is validated at the start of the next compression call.6067*/6068static void6069ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input)6070{6071DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)");6072if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {6073cctx->expectedInBuffer = *input;6074}6075if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {6076cctx->expectedOutBufferSize = output->size - output->pos;6077}6078}60796080/* Validate that the input/output buffers match the expectations set by6081* ZSTD_setBufferExpectations.6082*/6083static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,6084ZSTD_outBuffer const* output,6085ZSTD_inBuffer const* input,6086ZSTD_EndDirective endOp)6087{6088if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {6089ZSTD_inBuffer const expect = cctx->expectedInBuffer;6090if (expect.src != input->src || expect.pos != input->pos)6091RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!");6092}6093(void)endOp;6094if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {6095size_t const outBufferSize = output->size - output->pos;6096if (cctx->expectedOutBufferSize != outBufferSize)6097RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!");6098}6099return 0;6100}61016102static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,6103ZSTD_EndDirective endOp,6104size_t inSize)6105{6106ZSTD_CCtx_params params = cctx->requestedParams;6107ZSTD_prefixDict const prefixDict = cctx->prefixDict;6108FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */6109ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */6110assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */6111if (cctx->cdict && !cctx->localDict.cdict) {6112/* Let the cdict's compression level take priority over the requested params.6113* But do not take the cdict's compression level if the "cdict" is actually a localDict6114* generated from ZSTD_initLocalDict().6115*/6116params.compressionLevel = cctx->cdict->compressionLevel;6117}6118DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");6119if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */61206121{ size_t const dictSize = prefixDict.dict6122? prefixDict.dictSize6123: (cctx->cdict ? cctx->cdict->dictContentSize : 0);6124ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);6125params.cParams = ZSTD_getCParamsFromCCtxParams(6126¶ms, cctx->pledgedSrcSizePlusOne-1,6127dictSize, mode);6128}61296130params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);6131params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);6132params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);6133params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);6134params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);6135params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);61366137#ifdef ZSTD_MULTITHREAD6138/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */6139RETURN_ERROR_IF(6140params.useSequenceProducer == 1 && params.nbWorkers >= 1,6141parameter_combination_unsupported,6142"External sequence producer isn't supported with nbWorkers >= 1"6143);61446145if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {6146params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */6147}6148if (params.nbWorkers > 0) {6149#if ZSTD_TRACE6150cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;6151#endif6152/* mt context creation */6153if (cctx->mtctx == NULL) {6154DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",6155params.nbWorkers);6156cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);6157RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");6158}6159/* mt compression */6160DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);6161FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(6162cctx->mtctx,6163prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,6164cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");6165cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0;6166cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize;6167cctx->consumedSrcSize = 0;6168cctx->producedCSize = 0;6169cctx->streamStage = zcss_load;6170cctx->appliedParams = params;6171} else6172#endif /* ZSTD_MULTITHREAD */6173{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;6174assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));6175FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,6176prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,6177cctx->cdict,6178¶ms, pledgedSrcSize,6179ZSTDb_buffered) , "");6180assert(cctx->appliedParams.nbWorkers == 0);6181cctx->inToCompress = 0;6182cctx->inBuffPos = 0;6183if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {6184/* for small input: avoid automatic flush on reaching end of block, since6185* it would require to add a 3-bytes null block to end frame6186*/6187cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);6188} else {6189cctx->inBuffTarget = 0;6190}6191cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;6192cctx->streamStage = zcss_load;6193cctx->frameEnded = 0;6194}6195return 0;6196}61976198/* @return provides a minimum amount of data remaining to be flushed from internal buffers6199*/6200size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,6201ZSTD_outBuffer* output,6202ZSTD_inBuffer* input,6203ZSTD_EndDirective endOp)6204{6205DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);6206/* check conditions */6207RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");6208RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer");6209RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");6210assert(cctx != NULL);62116212/* transparent initialization stage */6213if (cctx->streamStage == zcss_init) {6214size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */6215size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed;6216if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */6217&& (endOp == ZSTD_e_continue) /* no flush requested, more input to come */6218&& (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */6219if (cctx->stableIn_notConsumed) { /* not the first time */6220/* check stable source guarantees */6221RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer");6222RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos");6223}6224/* pretend input was consumed, to give a sense forward progress */6225input->pos = input->size;6226/* save stable inBuffer, for later control, and flush/end */6227cctx->expectedInBuffer = *input;6228/* but actually input wasn't consumed, so keep track of position from where compression shall resume */6229cctx->stableIn_notConsumed += inputSize;6230/* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */6231return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */6232}6233FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed");6234ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */6235}6236/* end of transparent initialization stage */62376238FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");6239/* compression stage */6240#ifdef ZSTD_MULTITHREAD6241if (cctx->appliedParams.nbWorkers > 0) {6242size_t flushMin;6243if (cctx->cParamsChanged) {6244ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);6245cctx->cParamsChanged = 0;6246}6247if (cctx->stableIn_notConsumed) {6248assert(cctx->appliedParams.inBufferMode == ZSTD_bm_stable);6249/* some early data was skipped - make it available for consumption */6250assert(input->pos >= cctx->stableIn_notConsumed);6251input->pos -= cctx->stableIn_notConsumed;6252cctx->stableIn_notConsumed = 0;6253}6254for (;;) {6255size_t const ipos = input->pos;6256size_t const opos = output->pos;6257flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);6258cctx->consumedSrcSize += (U64)(input->pos - ipos);6259cctx->producedCSize += (U64)(output->pos - opos);6260if ( ZSTD_isError(flushMin)6261|| (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */6262if (flushMin == 0)6263ZSTD_CCtx_trace(cctx, 0);6264ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);6265}6266FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");62676268if (endOp == ZSTD_e_continue) {6269/* We only require some progress with ZSTD_e_continue, not maximal progress.6270* We're done if we've consumed or produced any bytes, or either buffer is6271* full.6272*/6273if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size)6274break;6275} else {6276assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end);6277/* We require maximal progress. We're done when the flush is complete or the6278* output buffer is full.6279*/6280if (flushMin == 0 || output->pos == output->size)6281break;6282}6283}6284DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");6285/* Either we don't require maximum forward progress, we've finished the6286* flush, or we are out of output space.6287*/6288assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size);6289ZSTD_setBufferExpectations(cctx, output, input);6290return flushMin;6291}6292#endif /* ZSTD_MULTITHREAD */6293FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");6294DEBUGLOG(5, "completed ZSTD_compressStream2");6295ZSTD_setBufferExpectations(cctx, output, input);6296return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */6297}62986299size_t ZSTD_compressStream2_simpleArgs (6300ZSTD_CCtx* cctx,6301void* dst, size_t dstCapacity, size_t* dstPos,6302const void* src, size_t srcSize, size_t* srcPos,6303ZSTD_EndDirective endOp)6304{6305ZSTD_outBuffer output;6306ZSTD_inBuffer input;6307output.dst = dst;6308output.size = dstCapacity;6309output.pos = *dstPos;6310input.src = src;6311input.size = srcSize;6312input.pos = *srcPos;6313/* ZSTD_compressStream2() will check validity of dstPos and srcPos */6314{ size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);6315*dstPos = output.pos;6316*srcPos = input.pos;6317return cErr;6318}6319}63206321size_t ZSTD_compress2(ZSTD_CCtx* cctx,6322void* dst, size_t dstCapacity,6323const void* src, size_t srcSize)6324{6325ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;6326ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;6327DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);6328ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);6329/* Enable stable input/output buffers. */6330cctx->requestedParams.inBufferMode = ZSTD_bm_stable;6331cctx->requestedParams.outBufferMode = ZSTD_bm_stable;6332{ size_t oPos = 0;6333size_t iPos = 0;6334size_t const result = ZSTD_compressStream2_simpleArgs(cctx,6335dst, dstCapacity, &oPos,6336src, srcSize, &iPos,6337ZSTD_e_end);6338/* Reset to the original values. */6339cctx->requestedParams.inBufferMode = originalInBufferMode;6340cctx->requestedParams.outBufferMode = originalOutBufferMode;63416342FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");6343if (result != 0) { /* compression not completed, due to lack of output space */6344assert(oPos == dstCapacity);6345RETURN_ERROR(dstSize_tooSmall, "");6346}6347assert(iPos == srcSize); /* all input is expected consumed */6348return oPos;6349}6350}63516352/* ZSTD_validateSequence() :6353* @offCode : is presumed to follow format required by ZSTD_storeSeq()6354* @returns a ZSTD error code if sequence is not valid6355*/6356static size_t6357ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,6358size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)6359{6360U32 const windowSize = 1u << windowLog;6361/* posInSrc represents the amount of data the decoder would decode up to this point.6362* As long as the amount of data decoded is less than or equal to window size, offsets may be6363* larger than the total length of output decoded in order to reference the dict, even larger than6364* window size. After output surpasses windowSize, we're limited to windowSize offsets again.6365*/6366size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;6367size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;6368RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");6369/* Validate maxNbSeq is large enough for the given matchLength and minMatch */6370RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");6371return 0;6372}63736374/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */6375static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)6376{6377U32 offBase = OFFSET_TO_OFFBASE(rawOffset);63786379if (!ll0 && rawOffset == rep[0]) {6380offBase = REPCODE1_TO_OFFBASE;6381} else if (rawOffset == rep[1]) {6382offBase = REPCODE_TO_OFFBASE(2 - ll0);6383} else if (rawOffset == rep[2]) {6384offBase = REPCODE_TO_OFFBASE(3 - ll0);6385} else if (ll0 && rawOffset == rep[0] - 1) {6386offBase = REPCODE3_TO_OFFBASE;6387}6388return offBase;6389}63906391size_t6392ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,6393ZSTD_sequencePosition* seqPos,6394const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,6395const void* src, size_t blockSize,6396ZSTD_paramSwitch_e externalRepSearch)6397{6398U32 idx = seqPos->idx;6399U32 const startIdx = idx;6400BYTE const* ip = (BYTE const*)(src);6401const BYTE* const iend = ip + blockSize;6402repcodes_t updatedRepcodes;6403U32 dictSize;64046405DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);64066407if (cctx->cdict) {6408dictSize = (U32)cctx->cdict->dictContentSize;6409} else if (cctx->prefixDict.dict) {6410dictSize = (U32)cctx->prefixDict.dictSize;6411} else {6412dictSize = 0;6413}6414ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));6415for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {6416U32 const litLength = inSeqs[idx].litLength;6417U32 const matchLength = inSeqs[idx].matchLength;6418U32 offBase;64196420if (externalRepSearch == ZSTD_ps_disable) {6421offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);6422} else {6423U32 const ll0 = (litLength == 0);6424offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);6425ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);6426}64276428DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);6429if (cctx->appliedParams.validateSequences) {6430seqPos->posInSrc += litLength + matchLength;6431FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,6432cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),6433"Sequence validation failed");6434}6435RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,6436"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");6437ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);6438ip += matchLength + litLength;6439}64406441/* If we skipped repcode search while parsing, we need to update repcodes now */6442assert(externalRepSearch != ZSTD_ps_auto);6443assert(idx >= startIdx);6444if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {6445U32* const rep = updatedRepcodes.rep;6446U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */64476448if (lastSeqIdx >= startIdx + 2) {6449rep[2] = inSeqs[lastSeqIdx - 2].offset;6450rep[1] = inSeqs[lastSeqIdx - 1].offset;6451rep[0] = inSeqs[lastSeqIdx].offset;6452} else if (lastSeqIdx == startIdx + 1) {6453rep[2] = rep[0];6454rep[1] = inSeqs[lastSeqIdx - 1].offset;6455rep[0] = inSeqs[lastSeqIdx].offset;6456} else {6457assert(lastSeqIdx == startIdx);6458rep[2] = rep[1];6459rep[1] = rep[0];6460rep[0] = inSeqs[lastSeqIdx].offset;6461}6462}64636464ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));64656466if (inSeqs[idx].litLength) {6467DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);6468ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);6469ip += inSeqs[idx].litLength;6470seqPos->posInSrc += inSeqs[idx].litLength;6471}6472RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");6473seqPos->idx = idx+1;6474return 0;6475}64766477size_t6478ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,6479const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,6480const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)6481{6482U32 idx = seqPos->idx;6483U32 startPosInSequence = seqPos->posInSequence;6484U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;6485size_t dictSize;6486BYTE const* ip = (BYTE const*)(src);6487BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */6488repcodes_t updatedRepcodes;6489U32 bytesAdjustment = 0;6490U32 finalMatchSplit = 0;64916492/* TODO(embg) support fast parsing mode in noBlockDelim mode */6493(void)externalRepSearch;64946495if (cctx->cdict) {6496dictSize = cctx->cdict->dictContentSize;6497} else if (cctx->prefixDict.dict) {6498dictSize = cctx->prefixDict.dictSize;6499} else {6500dictSize = 0;6501}6502DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);6503DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);6504ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));6505while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {6506const ZSTD_Sequence currSeq = inSeqs[idx];6507U32 litLength = currSeq.litLength;6508U32 matchLength = currSeq.matchLength;6509U32 const rawOffset = currSeq.offset;6510U32 offBase;65116512/* Modify the sequence depending on where endPosInSequence lies */6513if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {6514if (startPosInSequence >= litLength) {6515startPosInSequence -= litLength;6516litLength = 0;6517matchLength -= startPosInSequence;6518} else {6519litLength -= startPosInSequence;6520}6521/* Move to the next sequence */6522endPosInSequence -= currSeq.litLength + currSeq.matchLength;6523startPosInSequence = 0;6524} else {6525/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence6526does not reach the end of the match. So, we have to split the sequence */6527DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",6528currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);6529if (endPosInSequence > litLength) {6530U32 firstHalfMatchLength;6531litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;6532firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;6533if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {6534/* Only ever split the match if it is larger than the block size */6535U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;6536if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {6537/* Move the endPosInSequence backward so that it creates match of minMatch length */6538endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;6539bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;6540firstHalfMatchLength -= bytesAdjustment;6541}6542matchLength = firstHalfMatchLength;6543/* Flag that we split the last match - after storing the sequence, exit the loop,6544but keep the value of endPosInSequence */6545finalMatchSplit = 1;6546} else {6547/* Move the position in sequence backwards so that we don't split match, and break to store6548* the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence6549* should go. We prefer to do this whenever it is not necessary to split the match, or if doing so6550* would cause the first half of the match to be too small6551*/6552bytesAdjustment = endPosInSequence - currSeq.litLength;6553endPosInSequence = currSeq.litLength;6554break;6555}6556} else {6557/* This sequence ends inside the literals, break to store the last literals */6558break;6559}6560}6561/* Check if this offset can be represented with a repcode */6562{ U32 const ll0 = (litLength == 0);6563offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);6564ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);6565}65666567if (cctx->appliedParams.validateSequences) {6568seqPos->posInSrc += litLength + matchLength;6569FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,6570cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),6571"Sequence validation failed");6572}6573DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);6574RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,6575"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");6576ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);6577ip += matchLength + litLength;6578if (!finalMatchSplit)6579idx++; /* Next Sequence */6580}6581DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);6582assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);6583seqPos->idx = idx;6584seqPos->posInSequence = endPosInSequence;6585ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));65866587iend -= bytesAdjustment;6588if (ip != iend) {6589/* Store any last literals */6590U32 lastLLSize = (U32)(iend - ip);6591assert(ip <= iend);6592DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);6593ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);6594seqPos->posInSrc += lastLLSize;6595}65966597return bytesAdjustment;6598}65996600typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,6601const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,6602const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);6603static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)6604{6605ZSTD_sequenceCopier sequenceCopier = NULL;6606assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));6607if (mode == ZSTD_sf_explicitBlockDelimiters) {6608return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;6609} else if (mode == ZSTD_sf_noBlockDelimiters) {6610return ZSTD_copySequencesToSeqStoreNoBlockDelim;6611}6612assert(sequenceCopier != NULL);6613return sequenceCopier;6614}66156616/* Discover the size of next block by searching for the delimiter.6617* Note that a block delimiter **must** exist in this mode,6618* otherwise it's an input error.6619* The block size retrieved will be later compared to ensure it remains within bounds */6620static size_t6621blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)6622{6623int end = 0;6624size_t blockSize = 0;6625size_t spos = seqPos.idx;6626DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);6627assert(spos <= inSeqsSize);6628while (spos < inSeqsSize) {6629end = (inSeqs[spos].offset == 0);6630blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;6631if (end) {6632if (inSeqs[spos].matchLength != 0)6633RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");6634break;6635}6636spos++;6637}6638if (!end)6639RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");6640return blockSize;6641}66426643/* More a "target" block size */6644static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)6645{6646int const lastBlock = (remaining <= blockSize);6647return lastBlock ? remaining : blockSize;6648}66496650static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,6651size_t blockSize, size_t remaining,6652const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)6653{6654DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);6655if (mode == ZSTD_sf_noBlockDelimiters)6656return blockSize_noDelimiter(blockSize, remaining);6657{ size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);6658FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");6659if (explicitBlockSize > blockSize)6660RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");6661if (explicitBlockSize > remaining)6662RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");6663return explicitBlockSize;6664}6665}66666667/* Compress, block-by-block, all of the sequences given.6668*6669* Returns the cumulative size of all compressed blocks (including their headers),6670* otherwise a ZSTD error.6671*/6672static size_t6673ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,6674void* dst, size_t dstCapacity,6675const ZSTD_Sequence* inSeqs, size_t inSeqsSize,6676const void* src, size_t srcSize)6677{6678size_t cSize = 0;6679size_t remaining = srcSize;6680ZSTD_sequencePosition seqPos = {0, 0, 0};66816682BYTE const* ip = (BYTE const*)src;6683BYTE* op = (BYTE*)dst;6684ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);66856686DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);6687/* Special case: empty frame */6688if (remaining == 0) {6689U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);6690RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");6691MEM_writeLE32(op, cBlockHeader24);6692op += ZSTD_blockHeaderSize;6693dstCapacity -= ZSTD_blockHeaderSize;6694cSize += ZSTD_blockHeaderSize;6695}66966697while (remaining) {6698size_t compressedSeqsSize;6699size_t cBlockSize;6700size_t additionalByteAdjustment;6701size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,6702cctx->blockSize, remaining,6703inSeqs, inSeqsSize, seqPos);6704U32 const lastBlock = (blockSize == remaining);6705FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");6706assert(blockSize <= remaining);6707ZSTD_resetSeqStore(&cctx->seqStore);6708DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);67096710additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);6711FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");6712blockSize -= additionalByteAdjustment;67136714/* If blocks are too small, emit as a nocompress block */6715/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding6716* additional 1. We need to revisit and change this logic to be more consistent */6717if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {6718cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);6719FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");6720DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);6721cSize += cBlockSize;6722ip += blockSize;6723op += cBlockSize;6724remaining -= blockSize;6725dstCapacity -= cBlockSize;6726continue;6727}67286729RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");6730compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,6731&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,6732&cctx->appliedParams,6733op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,6734blockSize,6735cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,6736cctx->bmi2);6737FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");6738DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);67396740if (!cctx->isFirstBlock &&6741ZSTD_maybeRLE(&cctx->seqStore) &&6742ZSTD_isRLE(ip, blockSize)) {6743/* We don't want to emit our first block as a RLE even if it qualifies because6744* doing so will cause the decoder (cli only) to throw a "should consume all input error."6745* This is only an issue for zstd <= v1.4.36746*/6747compressedSeqsSize = 1;6748}67496750if (compressedSeqsSize == 0) {6751/* ZSTD_noCompressBlock writes the block header as well */6752cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);6753FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed");6754DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize);6755} else if (compressedSeqsSize == 1) {6756cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);6757FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed");6758DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize);6759} else {6760U32 cBlockHeader;6761/* Error checking and repcodes update */6762ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);6763if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)6764cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;67656766/* Write block header into beginning of block*/6767cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);6768MEM_writeLE24(op, cBlockHeader);6769cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;6770DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize);6771}67726773cSize += cBlockSize;67746775if (lastBlock) {6776break;6777} else {6778ip += blockSize;6779op += cBlockSize;6780remaining -= blockSize;6781dstCapacity -= cBlockSize;6782cctx->isFirstBlock = 0;6783}6784DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);6785}67866787DEBUGLOG(4, "cSize final total: %zu", cSize);6788return cSize;6789}67906791size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,6792void* dst, size_t dstCapacity,6793const ZSTD_Sequence* inSeqs, size_t inSeqsSize,6794const void* src, size_t srcSize)6795{6796BYTE* op = (BYTE*)dst;6797size_t cSize = 0;6798size_t compressedBlocksSize = 0;6799size_t frameHeaderSize = 0;68006801/* Transparent initialization stage, same as compressStream2() */6802DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);6803assert(cctx != NULL);6804FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");6805/* Begin writing output, starting with frame header */6806frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);6807op += frameHeaderSize;6808dstCapacity -= frameHeaderSize;6809cSize += frameHeaderSize;6810if (cctx->appliedParams.fParams.checksumFlag && srcSize) {6811XXH64_update(&cctx->xxhState, src, srcSize);6812}6813/* cSize includes block header size and compressed sequences size */6814compressedBlocksSize = ZSTD_compressSequences_internal(cctx,6815op, dstCapacity,6816inSeqs, inSeqsSize,6817src, srcSize);6818FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");6819cSize += compressedBlocksSize;6820dstCapacity -= compressedBlocksSize;68216822if (cctx->appliedParams.fParams.checksumFlag) {6823U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);6824RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");6825DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);6826MEM_writeLE32((char*)dst + cSize, checksum);6827cSize += 4;6828}68296830DEBUGLOG(4, "Final compressed size: %zu", cSize);6831return cSize;6832}68336834/*====== Finalize ======*/68356836static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs)6837{6838const ZSTD_inBuffer nullInput = { NULL, 0, 0 };6839const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable);6840return stableInput ? zcs->expectedInBuffer : nullInput;6841}68426843/*! ZSTD_flushStream() :6844* @return : amount of data remaining to flush */6845size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)6846{6847ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);6848input.size = input.pos; /* do not ingest more input during flush */6849return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);6850}685168526853size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)6854{6855ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);6856size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);6857FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed");6858if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */6859/* single thread mode : attempt to calculate remaining to flush more precisely */6860{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;6861size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);6862size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;6863DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);6864return toFlush;6865}6866}686768686869/*-===== Pre-defined compression levels =====-*/6870#include "clevels.h"68716872int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }6873int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }6874int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }68756876static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)6877{6878ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);6879switch (cParams.strategy) {6880case ZSTD_fast:6881case ZSTD_dfast:6882break;6883case ZSTD_greedy:6884case ZSTD_lazy:6885case ZSTD_lazy2:6886cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;6887break;6888case ZSTD_btlazy2:6889case ZSTD_btopt:6890case ZSTD_btultra:6891case ZSTD_btultra2:6892break;6893}6894return cParams;6895}68966897static int ZSTD_dedicatedDictSearch_isSupported(6898ZSTD_compressionParameters const* cParams)6899{6900return (cParams->strategy >= ZSTD_greedy)6901&& (cParams->strategy <= ZSTD_lazy2)6902&& (cParams->hashLog > cParams->chainLog)6903&& (cParams->chainLog <= 24);6904}69056906/**6907* Reverses the adjustment applied to cparams when enabling dedicated dict6908* search. This is used to recover the params set to be used in the working6909* context. (Otherwise, those tables would also grow.)6910*/6911static void ZSTD_dedicatedDictSearch_revertCParams(6912ZSTD_compressionParameters* cParams) {6913switch (cParams->strategy) {6914case ZSTD_fast:6915case ZSTD_dfast:6916break;6917case ZSTD_greedy:6918case ZSTD_lazy:6919case ZSTD_lazy2:6920cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;6921if (cParams->hashLog < ZSTD_HASHLOG_MIN) {6922cParams->hashLog = ZSTD_HASHLOG_MIN;6923}6924break;6925case ZSTD_btlazy2:6926case ZSTD_btopt:6927case ZSTD_btultra:6928case ZSTD_btultra2:6929break;6930}6931}69326933static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)6934{6935switch (mode) {6936case ZSTD_cpm_unknown:6937case ZSTD_cpm_noAttachDict:6938case ZSTD_cpm_createCDict:6939break;6940case ZSTD_cpm_attachDict:6941dictSize = 0;6942break;6943default:6944assert(0);6945break;6946}6947{ int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;6948size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;6949return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;6950}6951}69526953/*! ZSTD_getCParams_internal() :6954* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.6955* Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.6956* Use dictSize == 0 for unknown or unused.6957* Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */6958static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)6959{6960U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);6961U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);6962int row;6963DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);69646965/* row */6966if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */6967else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */6968else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;6969else row = compressionLevel;69706971{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];6972DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);6973/* acceleration factor */6974if (compressionLevel < 0) {6975int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);6976cp.targetLength = (unsigned)(-clampedCompressionLevel);6977}6978/* refine parameters based on srcSize & dictSize */6979return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);6980}6981}69826983/*! ZSTD_getCParams() :6984* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.6985* Size values are optional, provide 0 if not known or unused */6986ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)6987{6988if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;6989return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);6990}69916992/*! ZSTD_getParams() :6993* same idea as ZSTD_getCParams()6994* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).6995* Fields of `ZSTD_frameParameters` are set to default values */6996static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {6997ZSTD_parameters params;6998ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);6999DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);7000ZSTD_memset(¶ms, 0, sizeof(params));7001params.cParams = cParams;7002params.fParams.contentSizeFlag = 1;7003return params;7004}70057006/*! ZSTD_getParams() :7007* same idea as ZSTD_getCParams()7008* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).7009* Fields of `ZSTD_frameParameters` are set to default values */7010ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {7011if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;7012return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);7013}70147015void ZSTD_registerSequenceProducer(7016ZSTD_CCtx* zc, void* mState,7017ZSTD_sequenceProducer_F* mFinder7018) {7019if (mFinder != NULL) {7020ZSTD_externalMatchCtx emctx;7021emctx.mState = mState;7022emctx.mFinder = mFinder;7023emctx.seqBuffer = NULL;7024emctx.seqBufferCapacity = 0;7025zc->externalMatchCtx = emctx;7026zc->requestedParams.useSequenceProducer = 1;7027} else {7028ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));7029zc->requestedParams.useSequenceProducer = 0;7030}7031}703270337034