Path: blob/main/sys/contrib/zstd/lib/compress/zstd_compress.c
48378 views
/*1* Copyright (c) Yann Collet, Facebook, Inc.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910/*-*************************************11* Dependencies12***************************************/13#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */14#include "../common/mem.h"15#include "hist.h" /* HIST_countFast_wksp */16#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */17#include "../common/fse.h"18#define HUF_STATIC_LINKING_ONLY19#include "../common/huf.h"20#include "zstd_compress_internal.h"21#include "zstd_compress_sequences.h"22#include "zstd_compress_literals.h"23#include "zstd_fast.h"24#include "zstd_double_fast.h"25#include "zstd_lazy.h"26#include "zstd_opt.h"27#include "zstd_ldm.h"28#include "zstd_compress_superblock.h"2930/* ***************************************************************31* Tuning parameters32*****************************************************************/33/*!34* COMPRESS_HEAPMODE :35* Select how default decompression function ZSTD_compress() allocates its context,36* on stack (0, default), or into heap (1).37* Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.38*/39#ifndef ZSTD_COMPRESS_HEAPMODE40# define ZSTD_COMPRESS_HEAPMODE 041#endif4243/*!44* ZSTD_HASHLOG3_MAX :45* Maximum size of the hash table dedicated to find 3-bytes matches,46* in log format, aka 17 => 1 << 17 == 128Ki positions.47* This structure is only used in zstd_opt.48* Since allocation is centralized for all strategies, it has to be known here.49* The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,50* so that zstd_opt.c doesn't need to know about this constant.51*/52#ifndef ZSTD_HASHLOG3_MAX53# define ZSTD_HASHLOG3_MAX 1754#endif5556/*-*************************************57* Helper functions58***************************************/59/* ZSTD_compressBound()60* Note that the result from this function is only compatible with the "normal"61* full-block strategy.62* When there are a lot of small blocks due to frequent flush in streaming mode63* the overhead of headers can make the compressed data to be larger than the64* return value of ZSTD_compressBound().65*/66size_t ZSTD_compressBound(size_t srcSize) {67return ZSTD_COMPRESSBOUND(srcSize);68}697071/*-*************************************72* Context memory management73***************************************/74struct ZSTD_CDict_s {75const void* dictContent;76size_t dictContentSize;77ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */78U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */79ZSTD_cwksp workspace;80ZSTD_matchState_t matchState;81ZSTD_compressedBlockState_t cBlockState;82ZSTD_customMem customMem;83U32 dictID;84int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */85ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use86* row-based matchfinder. Unless the cdict is reloaded, we will use87* the same greedy/lazy matchfinder at compression time.88*/89}; /* typedef'd to ZSTD_CDict within "zstd.h" */9091ZSTD_CCtx* ZSTD_createCCtx(void)92{93return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);94}9596static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)97{98assert(cctx != NULL);99ZSTD_memset(cctx, 0, sizeof(*cctx));100cctx->customMem = memManager;101cctx->bmi2 = ZSTD_cpuSupportsBmi2();102{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);103assert(!ZSTD_isError(err));104(void)err;105}106}107108ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)109{110ZSTD_STATIC_ASSERT(zcss_init==0);111ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));112if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;113{ ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);114if (!cctx) return NULL;115ZSTD_initCCtx(cctx, customMem);116return cctx;117}118}119120ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)121{122ZSTD_cwksp ws;123ZSTD_CCtx* cctx;124if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */125if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */126ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);127128cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));129if (cctx == NULL) return NULL;130131ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));132ZSTD_cwksp_move(&cctx->workspace, &ws);133cctx->staticSize = workspaceSize;134135/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */136if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;137cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));138cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));139cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);140cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());141return cctx;142}143144/**145* Clears and frees all of the dictionaries in the CCtx.146*/147static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)148{149ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);150ZSTD_freeCDict(cctx->localDict.cdict);151ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));152ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));153cctx->cdict = NULL;154}155156static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)157{158size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;159size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);160return bufferSize + cdictSize;161}162163static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)164{165assert(cctx != NULL);166assert(cctx->staticSize == 0);167ZSTD_clearAllDicts(cctx);168#ifdef ZSTD_MULTITHREAD169ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;170#endif171ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);172}173174size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)175{176if (cctx==NULL) return 0; /* support free on NULL */177RETURN_ERROR_IF(cctx->staticSize, memory_allocation,178"not compatible with static CCtx");179{180int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);181ZSTD_freeCCtxContent(cctx);182if (!cctxInWorkspace) {183ZSTD_customFree(cctx, cctx->customMem);184}185}186return 0;187}188189190static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)191{192#ifdef ZSTD_MULTITHREAD193return ZSTDMT_sizeof_CCtx(cctx->mtctx);194#else195(void)cctx;196return 0;197#endif198}199200201size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)202{203if (cctx==NULL) return 0; /* support sizeof on NULL */204/* cctx may be in the workspace */205return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))206+ ZSTD_cwksp_sizeof(&cctx->workspace)207+ ZSTD_sizeof_localDict(cctx->localDict)208+ ZSTD_sizeof_mtctx(cctx);209}210211size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)212{213return ZSTD_sizeof_CCtx(zcs); /* same object */214}215216/* private API call, for dictBuilder only */217const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }218219/* Returns true if the strategy supports using a row based matchfinder */220static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {221return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);222}223224/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder225* for this compression.226*/227static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) {228assert(mode != ZSTD_ps_auto);229return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable);230}231232/* Returns row matchfinder usage given an initial mode and cParams */233static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,234const ZSTD_compressionParameters* const cParams) {235#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)236int const kHasSIMD128 = 1;237#else238int const kHasSIMD128 = 0;239#endif240if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */241mode = ZSTD_ps_disable;242if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;243if (kHasSIMD128) {244if (cParams->windowLog > 14) mode = ZSTD_ps_enable;245} else {246if (cParams->windowLog > 17) mode = ZSTD_ps_enable;247}248return mode;249}250251/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */252static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,253const ZSTD_compressionParameters* const cParams) {254if (mode != ZSTD_ps_auto) return mode;255return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;256}257258/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */259static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,260const ZSTD_paramSwitch_e useRowMatchFinder,261const U32 forDDSDict) {262assert(useRowMatchFinder != ZSTD_ps_auto);263/* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.264* We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.265*/266return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));267}268269/* Returns 1 if compression parameters are such that we should270* enable long distance matching (wlog >= 27, strategy >= btopt).271* Returns 0 otherwise.272*/273static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,274const ZSTD_compressionParameters* const cParams) {275if (mode != ZSTD_ps_auto) return mode;276return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;277}278279static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(280ZSTD_compressionParameters cParams)281{282ZSTD_CCtx_params cctxParams;283/* should not matter, as all cParams are presumed properly defined */284ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);285cctxParams.cParams = cParams;286287/* Adjust advanced params according to cParams */288cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams);289if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {290ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);291assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);292assert(cctxParams.ldmParams.hashRateLog < 32);293}294cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);295cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);296assert(!ZSTD_checkCParams(cParams));297return cctxParams;298}299300static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(301ZSTD_customMem customMem)302{303ZSTD_CCtx_params* params;304if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;305params = (ZSTD_CCtx_params*)ZSTD_customCalloc(306sizeof(ZSTD_CCtx_params), customMem);307if (!params) { return NULL; }308ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);309params->customMem = customMem;310return params;311}312313ZSTD_CCtx_params* ZSTD_createCCtxParams(void)314{315return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);316}317318size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)319{320if (params == NULL) { return 0; }321ZSTD_customFree(params, params->customMem);322return 0;323}324325size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)326{327return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);328}329330size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {331RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");332ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));333cctxParams->compressionLevel = compressionLevel;334cctxParams->fParams.contentSizeFlag = 1;335return 0;336}337338#define ZSTD_NO_CLEVEL 0339340/**341* Initializes the cctxParams from params and compressionLevel.342* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.343*/344static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)345{346assert(!ZSTD_checkCParams(params->cParams));347ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));348cctxParams->cParams = params->cParams;349cctxParams->fParams = params->fParams;350/* Should not matter, as all cParams are presumed properly defined.351* But, set it for tracing anyway.352*/353cctxParams->compressionLevel = compressionLevel;354cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);355cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);356cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);357DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",358cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);359}360361size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)362{363RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");364FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");365ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL);366return 0;367}368369/**370* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.371* @param param Validated zstd parameters.372*/373static void ZSTD_CCtxParams_setZstdParams(374ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)375{376assert(!ZSTD_checkCParams(params->cParams));377cctxParams->cParams = params->cParams;378cctxParams->fParams = params->fParams;379/* Should not matter, as all cParams are presumed properly defined.380* But, set it for tracing anyway.381*/382cctxParams->compressionLevel = ZSTD_NO_CLEVEL;383}384385ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)386{387ZSTD_bounds bounds = { 0, 0, 0 };388389switch(param)390{391case ZSTD_c_compressionLevel:392bounds.lowerBound = ZSTD_minCLevel();393bounds.upperBound = ZSTD_maxCLevel();394return bounds;395396case ZSTD_c_windowLog:397bounds.lowerBound = ZSTD_WINDOWLOG_MIN;398bounds.upperBound = ZSTD_WINDOWLOG_MAX;399return bounds;400401case ZSTD_c_hashLog:402bounds.lowerBound = ZSTD_HASHLOG_MIN;403bounds.upperBound = ZSTD_HASHLOG_MAX;404return bounds;405406case ZSTD_c_chainLog:407bounds.lowerBound = ZSTD_CHAINLOG_MIN;408bounds.upperBound = ZSTD_CHAINLOG_MAX;409return bounds;410411case ZSTD_c_searchLog:412bounds.lowerBound = ZSTD_SEARCHLOG_MIN;413bounds.upperBound = ZSTD_SEARCHLOG_MAX;414return bounds;415416case ZSTD_c_minMatch:417bounds.lowerBound = ZSTD_MINMATCH_MIN;418bounds.upperBound = ZSTD_MINMATCH_MAX;419return bounds;420421case ZSTD_c_targetLength:422bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;423bounds.upperBound = ZSTD_TARGETLENGTH_MAX;424return bounds;425426case ZSTD_c_strategy:427bounds.lowerBound = ZSTD_STRATEGY_MIN;428bounds.upperBound = ZSTD_STRATEGY_MAX;429return bounds;430431case ZSTD_c_contentSizeFlag:432bounds.lowerBound = 0;433bounds.upperBound = 1;434return bounds;435436case ZSTD_c_checksumFlag:437bounds.lowerBound = 0;438bounds.upperBound = 1;439return bounds;440441case ZSTD_c_dictIDFlag:442bounds.lowerBound = 0;443bounds.upperBound = 1;444return bounds;445446case ZSTD_c_nbWorkers:447bounds.lowerBound = 0;448#ifdef ZSTD_MULTITHREAD449bounds.upperBound = ZSTDMT_NBWORKERS_MAX;450#else451bounds.upperBound = 0;452#endif453return bounds;454455case ZSTD_c_jobSize:456bounds.lowerBound = 0;457#ifdef ZSTD_MULTITHREAD458bounds.upperBound = ZSTDMT_JOBSIZE_MAX;459#else460bounds.upperBound = 0;461#endif462return bounds;463464case ZSTD_c_overlapLog:465#ifdef ZSTD_MULTITHREAD466bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;467bounds.upperBound = ZSTD_OVERLAPLOG_MAX;468#else469bounds.lowerBound = 0;470bounds.upperBound = 0;471#endif472return bounds;473474case ZSTD_c_enableDedicatedDictSearch:475bounds.lowerBound = 0;476bounds.upperBound = 1;477return bounds;478479case ZSTD_c_enableLongDistanceMatching:480bounds.lowerBound = 0;481bounds.upperBound = 1;482return bounds;483484case ZSTD_c_ldmHashLog:485bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;486bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;487return bounds;488489case ZSTD_c_ldmMinMatch:490bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;491bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;492return bounds;493494case ZSTD_c_ldmBucketSizeLog:495bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;496bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;497return bounds;498499case ZSTD_c_ldmHashRateLog:500bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;501bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;502return bounds;503504/* experimental parameters */505case ZSTD_c_rsyncable:506bounds.lowerBound = 0;507bounds.upperBound = 1;508return bounds;509510case ZSTD_c_forceMaxWindow :511bounds.lowerBound = 0;512bounds.upperBound = 1;513return bounds;514515case ZSTD_c_format:516ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);517bounds.lowerBound = ZSTD_f_zstd1;518bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */519return bounds;520521case ZSTD_c_forceAttachDict:522ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);523bounds.lowerBound = ZSTD_dictDefaultAttach;524bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */525return bounds;526527case ZSTD_c_literalCompressionMode:528ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);529bounds.lowerBound = (int)ZSTD_ps_auto;530bounds.upperBound = (int)ZSTD_ps_disable;531return bounds;532533case ZSTD_c_targetCBlockSize:534bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;535bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;536return bounds;537538case ZSTD_c_srcSizeHint:539bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;540bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;541return bounds;542543case ZSTD_c_stableInBuffer:544case ZSTD_c_stableOutBuffer:545bounds.lowerBound = (int)ZSTD_bm_buffered;546bounds.upperBound = (int)ZSTD_bm_stable;547return bounds;548549case ZSTD_c_blockDelimiters:550bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;551bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;552return bounds;553554case ZSTD_c_validateSequences:555bounds.lowerBound = 0;556bounds.upperBound = 1;557return bounds;558559case ZSTD_c_useBlockSplitter:560bounds.lowerBound = (int)ZSTD_ps_auto;561bounds.upperBound = (int)ZSTD_ps_disable;562return bounds;563564case ZSTD_c_useRowMatchFinder:565bounds.lowerBound = (int)ZSTD_ps_auto;566bounds.upperBound = (int)ZSTD_ps_disable;567return bounds;568569case ZSTD_c_deterministicRefPrefix:570bounds.lowerBound = 0;571bounds.upperBound = 1;572return bounds;573574default:575bounds.error = ERROR(parameter_unsupported);576return bounds;577}578}579580/* ZSTD_cParam_clampBounds:581* Clamps the value into the bounded range.582*/583static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)584{585ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);586if (ZSTD_isError(bounds.error)) return bounds.error;587if (*value < bounds.lowerBound) *value = bounds.lowerBound;588if (*value > bounds.upperBound) *value = bounds.upperBound;589return 0;590}591592#define BOUNDCHECK(cParam, val) { \593RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \594parameter_outOfBound, "Param out of bounds"); \595}596597598static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)599{600switch(param)601{602case ZSTD_c_compressionLevel:603case ZSTD_c_hashLog:604case ZSTD_c_chainLog:605case ZSTD_c_searchLog:606case ZSTD_c_minMatch:607case ZSTD_c_targetLength:608case ZSTD_c_strategy:609return 1;610611case ZSTD_c_format:612case ZSTD_c_windowLog:613case ZSTD_c_contentSizeFlag:614case ZSTD_c_checksumFlag:615case ZSTD_c_dictIDFlag:616case ZSTD_c_forceMaxWindow :617case ZSTD_c_nbWorkers:618case ZSTD_c_jobSize:619case ZSTD_c_overlapLog:620case ZSTD_c_rsyncable:621case ZSTD_c_enableDedicatedDictSearch:622case ZSTD_c_enableLongDistanceMatching:623case ZSTD_c_ldmHashLog:624case ZSTD_c_ldmMinMatch:625case ZSTD_c_ldmBucketSizeLog:626case ZSTD_c_ldmHashRateLog:627case ZSTD_c_forceAttachDict:628case ZSTD_c_literalCompressionMode:629case ZSTD_c_targetCBlockSize:630case ZSTD_c_srcSizeHint:631case ZSTD_c_stableInBuffer:632case ZSTD_c_stableOutBuffer:633case ZSTD_c_blockDelimiters:634case ZSTD_c_validateSequences:635case ZSTD_c_useBlockSplitter:636case ZSTD_c_useRowMatchFinder:637case ZSTD_c_deterministicRefPrefix:638default:639return 0;640}641}642643size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)644{645DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);646if (cctx->streamStage != zcss_init) {647if (ZSTD_isUpdateAuthorized(param)) {648cctx->cParamsChanged = 1;649} else {650RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");651} }652653switch(param)654{655case ZSTD_c_nbWorkers:656RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,657"MT not compatible with static alloc");658break;659660case ZSTD_c_compressionLevel:661case ZSTD_c_windowLog:662case ZSTD_c_hashLog:663case ZSTD_c_chainLog:664case ZSTD_c_searchLog:665case ZSTD_c_minMatch:666case ZSTD_c_targetLength:667case ZSTD_c_strategy:668case ZSTD_c_ldmHashRateLog:669case ZSTD_c_format:670case ZSTD_c_contentSizeFlag:671case ZSTD_c_checksumFlag:672case ZSTD_c_dictIDFlag:673case ZSTD_c_forceMaxWindow:674case ZSTD_c_forceAttachDict:675case ZSTD_c_literalCompressionMode:676case ZSTD_c_jobSize:677case ZSTD_c_overlapLog:678case ZSTD_c_rsyncable:679case ZSTD_c_enableDedicatedDictSearch:680case ZSTD_c_enableLongDistanceMatching:681case ZSTD_c_ldmHashLog:682case ZSTD_c_ldmMinMatch:683case ZSTD_c_ldmBucketSizeLog:684case ZSTD_c_targetCBlockSize:685case ZSTD_c_srcSizeHint:686case ZSTD_c_stableInBuffer:687case ZSTD_c_stableOutBuffer:688case ZSTD_c_blockDelimiters:689case ZSTD_c_validateSequences:690case ZSTD_c_useBlockSplitter:691case ZSTD_c_useRowMatchFinder:692case ZSTD_c_deterministicRefPrefix:693break;694695default: RETURN_ERROR(parameter_unsupported, "unknown parameter");696}697return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);698}699700size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,701ZSTD_cParameter param, int value)702{703DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);704switch(param)705{706case ZSTD_c_format :707BOUNDCHECK(ZSTD_c_format, value);708CCtxParams->format = (ZSTD_format_e)value;709return (size_t)CCtxParams->format;710711case ZSTD_c_compressionLevel : {712FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");713if (value == 0)714CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */715else716CCtxParams->compressionLevel = value;717if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;718return 0; /* return type (size_t) cannot represent negative values */719}720721case ZSTD_c_windowLog :722if (value!=0) /* 0 => use default */723BOUNDCHECK(ZSTD_c_windowLog, value);724CCtxParams->cParams.windowLog = (U32)value;725return CCtxParams->cParams.windowLog;726727case ZSTD_c_hashLog :728if (value!=0) /* 0 => use default */729BOUNDCHECK(ZSTD_c_hashLog, value);730CCtxParams->cParams.hashLog = (U32)value;731return CCtxParams->cParams.hashLog;732733case ZSTD_c_chainLog :734if (value!=0) /* 0 => use default */735BOUNDCHECK(ZSTD_c_chainLog, value);736CCtxParams->cParams.chainLog = (U32)value;737return CCtxParams->cParams.chainLog;738739case ZSTD_c_searchLog :740if (value!=0) /* 0 => use default */741BOUNDCHECK(ZSTD_c_searchLog, value);742CCtxParams->cParams.searchLog = (U32)value;743return (size_t)value;744745case ZSTD_c_minMatch :746if (value!=0) /* 0 => use default */747BOUNDCHECK(ZSTD_c_minMatch, value);748CCtxParams->cParams.minMatch = value;749return CCtxParams->cParams.minMatch;750751case ZSTD_c_targetLength :752BOUNDCHECK(ZSTD_c_targetLength, value);753CCtxParams->cParams.targetLength = value;754return CCtxParams->cParams.targetLength;755756case ZSTD_c_strategy :757if (value!=0) /* 0 => use default */758BOUNDCHECK(ZSTD_c_strategy, value);759CCtxParams->cParams.strategy = (ZSTD_strategy)value;760return (size_t)CCtxParams->cParams.strategy;761762case ZSTD_c_contentSizeFlag :763/* Content size written in frame header _when known_ (default:1) */764DEBUGLOG(4, "set content size flag = %u", (value!=0));765CCtxParams->fParams.contentSizeFlag = value != 0;766return CCtxParams->fParams.contentSizeFlag;767768case ZSTD_c_checksumFlag :769/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */770CCtxParams->fParams.checksumFlag = value != 0;771return CCtxParams->fParams.checksumFlag;772773case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */774DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));775CCtxParams->fParams.noDictIDFlag = !value;776return !CCtxParams->fParams.noDictIDFlag;777778case ZSTD_c_forceMaxWindow :779CCtxParams->forceWindow = (value != 0);780return CCtxParams->forceWindow;781782case ZSTD_c_forceAttachDict : {783const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;784BOUNDCHECK(ZSTD_c_forceAttachDict, pref);785CCtxParams->attachDictPref = pref;786return CCtxParams->attachDictPref;787}788789case ZSTD_c_literalCompressionMode : {790const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;791BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);792CCtxParams->literalCompressionMode = lcm;793return CCtxParams->literalCompressionMode;794}795796case ZSTD_c_nbWorkers :797#ifndef ZSTD_MULTITHREAD798RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");799return 0;800#else801FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");802CCtxParams->nbWorkers = value;803return CCtxParams->nbWorkers;804#endif805806case ZSTD_c_jobSize :807#ifndef ZSTD_MULTITHREAD808RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");809return 0;810#else811/* Adjust to the minimum non-default value. */812if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)813value = ZSTDMT_JOBSIZE_MIN;814FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");815assert(value >= 0);816CCtxParams->jobSize = value;817return CCtxParams->jobSize;818#endif819820case ZSTD_c_overlapLog :821#ifndef ZSTD_MULTITHREAD822RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");823return 0;824#else825FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");826CCtxParams->overlapLog = value;827return CCtxParams->overlapLog;828#endif829830case ZSTD_c_rsyncable :831#ifndef ZSTD_MULTITHREAD832RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");833return 0;834#else835FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");836CCtxParams->rsyncable = value;837return CCtxParams->rsyncable;838#endif839840case ZSTD_c_enableDedicatedDictSearch :841CCtxParams->enableDedicatedDictSearch = (value!=0);842return CCtxParams->enableDedicatedDictSearch;843844case ZSTD_c_enableLongDistanceMatching :845CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;846return CCtxParams->ldmParams.enableLdm;847848case ZSTD_c_ldmHashLog :849if (value!=0) /* 0 ==> auto */850BOUNDCHECK(ZSTD_c_ldmHashLog, value);851CCtxParams->ldmParams.hashLog = value;852return CCtxParams->ldmParams.hashLog;853854case ZSTD_c_ldmMinMatch :855if (value!=0) /* 0 ==> default */856BOUNDCHECK(ZSTD_c_ldmMinMatch, value);857CCtxParams->ldmParams.minMatchLength = value;858return CCtxParams->ldmParams.minMatchLength;859860case ZSTD_c_ldmBucketSizeLog :861if (value!=0) /* 0 ==> default */862BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);863CCtxParams->ldmParams.bucketSizeLog = value;864return CCtxParams->ldmParams.bucketSizeLog;865866case ZSTD_c_ldmHashRateLog :867if (value!=0) /* 0 ==> default */868BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);869CCtxParams->ldmParams.hashRateLog = value;870return CCtxParams->ldmParams.hashRateLog;871872case ZSTD_c_targetCBlockSize :873if (value!=0) /* 0 ==> default */874BOUNDCHECK(ZSTD_c_targetCBlockSize, value);875CCtxParams->targetCBlockSize = value;876return CCtxParams->targetCBlockSize;877878case ZSTD_c_srcSizeHint :879if (value!=0) /* 0 ==> default */880BOUNDCHECK(ZSTD_c_srcSizeHint, value);881CCtxParams->srcSizeHint = value;882return CCtxParams->srcSizeHint;883884case ZSTD_c_stableInBuffer:885BOUNDCHECK(ZSTD_c_stableInBuffer, value);886CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;887return CCtxParams->inBufferMode;888889case ZSTD_c_stableOutBuffer:890BOUNDCHECK(ZSTD_c_stableOutBuffer, value);891CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;892return CCtxParams->outBufferMode;893894case ZSTD_c_blockDelimiters:895BOUNDCHECK(ZSTD_c_blockDelimiters, value);896CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;897return CCtxParams->blockDelimiters;898899case ZSTD_c_validateSequences:900BOUNDCHECK(ZSTD_c_validateSequences, value);901CCtxParams->validateSequences = value;902return CCtxParams->validateSequences;903904case ZSTD_c_useBlockSplitter:905BOUNDCHECK(ZSTD_c_useBlockSplitter, value);906CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;907return CCtxParams->useBlockSplitter;908909case ZSTD_c_useRowMatchFinder:910BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);911CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;912return CCtxParams->useRowMatchFinder;913914case ZSTD_c_deterministicRefPrefix:915BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);916CCtxParams->deterministicRefPrefix = !!value;917return CCtxParams->deterministicRefPrefix;918919default: RETURN_ERROR(parameter_unsupported, "unknown parameter");920}921}922923size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)924{925return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);926}927928size_t ZSTD_CCtxParams_getParameter(929ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)930{931switch(param)932{933case ZSTD_c_format :934*value = CCtxParams->format;935break;936case ZSTD_c_compressionLevel :937*value = CCtxParams->compressionLevel;938break;939case ZSTD_c_windowLog :940*value = (int)CCtxParams->cParams.windowLog;941break;942case ZSTD_c_hashLog :943*value = (int)CCtxParams->cParams.hashLog;944break;945case ZSTD_c_chainLog :946*value = (int)CCtxParams->cParams.chainLog;947break;948case ZSTD_c_searchLog :949*value = CCtxParams->cParams.searchLog;950break;951case ZSTD_c_minMatch :952*value = CCtxParams->cParams.minMatch;953break;954case ZSTD_c_targetLength :955*value = CCtxParams->cParams.targetLength;956break;957case ZSTD_c_strategy :958*value = (unsigned)CCtxParams->cParams.strategy;959break;960case ZSTD_c_contentSizeFlag :961*value = CCtxParams->fParams.contentSizeFlag;962break;963case ZSTD_c_checksumFlag :964*value = CCtxParams->fParams.checksumFlag;965break;966case ZSTD_c_dictIDFlag :967*value = !CCtxParams->fParams.noDictIDFlag;968break;969case ZSTD_c_forceMaxWindow :970*value = CCtxParams->forceWindow;971break;972case ZSTD_c_forceAttachDict :973*value = CCtxParams->attachDictPref;974break;975case ZSTD_c_literalCompressionMode :976*value = CCtxParams->literalCompressionMode;977break;978case ZSTD_c_nbWorkers :979#ifndef ZSTD_MULTITHREAD980assert(CCtxParams->nbWorkers == 0);981#endif982*value = CCtxParams->nbWorkers;983break;984case ZSTD_c_jobSize :985#ifndef ZSTD_MULTITHREAD986RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");987#else988assert(CCtxParams->jobSize <= INT_MAX);989*value = (int)CCtxParams->jobSize;990break;991#endif992case ZSTD_c_overlapLog :993#ifndef ZSTD_MULTITHREAD994RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");995#else996*value = CCtxParams->overlapLog;997break;998#endif999case ZSTD_c_rsyncable :1000#ifndef ZSTD_MULTITHREAD1001RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");1002#else1003*value = CCtxParams->rsyncable;1004break;1005#endif1006case ZSTD_c_enableDedicatedDictSearch :1007*value = CCtxParams->enableDedicatedDictSearch;1008break;1009case ZSTD_c_enableLongDistanceMatching :1010*value = CCtxParams->ldmParams.enableLdm;1011break;1012case ZSTD_c_ldmHashLog :1013*value = CCtxParams->ldmParams.hashLog;1014break;1015case ZSTD_c_ldmMinMatch :1016*value = CCtxParams->ldmParams.minMatchLength;1017break;1018case ZSTD_c_ldmBucketSizeLog :1019*value = CCtxParams->ldmParams.bucketSizeLog;1020break;1021case ZSTD_c_ldmHashRateLog :1022*value = CCtxParams->ldmParams.hashRateLog;1023break;1024case ZSTD_c_targetCBlockSize :1025*value = (int)CCtxParams->targetCBlockSize;1026break;1027case ZSTD_c_srcSizeHint :1028*value = (int)CCtxParams->srcSizeHint;1029break;1030case ZSTD_c_stableInBuffer :1031*value = (int)CCtxParams->inBufferMode;1032break;1033case ZSTD_c_stableOutBuffer :1034*value = (int)CCtxParams->outBufferMode;1035break;1036case ZSTD_c_blockDelimiters :1037*value = (int)CCtxParams->blockDelimiters;1038break;1039case ZSTD_c_validateSequences :1040*value = (int)CCtxParams->validateSequences;1041break;1042case ZSTD_c_useBlockSplitter :1043*value = (int)CCtxParams->useBlockSplitter;1044break;1045case ZSTD_c_useRowMatchFinder :1046*value = (int)CCtxParams->useRowMatchFinder;1047break;1048case ZSTD_c_deterministicRefPrefix:1049*value = (int)CCtxParams->deterministicRefPrefix;1050break;1051default: RETURN_ERROR(parameter_unsupported, "unknown parameter");1052}1053return 0;1054}10551056/** ZSTD_CCtx_setParametersUsingCCtxParams() :1057* just applies `params` into `cctx`1058* no action is performed, parameters are merely stored.1059* If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.1060* This is possible even if a compression is ongoing.1061* In which case, new parameters will be applied on the fly, starting with next compression job.1062*/1063size_t ZSTD_CCtx_setParametersUsingCCtxParams(1064ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)1065{1066DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");1067RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1068"The context is in the wrong stage!");1069RETURN_ERROR_IF(cctx->cdict, stage_wrong,1070"Can't override parameters with cdict attached (some must "1071"be inherited from the cdict).");10721073cctx->requestedParams = *params;1074return 0;1075}10761077size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)1078{1079DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);1080RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1081"Can't set pledgedSrcSize when not in init stage.");1082cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;1083return 0;1084}10851086static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(1087int const compressionLevel,1088size_t const dictSize);1089static int ZSTD_dedicatedDictSearch_isSupported(1090const ZSTD_compressionParameters* cParams);1091static void ZSTD_dedicatedDictSearch_revertCParams(1092ZSTD_compressionParameters* cParams);10931094/**1095* Initializes the local dict using the requested parameters.1096* NOTE: This does not use the pledged src size, because it may be used for more1097* than one compression.1098*/1099static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)1100{1101ZSTD_localDict* const dl = &cctx->localDict;1102if (dl->dict == NULL) {1103/* No local dictionary. */1104assert(dl->dictBuffer == NULL);1105assert(dl->cdict == NULL);1106assert(dl->dictSize == 0);1107return 0;1108}1109if (dl->cdict != NULL) {1110assert(cctx->cdict == dl->cdict);1111/* Local dictionary already initialized. */1112return 0;1113}1114assert(dl->dictSize > 0);1115assert(cctx->cdict == NULL);1116assert(cctx->prefixDict.dict == NULL);11171118dl->cdict = ZSTD_createCDict_advanced2(1119dl->dict,1120dl->dictSize,1121ZSTD_dlm_byRef,1122dl->dictContentType,1123&cctx->requestedParams,1124cctx->customMem);1125RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");1126cctx->cdict = dl->cdict;1127return 0;1128}11291130size_t ZSTD_CCtx_loadDictionary_advanced(1131ZSTD_CCtx* cctx, const void* dict, size_t dictSize,1132ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)1133{1134RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1135"Can't load a dictionary when ctx is not in init stage.");1136DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);1137ZSTD_clearAllDicts(cctx); /* in case one already exists */1138if (dict == NULL || dictSize == 0) /* no dictionary mode */1139return 0;1140if (dictLoadMethod == ZSTD_dlm_byRef) {1141cctx->localDict.dict = dict;1142} else {1143void* dictBuffer;1144RETURN_ERROR_IF(cctx->staticSize, memory_allocation,1145"no malloc for static CCtx");1146dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);1147RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");1148ZSTD_memcpy(dictBuffer, dict, dictSize);1149cctx->localDict.dictBuffer = dictBuffer;1150cctx->localDict.dict = dictBuffer;1151}1152cctx->localDict.dictSize = dictSize;1153cctx->localDict.dictContentType = dictContentType;1154return 0;1155}11561157size_t ZSTD_CCtx_loadDictionary_byReference(1158ZSTD_CCtx* cctx, const void* dict, size_t dictSize)1159{1160return ZSTD_CCtx_loadDictionary_advanced(1161cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);1162}11631164size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)1165{1166return ZSTD_CCtx_loadDictionary_advanced(1167cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);1168}116911701171size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)1172{1173RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1174"Can't ref a dict when ctx not in init stage.");1175/* Free the existing local cdict (if any) to save memory. */1176ZSTD_clearAllDicts(cctx);1177cctx->cdict = cdict;1178return 0;1179}11801181size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)1182{1183RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1184"Can't ref a pool when ctx not in init stage.");1185cctx->pool = pool;1186return 0;1187}11881189size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)1190{1191return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);1192}11931194size_t ZSTD_CCtx_refPrefix_advanced(1195ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)1196{1197RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1198"Can't ref a prefix when ctx not in init stage.");1199ZSTD_clearAllDicts(cctx);1200if (prefix != NULL && prefixSize > 0) {1201cctx->prefixDict.dict = prefix;1202cctx->prefixDict.dictSize = prefixSize;1203cctx->prefixDict.dictContentType = dictContentType;1204}1205return 0;1206}12071208/*! ZSTD_CCtx_reset() :1209* Also dumps dictionary */1210size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)1211{1212if ( (reset == ZSTD_reset_session_only)1213|| (reset == ZSTD_reset_session_and_parameters) ) {1214cctx->streamStage = zcss_init;1215cctx->pledgedSrcSizePlusOne = 0;1216}1217if ( (reset == ZSTD_reset_parameters)1218|| (reset == ZSTD_reset_session_and_parameters) ) {1219RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,1220"Can't reset parameters only when not in init stage.");1221ZSTD_clearAllDicts(cctx);1222return ZSTD_CCtxParams_reset(&cctx->requestedParams);1223}1224return 0;1225}122612271228/** ZSTD_checkCParams() :1229control CParam values remain within authorized range.1230@return : 0, or an error code if one value is beyond authorized range */1231size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)1232{1233BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);1234BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);1235BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);1236BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);1237BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);1238BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);1239BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);1240return 0;1241}12421243/** ZSTD_clampCParams() :1244* make CParam values within valid range.1245* @return : valid CParams */1246static ZSTD_compressionParameters1247ZSTD_clampCParams(ZSTD_compressionParameters cParams)1248{1249# define CLAMP_TYPE(cParam, val, type) { \1250ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \1251if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \1252else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \1253}1254# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)1255CLAMP(ZSTD_c_windowLog, cParams.windowLog);1256CLAMP(ZSTD_c_chainLog, cParams.chainLog);1257CLAMP(ZSTD_c_hashLog, cParams.hashLog);1258CLAMP(ZSTD_c_searchLog, cParams.searchLog);1259CLAMP(ZSTD_c_minMatch, cParams.minMatch);1260CLAMP(ZSTD_c_targetLength,cParams.targetLength);1261CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);1262return cParams;1263}12641265/** ZSTD_cycleLog() :1266* condition for correct operation : hashLog > 1 */1267U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)1268{1269U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);1270return hashLog - btScale;1271}12721273/** ZSTD_dictAndWindowLog() :1274* Returns an adjusted window log that is large enough to fit the source and the dictionary.1275* The zstd format says that the entire dictionary is valid if one byte of the dictionary1276* is within the window. So the hashLog and chainLog should be large enough to reference both1277* the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing1278* the hashLog and windowLog.1279* NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.1280*/1281static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)1282{1283const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;1284/* No dictionary ==> No change */1285if (dictSize == 0) {1286return windowLog;1287}1288assert(windowLog <= ZSTD_WINDOWLOG_MAX);1289assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */1290{1291U64 const windowSize = 1ULL << windowLog;1292U64 const dictAndWindowSize = dictSize + windowSize;1293/* If the window size is already large enough to fit both the source and the dictionary1294* then just use the window size. Otherwise adjust so that it fits the dictionary and1295* the window.1296*/1297if (windowSize >= dictSize + srcSize) {1298return windowLog; /* Window size large enough already */1299} else if (dictAndWindowSize >= maxWindowSize) {1300return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */1301} else {1302return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;1303}1304}1305}13061307/** ZSTD_adjustCParams_internal() :1308* optimize `cPar` for a specified input (`srcSize` and `dictSize`).1309* mostly downsize to reduce memory consumption and initialization latency.1310* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.1311* `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.1312* note : `srcSize==0` means 0!1313* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */1314static ZSTD_compressionParameters1315ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,1316unsigned long long srcSize,1317size_t dictSize,1318ZSTD_cParamMode_e mode)1319{1320const U64 minSrcSize = 513; /* (1<<9) + 1 */1321const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);1322assert(ZSTD_checkCParams(cPar)==0);13231324switch (mode) {1325case ZSTD_cpm_unknown:1326case ZSTD_cpm_noAttachDict:1327/* If we don't know the source size, don't make any1328* assumptions about it. We will already have selected1329* smaller parameters if a dictionary is in use.1330*/1331break;1332case ZSTD_cpm_createCDict:1333/* Assume a small source size when creating a dictionary1334* with an unknown source size.1335*/1336if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)1337srcSize = minSrcSize;1338break;1339case ZSTD_cpm_attachDict:1340/* Dictionary has its own dedicated parameters which have1341* already been selected. We are selecting parameters1342* for only the source.1343*/1344dictSize = 0;1345break;1346default:1347assert(0);1348break;1349}13501351/* resize windowLog if input is small enough, to use less memory */1352if ( (srcSize < maxWindowResize)1353&& (dictSize < maxWindowResize) ) {1354U32 const tSize = (U32)(srcSize + dictSize);1355static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;1356U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :1357ZSTD_highbit32(tSize-1) + 1;1358if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;1359}1360if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {1361U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);1362U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);1363if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;1364if (cycleLog > dictAndWindowLog)1365cPar.chainLog -= (cycleLog - dictAndWindowLog);1366}13671368if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)1369cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */13701371return cPar;1372}13731374ZSTD_compressionParameters1375ZSTD_adjustCParams(ZSTD_compressionParameters cPar,1376unsigned long long srcSize,1377size_t dictSize)1378{1379cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */1380if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;1381return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);1382}13831384static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);1385static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);13861387static void ZSTD_overrideCParams(1388ZSTD_compressionParameters* cParams,1389const ZSTD_compressionParameters* overrides)1390{1391if (overrides->windowLog) cParams->windowLog = overrides->windowLog;1392if (overrides->hashLog) cParams->hashLog = overrides->hashLog;1393if (overrides->chainLog) cParams->chainLog = overrides->chainLog;1394if (overrides->searchLog) cParams->searchLog = overrides->searchLog;1395if (overrides->minMatch) cParams->minMatch = overrides->minMatch;1396if (overrides->targetLength) cParams->targetLength = overrides->targetLength;1397if (overrides->strategy) cParams->strategy = overrides->strategy;1398}13991400ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(1401const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)1402{1403ZSTD_compressionParameters cParams;1404if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {1405srcSizeHint = CCtxParams->srcSizeHint;1406}1407cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);1408if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;1409ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);1410assert(!ZSTD_checkCParams(cParams));1411/* srcSizeHint == 0 means 0 */1412return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);1413}14141415static size_t1416ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,1417const ZSTD_paramSwitch_e useRowMatchFinder,1418const U32 enableDedicatedDictSearch,1419const U32 forCCtx)1420{1421/* chain table size should be 0 for fast or row-hash strategies */1422size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)1423? ((size_t)1 << cParams->chainLog)1424: 0;1425size_t const hSize = ((size_t)1) << cParams->hashLog;1426U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;1427size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;1428/* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't1429* surrounded by redzones in ASAN. */1430size_t const tableSpace = chainSize * sizeof(U32)1431+ hSize * sizeof(U32)1432+ h3Size * sizeof(U32);1433size_t const optPotentialSpace =1434ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))1435+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))1436+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))1437+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))1438+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))1439+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));1440size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)1441? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))1442: 0;1443size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))1444? optPotentialSpace1445: 0;1446size_t const slackSpace = ZSTD_cwksp_slack_space_required();14471448/* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */1449ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);1450assert(useRowMatchFinder != ZSTD_ps_auto);14511452DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",1453(U32)chainSize, (U32)hSize, (U32)h3Size);1454return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;1455}14561457static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(1458const ZSTD_compressionParameters* cParams,1459const ldmParams_t* ldmParams,1460const int isStatic,1461const ZSTD_paramSwitch_e useRowMatchFinder,1462const size_t buffInSize,1463const size_t buffOutSize,1464const U64 pledgedSrcSize)1465{1466size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);1467size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);1468U32 const divider = (cParams->minMatch==3) ? 3 : 4;1469size_t const maxNbSeq = blockSize / divider;1470size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)1471+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))1472+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));1473size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);1474size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));1475size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);14761477size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);1478size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);1479size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?1480ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;148114821483size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)1484+ ZSTD_cwksp_alloc_size(buffOutSize);14851486size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;14871488size_t const neededSpace =1489cctxSpace +1490entropySpace +1491blockStateSpace +1492ldmSpace +1493ldmSeqSpace +1494matchStateSize +1495tokenSpace +1496bufferSpace;14971498DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);1499return neededSpace;1500}15011502size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)1503{1504ZSTD_compressionParameters const cParams =1505ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);1506ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,1507&cParams);15081509RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");1510/* estimateCCtxSize is for one-shot compression. So no buffers should1511* be needed. However, we still allocate two 0-sized buffers, which can1512* take space under ASAN. */1513return ZSTD_estimateCCtxSize_usingCCtxParams_internal(1514&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);1515}15161517size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)1518{1519ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);1520if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {1521/* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */1522size_t noRowCCtxSize;1523size_t rowCCtxSize;1524initialParams.useRowMatchFinder = ZSTD_ps_disable;1525noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);1526initialParams.useRowMatchFinder = ZSTD_ps_enable;1527rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);1528return MAX(noRowCCtxSize, rowCCtxSize);1529} else {1530return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);1531}1532}15331534static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)1535{1536int tier = 0;1537size_t largestSize = 0;1538static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};1539for (; tier < 4; ++tier) {1540/* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */1541ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);1542largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);1543}1544return largestSize;1545}15461547size_t ZSTD_estimateCCtxSize(int compressionLevel)1548{1549int level;1550size_t memBudget = 0;1551for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {1552/* Ensure monotonically increasing memory usage as compression level increases */1553size_t const newMB = ZSTD_estimateCCtxSize_internal(level);1554if (newMB > memBudget) memBudget = newMB;1555}1556return memBudget;1557}15581559size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)1560{1561RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");1562{ ZSTD_compressionParameters const cParams =1563ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);1564size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);1565size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)1566? ((size_t)1 << cParams.windowLog) + blockSize1567: 0;1568size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)1569? ZSTD_compressBound(blockSize) + 11570: 0;1571ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams);15721573return ZSTD_estimateCCtxSize_usingCCtxParams_internal(1574&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,1575ZSTD_CONTENTSIZE_UNKNOWN);1576}1577}15781579size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)1580{1581ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);1582if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {1583/* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */1584size_t noRowCCtxSize;1585size_t rowCCtxSize;1586initialParams.useRowMatchFinder = ZSTD_ps_disable;1587noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);1588initialParams.useRowMatchFinder = ZSTD_ps_enable;1589rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);1590return MAX(noRowCCtxSize, rowCCtxSize);1591} else {1592return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);1593}1594}15951596static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)1597{1598ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);1599return ZSTD_estimateCStreamSize_usingCParams(cParams);1600}16011602size_t ZSTD_estimateCStreamSize(int compressionLevel)1603{1604int level;1605size_t memBudget = 0;1606for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {1607size_t const newMB = ZSTD_estimateCStreamSize_internal(level);1608if (newMB > memBudget) memBudget = newMB;1609}1610return memBudget;1611}16121613/* ZSTD_getFrameProgression():1614* tells how much data has been consumed (input) and produced (output) for current frame.1615* able to count progression inside worker threads (non-blocking mode).1616*/1617ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)1618{1619#ifdef ZSTD_MULTITHREAD1620if (cctx->appliedParams.nbWorkers > 0) {1621return ZSTDMT_getFrameProgression(cctx->mtctx);1622}1623#endif1624{ ZSTD_frameProgression fp;1625size_t const buffered = (cctx->inBuff == NULL) ? 0 :1626cctx->inBuffPos - cctx->inToCompress;1627if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);1628assert(buffered <= ZSTD_BLOCKSIZE_MAX);1629fp.ingested = cctx->consumedSrcSize + buffered;1630fp.consumed = cctx->consumedSrcSize;1631fp.produced = cctx->producedCSize;1632fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */1633fp.currentJobID = 0;1634fp.nbActiveWorkers = 0;1635return fp;1636} }16371638/*! ZSTD_toFlushNow()1639* Only useful for multithreading scenarios currently (nbWorkers >= 1).1640*/1641size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)1642{1643#ifdef ZSTD_MULTITHREAD1644if (cctx->appliedParams.nbWorkers > 0) {1645return ZSTDMT_toFlushNow(cctx->mtctx);1646}1647#endif1648(void)cctx;1649return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */1650}16511652static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,1653ZSTD_compressionParameters cParams2)1654{1655(void)cParams1;1656(void)cParams2;1657assert(cParams1.windowLog == cParams2.windowLog);1658assert(cParams1.chainLog == cParams2.chainLog);1659assert(cParams1.hashLog == cParams2.hashLog);1660assert(cParams1.searchLog == cParams2.searchLog);1661assert(cParams1.minMatch == cParams2.minMatch);1662assert(cParams1.targetLength == cParams2.targetLength);1663assert(cParams1.strategy == cParams2.strategy);1664}16651666void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)1667{1668int i;1669for (i = 0; i < ZSTD_REP_NUM; ++i)1670bs->rep[i] = repStartValue[i];1671bs->entropy.huf.repeatMode = HUF_repeat_none;1672bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;1673bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;1674bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;1675}16761677/*! ZSTD_invalidateMatchState()1678* Invalidate all the matches in the match finder tables.1679* Requires nextSrc and base to be set (can be NULL).1680*/1681static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)1682{1683ZSTD_window_clear(&ms->window);16841685ms->nextToUpdate = ms->window.dictLimit;1686ms->loadedDictEnd = 0;1687ms->opt.litLengthSum = 0; /* force reset of btopt stats */1688ms->dictMatchState = NULL;1689}16901691/**1692* Controls, for this matchState reset, whether the tables need to be cleared /1693* prepared for the coming compression (ZSTDcrp_makeClean), or whether the1694* tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a1695* subsequent operation will overwrite the table space anyways (e.g., copying1696* the matchState contents in from a CDict).1697*/1698typedef enum {1699ZSTDcrp_makeClean,1700ZSTDcrp_leaveDirty1701} ZSTD_compResetPolicy_e;17021703/**1704* Controls, for this matchState reset, whether indexing can continue where it1705* left off (ZSTDirp_continue), or whether it needs to be restarted from zero1706* (ZSTDirp_reset).1707*/1708typedef enum {1709ZSTDirp_continue,1710ZSTDirp_reset1711} ZSTD_indexResetPolicy_e;17121713typedef enum {1714ZSTD_resetTarget_CDict,1715ZSTD_resetTarget_CCtx1716} ZSTD_resetTarget_e;171717181719static size_t1720ZSTD_reset_matchState(ZSTD_matchState_t* ms,1721ZSTD_cwksp* ws,1722const ZSTD_compressionParameters* cParams,1723const ZSTD_paramSwitch_e useRowMatchFinder,1724const ZSTD_compResetPolicy_e crp,1725const ZSTD_indexResetPolicy_e forceResetIndex,1726const ZSTD_resetTarget_e forWho)1727{1728/* disable chain table allocation for fast or row-based strategies */1729size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,1730ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))1731? ((size_t)1 << cParams->chainLog)1732: 0;1733size_t const hSize = ((size_t)1) << cParams->hashLog;1734U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;1735size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;17361737DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);1738assert(useRowMatchFinder != ZSTD_ps_auto);1739if (forceResetIndex == ZSTDirp_reset) {1740ZSTD_window_init(&ms->window);1741ZSTD_cwksp_mark_tables_dirty(ws);1742}17431744ms->hashLog3 = hashLog3;17451746ZSTD_invalidateMatchState(ms);17471748assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */17491750ZSTD_cwksp_clear_tables(ws);17511752DEBUGLOG(5, "reserving table space");1753/* table Space */1754ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));1755ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));1756ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));1757RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,1758"failed a workspace allocation in ZSTD_reset_matchState");17591760DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);1761if (crp!=ZSTDcrp_leaveDirty) {1762/* reset tables only */1763ZSTD_cwksp_clean_tables(ws);1764}17651766/* opt parser space */1767if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {1768DEBUGLOG(4, "reserving optimal parser space");1769ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));1770ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));1771ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));1772ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));1773ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));1774ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));1775}17761777if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {1778{ /* Row match finder needs an additional table of hashes ("tags") */1779size_t const tagTableSize = hSize*sizeof(U16);1780ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);1781if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);1782}1783{ /* Switch to 32-entry rows if searchLog is 5 (or more) */1784U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);1785assert(cParams->hashLog >= rowLog);1786ms->rowHashLog = cParams->hashLog - rowLog;1787}1788}17891790ms->cParams = *cParams;17911792RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,1793"failed a workspace allocation in ZSTD_reset_matchState");1794return 0;1795}17961797/* ZSTD_indexTooCloseToMax() :1798* minor optimization : prefer memset() rather than reduceIndex()1799* which is measurably slow in some circumstances (reported for Visual Studio).1800* Works when re-using a context for a lot of smallish inputs :1801* if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,1802* memset() will be triggered before reduceIndex().1803*/1804#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)1805static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)1806{1807return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);1808}18091810/** ZSTD_dictTooBig():1811* When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in1812* one go generically. So we ensure that in that case we reset the tables to zero,1813* so that we can load as much of the dictionary as possible.1814*/1815static int ZSTD_dictTooBig(size_t const loadedDictSize)1816{1817return loadedDictSize > ZSTD_CHUNKSIZE_MAX;1818}18191820/*! ZSTD_resetCCtx_internal() :1821* @param loadedDictSize The size of the dictionary to be loaded1822* into the context, if any. If no dictionary is used, or the1823* dictionary is being attached / copied, then pass 0.1824* note : `params` are assumed fully validated at this stage.1825*/1826static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,1827ZSTD_CCtx_params const* params,1828U64 const pledgedSrcSize,1829size_t const loadedDictSize,1830ZSTD_compResetPolicy_e const crp,1831ZSTD_buffered_policy_e const zbuff)1832{1833ZSTD_cwksp* const ws = &zc->workspace;1834DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",1835(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);1836assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));18371838zc->isFirstBlock = 1;18391840/* Set applied params early so we can modify them for LDM,1841* and point params at the applied params.1842*/1843zc->appliedParams = *params;1844params = &zc->appliedParams;18451846assert(params->useRowMatchFinder != ZSTD_ps_auto);1847assert(params->useBlockSplitter != ZSTD_ps_auto);1848assert(params->ldmParams.enableLdm != ZSTD_ps_auto);1849if (params->ldmParams.enableLdm == ZSTD_ps_enable) {1850/* Adjust long distance matching parameters */1851ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);1852assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);1853assert(params->ldmParams.hashRateLog < 32);1854}18551856{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));1857size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);1858U32 const divider = (params->cParams.minMatch==3) ? 3 : 4;1859size_t const maxNbSeq = blockSize / divider;1860size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)1861? ZSTD_compressBound(blockSize) + 11862: 0;1863size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)1864? windowSize + blockSize1865: 0;1866size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);18671868int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);1869int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);1870ZSTD_indexResetPolicy_e needsIndexReset =1871(indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;18721873size_t const neededSpace =1874ZSTD_estimateCCtxSize_usingCCtxParams_internal(1875¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,1876buffInSize, buffOutSize, pledgedSrcSize);1877int resizeWorkspace;18781879FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");18801881if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);18821883{ /* Check if workspace is large enough, alloc a new one if needed */1884int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;1885int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);1886resizeWorkspace = workspaceTooSmall || workspaceWasteful;1887DEBUGLOG(4, "Need %zu B workspace", neededSpace);1888DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);18891890if (resizeWorkspace) {1891DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",1892ZSTD_cwksp_sizeof(ws) >> 10,1893neededSpace >> 10);18941895RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");18961897needsIndexReset = ZSTDirp_reset;18981899ZSTD_cwksp_free(ws, zc->customMem);1900FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");19011902DEBUGLOG(5, "reserving object space");1903/* Statically sized space.1904* entropyWorkspace never moves,1905* though prev/next block swap places */1906assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));1907zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));1908RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");1909zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));1910RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");1911zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);1912RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");1913} }19141915ZSTD_cwksp_clear(ws);19161917/* init params */1918zc->blockState.matchState.cParams = params->cParams;1919zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;1920zc->consumedSrcSize = 0;1921zc->producedCSize = 0;1922if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)1923zc->appliedParams.fParams.contentSizeFlag = 0;1924DEBUGLOG(4, "pledged content size : %u ; flag : %u",1925(unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);1926zc->blockSize = blockSize;19271928XXH64_reset(&zc->xxhState, 0);1929zc->stage = ZSTDcs_init;1930zc->dictID = 0;1931zc->dictContentSize = 0;19321933ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);19341935/* ZSTD_wildcopy() is used to copy into the literals buffer,1936* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.1937*/1938zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);1939zc->seqStore.maxNbLit = blockSize;19401941/* buffers */1942zc->bufferedPolicy = zbuff;1943zc->inBuffSize = buffInSize;1944zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);1945zc->outBuffSize = buffOutSize;1946zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);19471948/* ldm bucketOffsets table */1949if (params->ldmParams.enableLdm == ZSTD_ps_enable) {1950/* TODO: avoid memset? */1951size_t const numBuckets =1952((size_t)1) << (params->ldmParams.hashLog -1953params->ldmParams.bucketSizeLog);1954zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);1955ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);1956}19571958/* sequences storage */1959ZSTD_referenceExternalSequences(zc, NULL, 0);1960zc->seqStore.maxNbSeq = maxNbSeq;1961zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));1962zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));1963zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));1964zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));19651966FORWARD_IF_ERROR(ZSTD_reset_matchState(1967&zc->blockState.matchState,1968ws,1969¶ms->cParams,1970params->useRowMatchFinder,1971crp,1972needsIndexReset,1973ZSTD_resetTarget_CCtx), "");19741975/* ldm hash table */1976if (params->ldmParams.enableLdm == ZSTD_ps_enable) {1977/* TODO: avoid memset? */1978size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;1979zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));1980ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));1981zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));1982zc->maxNbLdmSequences = maxNbLdmSeq;19831984ZSTD_window_init(&zc->ldmState.window);1985zc->ldmState.loadedDictEnd = 0;1986}19871988DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));1989assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));19901991zc->initialized = 1;19921993return 0;1994}1995}19961997/* ZSTD_invalidateRepCodes() :1998* ensures next compression will not use repcodes from previous block.1999* Note : only works with regular variant;2000* do not use with extDict variant ! */2001void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {2002int i;2003for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;2004assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));2005}20062007/* These are the approximate sizes for each strategy past which copying the2008* dictionary tables into the working context is faster than using them2009* in-place.2010*/2011static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {20128 KB, /* unused */20138 KB, /* ZSTD_fast */201416 KB, /* ZSTD_dfast */201532 KB, /* ZSTD_greedy */201632 KB, /* ZSTD_lazy */201732 KB, /* ZSTD_lazy2 */201832 KB, /* ZSTD_btlazy2 */201932 KB, /* ZSTD_btopt */20208 KB, /* ZSTD_btultra */20218 KB /* ZSTD_btultra2 */2022};20232024static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,2025const ZSTD_CCtx_params* params,2026U64 pledgedSrcSize)2027{2028size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];2029int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;2030return dedicatedDictSearch2031|| ( ( pledgedSrcSize <= cutoff2032|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN2033|| params->attachDictPref == ZSTD_dictForceAttach )2034&& params->attachDictPref != ZSTD_dictForceCopy2035&& !params->forceWindow ); /* dictMatchState isn't correctly2036* handled in _enforceMaxDist */2037}20382039static size_t2040ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,2041const ZSTD_CDict* cdict,2042ZSTD_CCtx_params params,2043U64 pledgedSrcSize,2044ZSTD_buffered_policy_e zbuff)2045{2046DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",2047(unsigned long long)pledgedSrcSize);2048{2049ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;2050unsigned const windowLog = params.cParams.windowLog;2051assert(windowLog != 0);2052/* Resize working context table params for input only, since the dict2053* has its own tables. */2054/* pledgedSrcSize == 0 means 0! */20552056if (cdict->matchState.dedicatedDictSearch) {2057ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);2058}20592060params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,2061cdict->dictContentSize, ZSTD_cpm_attachDict);2062params.cParams.windowLog = windowLog;2063params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */2064FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,2065/* loadedDictSize */ 0,2066ZSTDcrp_makeClean, zbuff), "");2067assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);2068}20692070{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc2071- cdict->matchState.window.base);2072const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;2073if (cdictLen == 0) {2074/* don't even attach dictionaries with no contents */2075DEBUGLOG(4, "skipping attaching empty dictionary");2076} else {2077DEBUGLOG(4, "attaching dictionary into context");2078cctx->blockState.matchState.dictMatchState = &cdict->matchState;20792080/* prep working match state so dict matches never have negative indices2081* when they are translated to the working context's index space. */2082if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {2083cctx->blockState.matchState.window.nextSrc =2084cctx->blockState.matchState.window.base + cdictEnd;2085ZSTD_window_clear(&cctx->blockState.matchState.window);2086}2087/* loadedDictEnd is expressed within the referential of the active context */2088cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;2089} }20902091cctx->dictID = cdict->dictID;2092cctx->dictContentSize = cdict->dictContentSize;20932094/* copy block state */2095ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));20962097return 0;2098}20992100static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,2101const ZSTD_CDict* cdict,2102ZSTD_CCtx_params params,2103U64 pledgedSrcSize,2104ZSTD_buffered_policy_e zbuff)2105{2106const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;21072108assert(!cdict->matchState.dedicatedDictSearch);2109DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",2110(unsigned long long)pledgedSrcSize);21112112{ unsigned const windowLog = params.cParams.windowLog;2113assert(windowLog != 0);2114/* Copy only compression parameters related to tables. */2115params.cParams = *cdict_cParams;2116params.cParams.windowLog = windowLog;2117params.useRowMatchFinder = cdict->useRowMatchFinder;2118FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,2119/* loadedDictSize */ 0,2120ZSTDcrp_leaveDirty, zbuff), "");2121assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);2122assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);2123assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);2124}21252126ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);2127assert(params.useRowMatchFinder != ZSTD_ps_auto);21282129/* copy tables */2130{ size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)2131? ((size_t)1 << cdict_cParams->chainLog)2132: 0;2133size_t const hSize = (size_t)1 << cdict_cParams->hashLog;21342135ZSTD_memcpy(cctx->blockState.matchState.hashTable,2136cdict->matchState.hashTable,2137hSize * sizeof(U32));2138/* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */2139if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {2140ZSTD_memcpy(cctx->blockState.matchState.chainTable,2141cdict->matchState.chainTable,2142chainSize * sizeof(U32));2143}2144/* copy tag table */2145if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {2146size_t const tagTableSize = hSize*sizeof(U16);2147ZSTD_memcpy(cctx->blockState.matchState.tagTable,2148cdict->matchState.tagTable,2149tagTableSize);2150}2151}21522153/* Zero the hashTable3, since the cdict never fills it */2154{ int const h3log = cctx->blockState.matchState.hashLog3;2155size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;2156assert(cdict->matchState.hashLog3 == 0);2157ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));2158}21592160ZSTD_cwksp_mark_tables_clean(&cctx->workspace);21612162/* copy dictionary offsets */2163{ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;2164ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;2165dstMatchState->window = srcMatchState->window;2166dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;2167dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;2168}21692170cctx->dictID = cdict->dictID;2171cctx->dictContentSize = cdict->dictContentSize;21722173/* copy block state */2174ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));21752176return 0;2177}21782179/* We have a choice between copying the dictionary context into the working2180* context, or referencing the dictionary context from the working context2181* in-place. We decide here which strategy to use. */2182static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,2183const ZSTD_CDict* cdict,2184const ZSTD_CCtx_params* params,2185U64 pledgedSrcSize,2186ZSTD_buffered_policy_e zbuff)2187{21882189DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",2190(unsigned)pledgedSrcSize);21912192if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {2193return ZSTD_resetCCtx_byAttachingCDict(2194cctx, cdict, *params, pledgedSrcSize, zbuff);2195} else {2196return ZSTD_resetCCtx_byCopyingCDict(2197cctx, cdict, *params, pledgedSrcSize, zbuff);2198}2199}22002201/*! ZSTD_copyCCtx_internal() :2202* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.2203* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).2204* The "context", in this case, refers to the hash and chain tables,2205* entropy tables, and dictionary references.2206* `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.2207* @return : 0, or an error code */2208static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,2209const ZSTD_CCtx* srcCCtx,2210ZSTD_frameParameters fParams,2211U64 pledgedSrcSize,2212ZSTD_buffered_policy_e zbuff)2213{2214RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,2215"Can't copy a ctx that's not in init stage.");2216DEBUGLOG(5, "ZSTD_copyCCtx_internal");2217ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));2218{ ZSTD_CCtx_params params = dstCCtx->requestedParams;2219/* Copy only compression parameters related to tables. */2220params.cParams = srcCCtx->appliedParams.cParams;2221assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);2222assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);2223assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);2224params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;2225params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;2226params.ldmParams = srcCCtx->appliedParams.ldmParams;2227params.fParams = fParams;2228ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,2229/* loadedDictSize */ 0,2230ZSTDcrp_leaveDirty, zbuff);2231assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);2232assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);2233assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);2234assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);2235assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);2236}22372238ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);22392240/* copy tables */2241{ size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,2242srcCCtx->appliedParams.useRowMatchFinder,22430 /* forDDSDict */)2244? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)2245: 0;2246size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;2247int const h3log = srcCCtx->blockState.matchState.hashLog3;2248size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;22492250ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,2251srcCCtx->blockState.matchState.hashTable,2252hSize * sizeof(U32));2253ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,2254srcCCtx->blockState.matchState.chainTable,2255chainSize * sizeof(U32));2256ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,2257srcCCtx->blockState.matchState.hashTable3,2258h3Size * sizeof(U32));2259}22602261ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);22622263/* copy dictionary offsets */2264{2265const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;2266ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;2267dstMatchState->window = srcMatchState->window;2268dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;2269dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;2270}2271dstCCtx->dictID = srcCCtx->dictID;2272dstCCtx->dictContentSize = srcCCtx->dictContentSize;22732274/* copy block state */2275ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));22762277return 0;2278}22792280/*! ZSTD_copyCCtx() :2281* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.2282* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).2283* pledgedSrcSize==0 means "unknown".2284* @return : 0, or an error code */2285size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)2286{2287ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };2288ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;2289ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);2290if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;2291fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);22922293return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,2294fParams, pledgedSrcSize,2295zbuff);2296}229722982299#define ZSTD_ROWSIZE 162300/*! ZSTD_reduceTable() :2301* reduce table indexes by `reducerValue`, or squash to zero.2302* PreserveMark preserves "unsorted mark" for btlazy2 strategy.2303* It must be set to a clear 0/1 value, to remove branch during inlining.2304* Presume table size is a multiple of ZSTD_ROWSIZE2305* to help auto-vectorization */2306FORCE_INLINE_TEMPLATE void2307ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)2308{2309int const nbRows = (int)size / ZSTD_ROWSIZE;2310int cellNb = 0;2311int rowNb;2312/* Protect special index values < ZSTD_WINDOW_START_INDEX. */2313U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX;2314assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */2315assert(size < (1U<<31)); /* can be casted to int */23162317#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)2318/* To validate that the table re-use logic is sound, and that we don't2319* access table space that we haven't cleaned, we re-"poison" the table2320* space every time we mark it dirty.2321*2322* This function however is intended to operate on those dirty tables and2323* re-clean them. So when this function is used correctly, we can unpoison2324* the memory it operated on. This introduces a blind spot though, since2325* if we now try to operate on __actually__ poisoned memory, we will not2326* detect that. */2327__msan_unpoison(table, size * sizeof(U32));2328#endif23292330for (rowNb=0 ; rowNb < nbRows ; rowNb++) {2331int column;2332for (column=0; column<ZSTD_ROWSIZE; column++) {2333U32 newVal;2334if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) {2335/* This write is pointless, but is required(?) for the compiler2336* to auto-vectorize the loop. */2337newVal = ZSTD_DUBT_UNSORTED_MARK;2338} else if (table[cellNb] < reducerThreshold) {2339newVal = 0;2340} else {2341newVal = table[cellNb] - reducerValue;2342}2343table[cellNb] = newVal;2344cellNb++;2345} }2346}23472348static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)2349{2350ZSTD_reduceTable_internal(table, size, reducerValue, 0);2351}23522353static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)2354{2355ZSTD_reduceTable_internal(table, size, reducerValue, 1);2356}23572358/*! ZSTD_reduceIndex() :2359* rescale all indexes to avoid future overflow (indexes are U32) */2360static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)2361{2362{ U32 const hSize = (U32)1 << params->cParams.hashLog;2363ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);2364}23652366if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {2367U32 const chainSize = (U32)1 << params->cParams.chainLog;2368if (params->cParams.strategy == ZSTD_btlazy2)2369ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);2370else2371ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);2372}23732374if (ms->hashLog3) {2375U32 const h3Size = (U32)1 << ms->hashLog3;2376ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);2377}2378}237923802381/*-*******************************************************2382* Block entropic compression2383*********************************************************/23842385/* See doc/zstd_compression_format.md for detailed format description */23862387void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)2388{2389const seqDef* const sequences = seqStorePtr->sequencesStart;2390BYTE* const llCodeTable = seqStorePtr->llCode;2391BYTE* const ofCodeTable = seqStorePtr->ofCode;2392BYTE* const mlCodeTable = seqStorePtr->mlCode;2393U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);2394U32 u;2395assert(nbSeq <= seqStorePtr->maxNbSeq);2396for (u=0; u<nbSeq; u++) {2397U32 const llv = sequences[u].litLength;2398U32 const mlv = sequences[u].mlBase;2399llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);2400ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);2401mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);2402}2403if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)2404llCodeTable[seqStorePtr->longLengthPos] = MaxLL;2405if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)2406mlCodeTable[seqStorePtr->longLengthPos] = MaxML;2407}24082409/* ZSTD_useTargetCBlockSize():2410* Returns if target compressed block size param is being used.2411* If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.2412* Returns 1 if true, 0 otherwise. */2413static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)2414{2415DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);2416return (cctxParams->targetCBlockSize != 0);2417}24182419/* ZSTD_blockSplitterEnabled():2420* Returns if block splitting param is being used2421* If used, compression will do best effort to split a block in order to improve compression ratio.2422* At the time this function is called, the parameter must be finalized.2423* Returns 1 if true, 0 otherwise. */2424static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)2425{2426DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);2427assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);2428return (cctxParams->useBlockSplitter == ZSTD_ps_enable);2429}24302431/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types2432* and size of the sequences statistics2433*/2434typedef struct {2435U32 LLtype;2436U32 Offtype;2437U32 MLtype;2438size_t size;2439size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */2440} ZSTD_symbolEncodingTypeStats_t;24412442/* ZSTD_buildSequencesStatistics():2443* Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.2444* Modifies `nextEntropy` to have the appropriate values as a side effect.2445* nbSeq must be greater than 0.2446*2447* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)2448*/2449static ZSTD_symbolEncodingTypeStats_t2450ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,2451const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,2452BYTE* dst, const BYTE* const dstEnd,2453ZSTD_strategy strategy, unsigned* countWorkspace,2454void* entropyWorkspace, size_t entropyWkspSize) {2455BYTE* const ostart = dst;2456const BYTE* const oend = dstEnd;2457BYTE* op = ostart;2458FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;2459FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;2460FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;2461const BYTE* const ofCodeTable = seqStorePtr->ofCode;2462const BYTE* const llCodeTable = seqStorePtr->llCode;2463const BYTE* const mlCodeTable = seqStorePtr->mlCode;2464ZSTD_symbolEncodingTypeStats_t stats;24652466stats.lastCountSize = 0;2467/* convert length/distances into codes */2468ZSTD_seqToCodes(seqStorePtr);2469assert(op <= oend);2470assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */2471/* build CTable for Literal Lengths */2472{ unsigned max = MaxLL;2473size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */2474DEBUGLOG(5, "Building LL table");2475nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;2476stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,2477countWorkspace, max, mostFrequent, nbSeq,2478LLFSELog, prevEntropy->litlengthCTable,2479LL_defaultNorm, LL_defaultNormLog,2480ZSTD_defaultAllowed, strategy);2481assert(set_basic < set_compressed && set_rle < set_compressed);2482assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */2483{ size_t const countSize = ZSTD_buildCTable(2484op, (size_t)(oend - op),2485CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,2486countWorkspace, max, llCodeTable, nbSeq,2487LL_defaultNorm, LL_defaultNormLog, MaxLL,2488prevEntropy->litlengthCTable,2489sizeof(prevEntropy->litlengthCTable),2490entropyWorkspace, entropyWkspSize);2491if (ZSTD_isError(countSize)) {2492DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");2493stats.size = countSize;2494return stats;2495}2496if (stats.LLtype == set_compressed)2497stats.lastCountSize = countSize;2498op += countSize;2499assert(op <= oend);2500} }2501/* build CTable for Offsets */2502{ unsigned max = MaxOff;2503size_t const mostFrequent = HIST_countFast_wksp(2504countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */2505/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */2506ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;2507DEBUGLOG(5, "Building OF table");2508nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;2509stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,2510countWorkspace, max, mostFrequent, nbSeq,2511OffFSELog, prevEntropy->offcodeCTable,2512OF_defaultNorm, OF_defaultNormLog,2513defaultPolicy, strategy);2514assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */2515{ size_t const countSize = ZSTD_buildCTable(2516op, (size_t)(oend - op),2517CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,2518countWorkspace, max, ofCodeTable, nbSeq,2519OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,2520prevEntropy->offcodeCTable,2521sizeof(prevEntropy->offcodeCTable),2522entropyWorkspace, entropyWkspSize);2523if (ZSTD_isError(countSize)) {2524DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");2525stats.size = countSize;2526return stats;2527}2528if (stats.Offtype == set_compressed)2529stats.lastCountSize = countSize;2530op += countSize;2531assert(op <= oend);2532} }2533/* build CTable for MatchLengths */2534{ unsigned max = MaxML;2535size_t const mostFrequent = HIST_countFast_wksp(2536countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */2537DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));2538nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;2539stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,2540countWorkspace, max, mostFrequent, nbSeq,2541MLFSELog, prevEntropy->matchlengthCTable,2542ML_defaultNorm, ML_defaultNormLog,2543ZSTD_defaultAllowed, strategy);2544assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */2545{ size_t const countSize = ZSTD_buildCTable(2546op, (size_t)(oend - op),2547CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,2548countWorkspace, max, mlCodeTable, nbSeq,2549ML_defaultNorm, ML_defaultNormLog, MaxML,2550prevEntropy->matchlengthCTable,2551sizeof(prevEntropy->matchlengthCTable),2552entropyWorkspace, entropyWkspSize);2553if (ZSTD_isError(countSize)) {2554DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");2555stats.size = countSize;2556return stats;2557}2558if (stats.MLtype == set_compressed)2559stats.lastCountSize = countSize;2560op += countSize;2561assert(op <= oend);2562} }2563stats.size = (size_t)(op-ostart);2564return stats;2565}25662567/* ZSTD_entropyCompressSeqStore_internal():2568* compresses both literals and sequences2569* Returns compressed size of block, or a zstd error.2570*/2571#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 202572MEM_STATIC size_t2573ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,2574const ZSTD_entropyCTables_t* prevEntropy,2575ZSTD_entropyCTables_t* nextEntropy,2576const ZSTD_CCtx_params* cctxParams,2577void* dst, size_t dstCapacity,2578void* entropyWorkspace, size_t entropyWkspSize,2579const int bmi2)2580{2581const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;2582ZSTD_strategy const strategy = cctxParams->cParams.strategy;2583unsigned* count = (unsigned*)entropyWorkspace;2584FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;2585FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;2586FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;2587const seqDef* const sequences = seqStorePtr->sequencesStart;2588const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;2589const BYTE* const ofCodeTable = seqStorePtr->ofCode;2590const BYTE* const llCodeTable = seqStorePtr->llCode;2591const BYTE* const mlCodeTable = seqStorePtr->mlCode;2592BYTE* const ostart = (BYTE*)dst;2593BYTE* const oend = ostart + dstCapacity;2594BYTE* op = ostart;2595size_t lastCountSize;25962597entropyWorkspace = count + (MaxSeq + 1);2598entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);25992600DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);2601ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));2602assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);26032604/* Compress literals */2605{ const BYTE* const literals = seqStorePtr->litStart;2606size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;2607size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;2608/* Base suspicion of uncompressibility on ratio of literals to sequences */2609unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);2610size_t const litSize = (size_t)(seqStorePtr->lit - literals);2611size_t const cSize = ZSTD_compressLiterals(2612&prevEntropy->huf, &nextEntropy->huf,2613cctxParams->cParams.strategy,2614ZSTD_literalsCompressionIsDisabled(cctxParams),2615op, dstCapacity,2616literals, litSize,2617entropyWorkspace, entropyWkspSize,2618bmi2, suspectUncompressible);2619FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");2620assert(cSize <= dstCapacity);2621op += cSize;2622}26232624/* Sequences Header */2625RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,2626dstSize_tooSmall, "Can't fit seq hdr in output buf!");2627if (nbSeq < 128) {2628*op++ = (BYTE)nbSeq;2629} else if (nbSeq < LONGNBSEQ) {2630op[0] = (BYTE)((nbSeq>>8) + 0x80);2631op[1] = (BYTE)nbSeq;2632op+=2;2633} else {2634op[0]=0xFF;2635MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));2636op+=3;2637}2638assert(op <= oend);2639if (nbSeq==0) {2640/* Copy the old tables over as if we repeated them */2641ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));2642return (size_t)(op - ostart);2643}2644{2645ZSTD_symbolEncodingTypeStats_t stats;2646BYTE* seqHead = op++;2647/* build stats for sequences */2648stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,2649&prevEntropy->fse, &nextEntropy->fse,2650op, oend,2651strategy, count,2652entropyWorkspace, entropyWkspSize);2653FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");2654*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));2655lastCountSize = stats.lastCountSize;2656op += stats.size;2657}26582659{ size_t const bitstreamSize = ZSTD_encodeSequences(2660op, (size_t)(oend - op),2661CTable_MatchLength, mlCodeTable,2662CTable_OffsetBits, ofCodeTable,2663CTable_LitLength, llCodeTable,2664sequences, nbSeq,2665longOffsets, bmi2);2666FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");2667op += bitstreamSize;2668assert(op <= oend);2669/* zstd versions <= 1.3.4 mistakenly report corruption when2670* FSE_readNCount() receives a buffer < 4 bytes.2671* Fixed by https://github.com/facebook/zstd/pull/1146.2672* This can happen when the last set_compressed table present is 22673* bytes and the bitstream is only one byte.2674* In this exceedingly rare case, we will simply emit an uncompressed2675* block, since it isn't worth optimizing.2676*/2677if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {2678/* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */2679assert(lastCountSize + bitstreamSize == 3);2680DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "2681"emitting an uncompressed block.");2682return 0;2683}2684}26852686DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));2687return (size_t)(op - ostart);2688}26892690MEM_STATIC size_t2691ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,2692const ZSTD_entropyCTables_t* prevEntropy,2693ZSTD_entropyCTables_t* nextEntropy,2694const ZSTD_CCtx_params* cctxParams,2695void* dst, size_t dstCapacity,2696size_t srcSize,2697void* entropyWorkspace, size_t entropyWkspSize,2698int bmi2)2699{2700size_t const cSize = ZSTD_entropyCompressSeqStore_internal(2701seqStorePtr, prevEntropy, nextEntropy, cctxParams,2702dst, dstCapacity,2703entropyWorkspace, entropyWkspSize, bmi2);2704if (cSize == 0) return 0;2705/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.2706* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.2707*/2708if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))2709return 0; /* block not compressed */2710FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");27112712/* Check compressibility */2713{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);2714if (cSize >= maxCSize) return 0; /* block not compressed */2715}2716DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);2717return cSize;2718}27192720/* ZSTD_selectBlockCompressor() :2721* Not static, but internal use only (used by long distance matcher)2722* assumption : strat is a valid strategy */2723ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode)2724{2725static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {2726{ ZSTD_compressBlock_fast /* default for 0 */,2727ZSTD_compressBlock_fast,2728ZSTD_compressBlock_doubleFast,2729ZSTD_compressBlock_greedy,2730ZSTD_compressBlock_lazy,2731ZSTD_compressBlock_lazy2,2732ZSTD_compressBlock_btlazy2,2733ZSTD_compressBlock_btopt,2734ZSTD_compressBlock_btultra,2735ZSTD_compressBlock_btultra2 },2736{ ZSTD_compressBlock_fast_extDict /* default for 0 */,2737ZSTD_compressBlock_fast_extDict,2738ZSTD_compressBlock_doubleFast_extDict,2739ZSTD_compressBlock_greedy_extDict,2740ZSTD_compressBlock_lazy_extDict,2741ZSTD_compressBlock_lazy2_extDict,2742ZSTD_compressBlock_btlazy2_extDict,2743ZSTD_compressBlock_btopt_extDict,2744ZSTD_compressBlock_btultra_extDict,2745ZSTD_compressBlock_btultra_extDict },2746{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,2747ZSTD_compressBlock_fast_dictMatchState,2748ZSTD_compressBlock_doubleFast_dictMatchState,2749ZSTD_compressBlock_greedy_dictMatchState,2750ZSTD_compressBlock_lazy_dictMatchState,2751ZSTD_compressBlock_lazy2_dictMatchState,2752ZSTD_compressBlock_btlazy2_dictMatchState,2753ZSTD_compressBlock_btopt_dictMatchState,2754ZSTD_compressBlock_btultra_dictMatchState,2755ZSTD_compressBlock_btultra_dictMatchState },2756{ NULL /* default for 0 */,2757NULL,2758NULL,2759ZSTD_compressBlock_greedy_dedicatedDictSearch,2760ZSTD_compressBlock_lazy_dedicatedDictSearch,2761ZSTD_compressBlock_lazy2_dedicatedDictSearch,2762NULL,2763NULL,2764NULL,2765NULL }2766};2767ZSTD_blockCompressor selectedCompressor;2768ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);27692770assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));2771DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);2772if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {2773static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {2774{ ZSTD_compressBlock_greedy_row,2775ZSTD_compressBlock_lazy_row,2776ZSTD_compressBlock_lazy2_row },2777{ ZSTD_compressBlock_greedy_extDict_row,2778ZSTD_compressBlock_lazy_extDict_row,2779ZSTD_compressBlock_lazy2_extDict_row },2780{ ZSTD_compressBlock_greedy_dictMatchState_row,2781ZSTD_compressBlock_lazy_dictMatchState_row,2782ZSTD_compressBlock_lazy2_dictMatchState_row },2783{ ZSTD_compressBlock_greedy_dedicatedDictSearch_row,2784ZSTD_compressBlock_lazy_dedicatedDictSearch_row,2785ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }2786};2787DEBUGLOG(4, "Selecting a row-based matchfinder");2788assert(useRowMatchFinder != ZSTD_ps_auto);2789selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];2790} else {2791selectedCompressor = blockCompressor[(int)dictMode][(int)strat];2792}2793assert(selectedCompressor != NULL);2794return selectedCompressor;2795}27962797static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,2798const BYTE* anchor, size_t lastLLSize)2799{2800ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);2801seqStorePtr->lit += lastLLSize;2802}28032804void ZSTD_resetSeqStore(seqStore_t* ssPtr)2805{2806ssPtr->lit = ssPtr->litStart;2807ssPtr->sequences = ssPtr->sequencesStart;2808ssPtr->longLengthType = ZSTD_llt_none;2809}28102811typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;28122813static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)2814{2815ZSTD_matchState_t* const ms = &zc->blockState.matchState;2816DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);2817assert(srcSize <= ZSTD_BLOCKSIZE_MAX);2818/* Assert that we have correctly flushed the ctx params into the ms's copy */2819ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);2820if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {2821if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {2822ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);2823} else {2824ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);2825}2826return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */2827}2828ZSTD_resetSeqStore(&(zc->seqStore));2829/* required for optimal parser to read stats from dictionary */2830ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;2831/* tell the optimal parser how we expect to compress literals */2832ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;2833/* a gap between an attached dict and the current window is not safe,2834* they must remain adjacent,2835* and when that stops being the case, the dict must be unset */2836assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);28372838/* limited update after a very long match */2839{ const BYTE* const base = ms->window.base;2840const BYTE* const istart = (const BYTE*)src;2841const U32 curr = (U32)(istart-base);2842if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */2843if (curr > ms->nextToUpdate + 384)2844ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));2845}28462847/* select and store sequences */2848{ ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);2849size_t lastLLSize;2850{ int i;2851for (i = 0; i < ZSTD_REP_NUM; ++i)2852zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];2853}2854if (zc->externSeqStore.pos < zc->externSeqStore.size) {2855assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);2856/* Updates ldmSeqStore.pos */2857lastLLSize =2858ZSTD_ldm_blockCompress(&zc->externSeqStore,2859ms, &zc->seqStore,2860zc->blockState.nextCBlock->rep,2861zc->appliedParams.useRowMatchFinder,2862src, srcSize);2863assert(zc->externSeqStore.pos <= zc->externSeqStore.size);2864} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {2865rawSeqStore_t ldmSeqStore = kNullRawSeqStore;28662867ldmSeqStore.seq = zc->ldmSequences;2868ldmSeqStore.capacity = zc->maxNbLdmSequences;2869/* Updates ldmSeqStore.size */2870FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,2871&zc->appliedParams.ldmParams,2872src, srcSize), "");2873/* Updates ldmSeqStore.pos */2874lastLLSize =2875ZSTD_ldm_blockCompress(&ldmSeqStore,2876ms, &zc->seqStore,2877zc->blockState.nextCBlock->rep,2878zc->appliedParams.useRowMatchFinder,2879src, srcSize);2880assert(ldmSeqStore.pos == ldmSeqStore.size);2881} else { /* not long range mode */2882ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,2883zc->appliedParams.useRowMatchFinder,2884dictMode);2885ms->ldmSeqStore = NULL;2886lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);2887}2888{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;2889ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);2890} }2891return ZSTDbss_compress;2892}28932894static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)2895{2896const seqStore_t* seqStore = ZSTD_getSeqStore(zc);2897const seqDef* seqStoreSeqs = seqStore->sequencesStart;2898size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;2899size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);2900size_t literalsRead = 0;2901size_t lastLLSize;29022903ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];2904size_t i;2905repcodes_t updatedRepcodes;29062907assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);2908/* Ensure we have enough space for last literals "sequence" */2909assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);2910ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));2911for (i = 0; i < seqStoreSeqSize; ++i) {2912U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;2913outSeqs[i].litLength = seqStoreSeqs[i].litLength;2914outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;2915outSeqs[i].rep = 0;29162917if (i == seqStore->longLengthPos) {2918if (seqStore->longLengthType == ZSTD_llt_literalLength) {2919outSeqs[i].litLength += 0x10000;2920} else if (seqStore->longLengthType == ZSTD_llt_matchLength) {2921outSeqs[i].matchLength += 0x10000;2922}2923}29242925if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {2926/* Derive the correct offset corresponding to a repcode */2927outSeqs[i].rep = seqStoreSeqs[i].offBase;2928if (outSeqs[i].litLength != 0) {2929rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];2930} else {2931if (outSeqs[i].rep == 3) {2932rawOffset = updatedRepcodes.rep[0] - 1;2933} else {2934rawOffset = updatedRepcodes.rep[outSeqs[i].rep];2935}2936}2937}2938outSeqs[i].offset = rawOffset;2939/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode2940so we provide seqStoreSeqs[i].offset - 1 */2941ZSTD_updateRep(updatedRepcodes.rep,2942seqStoreSeqs[i].offBase - 1,2943seqStoreSeqs[i].litLength == 0);2944literalsRead += outSeqs[i].litLength;2945}2946/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.2947* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker2948* for the block boundary, according to the API.2949*/2950assert(seqStoreLiteralsSize >= literalsRead);2951lastLLSize = seqStoreLiteralsSize - literalsRead;2952outSeqs[i].litLength = (U32)lastLLSize;2953outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;2954seqStoreSeqSize++;2955zc->seqCollector.seqIndex += seqStoreSeqSize;2956}29572958size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,2959size_t outSeqsSize, const void* src, size_t srcSize)2960{2961const size_t dstCapacity = ZSTD_compressBound(srcSize);2962void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);2963SeqCollector seqCollector;29642965RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");29662967seqCollector.collectSequences = 1;2968seqCollector.seqStart = outSeqs;2969seqCollector.seqIndex = 0;2970seqCollector.maxSequences = outSeqsSize;2971zc->seqCollector = seqCollector;29722973ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);2974ZSTD_customFree(dst, ZSTD_defaultCMem);2975return zc->seqCollector.seqIndex;2976}29772978size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {2979size_t in = 0;2980size_t out = 0;2981for (; in < seqsSize; ++in) {2982if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {2983if (in != seqsSize - 1) {2984sequences[in+1].litLength += sequences[in].litLength;2985}2986} else {2987sequences[out] = sequences[in];2988++out;2989}2990}2991return out;2992}29932994/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */2995static int ZSTD_isRLE(const BYTE* src, size_t length) {2996const BYTE* ip = src;2997const BYTE value = ip[0];2998const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);2999const size_t unrollSize = sizeof(size_t) * 4;3000const size_t unrollMask = unrollSize - 1;3001const size_t prefixLength = length & unrollMask;3002size_t i;3003size_t u;3004if (length == 1) return 1;3005/* Check if prefix is RLE first before using unrolled loop */3006if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {3007return 0;3008}3009for (i = prefixLength; i != length; i += unrollSize) {3010for (u = 0; u < unrollSize; u += sizeof(size_t)) {3011if (MEM_readST(ip + i + u) != valueST) {3012return 0;3013}3014}3015}3016return 1;3017}30183019/* Returns true if the given block may be RLE.3020* This is just a heuristic based on the compressibility.3021* It may return both false positives and false negatives.3022*/3023static int ZSTD_maybeRLE(seqStore_t const* seqStore)3024{3025size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);3026size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);30273028return nbSeqs < 4 && nbLits < 10;3029}30303031static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)3032{3033ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;3034bs->prevCBlock = bs->nextCBlock;3035bs->nextCBlock = tmp;3036}30373038/* Writes the block header */3039static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {3040U32 const cBlockHeader = cSize == 1 ?3041lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :3042lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);3043MEM_writeLE24(op, cBlockHeader);3044DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);3045}30463047/** ZSTD_buildBlockEntropyStats_literals() :3048* Builds entropy for the literals.3049* Stores literals block type (raw, rle, compressed, repeat) and3050* huffman description table to hufMetadata.3051* Requires ENTROPY_WORKSPACE_SIZE workspace3052* @return : size of huffman description table or error code */3053static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,3054const ZSTD_hufCTables_t* prevHuf,3055ZSTD_hufCTables_t* nextHuf,3056ZSTD_hufCTablesMetadata_t* hufMetadata,3057const int literalsCompressionIsDisabled,3058void* workspace, size_t wkspSize)3059{3060BYTE* const wkspStart = (BYTE*)workspace;3061BYTE* const wkspEnd = wkspStart + wkspSize;3062BYTE* const countWkspStart = wkspStart;3063unsigned* const countWksp = (unsigned*)workspace;3064const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);3065BYTE* const nodeWksp = countWkspStart + countWkspSize;3066const size_t nodeWkspSize = wkspEnd-nodeWksp;3067unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;3068unsigned huffLog = HUF_TABLELOG_DEFAULT;3069HUF_repeat repeat = prevHuf->repeatMode;3070DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);30713072/* Prepare nextEntropy assuming reusing the existing table */3073ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));30743075if (literalsCompressionIsDisabled) {3076DEBUGLOG(5, "set_basic - disabled");3077hufMetadata->hType = set_basic;3078return 0;3079}30803081/* small ? don't even attempt compression (speed opt) */3082#ifndef COMPRESS_LITERALS_SIZE_MIN3083#define COMPRESS_LITERALS_SIZE_MIN 633084#endif3085{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;3086if (srcSize <= minLitSize) {3087DEBUGLOG(5, "set_basic - too small");3088hufMetadata->hType = set_basic;3089return 0;3090}3091}30923093/* Scan input and build symbol stats */3094{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);3095FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");3096if (largest == srcSize) {3097DEBUGLOG(5, "set_rle");3098hufMetadata->hType = set_rle;3099return 0;3100}3101if (largest <= (srcSize >> 7)+4) {3102DEBUGLOG(5, "set_basic - no gain");3103hufMetadata->hType = set_basic;3104return 0;3105}3106}31073108/* Validate the previous Huffman table */3109if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {3110repeat = HUF_repeat_none;3111}31123113/* Build Huffman Tree */3114ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));3115huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);3116{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,3117maxSymbolValue, huffLog,3118nodeWksp, nodeWkspSize);3119FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");3120huffLog = (U32)maxBits;3121{ /* Build and write the CTable */3122size_t const newCSize = HUF_estimateCompressedSize(3123(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);3124size_t const hSize = HUF_writeCTable_wksp(3125hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),3126(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,3127nodeWksp, nodeWkspSize);3128/* Check against repeating the previous CTable */3129if (repeat != HUF_repeat_none) {3130size_t const oldCSize = HUF_estimateCompressedSize(3131(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);3132if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {3133DEBUGLOG(5, "set_repeat - smaller");3134ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));3135hufMetadata->hType = set_repeat;3136return 0;3137}3138}3139if (newCSize + hSize >= srcSize) {3140DEBUGLOG(5, "set_basic - no gains");3141ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));3142hufMetadata->hType = set_basic;3143return 0;3144}3145DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);3146hufMetadata->hType = set_compressed;3147nextHuf->repeatMode = HUF_repeat_check;3148return hSize;3149}3150}3151}315231533154/* ZSTD_buildDummySequencesStatistics():3155* Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,3156* and updates nextEntropy to the appropriate repeatMode.3157*/3158static ZSTD_symbolEncodingTypeStats_t3159ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {3160ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};3161nextEntropy->litlength_repeatMode = FSE_repeat_none;3162nextEntropy->offcode_repeatMode = FSE_repeat_none;3163nextEntropy->matchlength_repeatMode = FSE_repeat_none;3164return stats;3165}31663167/** ZSTD_buildBlockEntropyStats_sequences() :3168* Builds entropy for the sequences.3169* Stores symbol compression modes and fse table to fseMetadata.3170* Requires ENTROPY_WORKSPACE_SIZE wksp.3171* @return : size of fse tables or error code */3172static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,3173const ZSTD_fseCTables_t* prevEntropy,3174ZSTD_fseCTables_t* nextEntropy,3175const ZSTD_CCtx_params* cctxParams,3176ZSTD_fseCTablesMetadata_t* fseMetadata,3177void* workspace, size_t wkspSize)3178{3179ZSTD_strategy const strategy = cctxParams->cParams.strategy;3180size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;3181BYTE* const ostart = fseMetadata->fseTablesBuffer;3182BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);3183BYTE* op = ostart;3184unsigned* countWorkspace = (unsigned*)workspace;3185unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);3186size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);3187ZSTD_symbolEncodingTypeStats_t stats;31883189DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);3190stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,3191prevEntropy, nextEntropy, op, oend,3192strategy, countWorkspace,3193entropyWorkspace, entropyWorkspaceSize)3194: ZSTD_buildDummySequencesStatistics(nextEntropy);3195FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");3196fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;3197fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;3198fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;3199fseMetadata->lastCountSize = stats.lastCountSize;3200return stats.size;3201}320232033204/** ZSTD_buildBlockEntropyStats() :3205* Builds entropy for the block.3206* Requires workspace size ENTROPY_WORKSPACE_SIZE3207*3208* @return : 0 on success or error code3209*/3210size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,3211const ZSTD_entropyCTables_t* prevEntropy,3212ZSTD_entropyCTables_t* nextEntropy,3213const ZSTD_CCtx_params* cctxParams,3214ZSTD_entropyCTablesMetadata_t* entropyMetadata,3215void* workspace, size_t wkspSize)3216{3217size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;3218entropyMetadata->hufMetadata.hufDesSize =3219ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,3220&prevEntropy->huf, &nextEntropy->huf,3221&entropyMetadata->hufMetadata,3222ZSTD_literalsCompressionIsDisabled(cctxParams),3223workspace, wkspSize);3224FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");3225entropyMetadata->fseMetadata.fseTablesSize =3226ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,3227&prevEntropy->fse, &nextEntropy->fse,3228cctxParams,3229&entropyMetadata->fseMetadata,3230workspace, wkspSize);3231FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");3232return 0;3233}32343235/* Returns the size estimate for the literals section (header + content) of a block */3236static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,3237const ZSTD_hufCTables_t* huf,3238const ZSTD_hufCTablesMetadata_t* hufMetadata,3239void* workspace, size_t wkspSize,3240int writeEntropy)3241{3242unsigned* const countWksp = (unsigned*)workspace;3243unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;3244size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);3245U32 singleStream = litSize < 256;32463247if (hufMetadata->hType == set_basic) return litSize;3248else if (hufMetadata->hType == set_rle) return 1;3249else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {3250size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);3251if (ZSTD_isError(largest)) return litSize;3252{ size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);3253if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;3254if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */3255return cLitSizeEstimate + literalSectionHeaderSize;3256} }3257assert(0); /* impossible */3258return 0;3259}32603261/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */3262static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,3263const BYTE* codeTable, size_t nbSeq, unsigned maxCode,3264const FSE_CTable* fseCTable,3265const U8* additionalBits,3266short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,3267void* workspace, size_t wkspSize)3268{3269unsigned* const countWksp = (unsigned*)workspace;3270const BYTE* ctp = codeTable;3271const BYTE* const ctStart = ctp;3272const BYTE* const ctEnd = ctStart + nbSeq;3273size_t cSymbolTypeSizeEstimateInBits = 0;3274unsigned max = maxCode;32753276HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */3277if (type == set_basic) {3278/* We selected this encoding type, so it must be valid. */3279assert(max <= defaultMax);3280(void)defaultMax;3281cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);3282} else if (type == set_rle) {3283cSymbolTypeSizeEstimateInBits = 0;3284} else if (type == set_compressed || type == set_repeat) {3285cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);3286}3287if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {3288return nbSeq * 10;3289}3290while (ctp < ctEnd) {3291if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];3292else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */3293ctp++;3294}3295return cSymbolTypeSizeEstimateInBits >> 3;3296}32973298/* Returns the size estimate for the sequences section (header + content) of a block */3299static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,3300const BYTE* llCodeTable,3301const BYTE* mlCodeTable,3302size_t nbSeq,3303const ZSTD_fseCTables_t* fseTables,3304const ZSTD_fseCTablesMetadata_t* fseMetadata,3305void* workspace, size_t wkspSize,3306int writeEntropy)3307{3308size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);3309size_t cSeqSizeEstimate = 0;3310cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,3311fseTables->offcodeCTable, NULL,3312OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,3313workspace, wkspSize);3314cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,3315fseTables->litlengthCTable, LL_bits,3316LL_defaultNorm, LL_defaultNormLog, MaxLL,3317workspace, wkspSize);3318cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,3319fseTables->matchlengthCTable, ML_bits,3320ML_defaultNorm, ML_defaultNormLog, MaxML,3321workspace, wkspSize);3322if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;3323return cSeqSizeEstimate + sequencesSectionHeaderSize;3324}33253326/* Returns the size estimate for a given stream of literals, of, ll, ml */3327static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,3328const BYTE* ofCodeTable,3329const BYTE* llCodeTable,3330const BYTE* mlCodeTable,3331size_t nbSeq,3332const ZSTD_entropyCTables_t* entropy,3333const ZSTD_entropyCTablesMetadata_t* entropyMetadata,3334void* workspace, size_t wkspSize,3335int writeLitEntropy, int writeSeqEntropy) {3336size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,3337&entropy->huf, &entropyMetadata->hufMetadata,3338workspace, wkspSize, writeLitEntropy);3339size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,3340nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,3341workspace, wkspSize, writeSeqEntropy);3342return seqSize + literalsSize + ZSTD_blockHeaderSize;3343}33443345/* Builds entropy statistics and uses them for blocksize estimation.3346*3347* Returns the estimated compressed size of the seqStore, or a zstd error.3348*/3349static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {3350ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;3351DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");3352FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,3353&zc->blockState.prevCBlock->entropy,3354&zc->blockState.nextCBlock->entropy,3355&zc->appliedParams,3356entropyMetadata,3357zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");3358return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),3359seqStore->ofCode, seqStore->llCode, seqStore->mlCode,3360(size_t)(seqStore->sequences - seqStore->sequencesStart),3361&zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,3362(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);3363}33643365/* Returns literals bytes represented in a seqStore */3366static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {3367size_t literalsBytes = 0;3368size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;3369size_t i;3370for (i = 0; i < nbSeqs; ++i) {3371seqDef seq = seqStore->sequencesStart[i];3372literalsBytes += seq.litLength;3373if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {3374literalsBytes += 0x10000;3375}3376}3377return literalsBytes;3378}33793380/* Returns match bytes represented in a seqStore */3381static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {3382size_t matchBytes = 0;3383size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;3384size_t i;3385for (i = 0; i < nbSeqs; ++i) {3386seqDef seq = seqStore->sequencesStart[i];3387matchBytes += seq.mlBase + MINMATCH;3388if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {3389matchBytes += 0x10000;3390}3391}3392return matchBytes;3393}33943395/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).3396* Stores the result in resultSeqStore.3397*/3398static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,3399const seqStore_t* originalSeqStore,3400size_t startIdx, size_t endIdx) {3401BYTE* const litEnd = originalSeqStore->lit;3402size_t literalsBytes;3403size_t literalsBytesPreceding = 0;34043405*resultSeqStore = *originalSeqStore;3406if (startIdx > 0) {3407resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;3408literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);3409}34103411/* Move longLengthPos into the correct position if necessary */3412if (originalSeqStore->longLengthType != ZSTD_llt_none) {3413if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {3414resultSeqStore->longLengthType = ZSTD_llt_none;3415} else {3416resultSeqStore->longLengthPos -= (U32)startIdx;3417}3418}3419resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;3420resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;3421literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);3422resultSeqStore->litStart += literalsBytesPreceding;3423if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {3424/* This accounts for possible last literals if the derived chunk reaches the end of the block */3425resultSeqStore->lit = litEnd;3426} else {3427resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;3428}3429resultSeqStore->llCode += startIdx;3430resultSeqStore->mlCode += startIdx;3431resultSeqStore->ofCode += startIdx;3432}34333434/**3435* Returns the raw offset represented by the combination of offCode, ll0, and repcode history.3436* offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().3437*/3438static U323439ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)3440{3441U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */3442assert(STORED_IS_REPCODE(offCode));3443if (adjustedOffCode == ZSTD_REP_NUM) {3444/* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */3445assert(rep[0] > 0);3446return rep[0] - 1;3447}3448return rep[adjustedOffCode];3449}34503451/**3452* ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise3453* due to emission of RLE/raw blocks that disturb the offset history,3454* and replaces any repcodes within the seqStore that may be invalid.3455*3456* dRepcodes are updated as would be on the decompression side.3457* cRepcodes are updated exactly in accordance with the seqStore.3458*3459* Note : this function assumes seq->offBase respects the following numbering scheme :3460* 0 : invalid3461* 1-3 : repcode 1-33462* 4+ : real_offset+33463*/3464static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,3465seqStore_t* const seqStore, U32 const nbSeq) {3466U32 idx = 0;3467for (; idx < nbSeq; ++idx) {3468seqDef* const seq = seqStore->sequencesStart + idx;3469U32 const ll0 = (seq->litLength == 0);3470U32 const offCode = OFFBASE_TO_STORED(seq->offBase);3471assert(seq->offBase > 0);3472if (STORED_IS_REPCODE(offCode)) {3473U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);3474U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);3475/* Adjust simulated decompression repcode history if we come across a mismatch. Replace3476* the repcode with the offset it actually references, determined by the compression3477* repcode history.3478*/3479if (dRawOffset != cRawOffset) {3480seq->offBase = cRawOffset + ZSTD_REP_NUM;3481}3482}3483/* Compression repcode history is always updated with values directly from the unmodified seqStore.3484* Decompression repcode history may use modified seq->offset value taken from compression repcode history.3485*/3486ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);3487ZSTD_updateRep(cRepcodes->rep, offCode, ll0);3488}3489}34903491/* ZSTD_compressSeqStore_singleBlock():3492* Compresses a seqStore into a block with a block header, into the buffer dst.3493*3494* Returns the total size of that block (including header) or a ZSTD error code.3495*/3496static size_t3497ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,3498repcodes_t* const dRep, repcodes_t* const cRep,3499void* dst, size_t dstCapacity,3500const void* src, size_t srcSize,3501U32 lastBlock, U32 isPartition)3502{3503const U32 rleMaxLength = 25;3504BYTE* op = (BYTE*)dst;3505const BYTE* ip = (const BYTE*)src;3506size_t cSize;3507size_t cSeqsSize;35083509/* In case of an RLE or raw block, the simulated decompression repcode history must be reset */3510repcodes_t const dRepOriginal = *dRep;3511DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");3512if (isPartition)3513ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));35143515RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit");3516cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,3517&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,3518&zc->appliedParams,3519op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,3520srcSize,3521zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,3522zc->bmi2);3523FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");35243525if (!zc->isFirstBlock &&3526cSeqsSize < rleMaxLength &&3527ZSTD_isRLE((BYTE const*)src, srcSize)) {3528/* We don't want to emit our first block as a RLE even if it qualifies because3529* doing so will cause the decoder (cli only) to throw a "should consume all input error."3530* This is only an issue for zstd <= v1.4.33531*/3532cSeqsSize = 1;3533}35343535if (zc->seqCollector.collectSequences) {3536ZSTD_copyBlockSequences(zc);3537ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);3538return 0;3539}35403541if (cSeqsSize == 0) {3542cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);3543FORWARD_IF_ERROR(cSize, "Nocompress block failed");3544DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);3545*dRep = dRepOriginal; /* reset simulated decompression repcode history */3546} else if (cSeqsSize == 1) {3547cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);3548FORWARD_IF_ERROR(cSize, "RLE compress block failed");3549DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);3550*dRep = dRepOriginal; /* reset simulated decompression repcode history */3551} else {3552ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);3553writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);3554cSize = ZSTD_blockHeaderSize + cSeqsSize;3555DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);3556}35573558if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)3559zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;35603561return cSize;3562}35633564/* Struct to keep track of where we are in our recursive calls. */3565typedef struct {3566U32* splitLocations; /* Array of split indices */3567size_t idx; /* The current index within splitLocations being worked on */3568} seqStoreSplits;35693570#define MIN_SEQUENCES_BLOCK_SPLITTING 30035713572/* Helper function to perform the recursive search for block splits.3573* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.3574* If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then3575* we do not recurse.3576*3577* Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.3578* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).3579* In practice, recursion depth usually doesn't go beyond 4.3580*3581* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize3582* maximum of 128 KB, this value is actually impossible to reach.3583*/3584static void3585ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,3586ZSTD_CCtx* zc, const seqStore_t* origSeqStore)3587{3588seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;3589seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;3590seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;3591size_t estimatedOriginalSize;3592size_t estimatedFirstHalfSize;3593size_t estimatedSecondHalfSize;3594size_t midIdx = (startIdx + endIdx)/2;35953596if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {3597DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");3598return;3599}3600DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);3601ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);3602ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);3603ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);3604estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);3605estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);3606estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);3607DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",3608estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);3609if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {3610return;3611}3612if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {3613ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);3614splits->splitLocations[splits->idx] = (U32)midIdx;3615splits->idx++;3616ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);3617}3618}36193620/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.3621*3622* Returns the number of splits made (which equals the size of the partition table - 1).3623*/3624static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {3625seqStoreSplits splits = {partitions, 0};3626if (nbSeq <= 4) {3627DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");3628/* Refuse to try and split anything with less than 4 sequences */3629return 0;3630}3631ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);3632splits.splitLocations[splits.idx] = nbSeq;3633DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);3634return splits.idx;3635}36363637/* ZSTD_compressBlock_splitBlock():3638* Attempts to split a given block into multiple blocks to improve compression ratio.3639*3640* Returns combined size of all blocks (which includes headers), or a ZSTD error code.3641*/3642static size_t3643ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,3644const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)3645{3646size_t cSize = 0;3647const BYTE* ip = (const BYTE*)src;3648BYTE* op = (BYTE*)dst;3649size_t i = 0;3650size_t srcBytesTotal = 0;3651U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */3652seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;3653seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;3654size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);36553656/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history3657* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two3658* separate repcode histories that simulate repcode history on compression and decompression side,3659* and use the histories to determine whether we must replace a particular repcode with its raw offset.3660*3661* 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed3662* or RLE. This allows us to retrieve the offset value that an invalid repcode references within3663* a nocompress/RLE block.3664* 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use3665* the replacement offset value rather than the original repcode to update the repcode history.3666* dRep also will be the final repcode history sent to the next block.3667*3668* See ZSTD_seqStore_resolveOffCodes() for more details.3669*/3670repcodes_t dRep;3671repcodes_t cRep;3672ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));3673ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));3674ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));36753676DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",3677(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,3678(unsigned)zc->blockState.matchState.nextToUpdate);36793680if (numSplits == 0) {3681size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,3682&dRep, &cRep,3683op, dstCapacity,3684ip, blockSize,3685lastBlock, 0 /* isPartition */);3686FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");3687DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");3688assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);3689return cSizeSingleBlock;3690}36913692ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);3693for (i = 0; i <= numSplits; ++i) {3694size_t srcBytes;3695size_t cSizeChunk;3696U32 const lastPartition = (i == numSplits);3697U32 lastBlockEntireSrc = 0;36983699srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);3700srcBytesTotal += srcBytes;3701if (lastPartition) {3702/* This is the final partition, need to account for possible last literals */3703srcBytes += blockSize - srcBytesTotal;3704lastBlockEntireSrc = lastBlock;3705} else {3706ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);3707}37083709cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,3710&dRep, &cRep,3711op, dstCapacity,3712ip, srcBytes,3713lastBlockEntireSrc, 1 /* isPartition */);3714DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);3715FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");37163717ip += srcBytes;3718op += cSizeChunk;3719dstCapacity -= cSizeChunk;3720cSize += cSizeChunk;3721*currSeqStore = *nextSeqStore;3722assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);3723}3724/* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes3725* for the next block.3726*/3727ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));3728return cSize;3729}37303731static size_t3732ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,3733void* dst, size_t dstCapacity,3734const void* src, size_t srcSize, U32 lastBlock)3735{3736const BYTE* ip = (const BYTE*)src;3737BYTE* op = (BYTE*)dst;3738U32 nbSeq;3739size_t cSize;3740DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");3741assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);37423743{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);3744FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");3745if (bss == ZSTDbss_noCompress) {3746if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)3747zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;3748cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);3749FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");3750DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");3751return cSize;3752}3753nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);3754}37553756cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);3757FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");3758return cSize;3759}37603761static size_t3762ZSTD_compressBlock_internal(ZSTD_CCtx* zc,3763void* dst, size_t dstCapacity,3764const void* src, size_t srcSize, U32 frame)3765{3766/* This the upper bound for the length of an rle block.3767* This isn't the actual upper bound. Finding the real threshold3768* needs further investigation.3769*/3770const U32 rleMaxLength = 25;3771size_t cSize;3772const BYTE* ip = (const BYTE*)src;3773BYTE* op = (BYTE*)dst;3774DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",3775(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,3776(unsigned)zc->blockState.matchState.nextToUpdate);37773778{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);3779FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");3780if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }3781}37823783if (zc->seqCollector.collectSequences) {3784ZSTD_copyBlockSequences(zc);3785ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);3786return 0;3787}37883789/* encode sequences and literals */3790cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,3791&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,3792&zc->appliedParams,3793dst, dstCapacity,3794srcSize,3795zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,3796zc->bmi2);37973798if (frame &&3799/* We don't want to emit our first block as a RLE even if it qualifies because3800* doing so will cause the decoder (cli only) to throw a "should consume all input error."3801* This is only an issue for zstd <= v1.4.33802*/3803!zc->isFirstBlock &&3804cSize < rleMaxLength &&3805ZSTD_isRLE(ip, srcSize))3806{3807cSize = 1;3808op[0] = ip[0];3809}38103811out:3812if (!ZSTD_isError(cSize) && cSize > 1) {3813ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);3814}3815/* We check that dictionaries have offset codes available for the first3816* block. After the first block, the offcode table might not have large3817* enough codes to represent the offsets in the data.3818*/3819if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)3820zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;38213822return cSize;3823}38243825static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,3826void* dst, size_t dstCapacity,3827const void* src, size_t srcSize,3828const size_t bss, U32 lastBlock)3829{3830DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");3831if (bss == ZSTDbss_compress) {3832if (/* We don't want to emit our first block as a RLE even if it qualifies because3833* doing so will cause the decoder (cli only) to throw a "should consume all input error."3834* This is only an issue for zstd <= v1.4.33835*/3836!zc->isFirstBlock &&3837ZSTD_maybeRLE(&zc->seqStore) &&3838ZSTD_isRLE((BYTE const*)src, srcSize))3839{3840return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);3841}3842/* Attempt superblock compression.3843*3844* Note that compressed size of ZSTD_compressSuperBlock() is not bound by the3845* standard ZSTD_compressBound(). This is a problem, because even if we have3846* space now, taking an extra byte now could cause us to run out of space later3847* and violate ZSTD_compressBound().3848*3849* Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.3850*3851* In order to respect ZSTD_compressBound() we must attempt to emit a raw3852* uncompressed block in these cases:3853* * cSize == 0: Return code for an uncompressed block.3854* * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).3855* ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of3856* output space.3857* * cSize >= blockBound(srcSize): We have expanded the block too much so3858* emit an uncompressed block.3859*/3860{3861size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);3862if (cSize != ERROR(dstSize_tooSmall)) {3863size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);3864FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");3865if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {3866ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);3867return cSize;3868}3869}3870}3871}38723873DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");3874/* Superblock compression failed, attempt to emit a single no compress block.3875* The decoder will be able to stream this block since it is uncompressed.3876*/3877return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);3878}38793880static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,3881void* dst, size_t dstCapacity,3882const void* src, size_t srcSize,3883U32 lastBlock)3884{3885size_t cSize = 0;3886const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);3887DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",3888(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);3889FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");38903891cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);3892FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");38933894if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)3895zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;38963897return cSize;3898}38993900static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,3901ZSTD_cwksp* ws,3902ZSTD_CCtx_params const* params,3903void const* ip,3904void const* iend)3905{3906U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);3907U32 const maxDist = (U32)1 << params->cParams.windowLog;3908if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {3909U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);3910ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);3911ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);3912ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);3913ZSTD_cwksp_mark_tables_dirty(ws);3914ZSTD_reduceIndex(ms, params, correction);3915ZSTD_cwksp_mark_tables_clean(ws);3916if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;3917else ms->nextToUpdate -= correction;3918/* invalidate dictionaries on overflow correction */3919ms->loadedDictEnd = 0;3920ms->dictMatchState = NULL;3921}3922}39233924/*! ZSTD_compress_frameChunk() :3925* Compress a chunk of data into one or multiple blocks.3926* All blocks will be terminated, all input will be consumed.3927* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.3928* Frame is supposed already started (header already produced)3929* @return : compressed size, or an error code3930*/3931static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,3932void* dst, size_t dstCapacity,3933const void* src, size_t srcSize,3934U32 lastFrameChunk)3935{3936size_t blockSize = cctx->blockSize;3937size_t remaining = srcSize;3938const BYTE* ip = (const BYTE*)src;3939BYTE* const ostart = (BYTE*)dst;3940BYTE* op = ostart;3941U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;39423943assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);39443945DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);3946if (cctx->appliedParams.fParams.checksumFlag && srcSize)3947XXH64_update(&cctx->xxhState, src, srcSize);39483949while (remaining) {3950ZSTD_matchState_t* const ms = &cctx->blockState.matchState;3951U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);39523953RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,3954dstSize_tooSmall,3955"not enough space to store compressed block");3956if (remaining < blockSize) blockSize = remaining;39573958ZSTD_overflowCorrectIfNeeded(3959ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);3960ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);3961ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);39623963/* Ensure hash/chain table insertion resumes no sooner than lowlimit */3964if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;39653966{ size_t cSize;3967if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {3968cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);3969FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");3970assert(cSize > 0);3971assert(cSize <= blockSize + ZSTD_blockHeaderSize);3972} else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {3973cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);3974FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");3975assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);3976} else {3977cSize = ZSTD_compressBlock_internal(cctx,3978op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,3979ip, blockSize, 1 /* frame */);3980FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");39813982if (cSize == 0) { /* block is not compressible */3983cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);3984FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");3985} else {3986U32 const cBlockHeader = cSize == 1 ?3987lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :3988lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);3989MEM_writeLE24(op, cBlockHeader);3990cSize += ZSTD_blockHeaderSize;3991}3992}399339943995ip += blockSize;3996assert(remaining >= blockSize);3997remaining -= blockSize;3998op += cSize;3999assert(dstCapacity >= cSize);4000dstCapacity -= cSize;4001cctx->isFirstBlock = 0;4002DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",4003(unsigned)cSize);4004} }40054006if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;4007return (size_t)(op-ostart);4008}400940104011static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,4012const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)4013{ BYTE* const op = (BYTE*)dst;4014U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */4015U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */4016U32 const checksumFlag = params->fParams.checksumFlag>0;4017U32 const windowSize = (U32)1 << params->cParams.windowLog;4018U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);4019BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);4020U32 const fcsCode = params->fParams.contentSizeFlag ?4021(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */4022BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );4023size_t pos=0;40244025assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));4026RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,4027"dst buf is too small to fit worst-case frame header size.");4028DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",4029!params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);4030if (params->format == ZSTD_f_zstd1) {4031MEM_writeLE32(dst, ZSTD_MAGICNUMBER);4032pos = 4;4033}4034op[pos++] = frameHeaderDescriptionByte;4035if (!singleSegment) op[pos++] = windowLogByte;4036switch(dictIDSizeCode)4037{4038default:4039assert(0); /* impossible */4040ZSTD_FALLTHROUGH;4041case 0 : break;4042case 1 : op[pos] = (BYTE)(dictID); pos++; break;4043case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;4044case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;4045}4046switch(fcsCode)4047{4048default:4049assert(0); /* impossible */4050ZSTD_FALLTHROUGH;4051case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;4052case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;4053case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;4054case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;4055}4056return pos;4057}40584059/* ZSTD_writeSkippableFrame_advanced() :4060* Writes out a skippable frame with the specified magic number variant (16 are supported),4061* from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.4062*4063* Returns the total number of bytes written, or a ZSTD error code.4064*/4065size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,4066const void* src, size_t srcSize, unsigned magicVariant) {4067BYTE* op = (BYTE*)dst;4068RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,4069dstSize_tooSmall, "Not enough room for skippable frame");4070RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");4071RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");40724073MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));4074MEM_writeLE32(op+4, (U32)srcSize);4075ZSTD_memcpy(op+8, src, srcSize);4076return srcSize + ZSTD_SKIPPABLEHEADERSIZE;4077}40784079/* ZSTD_writeLastEmptyBlock() :4080* output an empty Block with end-of-frame mark to complete a frame4081* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))4082* or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)4083*/4084size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)4085{4086RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,4087"dst buf is too small to write frame trailer empty block.");4088{ U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */4089MEM_writeLE24(dst, cBlockHeader24);4090return ZSTD_blockHeaderSize;4091}4092}40934094size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)4095{4096RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,4097"wrong cctx stage");4098RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,4099parameter_unsupported,4100"incompatible with ldm");4101cctx->externSeqStore.seq = seq;4102cctx->externSeqStore.size = nbSeq;4103cctx->externSeqStore.capacity = nbSeq;4104cctx->externSeqStore.pos = 0;4105cctx->externSeqStore.posInSequence = 0;4106return 0;4107}410841094110static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,4111void* dst, size_t dstCapacity,4112const void* src, size_t srcSize,4113U32 frame, U32 lastFrameChunk)4114{4115ZSTD_matchState_t* const ms = &cctx->blockState.matchState;4116size_t fhSize = 0;41174118DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",4119cctx->stage, (unsigned)srcSize);4120RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,4121"missing init (ZSTD_compressBegin)");41224123if (frame && (cctx->stage==ZSTDcs_init)) {4124fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,4125cctx->pledgedSrcSizePlusOne-1, cctx->dictID);4126FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");4127assert(fhSize <= dstCapacity);4128dstCapacity -= fhSize;4129dst = (char*)dst + fhSize;4130cctx->stage = ZSTDcs_ongoing;4131}41324133if (!srcSize) return fhSize; /* do not generate an empty block if no input */41344135if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {4136ms->forceNonContiguous = 0;4137ms->nextToUpdate = ms->window.dictLimit;4138}4139if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {4140ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);4141}41424143if (!frame) {4144/* overflow check and correction for block mode */4145ZSTD_overflowCorrectIfNeeded(4146ms, &cctx->workspace, &cctx->appliedParams,4147src, (BYTE const*)src + srcSize);4148}41494150DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);4151{ size_t const cSize = frame ?4152ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :4153ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);4154FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");4155cctx->consumedSrcSize += srcSize;4156cctx->producedCSize += (cSize + fhSize);4157assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));4158if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */4159ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);4160RETURN_ERROR_IF(4161cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,4162srcSize_wrong,4163"error : pledgedSrcSize = %u, while realSrcSize >= %u",4164(unsigned)cctx->pledgedSrcSizePlusOne-1,4165(unsigned)cctx->consumedSrcSize);4166}4167return cSize + fhSize;4168}4169}41704171size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,4172void* dst, size_t dstCapacity,4173const void* src, size_t srcSize)4174{4175DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);4176return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);4177}417841794180size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)4181{4182ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;4183assert(!ZSTD_checkCParams(cParams));4184return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);4185}41864187size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)4188{4189DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);4190{ size_t const blockSizeMax = ZSTD_getBlockSize(cctx);4191RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }41924193return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);4194}41954196/*! ZSTD_loadDictionaryContent() :4197* @return : 0, or an error code4198*/4199static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,4200ldmState_t* ls,4201ZSTD_cwksp* ws,4202ZSTD_CCtx_params const* params,4203const void* src, size_t srcSize,4204ZSTD_dictTableLoadMethod_e dtlm)4205{4206const BYTE* ip = (const BYTE*) src;4207const BYTE* const iend = ip + srcSize;4208int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;42094210/* Assert that we the ms params match the params we're being given */4211ZSTD_assertEqualCParams(params->cParams, ms->cParams);42124213if (srcSize > ZSTD_CHUNKSIZE_MAX) {4214/* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.4215* Dictionaries right at the edge will immediately trigger overflow4216* correction, but I don't want to insert extra constraints here.4217*/4218U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;4219/* We must have cleared our windows when our source is this large. */4220assert(ZSTD_window_isEmpty(ms->window));4221if (loadLdmDict)4222assert(ZSTD_window_isEmpty(ls->window));4223/* If the dictionary is too large, only load the suffix of the dictionary. */4224if (srcSize > maxDictSize) {4225ip = iend - maxDictSize;4226src = ip;4227srcSize = maxDictSize;4228}4229}42304231DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);4232ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);4233ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);4234ms->forceNonContiguous = params->deterministicRefPrefix;42354236if (loadLdmDict) {4237ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);4238ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);4239}42404241if (srcSize <= HASH_READ_SIZE) return 0;42424243ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);42444245if (loadLdmDict)4246ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);42474248switch(params->cParams.strategy)4249{4250case ZSTD_fast:4251ZSTD_fillHashTable(ms, iend, dtlm);4252break;4253case ZSTD_dfast:4254ZSTD_fillDoubleHashTable(ms, iend, dtlm);4255break;42564257case ZSTD_greedy:4258case ZSTD_lazy:4259case ZSTD_lazy2:4260assert(srcSize >= HASH_READ_SIZE);4261if (ms->dedicatedDictSearch) {4262assert(ms->chainTable != NULL);4263ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);4264} else {4265assert(params->useRowMatchFinder != ZSTD_ps_auto);4266if (params->useRowMatchFinder == ZSTD_ps_enable) {4267size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);4268ZSTD_memset(ms->tagTable, 0, tagTableSize);4269ZSTD_row_update(ms, iend-HASH_READ_SIZE);4270DEBUGLOG(4, "Using row-based hash table for lazy dict");4271} else {4272ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);4273DEBUGLOG(4, "Using chain-based hash table for lazy dict");4274}4275}4276break;42774278case ZSTD_btlazy2: /* we want the dictionary table fully sorted */4279case ZSTD_btopt:4280case ZSTD_btultra:4281case ZSTD_btultra2:4282assert(srcSize >= HASH_READ_SIZE);4283ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);4284break;42854286default:4287assert(0); /* not possible : not a valid strategy id */4288}42894290ms->nextToUpdate = (U32)(iend - ms->window.base);4291return 0;4292}429342944295/* Dictionaries that assign zero probability to symbols that show up causes problems4296* when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check4297* and only dictionaries with 100% valid symbols can be assumed valid.4298*/4299static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)4300{4301U32 s;4302if (dictMaxSymbolValue < maxSymbolValue) {4303return FSE_repeat_check;4304}4305for (s = 0; s <= maxSymbolValue; ++s) {4306if (normalizedCounter[s] == 0) {4307return FSE_repeat_check;4308}4309}4310return FSE_repeat_valid;4311}43124313size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,4314const void* const dict, size_t dictSize)4315{4316short offcodeNCount[MaxOff+1];4317unsigned offcodeMaxValue = MaxOff;4318const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */4319const BYTE* const dictEnd = dictPtr + dictSize;4320dictPtr += 8;4321bs->entropy.huf.repeatMode = HUF_repeat_check;43224323{ unsigned maxSymbolValue = 255;4324unsigned hasZeroWeights = 1;4325size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,4326dictEnd-dictPtr, &hasZeroWeights);43274328/* We only set the loaded table as valid if it contains all non-zero4329* weights. Otherwise, we set it to check */4330if (!hasZeroWeights)4331bs->entropy.huf.repeatMode = HUF_repeat_valid;43324333RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");4334RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");4335dictPtr += hufHeaderSize;4336}43374338{ unsigned offcodeLog;4339size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);4340RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");4341RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");4342/* fill all offset symbols to avoid garbage at end of table */4343RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(4344bs->entropy.fse.offcodeCTable,4345offcodeNCount, MaxOff, offcodeLog,4346workspace, HUF_WORKSPACE_SIZE)),4347dictionary_corrupted, "");4348/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */4349dictPtr += offcodeHeaderSize;4350}43514352{ short matchlengthNCount[MaxML+1];4353unsigned matchlengthMaxValue = MaxML, matchlengthLog;4354size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);4355RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");4356RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");4357RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(4358bs->entropy.fse.matchlengthCTable,4359matchlengthNCount, matchlengthMaxValue, matchlengthLog,4360workspace, HUF_WORKSPACE_SIZE)),4361dictionary_corrupted, "");4362bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);4363dictPtr += matchlengthHeaderSize;4364}43654366{ short litlengthNCount[MaxLL+1];4367unsigned litlengthMaxValue = MaxLL, litlengthLog;4368size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);4369RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");4370RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");4371RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(4372bs->entropy.fse.litlengthCTable,4373litlengthNCount, litlengthMaxValue, litlengthLog,4374workspace, HUF_WORKSPACE_SIZE)),4375dictionary_corrupted, "");4376bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);4377dictPtr += litlengthHeaderSize;4378}43794380RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");4381bs->rep[0] = MEM_readLE32(dictPtr+0);4382bs->rep[1] = MEM_readLE32(dictPtr+4);4383bs->rep[2] = MEM_readLE32(dictPtr+8);4384dictPtr += 12;43854386{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);4387U32 offcodeMax = MaxOff;4388if (dictContentSize <= ((U32)-1) - 128 KB) {4389U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */4390offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */4391}4392/* All offset values <= dictContentSize + 128 KB must be representable for a valid table */4393bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));43944395/* All repCodes must be <= dictContentSize and != 0 */4396{ U32 u;4397for (u=0; u<3; u++) {4398RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");4399RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");4400} } }44014402return dictPtr - (const BYTE*)dict;4403}44044405/* Dictionary format :4406* See :4407* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format4408*/4409/*! ZSTD_loadZstdDictionary() :4410* @return : dictID, or an error code4411* assumptions : magic number supposed already checked4412* dictSize supposed >= 84413*/4414static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,4415ZSTD_matchState_t* ms,4416ZSTD_cwksp* ws,4417ZSTD_CCtx_params const* params,4418const void* dict, size_t dictSize,4419ZSTD_dictTableLoadMethod_e dtlm,4420void* workspace)4421{4422const BYTE* dictPtr = (const BYTE*)dict;4423const BYTE* const dictEnd = dictPtr + dictSize;4424size_t dictID;4425size_t eSize;4426ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));4427assert(dictSize >= 8);4428assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);44294430dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );4431eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);4432FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");4433dictPtr += eSize;44344435{4436size_t const dictContentSize = (size_t)(dictEnd - dictPtr);4437FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(4438ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");4439}4440return dictID;4441}44424443/** ZSTD_compress_insertDictionary() :4444* @return : dictID, or an error code */4445static size_t4446ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,4447ZSTD_matchState_t* ms,4448ldmState_t* ls,4449ZSTD_cwksp* ws,4450const ZSTD_CCtx_params* params,4451const void* dict, size_t dictSize,4452ZSTD_dictContentType_e dictContentType,4453ZSTD_dictTableLoadMethod_e dtlm,4454void* workspace)4455{4456DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);4457if ((dict==NULL) || (dictSize<8)) {4458RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");4459return 0;4460}44614462ZSTD_reset_compressedBlockState(bs);44634464/* dict restricted modes */4465if (dictContentType == ZSTD_dct_rawContent)4466return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);44674468if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {4469if (dictContentType == ZSTD_dct_auto) {4470DEBUGLOG(4, "raw content dictionary detected");4471return ZSTD_loadDictionaryContent(4472ms, ls, ws, params, dict, dictSize, dtlm);4473}4474RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");4475assert(0); /* impossible */4476}44774478/* dict as full zstd dictionary */4479return ZSTD_loadZstdDictionary(4480bs, ms, ws, params, dict, dictSize, dtlm, workspace);4481}44824483#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)4484#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)44854486/*! ZSTD_compressBegin_internal() :4487* @return : 0, or an error code */4488static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,4489const void* dict, size_t dictSize,4490ZSTD_dictContentType_e dictContentType,4491ZSTD_dictTableLoadMethod_e dtlm,4492const ZSTD_CDict* cdict,4493const ZSTD_CCtx_params* params, U64 pledgedSrcSize,4494ZSTD_buffered_policy_e zbuff)4495{4496size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;4497#if ZSTD_TRACE4498cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;4499#endif4500DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);4501/* params are supposed to be fully validated at this point */4502assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));4503assert(!((dict) && (cdict))); /* either dict or cdict, not both */4504if ( (cdict)4505&& (cdict->dictContentSize > 0)4506&& ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF4507|| pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER4508|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN4509|| cdict->compressionLevel == 0)4510&& (params->attachDictPref != ZSTD_dictForceLoad) ) {4511return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);4512}45134514FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,4515dictContentSize,4516ZSTDcrp_makeClean, zbuff) , "");4517{ size_t const dictID = cdict ?4518ZSTD_compress_insertDictionary(4519cctx->blockState.prevCBlock, &cctx->blockState.matchState,4520&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,4521cdict->dictContentSize, cdict->dictContentType, dtlm,4522cctx->entropyWorkspace)4523: ZSTD_compress_insertDictionary(4524cctx->blockState.prevCBlock, &cctx->blockState.matchState,4525&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,4526dictContentType, dtlm, cctx->entropyWorkspace);4527FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");4528assert(dictID <= UINT_MAX);4529cctx->dictID = (U32)dictID;4530cctx->dictContentSize = dictContentSize;4531}4532return 0;4533}45344535size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,4536const void* dict, size_t dictSize,4537ZSTD_dictContentType_e dictContentType,4538ZSTD_dictTableLoadMethod_e dtlm,4539const ZSTD_CDict* cdict,4540const ZSTD_CCtx_params* params,4541unsigned long long pledgedSrcSize)4542{4543DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);4544/* compression parameters verification and optimization */4545FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");4546return ZSTD_compressBegin_internal(cctx,4547dict, dictSize, dictContentType, dtlm,4548cdict,4549params, pledgedSrcSize,4550ZSTDb_not_buffered);4551}45524553/*! ZSTD_compressBegin_advanced() :4554* @return : 0, or an error code */4555size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,4556const void* dict, size_t dictSize,4557ZSTD_parameters params, unsigned long long pledgedSrcSize)4558{4559ZSTD_CCtx_params cctxParams;4560ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL);4561return ZSTD_compressBegin_advanced_internal(cctx,4562dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,4563NULL /*cdict*/,4564&cctxParams, pledgedSrcSize);4565}45664567size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)4568{4569ZSTD_CCtx_params cctxParams;4570{4571ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);4572ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);4573}4574DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);4575return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,4576&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);4577}45784579size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)4580{4581return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);4582}458345844585/*! ZSTD_writeEpilogue() :4586* Ends a frame.4587* @return : nb of bytes written into dst (or an error code) */4588static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)4589{4590BYTE* const ostart = (BYTE*)dst;4591BYTE* op = ostart;4592size_t fhSize = 0;45934594DEBUGLOG(4, "ZSTD_writeEpilogue");4595RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");45964597/* special case : empty frame */4598if (cctx->stage == ZSTDcs_init) {4599fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);4600FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");4601dstCapacity -= fhSize;4602op += fhSize;4603cctx->stage = ZSTDcs_ongoing;4604}46054606if (cctx->stage != ZSTDcs_ending) {4607/* write one last empty block, make it the "last" block */4608U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;4609RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");4610MEM_writeLE32(op, cBlockHeader24);4611op += ZSTD_blockHeaderSize;4612dstCapacity -= ZSTD_blockHeaderSize;4613}46144615if (cctx->appliedParams.fParams.checksumFlag) {4616U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);4617RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");4618DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);4619MEM_writeLE32(op, checksum);4620op += 4;4621}46224623cctx->stage = ZSTDcs_created; /* return to "created but no init" status */4624return op-ostart;4625}46264627void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)4628{4629#if ZSTD_TRACE4630if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {4631int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;4632ZSTD_Trace trace;4633ZSTD_memset(&trace, 0, sizeof(trace));4634trace.version = ZSTD_VERSION_NUMBER;4635trace.streaming = streaming;4636trace.dictionaryID = cctx->dictID;4637trace.dictionarySize = cctx->dictContentSize;4638trace.uncompressedSize = cctx->consumedSrcSize;4639trace.compressedSize = cctx->producedCSize + extraCSize;4640trace.params = &cctx->appliedParams;4641trace.cctx = cctx;4642ZSTD_trace_compress_end(cctx->traceCtx, &trace);4643}4644cctx->traceCtx = 0;4645#else4646(void)cctx;4647(void)extraCSize;4648#endif4649}46504651size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,4652void* dst, size_t dstCapacity,4653const void* src, size_t srcSize)4654{4655size_t endResult;4656size_t const cSize = ZSTD_compressContinue_internal(cctx,4657dst, dstCapacity, src, srcSize,46581 /* frame mode */, 1 /* last chunk */);4659FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");4660endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);4661FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");4662assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));4663if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */4664ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);4665DEBUGLOG(4, "end of frame : controlling src size");4666RETURN_ERROR_IF(4667cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,4668srcSize_wrong,4669"error : pledgedSrcSize = %u, while realSrcSize = %u",4670(unsigned)cctx->pledgedSrcSizePlusOne-1,4671(unsigned)cctx->consumedSrcSize);4672}4673ZSTD_CCtx_trace(cctx, endResult);4674return cSize + endResult;4675}46764677size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,4678void* dst, size_t dstCapacity,4679const void* src, size_t srcSize,4680const void* dict,size_t dictSize,4681ZSTD_parameters params)4682{4683DEBUGLOG(4, "ZSTD_compress_advanced");4684FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");4685ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL);4686return ZSTD_compress_advanced_internal(cctx,4687dst, dstCapacity,4688src, srcSize,4689dict, dictSize,4690&cctx->simpleApiParams);4691}46924693/* Internal */4694size_t ZSTD_compress_advanced_internal(4695ZSTD_CCtx* cctx,4696void* dst, size_t dstCapacity,4697const void* src, size_t srcSize,4698const void* dict,size_t dictSize,4699const ZSTD_CCtx_params* params)4700{4701DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);4702FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,4703dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,4704params, srcSize, ZSTDb_not_buffered) , "");4705return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);4706}47074708size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,4709void* dst, size_t dstCapacity,4710const void* src, size_t srcSize,4711const void* dict, size_t dictSize,4712int compressionLevel)4713{4714{4715ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);4716assert(params.fParams.contentSizeFlag == 1);4717ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);4718}4719DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);4720return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);4721}47224723size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,4724void* dst, size_t dstCapacity,4725const void* src, size_t srcSize,4726int compressionLevel)4727{4728DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);4729assert(cctx != NULL);4730return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);4731}47324733size_t ZSTD_compress(void* dst, size_t dstCapacity,4734const void* src, size_t srcSize,4735int compressionLevel)4736{4737size_t result;4738#if ZSTD_COMPRESS_HEAPMODE4739ZSTD_CCtx* cctx = ZSTD_createCCtx();4740RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");4741result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);4742ZSTD_freeCCtx(cctx);4743#else4744ZSTD_CCtx ctxBody;4745ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);4746result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);4747ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */4748#endif4749return result;4750}475147524753/* ===== Dictionary API ===== */47544755/*! ZSTD_estimateCDictSize_advanced() :4756* Estimate amount of memory that will be needed to create a dictionary with following arguments */4757size_t ZSTD_estimateCDictSize_advanced(4758size_t dictSize, ZSTD_compressionParameters cParams,4759ZSTD_dictLoadMethod_e dictLoadMethod)4760{4761DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));4762return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))4763+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)4764/* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small4765* in case we are using DDS with row-hash. */4766+ ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),4767/* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)4768+ (dictLoadMethod == ZSTD_dlm_byRef ? 04769: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));4770}47714772size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)4773{4774ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);4775return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);4776}47774778size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)4779{4780if (cdict==NULL) return 0; /* support sizeof on NULL */4781DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));4782/* cdict may be in the workspace */4783return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))4784+ ZSTD_cwksp_sizeof(&cdict->workspace);4785}47864787static size_t ZSTD_initCDict_internal(4788ZSTD_CDict* cdict,4789const void* dictBuffer, size_t dictSize,4790ZSTD_dictLoadMethod_e dictLoadMethod,4791ZSTD_dictContentType_e dictContentType,4792ZSTD_CCtx_params params)4793{4794DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);4795assert(!ZSTD_checkCParams(params.cParams));4796cdict->matchState.cParams = params.cParams;4797cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;4798if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {4799cdict->dictContent = dictBuffer;4800} else {4801void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));4802RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");4803cdict->dictContent = internalBuffer;4804ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);4805}4806cdict->dictContentSize = dictSize;4807cdict->dictContentType = dictContentType;48084809cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);481048114812/* Reset the state to no dictionary */4813ZSTD_reset_compressedBlockState(&cdict->cBlockState);4814FORWARD_IF_ERROR(ZSTD_reset_matchState(4815&cdict->matchState,4816&cdict->workspace,4817¶ms.cParams,4818params.useRowMatchFinder,4819ZSTDcrp_makeClean,4820ZSTDirp_reset,4821ZSTD_resetTarget_CDict), "");4822/* (Maybe) load the dictionary4823* Skips loading the dictionary if it is < 8 bytes.4824*/4825{ params.compressionLevel = ZSTD_CLEVEL_DEFAULT;4826params.fParams.contentSizeFlag = 1;4827{ size_t const dictID = ZSTD_compress_insertDictionary(4828&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,4829¶ms, cdict->dictContent, cdict->dictContentSize,4830dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);4831FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");4832assert(dictID <= (size_t)(U32)-1);4833cdict->dictID = (U32)dictID;4834}4835}48364837return 0;4838}48394840static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,4841ZSTD_dictLoadMethod_e dictLoadMethod,4842ZSTD_compressionParameters cParams,4843ZSTD_paramSwitch_e useRowMatchFinder,4844U32 enableDedicatedDictSearch,4845ZSTD_customMem customMem)4846{4847if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;48484849{ size_t const workspaceSize =4850ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +4851ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +4852ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +4853(dictLoadMethod == ZSTD_dlm_byRef ? 04854: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));4855void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);4856ZSTD_cwksp ws;4857ZSTD_CDict* cdict;48584859if (!workspace) {4860ZSTD_customFree(workspace, customMem);4861return NULL;4862}48634864ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);48654866cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));4867assert(cdict != NULL);4868ZSTD_cwksp_move(&cdict->workspace, &ws);4869cdict->customMem = customMem;4870cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */4871cdict->useRowMatchFinder = useRowMatchFinder;4872return cdict;4873}4874}48754876ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,4877ZSTD_dictLoadMethod_e dictLoadMethod,4878ZSTD_dictContentType_e dictContentType,4879ZSTD_compressionParameters cParams,4880ZSTD_customMem customMem)4881{4882ZSTD_CCtx_params cctxParams;4883ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));4884ZSTD_CCtxParams_init(&cctxParams, 0);4885cctxParams.cParams = cParams;4886cctxParams.customMem = customMem;4887return ZSTD_createCDict_advanced2(4888dictBuffer, dictSize,4889dictLoadMethod, dictContentType,4890&cctxParams, customMem);4891}48924893ZSTD_CDict* ZSTD_createCDict_advanced2(4894const void* dict, size_t dictSize,4895ZSTD_dictLoadMethod_e dictLoadMethod,4896ZSTD_dictContentType_e dictContentType,4897const ZSTD_CCtx_params* originalCctxParams,4898ZSTD_customMem customMem)4899{4900ZSTD_CCtx_params cctxParams = *originalCctxParams;4901ZSTD_compressionParameters cParams;4902ZSTD_CDict* cdict;49034904DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);4905if (!customMem.customAlloc ^ !customMem.customFree) return NULL;49064907if (cctxParams.enableDedicatedDictSearch) {4908cParams = ZSTD_dedicatedDictSearch_getCParams(4909cctxParams.compressionLevel, dictSize);4910ZSTD_overrideCParams(&cParams, &cctxParams.cParams);4911} else {4912cParams = ZSTD_getCParamsFromCCtxParams(4913&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);4914}49154916if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {4917/* Fall back to non-DDSS params */4918cctxParams.enableDedicatedDictSearch = 0;4919cParams = ZSTD_getCParamsFromCCtxParams(4920&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);4921}49224923DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);4924cctxParams.cParams = cParams;4925cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);49264927cdict = ZSTD_createCDict_advanced_internal(dictSize,4928dictLoadMethod, cctxParams.cParams,4929cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,4930customMem);49314932if (ZSTD_isError( ZSTD_initCDict_internal(cdict,4933dict, dictSize,4934dictLoadMethod, dictContentType,4935cctxParams) )) {4936ZSTD_freeCDict(cdict);4937return NULL;4938}49394940return cdict;4941}49424943ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)4944{4945ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);4946ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,4947ZSTD_dlm_byCopy, ZSTD_dct_auto,4948cParams, ZSTD_defaultCMem);4949if (cdict)4950cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;4951return cdict;4952}49534954ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)4955{4956ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);4957ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,4958ZSTD_dlm_byRef, ZSTD_dct_auto,4959cParams, ZSTD_defaultCMem);4960if (cdict)4961cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;4962return cdict;4963}49644965size_t ZSTD_freeCDict(ZSTD_CDict* cdict)4966{4967if (cdict==NULL) return 0; /* support free on NULL */4968{ ZSTD_customMem const cMem = cdict->customMem;4969int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);4970ZSTD_cwksp_free(&cdict->workspace, cMem);4971if (!cdictInWorkspace) {4972ZSTD_customFree(cdict, cMem);4973}4974return 0;4975}4976}49774978/*! ZSTD_initStaticCDict_advanced() :4979* Generate a digested dictionary in provided memory area.4980* workspace: The memory area to emplace the dictionary into.4981* Provided pointer must 8-bytes aligned.4982* It must outlive dictionary usage.4983* workspaceSize: Use ZSTD_estimateCDictSize()4984* to determine how large workspace must be.4985* cParams : use ZSTD_getCParams() to transform a compression level4986* into its relevants cParams.4987* @return : pointer to ZSTD_CDict*, or NULL if error (size too small)4988* Note : there is no corresponding "free" function.4989* Since workspace was allocated externally, it must be freed externally.4990*/4991const ZSTD_CDict* ZSTD_initStaticCDict(4992void* workspace, size_t workspaceSize,4993const void* dict, size_t dictSize,4994ZSTD_dictLoadMethod_e dictLoadMethod,4995ZSTD_dictContentType_e dictContentType,4996ZSTD_compressionParameters cParams)4997{4998ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams);4999/* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */5000size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);5001size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))5002+ (dictLoadMethod == ZSTD_dlm_byRef ? 05003: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))5004+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)5005+ matchStateSize;5006ZSTD_CDict* cdict;5007ZSTD_CCtx_params params;50085009if ((size_t)workspace & 7) return NULL; /* 8-aligned */50105011{5012ZSTD_cwksp ws;5013ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);5014cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));5015if (cdict == NULL) return NULL;5016ZSTD_cwksp_move(&cdict->workspace, &ws);5017}50185019DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",5020(unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));5021if (workspaceSize < neededSize) return NULL;50225023ZSTD_CCtxParams_init(¶ms, 0);5024params.cParams = cParams;5025params.useRowMatchFinder = useRowMatchFinder;5026cdict->useRowMatchFinder = useRowMatchFinder;50275028if (ZSTD_isError( ZSTD_initCDict_internal(cdict,5029dict, dictSize,5030dictLoadMethod, dictContentType,5031params) ))5032return NULL;50335034return cdict;5035}50365037ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)5038{5039assert(cdict != NULL);5040return cdict->matchState.cParams;5041}50425043/*! ZSTD_getDictID_fromCDict() :5044* Provides the dictID of the dictionary loaded into `cdict`.5045* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.5046* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */5047unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)5048{5049if (cdict==NULL) return 0;5050return cdict->dictID;5051}50525053/* ZSTD_compressBegin_usingCDict_internal() :5054* Implementation of various ZSTD_compressBegin_usingCDict* functions.5055*/5056static size_t ZSTD_compressBegin_usingCDict_internal(5057ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,5058ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)5059{5060ZSTD_CCtx_params cctxParams;5061DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");5062RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");5063/* Initialize the cctxParams from the cdict */5064{5065ZSTD_parameters params;5066params.fParams = fParams;5067params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF5068|| pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER5069|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN5070|| cdict->compressionLevel == 0 ) ?5071ZSTD_getCParamsFromCDict(cdict)5072: ZSTD_getCParams(cdict->compressionLevel,5073pledgedSrcSize,5074cdict->dictContentSize);5075ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel);5076}5077/* Increase window log to fit the entire dictionary and source if the5078* source size is known. Limit the increase to 19, which is the5079* window log for compression level 1 with the largest source size.5080*/5081if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {5082U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);5083U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;5084cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);5085}5086return ZSTD_compressBegin_internal(cctx,5087NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,5088cdict,5089&cctxParams, pledgedSrcSize,5090ZSTDb_not_buffered);5091}509250935094/* ZSTD_compressBegin_usingCDict_advanced() :5095* This function is DEPRECATED.5096* cdict must be != NULL */5097size_t ZSTD_compressBegin_usingCDict_advanced(5098ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,5099ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)5100{5101return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);5102}51035104/* ZSTD_compressBegin_usingCDict() :5105* cdict must be != NULL */5106size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)5107{5108ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };5109return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);5110}51115112/*! ZSTD_compress_usingCDict_internal():5113* Implementation of various ZSTD_compress_usingCDict* functions.5114*/5115static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,5116void* dst, size_t dstCapacity,5117const void* src, size_t srcSize,5118const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)5119{5120FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */5121return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);5122}51235124/*! ZSTD_compress_usingCDict_advanced():5125* This function is DEPRECATED.5126*/5127size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,5128void* dst, size_t dstCapacity,5129const void* src, size_t srcSize,5130const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)5131{5132return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);5133}51345135/*! ZSTD_compress_usingCDict() :5136* Compression using a digested Dictionary.5137* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.5138* Note that compression parameters are decided at CDict creation time5139* while frame parameters are hardcoded */5140size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,5141void* dst, size_t dstCapacity,5142const void* src, size_t srcSize,5143const ZSTD_CDict* cdict)5144{5145ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };5146return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);5147}5148514951505151/* ******************************************************************5152* Streaming5153********************************************************************/51545155ZSTD_CStream* ZSTD_createCStream(void)5156{5157DEBUGLOG(3, "ZSTD_createCStream");5158return ZSTD_createCStream_advanced(ZSTD_defaultCMem);5159}51605161ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)5162{5163return ZSTD_initStaticCCtx(workspace, workspaceSize);5164}51655166ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)5167{ /* CStream and CCtx are now same object */5168return ZSTD_createCCtx_advanced(customMem);5169}51705171size_t ZSTD_freeCStream(ZSTD_CStream* zcs)5172{5173return ZSTD_freeCCtx(zcs); /* same object */5174}5175517651775178/*====== Initialization ======*/51795180size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }51815182size_t ZSTD_CStreamOutSize(void)5183{5184return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;5185}51865187static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)5188{5189if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))5190return ZSTD_cpm_attachDict;5191else5192return ZSTD_cpm_noAttachDict;5193}51945195/* ZSTD_resetCStream():5196* pledgedSrcSize == 0 means "unknown" */5197size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)5198{5199/* temporary : 0 interpreted as "unknown" during transition period.5200* Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.5201* 0 will be interpreted as "empty" in the future.5202*/5203U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;5204DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);5205FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5206FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5207return 0;5208}52095210/*! ZSTD_initCStream_internal() :5211* Note : for lib/compress only. Used by zstdmt_compress.c.5212* Assumption 1 : params are valid5213* Assumption 2 : either dict, or cdict, is defined, not both */5214size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,5215const void* dict, size_t dictSize, const ZSTD_CDict* cdict,5216const ZSTD_CCtx_params* params,5217unsigned long long pledgedSrcSize)5218{5219DEBUGLOG(4, "ZSTD_initCStream_internal");5220FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5221FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5222assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));5223zcs->requestedParams = *params;5224assert(!((dict) && (cdict))); /* either dict or cdict, not both */5225if (dict) {5226FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");5227} else {5228/* Dictionary is cleared if !cdict */5229FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");5230}5231return 0;5232}52335234/* ZSTD_initCStream_usingCDict_advanced() :5235* same as ZSTD_initCStream_usingCDict(), with control over frame parameters */5236size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,5237const ZSTD_CDict* cdict,5238ZSTD_frameParameters fParams,5239unsigned long long pledgedSrcSize)5240{5241DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");5242FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5243FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5244zcs->requestedParams.fParams = fParams;5245FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");5246return 0;5247}52485249/* note : cdict must outlive compression session */5250size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)5251{5252DEBUGLOG(4, "ZSTD_initCStream_usingCDict");5253FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5254FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");5255return 0;5256}525752585259/* ZSTD_initCStream_advanced() :5260* pledgedSrcSize must be exact.5261* if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.5262* dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */5263size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,5264const void* dict, size_t dictSize,5265ZSTD_parameters params, unsigned long long pss)5266{5267/* for compatibility with older programs relying on this behavior.5268* Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.5269* This line will be removed in the future.5270*/5271U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;5272DEBUGLOG(4, "ZSTD_initCStream_advanced");5273FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5274FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5275FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");5276ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms);5277FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");5278return 0;5279}52805281size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)5282{5283DEBUGLOG(4, "ZSTD_initCStream_usingDict");5284FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5285FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");5286FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");5287return 0;5288}52895290size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)5291{5292/* temporary : 0 interpreted as "unknown" during transition period.5293* Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.5294* 0 will be interpreted as "empty" in the future.5295*/5296U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;5297DEBUGLOG(4, "ZSTD_initCStream_srcSize");5298FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5299FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");5300FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");5301FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");5302return 0;5303}53045305size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)5306{5307DEBUGLOG(4, "ZSTD_initCStream");5308FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");5309FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");5310FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");5311return 0;5312}53135314/*====== Compression ======*/53155316static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)5317{5318size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;5319if (hintInSize==0) hintInSize = cctx->blockSize;5320return hintInSize;5321}53225323/** ZSTD_compressStream_generic():5324* internal function for all *compressStream*() variants5325* non-static, because can be called from zstdmt_compress.c5326* @return : hint size for next input */5327static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,5328ZSTD_outBuffer* output,5329ZSTD_inBuffer* input,5330ZSTD_EndDirective const flushMode)5331{5332const char* const istart = (const char*)input->src;5333const char* const iend = input->size != 0 ? istart + input->size : istart;5334const char* ip = input->pos != 0 ? istart + input->pos : istart;5335char* const ostart = (char*)output->dst;5336char* const oend = output->size != 0 ? ostart + output->size : ostart;5337char* op = output->pos != 0 ? ostart + output->pos : ostart;5338U32 someMoreWork = 1;53395340/* check expectations */5341DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);5342if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {5343assert(zcs->inBuff != NULL);5344assert(zcs->inBuffSize > 0);5345}5346if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {5347assert(zcs->outBuff != NULL);5348assert(zcs->outBuffSize > 0);5349}5350assert(output->pos <= output->size);5351assert(input->pos <= input->size);5352assert((U32)flushMode <= (U32)ZSTD_e_end);53535354while (someMoreWork) {5355switch(zcs->streamStage)5356{5357case zcss_init:5358RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");53595360case zcss_load:5361if ( (flushMode == ZSTD_e_end)5362&& ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */5363|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */5364&& (zcs->inBuffPos == 0) ) {5365/* shortcut to compression pass directly into output buffer */5366size_t const cSize = ZSTD_compressEnd(zcs,5367op, oend-op, ip, iend-ip);5368DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);5369FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");5370ip = iend;5371op += cSize;5372zcs->frameEnded = 1;5373ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);5374someMoreWork = 0; break;5375}5376/* complete loading into inBuffer in buffered mode */5377if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {5378size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;5379size_t const loaded = ZSTD_limitCopy(5380zcs->inBuff + zcs->inBuffPos, toLoad,5381ip, iend-ip);5382zcs->inBuffPos += loaded;5383if (loaded != 0)5384ip += loaded;5385if ( (flushMode == ZSTD_e_continue)5386&& (zcs->inBuffPos < zcs->inBuffTarget) ) {5387/* not enough input to fill full block : stop here */5388someMoreWork = 0; break;5389}5390if ( (flushMode == ZSTD_e_flush)5391&& (zcs->inBuffPos == zcs->inToCompress) ) {5392/* empty */5393someMoreWork = 0; break;5394}5395}5396/* compress current block (note : this stage cannot be stopped in the middle) */5397DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);5398{ int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);5399void* cDst;5400size_t cSize;5401size_t oSize = oend-op;5402size_t const iSize = inputBuffered5403? zcs->inBuffPos - zcs->inToCompress5404: MIN((size_t)(iend - ip), zcs->blockSize);5405if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)5406cDst = op; /* compress into output buffer, to skip flush stage */5407else5408cDst = zcs->outBuff, oSize = zcs->outBuffSize;5409if (inputBuffered) {5410unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);5411cSize = lastBlock ?5412ZSTD_compressEnd(zcs, cDst, oSize,5413zcs->inBuff + zcs->inToCompress, iSize) :5414ZSTD_compressContinue(zcs, cDst, oSize,5415zcs->inBuff + zcs->inToCompress, iSize);5416FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");5417zcs->frameEnded = lastBlock;5418/* prepare next block */5419zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;5420if (zcs->inBuffTarget > zcs->inBuffSize)5421zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;5422DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",5423(unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);5424if (!lastBlock)5425assert(zcs->inBuffTarget <= zcs->inBuffSize);5426zcs->inToCompress = zcs->inBuffPos;5427} else {5428unsigned const lastBlock = (ip + iSize == iend);5429assert(flushMode == ZSTD_e_end /* Already validated */);5430cSize = lastBlock ?5431ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :5432ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);5433/* Consume the input prior to error checking to mirror buffered mode. */5434if (iSize > 0)5435ip += iSize;5436FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");5437zcs->frameEnded = lastBlock;5438if (lastBlock)5439assert(ip == iend);5440}5441if (cDst == op) { /* no need to flush */5442op += cSize;5443if (zcs->frameEnded) {5444DEBUGLOG(5, "Frame completed directly in outBuffer");5445someMoreWork = 0;5446ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);5447}5448break;5449}5450zcs->outBuffContentSize = cSize;5451zcs->outBuffFlushedSize = 0;5452zcs->streamStage = zcss_flush; /* pass-through to flush stage */5453}5454ZSTD_FALLTHROUGH;5455case zcss_flush:5456DEBUGLOG(5, "flush stage");5457assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);5458{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;5459size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),5460zcs->outBuff + zcs->outBuffFlushedSize, toFlush);5461DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",5462(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);5463if (flushed)5464op += flushed;5465zcs->outBuffFlushedSize += flushed;5466if (toFlush!=flushed) {5467/* flush not fully completed, presumably because dst is too small */5468assert(op==oend);5469someMoreWork = 0;5470break;5471}5472zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;5473if (zcs->frameEnded) {5474DEBUGLOG(5, "Frame completed on flush");5475someMoreWork = 0;5476ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);5477break;5478}5479zcs->streamStage = zcss_load;5480break;5481}54825483default: /* impossible */5484assert(0);5485}5486}54875488input->pos = ip - istart;5489output->pos = op - ostart;5490if (zcs->frameEnded) return 0;5491return ZSTD_nextInputSizeHint(zcs);5492}54935494static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)5495{5496#ifdef ZSTD_MULTITHREAD5497if (cctx->appliedParams.nbWorkers >= 1) {5498assert(cctx->mtctx != NULL);5499return ZSTDMT_nextInputSizeHint(cctx->mtctx);5500}5501#endif5502return ZSTD_nextInputSizeHint(cctx);55035504}55055506size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)5507{5508FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");5509return ZSTD_nextInputSizeHint_MTorST(zcs);5510}55115512/* After a compression call set the expected input/output buffer.5513* This is validated at the start of the next compression call.5514*/5515static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)5516{5517if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {5518cctx->expectedInBuffer = *input;5519}5520if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {5521cctx->expectedOutBufferSize = output->size - output->pos;5522}5523}55245525/* Validate that the input/output buffers match the expectations set by5526* ZSTD_setBufferExpectations.5527*/5528static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,5529ZSTD_outBuffer const* output,5530ZSTD_inBuffer const* input,5531ZSTD_EndDirective endOp)5532{5533if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {5534ZSTD_inBuffer const expect = cctx->expectedInBuffer;5535if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)5536RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");5537if (endOp != ZSTD_e_end)5538RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");5539}5540if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {5541size_t const outBufferSize = output->size - output->pos;5542if (cctx->expectedOutBufferSize != outBufferSize)5543RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");5544}5545return 0;5546}55475548static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,5549ZSTD_EndDirective endOp,5550size_t inSize) {5551ZSTD_CCtx_params params = cctx->requestedParams;5552ZSTD_prefixDict const prefixDict = cctx->prefixDict;5553FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */5554ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */5555assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */5556if (cctx->cdict && !cctx->localDict.cdict) {5557/* Let the cdict's compression level take priority over the requested params.5558* But do not take the cdict's compression level if the "cdict" is actually a localDict5559* generated from ZSTD_initLocalDict().5560*/5561params.compressionLevel = cctx->cdict->compressionLevel;5562}5563DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");5564if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */5565{5566size_t const dictSize = prefixDict.dict5567? prefixDict.dictSize5568: (cctx->cdict ? cctx->cdict->dictContentSize : 0);5569ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);5570params.cParams = ZSTD_getCParamsFromCCtxParams(5571¶ms, cctx->pledgedSrcSizePlusOne-1,5572dictSize, mode);5573}55745575params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);5576params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);5577params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);55785579#ifdef ZSTD_MULTITHREAD5580if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {5581params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */5582}5583if (params.nbWorkers > 0) {5584#if ZSTD_TRACE5585cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;5586#endif5587/* mt context creation */5588if (cctx->mtctx == NULL) {5589DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",5590params.nbWorkers);5591cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);5592RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");5593}5594/* mt compression */5595DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);5596FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(5597cctx->mtctx,5598prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,5599cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");5600cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0;5601cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize;5602cctx->consumedSrcSize = 0;5603cctx->producedCSize = 0;5604cctx->streamStage = zcss_load;5605cctx->appliedParams = params;5606} else5607#endif5608{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;5609assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));5610FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,5611prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,5612cctx->cdict,5613¶ms, pledgedSrcSize,5614ZSTDb_buffered) , "");5615assert(cctx->appliedParams.nbWorkers == 0);5616cctx->inToCompress = 0;5617cctx->inBuffPos = 0;5618if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {5619/* for small input: avoid automatic flush on reaching end of block, since5620* it would require to add a 3-bytes null block to end frame5621*/5622cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);5623} else {5624cctx->inBuffTarget = 0;5625}5626cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;5627cctx->streamStage = zcss_load;5628cctx->frameEnded = 0;5629}5630return 0;5631}56325633size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,5634ZSTD_outBuffer* output,5635ZSTD_inBuffer* input,5636ZSTD_EndDirective endOp)5637{5638DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);5639/* check conditions */5640RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");5641RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer");5642RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");5643assert(cctx != NULL);56445645/* transparent initialization stage */5646if (cctx->streamStage == zcss_init) {5647FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");5648ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */5649}5650/* end of transparent initialization stage */56515652FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");5653/* compression stage */5654#ifdef ZSTD_MULTITHREAD5655if (cctx->appliedParams.nbWorkers > 0) {5656size_t flushMin;5657if (cctx->cParamsChanged) {5658ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);5659cctx->cParamsChanged = 0;5660}5661for (;;) {5662size_t const ipos = input->pos;5663size_t const opos = output->pos;5664flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);5665cctx->consumedSrcSize += (U64)(input->pos - ipos);5666cctx->producedCSize += (U64)(output->pos - opos);5667if ( ZSTD_isError(flushMin)5668|| (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */5669if (flushMin == 0)5670ZSTD_CCtx_trace(cctx, 0);5671ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);5672}5673FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");56745675if (endOp == ZSTD_e_continue) {5676/* We only require some progress with ZSTD_e_continue, not maximal progress.5677* We're done if we've consumed or produced any bytes, or either buffer is5678* full.5679*/5680if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size)5681break;5682} else {5683assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end);5684/* We require maximal progress. We're done when the flush is complete or the5685* output buffer is full.5686*/5687if (flushMin == 0 || output->pos == output->size)5688break;5689}5690}5691DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");5692/* Either we don't require maximum forward progress, we've finished the5693* flush, or we are out of output space.5694*/5695assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size);5696ZSTD_setBufferExpectations(cctx, output, input);5697return flushMin;5698}5699#endif5700FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");5701DEBUGLOG(5, "completed ZSTD_compressStream2");5702ZSTD_setBufferExpectations(cctx, output, input);5703return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */5704}57055706size_t ZSTD_compressStream2_simpleArgs (5707ZSTD_CCtx* cctx,5708void* dst, size_t dstCapacity, size_t* dstPos,5709const void* src, size_t srcSize, size_t* srcPos,5710ZSTD_EndDirective endOp)5711{5712ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };5713ZSTD_inBuffer input = { src, srcSize, *srcPos };5714/* ZSTD_compressStream2() will check validity of dstPos and srcPos */5715size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);5716*dstPos = output.pos;5717*srcPos = input.pos;5718return cErr;5719}57205721size_t ZSTD_compress2(ZSTD_CCtx* cctx,5722void* dst, size_t dstCapacity,5723const void* src, size_t srcSize)5724{5725ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;5726ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;5727DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);5728ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);5729/* Enable stable input/output buffers. */5730cctx->requestedParams.inBufferMode = ZSTD_bm_stable;5731cctx->requestedParams.outBufferMode = ZSTD_bm_stable;5732{ size_t oPos = 0;5733size_t iPos = 0;5734size_t const result = ZSTD_compressStream2_simpleArgs(cctx,5735dst, dstCapacity, &oPos,5736src, srcSize, &iPos,5737ZSTD_e_end);5738/* Reset to the original values. */5739cctx->requestedParams.inBufferMode = originalInBufferMode;5740cctx->requestedParams.outBufferMode = originalOutBufferMode;5741FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");5742if (result != 0) { /* compression not completed, due to lack of output space */5743assert(oPos == dstCapacity);5744RETURN_ERROR(dstSize_tooSmall, "");5745}5746assert(iPos == srcSize); /* all input is expected consumed */5747return oPos;5748}5749}57505751typedef struct {5752U32 idx; /* Index in array of ZSTD_Sequence */5753U32 posInSequence; /* Position within sequence at idx */5754size_t posInSrc; /* Number of bytes given by sequences provided so far */5755} ZSTD_sequencePosition;57565757/* ZSTD_validateSequence() :5758* @offCode : is presumed to follow format required by ZSTD_storeSeq()5759* @returns a ZSTD error code if sequence is not valid5760*/5761static size_t5762ZSTD_validateSequence(U32 offCode, U32 matchLength,5763size_t posInSrc, U32 windowLog, size_t dictSize)5764{5765U32 const windowSize = 1 << windowLog;5766/* posInSrc represents the amount of data the the decoder would decode up to this point.5767* As long as the amount of data decoded is less than or equal to window size, offsets may be5768* larger than the total length of output decoded in order to reference the dict, even larger than5769* window size. After output surpasses windowSize, we're limited to windowSize offsets again.5770*/5771size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;5772RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");5773RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");5774return 0;5775}57765777/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */5778static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)5779{5780U32 offCode = STORE_OFFSET(rawOffset);57815782if (!ll0 && rawOffset == rep[0]) {5783offCode = STORE_REPCODE_1;5784} else if (rawOffset == rep[1]) {5785offCode = STORE_REPCODE(2 - ll0);5786} else if (rawOffset == rep[2]) {5787offCode = STORE_REPCODE(3 - ll0);5788} else if (ll0 && rawOffset == rep[0] - 1) {5789offCode = STORE_REPCODE_3;5790}5791return offCode;5792}57935794/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of5795* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.5796*/5797static size_t5798ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,5799ZSTD_sequencePosition* seqPos,5800const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,5801const void* src, size_t blockSize)5802{5803U32 idx = seqPos->idx;5804BYTE const* ip = (BYTE const*)(src);5805const BYTE* const iend = ip + blockSize;5806repcodes_t updatedRepcodes;5807U32 dictSize;58085809if (cctx->cdict) {5810dictSize = (U32)cctx->cdict->dictContentSize;5811} else if (cctx->prefixDict.dict) {5812dictSize = (U32)cctx->prefixDict.dictSize;5813} else {5814dictSize = 0;5815}5816ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));5817for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {5818U32 const litLength = inSeqs[idx].litLength;5819U32 const ll0 = (litLength == 0);5820U32 const matchLength = inSeqs[idx].matchLength;5821U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);5822ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);58235824DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);5825if (cctx->appliedParams.validateSequences) {5826seqPos->posInSrc += litLength + matchLength;5827FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,5828cctx->appliedParams.cParams.windowLog, dictSize),5829"Sequence validation failed");5830}5831RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,5832"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");5833ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);5834ip += matchLength + litLength;5835}5836ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));58375838if (inSeqs[idx].litLength) {5839DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);5840ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);5841ip += inSeqs[idx].litLength;5842seqPos->posInSrc += inSeqs[idx].litLength;5843}5844RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");5845seqPos->idx = idx+1;5846return 0;5847}58485849/* Returns the number of bytes to move the current read position back by. Only non-zero5850* if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something5851* went wrong.5852*5853* This function will attempt to scan through blockSize bytes represented by the sequences5854* in inSeqs, storing any (partial) sequences.5855*5856* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to5857* avoid splitting a match, or to avoid splitting a match such that it would produce a match5858* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.5859*/5860static size_t5861ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,5862const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,5863const void* src, size_t blockSize)5864{5865U32 idx = seqPos->idx;5866U32 startPosInSequence = seqPos->posInSequence;5867U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;5868size_t dictSize;5869BYTE const* ip = (BYTE const*)(src);5870BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */5871repcodes_t updatedRepcodes;5872U32 bytesAdjustment = 0;5873U32 finalMatchSplit = 0;58745875if (cctx->cdict) {5876dictSize = cctx->cdict->dictContentSize;5877} else if (cctx->prefixDict.dict) {5878dictSize = cctx->prefixDict.dictSize;5879} else {5880dictSize = 0;5881}5882DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);5883DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);5884ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));5885while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {5886const ZSTD_Sequence currSeq = inSeqs[idx];5887U32 litLength = currSeq.litLength;5888U32 matchLength = currSeq.matchLength;5889U32 const rawOffset = currSeq.offset;5890U32 offCode;58915892/* Modify the sequence depending on where endPosInSequence lies */5893if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {5894if (startPosInSequence >= litLength) {5895startPosInSequence -= litLength;5896litLength = 0;5897matchLength -= startPosInSequence;5898} else {5899litLength -= startPosInSequence;5900}5901/* Move to the next sequence */5902endPosInSequence -= currSeq.litLength + currSeq.matchLength;5903startPosInSequence = 0;5904idx++;5905} else {5906/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence5907does not reach the end of the match. So, we have to split the sequence */5908DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",5909currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);5910if (endPosInSequence > litLength) {5911U32 firstHalfMatchLength;5912litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;5913firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;5914if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {5915/* Only ever split the match if it is larger than the block size */5916U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;5917if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {5918/* Move the endPosInSequence backward so that it creates match of minMatch length */5919endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;5920bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;5921firstHalfMatchLength -= bytesAdjustment;5922}5923matchLength = firstHalfMatchLength;5924/* Flag that we split the last match - after storing the sequence, exit the loop,5925but keep the value of endPosInSequence */5926finalMatchSplit = 1;5927} else {5928/* Move the position in sequence backwards so that we don't split match, and break to store5929* the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence5930* should go. We prefer to do this whenever it is not necessary to split the match, or if doing so5931* would cause the first half of the match to be too small5932*/5933bytesAdjustment = endPosInSequence - currSeq.litLength;5934endPosInSequence = currSeq.litLength;5935break;5936}5937} else {5938/* This sequence ends inside the literals, break to store the last literals */5939break;5940}5941}5942/* Check if this offset can be represented with a repcode */5943{ U32 const ll0 = (litLength == 0);5944offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);5945ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);5946}59475948if (cctx->appliedParams.validateSequences) {5949seqPos->posInSrc += litLength + matchLength;5950FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,5951cctx->appliedParams.cParams.windowLog, dictSize),5952"Sequence validation failed");5953}5954DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);5955RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,5956"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");5957ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);5958ip += matchLength + litLength;5959}5960DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);5961assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);5962seqPos->idx = idx;5963seqPos->posInSequence = endPosInSequence;5964ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));59655966iend -= bytesAdjustment;5967if (ip != iend) {5968/* Store any last literals */5969U32 lastLLSize = (U32)(iend - ip);5970assert(ip <= iend);5971DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);5972ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);5973seqPos->posInSrc += lastLLSize;5974}59755976return bytesAdjustment;5977}59785979typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,5980const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,5981const void* src, size_t blockSize);5982static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)5983{5984ZSTD_sequenceCopier sequenceCopier = NULL;5985assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));5986if (mode == ZSTD_sf_explicitBlockDelimiters) {5987return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;5988} else if (mode == ZSTD_sf_noBlockDelimiters) {5989return ZSTD_copySequencesToSeqStoreNoBlockDelim;5990}5991assert(sequenceCopier != NULL);5992return sequenceCopier;5993}59945995/* Compress, block-by-block, all of the sequences given.5996*5997* Returns the cumulative size of all compressed blocks (including their headers),5998* otherwise a ZSTD error.5999*/6000static size_t6001ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,6002void* dst, size_t dstCapacity,6003const ZSTD_Sequence* inSeqs, size_t inSeqsSize,6004const void* src, size_t srcSize)6005{6006size_t cSize = 0;6007U32 lastBlock;6008size_t blockSize;6009size_t compressedSeqsSize;6010size_t remaining = srcSize;6011ZSTD_sequencePosition seqPos = {0, 0, 0};60126013BYTE const* ip = (BYTE const*)src;6014BYTE* op = (BYTE*)dst;6015ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);60166017DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);6018/* Special case: empty frame */6019if (remaining == 0) {6020U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);6021RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");6022MEM_writeLE32(op, cBlockHeader24);6023op += ZSTD_blockHeaderSize;6024dstCapacity -= ZSTD_blockHeaderSize;6025cSize += ZSTD_blockHeaderSize;6026}60276028while (remaining) {6029size_t cBlockSize;6030size_t additionalByteAdjustment;6031lastBlock = remaining <= cctx->blockSize;6032blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;6033ZSTD_resetSeqStore(&cctx->seqStore);6034DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);60356036additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);6037FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");6038blockSize -= additionalByteAdjustment;60396040/* If blocks are too small, emit as a nocompress block */6041if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {6042cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);6043FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");6044DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);6045cSize += cBlockSize;6046ip += blockSize;6047op += cBlockSize;6048remaining -= blockSize;6049dstCapacity -= cBlockSize;6050continue;6051}60526053compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,6054&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,6055&cctx->appliedParams,6056op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,6057blockSize,6058cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,6059cctx->bmi2);6060FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");6061DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);60626063if (!cctx->isFirstBlock &&6064ZSTD_maybeRLE(&cctx->seqStore) &&6065ZSTD_isRLE((BYTE const*)src, srcSize)) {6066/* We don't want to emit our first block as a RLE even if it qualifies because6067* doing so will cause the decoder (cli only) to throw a "should consume all input error."6068* This is only an issue for zstd <= v1.4.36069*/6070compressedSeqsSize = 1;6071}60726073if (compressedSeqsSize == 0) {6074/* ZSTD_noCompressBlock writes the block header as well */6075cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);6076FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");6077DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);6078} else if (compressedSeqsSize == 1) {6079cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);6080FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");6081DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);6082} else {6083U32 cBlockHeader;6084/* Error checking and repcodes update */6085ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);6086if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)6087cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;60886089/* Write block header into beginning of block*/6090cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);6091MEM_writeLE24(op, cBlockHeader);6092cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;6093DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);6094}60956096cSize += cBlockSize;6097DEBUGLOG(4, "cSize running total: %zu", cSize);60986099if (lastBlock) {6100break;6101} else {6102ip += blockSize;6103op += cBlockSize;6104remaining -= blockSize;6105dstCapacity -= cBlockSize;6106cctx->isFirstBlock = 0;6107}6108}61096110return cSize;6111}61126113size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,6114const ZSTD_Sequence* inSeqs, size_t inSeqsSize,6115const void* src, size_t srcSize)6116{6117BYTE* op = (BYTE*)dst;6118size_t cSize = 0;6119size_t compressedBlocksSize = 0;6120size_t frameHeaderSize = 0;61216122/* Transparent initialization stage, same as compressStream2() */6123DEBUGLOG(3, "ZSTD_compressSequences()");6124assert(cctx != NULL);6125FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");6126/* Begin writing output, starting with frame header */6127frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);6128op += frameHeaderSize;6129dstCapacity -= frameHeaderSize;6130cSize += frameHeaderSize;6131if (cctx->appliedParams.fParams.checksumFlag && srcSize) {6132XXH64_update(&cctx->xxhState, src, srcSize);6133}6134/* cSize includes block header size and compressed sequences size */6135compressedBlocksSize = ZSTD_compressSequences_internal(cctx,6136op, dstCapacity,6137inSeqs, inSeqsSize,6138src, srcSize);6139FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");6140cSize += compressedBlocksSize;6141dstCapacity -= compressedBlocksSize;61426143if (cctx->appliedParams.fParams.checksumFlag) {6144U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);6145RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");6146DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);6147MEM_writeLE32((char*)dst + cSize, checksum);6148cSize += 4;6149}61506151DEBUGLOG(3, "Final compressed size: %zu", cSize);6152return cSize;6153}61546155/*====== Finalize ======*/61566157/*! ZSTD_flushStream() :6158* @return : amount of data remaining to flush */6159size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)6160{6161ZSTD_inBuffer input = { NULL, 0, 0 };6162return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);6163}616461656166size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)6167{6168ZSTD_inBuffer input = { NULL, 0, 0 };6169size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);6170FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");6171if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */6172/* single thread mode : attempt to calculate remaining to flush more precisely */6173{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;6174size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);6175size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;6176DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);6177return toFlush;6178}6179}618061816182/*-===== Pre-defined compression levels =====-*/6183#include "clevels.h"61846185int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }6186int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }6187int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }61886189static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)6190{6191ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);6192switch (cParams.strategy) {6193case ZSTD_fast:6194case ZSTD_dfast:6195break;6196case ZSTD_greedy:6197case ZSTD_lazy:6198case ZSTD_lazy2:6199cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;6200break;6201case ZSTD_btlazy2:6202case ZSTD_btopt:6203case ZSTD_btultra:6204case ZSTD_btultra2:6205break;6206}6207return cParams;6208}62096210static int ZSTD_dedicatedDictSearch_isSupported(6211ZSTD_compressionParameters const* cParams)6212{6213return (cParams->strategy >= ZSTD_greedy)6214&& (cParams->strategy <= ZSTD_lazy2)6215&& (cParams->hashLog > cParams->chainLog)6216&& (cParams->chainLog <= 24);6217}62186219/**6220* Reverses the adjustment applied to cparams when enabling dedicated dict6221* search. This is used to recover the params set to be used in the working6222* context. (Otherwise, those tables would also grow.)6223*/6224static void ZSTD_dedicatedDictSearch_revertCParams(6225ZSTD_compressionParameters* cParams) {6226switch (cParams->strategy) {6227case ZSTD_fast:6228case ZSTD_dfast:6229break;6230case ZSTD_greedy:6231case ZSTD_lazy:6232case ZSTD_lazy2:6233cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;6234if (cParams->hashLog < ZSTD_HASHLOG_MIN) {6235cParams->hashLog = ZSTD_HASHLOG_MIN;6236}6237break;6238case ZSTD_btlazy2:6239case ZSTD_btopt:6240case ZSTD_btultra:6241case ZSTD_btultra2:6242break;6243}6244}62456246static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)6247{6248switch (mode) {6249case ZSTD_cpm_unknown:6250case ZSTD_cpm_noAttachDict:6251case ZSTD_cpm_createCDict:6252break;6253case ZSTD_cpm_attachDict:6254dictSize = 0;6255break;6256default:6257assert(0);6258break;6259}6260{ int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;6261size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;6262return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;6263}6264}62656266/*! ZSTD_getCParams_internal() :6267* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.6268* Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.6269* Use dictSize == 0 for unknown or unused.6270* Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */6271static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)6272{6273U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);6274U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);6275int row;6276DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);62776278/* row */6279if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */6280else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */6281else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;6282else row = compressionLevel;62836284{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];6285DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);6286/* acceleration factor */6287if (compressionLevel < 0) {6288int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);6289cp.targetLength = (unsigned)(-clampedCompressionLevel);6290}6291/* refine parameters based on srcSize & dictSize */6292return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);6293}6294}62956296/*! ZSTD_getCParams() :6297* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.6298* Size values are optional, provide 0 if not known or unused */6299ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)6300{6301if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;6302return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);6303}63046305/*! ZSTD_getParams() :6306* same idea as ZSTD_getCParams()6307* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).6308* Fields of `ZSTD_frameParameters` are set to default values */6309static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {6310ZSTD_parameters params;6311ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);6312DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);6313ZSTD_memset(¶ms, 0, sizeof(params));6314params.cParams = cParams;6315params.fParams.contentSizeFlag = 1;6316return params;6317}63186319/*! ZSTD_getParams() :6320* same idea as ZSTD_getCParams()6321* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).6322* Fields of `ZSTD_frameParameters` are set to default values */6323ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {6324if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;6325return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);6326}632763286329