Path: blob/main/sys/contrib/zstd/lib/decompress/zstd_decompress_block.c
48378 views
/*1* Copyright (c) Yann Collet, Facebook, Inc.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910/* zstd_decompress_block :11* this module takes care of decompressing _compressed_ block */1213/*-*******************************************************14* Dependencies15*********************************************************/16#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */17#include "../common/compiler.h" /* prefetch */18#include "../common/cpu.h" /* bmi2 */19#include "../common/mem.h" /* low level memory routines */20#define FSE_STATIC_LINKING_ONLY21#include "../common/fse.h"22#define HUF_STATIC_LINKING_ONLY23#include "../common/huf.h"24#include "../common/zstd_internal.h"25#include "zstd_decompress_internal.h" /* ZSTD_DCtx */26#include "zstd_ddict.h" /* ZSTD_DDictDictContent */27#include "zstd_decompress_block.h"2829/*_*******************************************************30* Macros31**********************************************************/3233/* These two optional macros force the use one way or another of the two34* ZSTD_decompressSequences implementations. You can't force in both directions35* at the same time.36*/37#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \38defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)39#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"40#endif414243/*_*******************************************************44* Memory operations45**********************************************************/46static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }474849/*-*************************************************************50* Block decoding51***************************************************************/5253/*! ZSTD_getcBlockSize() :54* Provides the size of compressed block from block header `src` */55size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,56blockProperties_t* bpPtr)57{58RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");5960{ U32 const cBlockHeader = MEM_readLE24(src);61U32 const cSize = cBlockHeader >> 3;62bpPtr->lastBlock = cBlockHeader & 1;63bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);64bpPtr->origSize = cSize; /* only useful for RLE */65if (bpPtr->blockType == bt_rle) return 1;66RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");67return cSize;68}69}7071/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */72static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,73const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)74{75if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)76{77/* room for litbuffer to fit without read faulting */78dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;79dctx->litBufferEnd = dctx->litBuffer + litSize;80dctx->litBufferLocation = ZSTD_in_dst;81}82else if (litSize > ZSTD_LITBUFFEREXTRASIZE)83{84/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */85if (splitImmediately) {86/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */87dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;88dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;89}90else {91/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */92dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;93dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;94}95dctx->litBufferLocation = ZSTD_split;96}97else98{99/* fits entirely within litExtraBuffer, so no split is necessary */100dctx->litBuffer = dctx->litExtraBuffer;101dctx->litBufferEnd = dctx->litBuffer + litSize;102dctx->litBufferLocation = ZSTD_not_in_dst;103}104}105106/* Hidden declaration for fullbench */107size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,108const void* src, size_t srcSize,109void* dst, size_t dstCapacity, const streaming_operation streaming);110/*! ZSTD_decodeLiteralsBlock() :111* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored112* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current113* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being114* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.115*116* @return : nb of bytes read from src (< srcSize )117* note : symbol not declared but exposed for fullbench */118size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,119const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */120void* dst, size_t dstCapacity, const streaming_operation streaming)121{122DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");123RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");124125{ const BYTE* const istart = (const BYTE*) src;126symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);127128switch(litEncType)129{130case set_repeat:131DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");132RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");133ZSTD_FALLTHROUGH;134135case set_compressed:136RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");137{ size_t lhSize, litSize, litCSize;138U32 singleStream=0;139U32 const lhlCode = (istart[0] >> 2) & 3;140U32 const lhc = MEM_readLE32(istart);141size_t hufSuccess;142size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);143switch(lhlCode)144{145case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */146/* 2 - 2 - 10 - 10 */147singleStream = !lhlCode;148lhSize = 3;149litSize = (lhc >> 4) & 0x3FF;150litCSize = (lhc >> 14) & 0x3FF;151break;152case 2:153/* 2 - 2 - 14 - 14 */154lhSize = 4;155litSize = (lhc >> 4) & 0x3FFF;156litCSize = lhc >> 18;157break;158case 3:159/* 2 - 2 - 18 - 18 */160lhSize = 5;161litSize = (lhc >> 4) & 0x3FFFF;162litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);163break;164}165RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");166RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");167RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");168RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");169ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);170171/* prefetch huffman table if cold */172if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {173PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));174}175176if (litEncType==set_repeat) {177if (singleStream) {178hufSuccess = HUF_decompress1X_usingDTable_bmi2(179dctx->litBuffer, litSize, istart+lhSize, litCSize,180dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));181} else {182hufSuccess = HUF_decompress4X_usingDTable_bmi2(183dctx->litBuffer, litSize, istart+lhSize, litCSize,184dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));185}186} else {187if (singleStream) {188#if defined(HUF_FORCE_DECOMPRESS_X2)189hufSuccess = HUF_decompress1X_DCtx_wksp(190dctx->entropy.hufTable, dctx->litBuffer, litSize,191istart+lhSize, litCSize, dctx->workspace,192sizeof(dctx->workspace));193#else194hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(195dctx->entropy.hufTable, dctx->litBuffer, litSize,196istart+lhSize, litCSize, dctx->workspace,197sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));198#endif199} else {200hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(201dctx->entropy.hufTable, dctx->litBuffer, litSize,202istart+lhSize, litCSize, dctx->workspace,203sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));204}205}206if (dctx->litBufferLocation == ZSTD_split)207{208ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);209ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);210dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;211dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;212}213214RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");215216dctx->litPtr = dctx->litBuffer;217dctx->litSize = litSize;218dctx->litEntropy = 1;219if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;220return litCSize + lhSize;221}222223case set_basic:224{ size_t litSize, lhSize;225U32 const lhlCode = ((istart[0]) >> 2) & 3;226size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);227switch(lhlCode)228{229case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */230lhSize = 1;231litSize = istart[0] >> 3;232break;233case 1:234lhSize = 2;235litSize = MEM_readLE16(istart) >> 4;236break;237case 3:238lhSize = 3;239litSize = MEM_readLE24(istart) >> 4;240break;241}242243RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");244RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");245ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);246if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */247RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");248if (dctx->litBufferLocation == ZSTD_split)249{250ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);251ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);252}253else254{255ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);256}257dctx->litPtr = dctx->litBuffer;258dctx->litSize = litSize;259return lhSize+litSize;260}261/* direct reference into compressed stream */262dctx->litPtr = istart+lhSize;263dctx->litSize = litSize;264dctx->litBufferEnd = dctx->litPtr + litSize;265dctx->litBufferLocation = ZSTD_not_in_dst;266return lhSize+litSize;267}268269case set_rle:270{ U32 const lhlCode = ((istart[0]) >> 2) & 3;271size_t litSize, lhSize;272size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);273switch(lhlCode)274{275case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */276lhSize = 1;277litSize = istart[0] >> 3;278break;279case 1:280lhSize = 2;281litSize = MEM_readLE16(istart) >> 4;282break;283case 3:284lhSize = 3;285litSize = MEM_readLE24(istart) >> 4;286RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");287break;288}289RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");290RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");291RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");292ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);293if (dctx->litBufferLocation == ZSTD_split)294{295ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);296ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);297}298else299{300ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);301}302dctx->litPtr = dctx->litBuffer;303dctx->litSize = litSize;304return lhSize+1;305}306default:307RETURN_ERROR(corruption_detected, "impossible");308}309}310}311312/* Default FSE distribution tables.313* These are pre-calculated FSE decoding tables using default distributions as defined in specification :314* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions315* They were generated programmatically with following method :316* - start from default distributions, present in /lib/common/zstd_internal.h317* - generate tables normally, using ZSTD_buildFSETable()318* - printout the content of tables319* - pretify output, report below, test with fuzzer to ensure it's correct */320321/* Default FSE distribution table for Literal Lengths */322static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {323{ 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */324/* nextState, nbAddBits, nbBits, baseVal */325{ 0, 0, 4, 0}, { 16, 0, 4, 0},326{ 32, 0, 5, 1}, { 0, 0, 5, 3},327{ 0, 0, 5, 4}, { 0, 0, 5, 6},328{ 0, 0, 5, 7}, { 0, 0, 5, 9},329{ 0, 0, 5, 10}, { 0, 0, 5, 12},330{ 0, 0, 6, 14}, { 0, 1, 5, 16},331{ 0, 1, 5, 20}, { 0, 1, 5, 22},332{ 0, 2, 5, 28}, { 0, 3, 5, 32},333{ 0, 4, 5, 48}, { 32, 6, 5, 64},334{ 0, 7, 5, 128}, { 0, 8, 6, 256},335{ 0, 10, 6, 1024}, { 0, 12, 6, 4096},336{ 32, 0, 4, 0}, { 0, 0, 4, 1},337{ 0, 0, 5, 2}, { 32, 0, 5, 4},338{ 0, 0, 5, 5}, { 32, 0, 5, 7},339{ 0, 0, 5, 8}, { 32, 0, 5, 10},340{ 0, 0, 5, 11}, { 0, 0, 6, 13},341{ 32, 1, 5, 16}, { 0, 1, 5, 18},342{ 32, 1, 5, 22}, { 0, 2, 5, 24},343{ 32, 3, 5, 32}, { 0, 3, 5, 40},344{ 0, 6, 4, 64}, { 16, 6, 4, 64},345{ 32, 7, 5, 128}, { 0, 9, 6, 512},346{ 0, 11, 6, 2048}, { 48, 0, 4, 0},347{ 16, 0, 4, 1}, { 32, 0, 5, 2},348{ 32, 0, 5, 3}, { 32, 0, 5, 5},349{ 32, 0, 5, 6}, { 32, 0, 5, 8},350{ 32, 0, 5, 9}, { 32, 0, 5, 11},351{ 32, 0, 5, 12}, { 0, 0, 6, 15},352{ 32, 1, 5, 18}, { 32, 1, 5, 20},353{ 32, 2, 5, 24}, { 32, 2, 5, 28},354{ 32, 3, 5, 40}, { 32, 4, 5, 48},355{ 0, 16, 6,65536}, { 0, 15, 6,32768},356{ 0, 14, 6,16384}, { 0, 13, 6, 8192},357}; /* LL_defaultDTable */358359/* Default FSE distribution table for Offset Codes */360static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {361{ 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */362/* nextState, nbAddBits, nbBits, baseVal */363{ 0, 0, 5, 0}, { 0, 6, 4, 61},364{ 0, 9, 5, 509}, { 0, 15, 5,32765},365{ 0, 21, 5,2097149}, { 0, 3, 5, 5},366{ 0, 7, 4, 125}, { 0, 12, 5, 4093},367{ 0, 18, 5,262141}, { 0, 23, 5,8388605},368{ 0, 5, 5, 29}, { 0, 8, 4, 253},369{ 0, 14, 5,16381}, { 0, 20, 5,1048573},370{ 0, 2, 5, 1}, { 16, 7, 4, 125},371{ 0, 11, 5, 2045}, { 0, 17, 5,131069},372{ 0, 22, 5,4194301}, { 0, 4, 5, 13},373{ 16, 8, 4, 253}, { 0, 13, 5, 8189},374{ 0, 19, 5,524285}, { 0, 1, 5, 1},375{ 16, 6, 4, 61}, { 0, 10, 5, 1021},376{ 0, 16, 5,65533}, { 0, 28, 5,268435453},377{ 0, 27, 5,134217725}, { 0, 26, 5,67108861},378{ 0, 25, 5,33554429}, { 0, 24, 5,16777213},379}; /* OF_defaultDTable */380381382/* Default FSE distribution table for Match Lengths */383static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {384{ 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */385/* nextState, nbAddBits, nbBits, baseVal */386{ 0, 0, 6, 3}, { 0, 0, 4, 4},387{ 32, 0, 5, 5}, { 0, 0, 5, 6},388{ 0, 0, 5, 8}, { 0, 0, 5, 9},389{ 0, 0, 5, 11}, { 0, 0, 6, 13},390{ 0, 0, 6, 16}, { 0, 0, 6, 19},391{ 0, 0, 6, 22}, { 0, 0, 6, 25},392{ 0, 0, 6, 28}, { 0, 0, 6, 31},393{ 0, 0, 6, 34}, { 0, 1, 6, 37},394{ 0, 1, 6, 41}, { 0, 2, 6, 47},395{ 0, 3, 6, 59}, { 0, 4, 6, 83},396{ 0, 7, 6, 131}, { 0, 9, 6, 515},397{ 16, 0, 4, 4}, { 0, 0, 4, 5},398{ 32, 0, 5, 6}, { 0, 0, 5, 7},399{ 32, 0, 5, 9}, { 0, 0, 5, 10},400{ 0, 0, 6, 12}, { 0, 0, 6, 15},401{ 0, 0, 6, 18}, { 0, 0, 6, 21},402{ 0, 0, 6, 24}, { 0, 0, 6, 27},403{ 0, 0, 6, 30}, { 0, 0, 6, 33},404{ 0, 1, 6, 35}, { 0, 1, 6, 39},405{ 0, 2, 6, 43}, { 0, 3, 6, 51},406{ 0, 4, 6, 67}, { 0, 5, 6, 99},407{ 0, 8, 6, 259}, { 32, 0, 4, 4},408{ 48, 0, 4, 4}, { 16, 0, 4, 5},409{ 32, 0, 5, 7}, { 32, 0, 5, 8},410{ 32, 0, 5, 10}, { 32, 0, 5, 11},411{ 0, 0, 6, 14}, { 0, 0, 6, 17},412{ 0, 0, 6, 20}, { 0, 0, 6, 23},413{ 0, 0, 6, 26}, { 0, 0, 6, 29},414{ 0, 0, 6, 32}, { 0, 16, 6,65539},415{ 0, 15, 6,32771}, { 0, 14, 6,16387},416{ 0, 13, 6, 8195}, { 0, 12, 6, 4099},417{ 0, 11, 6, 2051}, { 0, 10, 6, 1027},418}; /* ML_defaultDTable */419420421static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)422{423void* ptr = dt;424ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;425ZSTD_seqSymbol* const cell = dt + 1;426427DTableH->tableLog = 0;428DTableH->fastMode = 0;429430cell->nbBits = 0;431cell->nextState = 0;432assert(nbAddBits < 255);433cell->nbAdditionalBits = nbAddBits;434cell->baseValue = baseValue;435}436437438/* ZSTD_buildFSETable() :439* generate FSE decoding table for one symbol (ll, ml or off)440* cannot fail if input is valid =>441* all inputs are presumed validated at this stage */442FORCE_INLINE_TEMPLATE443void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,444const short* normalizedCounter, unsigned maxSymbolValue,445const U32* baseValue, const U8* nbAdditionalBits,446unsigned tableLog, void* wksp, size_t wkspSize)447{448ZSTD_seqSymbol* const tableDecode = dt+1;449U32 const maxSV1 = maxSymbolValue + 1;450U32 const tableSize = 1 << tableLog;451452U16* symbolNext = (U16*)wksp;453BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);454U32 highThreshold = tableSize - 1;455456457/* Sanity Checks */458assert(maxSymbolValue <= MaxSeq);459assert(tableLog <= MaxFSELog);460assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);461(void)wkspSize;462/* Init, lay down lowprob symbols */463{ ZSTD_seqSymbol_header DTableH;464DTableH.tableLog = tableLog;465DTableH.fastMode = 1;466{ S16 const largeLimit= (S16)(1 << (tableLog-1));467U32 s;468for (s=0; s<maxSV1; s++) {469if (normalizedCounter[s]==-1) {470tableDecode[highThreshold--].baseValue = s;471symbolNext[s] = 1;472} else {473if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;474assert(normalizedCounter[s]>=0);475symbolNext[s] = (U16)normalizedCounter[s];476} } }477ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));478}479480/* Spread symbols */481assert(tableSize <= 512);482/* Specialized symbol spreading for the case when there are483* no low probability (-1 count) symbols. When compressing484* small blocks we avoid low probability symbols to hit this485* case, since header decoding speed matters more.486*/487if (highThreshold == tableSize - 1) {488size_t const tableMask = tableSize-1;489size_t const step = FSE_TABLESTEP(tableSize);490/* First lay down the symbols in order.491* We use a uint64_t to lay down 8 bytes at a time. This reduces branch492* misses since small blocks generally have small table logs, so nearly493* all symbols have counts <= 8. We ensure we have 8 bytes at the end of494* our buffer to handle the over-write.495*/496{497U64 const add = 0x0101010101010101ull;498size_t pos = 0;499U64 sv = 0;500U32 s;501for (s=0; s<maxSV1; ++s, sv += add) {502int i;503int const n = normalizedCounter[s];504MEM_write64(spread + pos, sv);505for (i = 8; i < n; i += 8) {506MEM_write64(spread + pos + i, sv);507}508pos += n;509}510}511/* Now we spread those positions across the table.512* The benefit of doing it in two stages is that we avoid the the513* variable size inner loop, which caused lots of branch misses.514* Now we can run through all the positions without any branch misses.515* We unroll the loop twice, since that is what emperically worked best.516*/517{518size_t position = 0;519size_t s;520size_t const unroll = 2;521assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */522for (s = 0; s < (size_t)tableSize; s += unroll) {523size_t u;524for (u = 0; u < unroll; ++u) {525size_t const uPosition = (position + (u * step)) & tableMask;526tableDecode[uPosition].baseValue = spread[s + u];527}528position = (position + (unroll * step)) & tableMask;529}530assert(position == 0);531}532} else {533U32 const tableMask = tableSize-1;534U32 const step = FSE_TABLESTEP(tableSize);535U32 s, position = 0;536for (s=0; s<maxSV1; s++) {537int i;538int const n = normalizedCounter[s];539for (i=0; i<n; i++) {540tableDecode[position].baseValue = s;541position = (position + step) & tableMask;542while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */543} }544assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */545}546547/* Build Decoding table */548{549U32 u;550for (u=0; u<tableSize; u++) {551U32 const symbol = tableDecode[u].baseValue;552U32 const nextState = symbolNext[symbol]++;553tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );554tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);555assert(nbAdditionalBits[symbol] < 255);556tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];557tableDecode[u].baseValue = baseValue[symbol];558}559}560}561562/* Avoids the FORCE_INLINE of the _body() function. */563static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,564const short* normalizedCounter, unsigned maxSymbolValue,565const U32* baseValue, const U8* nbAdditionalBits,566unsigned tableLog, void* wksp, size_t wkspSize)567{568ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,569baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);570}571572#if DYNAMIC_BMI2573BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,574const short* normalizedCounter, unsigned maxSymbolValue,575const U32* baseValue, const U8* nbAdditionalBits,576unsigned tableLog, void* wksp, size_t wkspSize)577{578ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,579baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);580}581#endif582583void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,584const short* normalizedCounter, unsigned maxSymbolValue,585const U32* baseValue, const U8* nbAdditionalBits,586unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)587{588#if DYNAMIC_BMI2589if (bmi2) {590ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,591baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);592return;593}594#endif595(void)bmi2;596ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,597baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);598}599600601/*! ZSTD_buildSeqTable() :602* @return : nb bytes read from src,603* or an error code if it fails */604static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,605symbolEncodingType_e type, unsigned max, U32 maxLog,606const void* src, size_t srcSize,607const U32* baseValue, const U8* nbAdditionalBits,608const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,609int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,610int bmi2)611{612switch(type)613{614case set_rle :615RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");616RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");617{ U32 const symbol = *(const BYTE*)src;618U32 const baseline = baseValue[symbol];619U8 const nbBits = nbAdditionalBits[symbol];620ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);621}622*DTablePtr = DTableSpace;623return 1;624case set_basic :625*DTablePtr = defaultTable;626return 0;627case set_repeat:628RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");629/* prefetch FSE table if used */630if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {631const void* const pStart = *DTablePtr;632size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));633PREFETCH_AREA(pStart, pSize);634}635return 0;636case set_compressed :637{ unsigned tableLog;638S16 norm[MaxSeq+1];639size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);640RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");641RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");642ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);643*DTablePtr = DTableSpace;644return headerSize;645}646default :647assert(0);648RETURN_ERROR(GENERIC, "impossible");649}650}651652size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,653const void* src, size_t srcSize)654{655const BYTE* const istart = (const BYTE*)src;656const BYTE* const iend = istart + srcSize;657const BYTE* ip = istart;658int nbSeq;659DEBUGLOG(5, "ZSTD_decodeSeqHeaders");660661/* check */662RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");663664/* SeqHead */665nbSeq = *ip++;666if (!nbSeq) {667*nbSeqPtr=0;668RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");669return 1;670}671if (nbSeq > 0x7F) {672if (nbSeq == 0xFF) {673RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");674nbSeq = MEM_readLE16(ip) + LONGNBSEQ;675ip+=2;676} else {677RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");678nbSeq = ((nbSeq-0x80)<<8) + *ip++;679}680}681*nbSeqPtr = nbSeq;682683/* FSE table descriptors */684RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */685{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);686symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);687symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);688ip++;689690/* Build DTables */691{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,692LLtype, MaxLL, LLFSELog,693ip, iend-ip,694LL_base, LL_bits,695LL_defaultDTable, dctx->fseEntropy,696dctx->ddictIsCold, nbSeq,697dctx->workspace, sizeof(dctx->workspace),698ZSTD_DCtx_get_bmi2(dctx));699RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");700ip += llhSize;701}702703{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,704OFtype, MaxOff, OffFSELog,705ip, iend-ip,706OF_base, OF_bits,707OF_defaultDTable, dctx->fseEntropy,708dctx->ddictIsCold, nbSeq,709dctx->workspace, sizeof(dctx->workspace),710ZSTD_DCtx_get_bmi2(dctx));711RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");712ip += ofhSize;713}714715{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,716MLtype, MaxML, MLFSELog,717ip, iend-ip,718ML_base, ML_bits,719ML_defaultDTable, dctx->fseEntropy,720dctx->ddictIsCold, nbSeq,721dctx->workspace, sizeof(dctx->workspace),722ZSTD_DCtx_get_bmi2(dctx));723RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");724ip += mlhSize;725}726}727728return ip-istart;729}730731732typedef struct {733size_t litLength;734size_t matchLength;735size_t offset;736} seq_t;737738typedef struct {739size_t state;740const ZSTD_seqSymbol* table;741} ZSTD_fseState;742743typedef struct {744BIT_DStream_t DStream;745ZSTD_fseState stateLL;746ZSTD_fseState stateOffb;747ZSTD_fseState stateML;748size_t prevOffset[ZSTD_REP_NUM];749} seqState_t;750751/*! ZSTD_overlapCopy8() :752* Copies 8 bytes from ip to op and updates op and ip where ip <= op.753* If the offset is < 8 then the offset is spread to at least 8 bytes.754*755* Precondition: *ip <= *op756* Postcondition: *op - *op >= 8757*/758HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {759assert(*ip <= *op);760if (offset < 8) {761/* close range match, overlap */762static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */763static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */764int const sub2 = dec64table[offset];765(*op)[0] = (*ip)[0];766(*op)[1] = (*ip)[1];767(*op)[2] = (*ip)[2];768(*op)[3] = (*ip)[3];769*ip += dec32table[offset];770ZSTD_copy4(*op+4, *ip);771*ip -= sub2;772} else {773ZSTD_copy8(*op, *ip);774}775*ip += 8;776*op += 8;777assert(*op - *ip >= 8);778}779780/*! ZSTD_safecopy() :781* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer782* and write up to 16 bytes past oend_w (op >= oend_w is allowed).783* This function is only called in the uncommon case where the sequence is near the end of the block. It784* should be fast for a single long sequence, but can be slow for several short sequences.785*786* @param ovtype controls the overlap detection787* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.788* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.789* The src buffer must be before the dst buffer.790*/791static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {792ptrdiff_t const diff = op - ip;793BYTE* const oend = op + length;794795assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||796(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));797798if (length < 8) {799/* Handle short lengths. */800while (op < oend) *op++ = *ip++;801return;802}803if (ovtype == ZSTD_overlap_src_before_dst) {804/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */805assert(length >= 8);806ZSTD_overlapCopy8(&op, &ip, diff);807length -= 8;808assert(op - ip >= 8);809assert(op <= oend);810}811812if (oend <= oend_w) {813/* No risk of overwrite. */814ZSTD_wildcopy(op, ip, length, ovtype);815return;816}817if (op <= oend_w) {818/* Wildcopy until we get close to the end. */819assert(oend > oend_w);820ZSTD_wildcopy(op, ip, oend_w - op, ovtype);821ip += oend_w - op;822op += oend_w - op;823}824/* Handle the leftovers. */825while (op < oend) *op++ = *ip++;826}827828/* ZSTD_safecopyDstBeforeSrc():829* This version allows overlap with dst before src, or handles the non-overlap case with dst after src830* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */831static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {832ptrdiff_t const diff = op - ip;833BYTE* const oend = op + length;834835if (length < 8 || diff > -8) {836/* Handle short lengths, close overlaps, and dst not before src. */837while (op < oend) *op++ = *ip++;838return;839}840841if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {842ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);843ip += oend - WILDCOPY_OVERLENGTH - op;844op += oend - WILDCOPY_OVERLENGTH - op;845}846847/* Handle the leftovers. */848while (op < oend) *op++ = *ip++;849}850851/* ZSTD_execSequenceEnd():852* This version handles cases that are near the end of the output buffer. It requires853* more careful checks to make sure there is no overflow. By separating out these hard854* and unlikely cases, we can speed up the common cases.855*856* NOTE: This function needs to be fast for a single long sequence, but doesn't need857* to be optimized for many small sequences, since those fall into ZSTD_execSequence().858*/859FORCE_NOINLINE860size_t ZSTD_execSequenceEnd(BYTE* op,861BYTE* const oend, seq_t sequence,862const BYTE** litPtr, const BYTE* const litLimit,863const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)864{865BYTE* const oLitEnd = op + sequence.litLength;866size_t const sequenceLength = sequence.litLength + sequence.matchLength;867const BYTE* const iLitEnd = *litPtr + sequence.litLength;868const BYTE* match = oLitEnd - sequence.offset;869BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;870871/* bounds checks : careful of address space overflow in 32-bit mode */872RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");873RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");874assert(op < op + sequenceLength);875assert(oLitEnd < op + sequenceLength);876877/* copy literals */878ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);879op = oLitEnd;880*litPtr = iLitEnd;881882/* copy Match */883if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {884/* offset beyond prefix */885RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");886match = dictEnd - (prefixStart - match);887if (match + sequence.matchLength <= dictEnd) {888ZSTD_memmove(oLitEnd, match, sequence.matchLength);889return sequenceLength;890}891/* span extDict & currentPrefixSegment */892{ size_t const length1 = dictEnd - match;893ZSTD_memmove(oLitEnd, match, length1);894op = oLitEnd + length1;895sequence.matchLength -= length1;896match = prefixStart;897}898}899ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);900return sequenceLength;901}902903/* ZSTD_execSequenceEndSplitLitBuffer():904* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.905*/906FORCE_NOINLINE907size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,908BYTE* const oend, const BYTE* const oend_w, seq_t sequence,909const BYTE** litPtr, const BYTE* const litLimit,910const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)911{912BYTE* const oLitEnd = op + sequence.litLength;913size_t const sequenceLength = sequence.litLength + sequence.matchLength;914const BYTE* const iLitEnd = *litPtr + sequence.litLength;915const BYTE* match = oLitEnd - sequence.offset;916917918/* bounds checks : careful of address space overflow in 32-bit mode */919RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");920RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");921assert(op < op + sequenceLength);922assert(oLitEnd < op + sequenceLength);923924/* copy literals */925RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");926ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);927op = oLitEnd;928*litPtr = iLitEnd;929930/* copy Match */931if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {932/* offset beyond prefix */933RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");934match = dictEnd - (prefixStart - match);935if (match + sequence.matchLength <= dictEnd) {936ZSTD_memmove(oLitEnd, match, sequence.matchLength);937return sequenceLength;938}939/* span extDict & currentPrefixSegment */940{ size_t const length1 = dictEnd - match;941ZSTD_memmove(oLitEnd, match, length1);942op = oLitEnd + length1;943sequence.matchLength -= length1;944match = prefixStart;945}946}947ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);948return sequenceLength;949}950951HINT_INLINE952size_t ZSTD_execSequence(BYTE* op,953BYTE* const oend, seq_t sequence,954const BYTE** litPtr, const BYTE* const litLimit,955const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)956{957BYTE* const oLitEnd = op + sequence.litLength;958size_t const sequenceLength = sequence.litLength + sequence.matchLength;959BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */960BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */961const BYTE* const iLitEnd = *litPtr + sequence.litLength;962const BYTE* match = oLitEnd - sequence.offset;963964assert(op != NULL /* Precondition */);965assert(oend_w < oend /* No underflow */);966/* Handle edge cases in a slow path:967* - Read beyond end of literals968* - Match end is within WILDCOPY_OVERLIMIT of oend969* - 32-bit mode and the match length overflows970*/971if (UNLIKELY(972iLitEnd > litLimit ||973oMatchEnd > oend_w ||974(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))975return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);976977/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */978assert(op <= oLitEnd /* No overflow */);979assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);980assert(oMatchEnd <= oend /* No underflow */);981assert(iLitEnd <= litLimit /* Literal length is in bounds */);982assert(oLitEnd <= oend_w /* Can wildcopy literals */);983assert(oMatchEnd <= oend_w /* Can wildcopy matches */);984985/* Copy Literals:986* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.987* We likely don't need the full 32-byte wildcopy.988*/989assert(WILDCOPY_OVERLENGTH >= 16);990ZSTD_copy16(op, (*litPtr));991if (UNLIKELY(sequence.litLength > 16)) {992ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);993}994op = oLitEnd;995*litPtr = iLitEnd; /* update for next sequence */996997/* Copy Match */998if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {999/* offset beyond prefix -> go into extDict */1000RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");1001match = dictEnd + (match - prefixStart);1002if (match + sequence.matchLength <= dictEnd) {1003ZSTD_memmove(oLitEnd, match, sequence.matchLength);1004return sequenceLength;1005}1006/* span extDict & currentPrefixSegment */1007{ size_t const length1 = dictEnd - match;1008ZSTD_memmove(oLitEnd, match, length1);1009op = oLitEnd + length1;1010sequence.matchLength -= length1;1011match = prefixStart;1012}1013}1014/* Match within prefix of 1 or more bytes */1015assert(op <= oMatchEnd);1016assert(oMatchEnd <= oend_w);1017assert(match >= prefixStart);1018assert(sequence.matchLength >= 1);10191020/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy1021* without overlap checking.1022*/1023if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {1024/* We bet on a full wildcopy for matches, since we expect matches to be1025* longer than literals (in general). In silesia, ~10% of matches are longer1026* than 16 bytes.1027*/1028ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);1029return sequenceLength;1030}1031assert(sequence.offset < WILDCOPY_VECLEN);10321033/* Copy 8 bytes and spread the offset to be >= 8. */1034ZSTD_overlapCopy8(&op, &match, sequence.offset);10351036/* If the match length is > 8 bytes, then continue with the wildcopy. */1037if (sequence.matchLength > 8) {1038assert(op < oMatchEnd);1039ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);1040}1041return sequenceLength;1042}10431044HINT_INLINE1045size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,1046BYTE* const oend, const BYTE* const oend_w, seq_t sequence,1047const BYTE** litPtr, const BYTE* const litLimit,1048const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)1049{1050BYTE* const oLitEnd = op + sequence.litLength;1051size_t const sequenceLength = sequence.litLength + sequence.matchLength;1052BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */1053const BYTE* const iLitEnd = *litPtr + sequence.litLength;1054const BYTE* match = oLitEnd - sequence.offset;10551056assert(op != NULL /* Precondition */);1057assert(oend_w < oend /* No underflow */);1058/* Handle edge cases in a slow path:1059* - Read beyond end of literals1060* - Match end is within WILDCOPY_OVERLIMIT of oend1061* - 32-bit mode and the match length overflows1062*/1063if (UNLIKELY(1064iLitEnd > litLimit ||1065oMatchEnd > oend_w ||1066(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))1067return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);10681069/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */1070assert(op <= oLitEnd /* No overflow */);1071assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);1072assert(oMatchEnd <= oend /* No underflow */);1073assert(iLitEnd <= litLimit /* Literal length is in bounds */);1074assert(oLitEnd <= oend_w /* Can wildcopy literals */);1075assert(oMatchEnd <= oend_w /* Can wildcopy matches */);10761077/* Copy Literals:1078* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.1079* We likely don't need the full 32-byte wildcopy.1080*/1081assert(WILDCOPY_OVERLENGTH >= 16);1082ZSTD_copy16(op, (*litPtr));1083if (UNLIKELY(sequence.litLength > 16)) {1084ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);1085}1086op = oLitEnd;1087*litPtr = iLitEnd; /* update for next sequence */10881089/* Copy Match */1090if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {1091/* offset beyond prefix -> go into extDict */1092RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");1093match = dictEnd + (match - prefixStart);1094if (match + sequence.matchLength <= dictEnd) {1095ZSTD_memmove(oLitEnd, match, sequence.matchLength);1096return sequenceLength;1097}1098/* span extDict & currentPrefixSegment */1099{ size_t const length1 = dictEnd - match;1100ZSTD_memmove(oLitEnd, match, length1);1101op = oLitEnd + length1;1102sequence.matchLength -= length1;1103match = prefixStart;1104} }1105/* Match within prefix of 1 or more bytes */1106assert(op <= oMatchEnd);1107assert(oMatchEnd <= oend_w);1108assert(match >= prefixStart);1109assert(sequence.matchLength >= 1);11101111/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy1112* without overlap checking.1113*/1114if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {1115/* We bet on a full wildcopy for matches, since we expect matches to be1116* longer than literals (in general). In silesia, ~10% of matches are longer1117* than 16 bytes.1118*/1119ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);1120return sequenceLength;1121}1122assert(sequence.offset < WILDCOPY_VECLEN);11231124/* Copy 8 bytes and spread the offset to be >= 8. */1125ZSTD_overlapCopy8(&op, &match, sequence.offset);11261127/* If the match length is > 8 bytes, then continue with the wildcopy. */1128if (sequence.matchLength > 8) {1129assert(op < oMatchEnd);1130ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);1131}1132return sequenceLength;1133}113411351136static void1137ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)1138{1139const void* ptr = dt;1140const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;1141DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);1142DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",1143(U32)DStatePtr->state, DTableH->tableLog);1144BIT_reloadDStream(bitD);1145DStatePtr->table = dt + 1;1146}11471148FORCE_INLINE_TEMPLATE void1149ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)1150{1151size_t const lowBits = BIT_readBits(bitD, nbBits);1152DStatePtr->state = nextState + lowBits;1153}11541155/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum1156* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)1157* bits before reloading. This value is the maximum number of bytes we read1158* after reloading when we are decoding long offsets.1159*/1160#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \1161(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \1162? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \1163: 0)11641165typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;11661167FORCE_INLINE_TEMPLATE seq_t1168ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)1169{1170seq_t seq;1171const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;1172const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;1173const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;1174seq.matchLength = mlDInfo->baseValue;1175seq.litLength = llDInfo->baseValue;1176{ U32 const ofBase = ofDInfo->baseValue;1177BYTE const llBits = llDInfo->nbAdditionalBits;1178BYTE const mlBits = mlDInfo->nbAdditionalBits;1179BYTE const ofBits = ofDInfo->nbAdditionalBits;1180BYTE const totalBits = llBits+mlBits+ofBits;11811182U16 const llNext = llDInfo->nextState;1183U16 const mlNext = mlDInfo->nextState;1184U16 const ofNext = ofDInfo->nextState;1185U32 const llnbBits = llDInfo->nbBits;1186U32 const mlnbBits = mlDInfo->nbBits;1187U32 const ofnbBits = ofDInfo->nbBits;1188/*1189* As gcc has better branch and block analyzers, sometimes it is only1190* valuable to mark likelyness for clang, it gives around 3-4% of1191* performance.1192*/11931194/* sequence */1195{ size_t offset;1196#if defined(__clang__)1197if (LIKELY(ofBits > 1)) {1198#else1199if (ofBits > 1) {1200#endif1201ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);1202ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);1203assert(ofBits <= MaxOff);1204if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {1205U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);1206offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);1207BIT_reloadDStream(&seqState->DStream);1208if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);1209assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */1210} else {1211offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */1212if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);1213}1214seqState->prevOffset[2] = seqState->prevOffset[1];1215seqState->prevOffset[1] = seqState->prevOffset[0];1216seqState->prevOffset[0] = offset;1217} else {1218U32 const ll0 = (llDInfo->baseValue == 0);1219if (LIKELY((ofBits == 0))) {1220offset = seqState->prevOffset[ll0];1221seqState->prevOffset[1] = seqState->prevOffset[!ll0];1222seqState->prevOffset[0] = offset;1223} else {1224offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);1225{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];1226temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */1227if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];1228seqState->prevOffset[1] = seqState->prevOffset[0];1229seqState->prevOffset[0] = offset = temp;1230} } }1231seq.offset = offset;1232}12331234#if defined(__clang__)1235if (UNLIKELY(mlBits > 0))1236#else1237if (mlBits > 0)1238#endif1239seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);12401241if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))1242BIT_reloadDStream(&seqState->DStream);1243if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))1244BIT_reloadDStream(&seqState->DStream);1245/* Ensure there are enough bits to read the rest of data in 64-bit mode. */1246ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);12471248#if defined(__clang__)1249if (UNLIKELY(llBits > 0))1250#else1251if (llBits > 0)1252#endif1253seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);12541255if (MEM_32bits())1256BIT_reloadDStream(&seqState->DStream);12571258DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",1259(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);12601261ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */1262ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */1263if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */1264ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */1265}12661267return seq;1268}12691270#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION1271MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)1272{1273size_t const windowSize = dctx->fParams.windowSize;1274/* No dictionary used. */1275if (dctx->dictContentEndForFuzzing == NULL) return 0;1276/* Dictionary is our prefix. */1277if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;1278/* Dictionary is not our ext-dict. */1279if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;1280/* Dictionary is not within our window size. */1281if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;1282/* Dictionary is active. */1283return 1;1284}12851286MEM_STATIC void ZSTD_assertValidSequence(1287ZSTD_DCtx const* dctx,1288BYTE const* op, BYTE const* oend,1289seq_t const seq,1290BYTE const* prefixStart, BYTE const* virtualStart)1291{1292#if DEBUGLEVEL >= 11293size_t const windowSize = dctx->fParams.windowSize;1294size_t const sequenceSize = seq.litLength + seq.matchLength;1295BYTE const* const oLitEnd = op + seq.litLength;1296DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",1297(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);1298assert(op <= oend);1299assert((size_t)(oend - op) >= sequenceSize);1300assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);1301if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {1302size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);1303/* Offset must be within the dictionary. */1304assert(seq.offset <= (size_t)(oLitEnd - virtualStart));1305assert(seq.offset <= windowSize + dictSize);1306} else {1307/* Offset must be within our window. */1308assert(seq.offset <= windowSize);1309}1310#else1311(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;1312#endif1313}1314#endif13151316#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG131713181319FORCE_INLINE_TEMPLATE size_t1320DONT_VECTORIZE1321ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,1322void* dst, size_t maxDstSize,1323const void* seqStart, size_t seqSize, int nbSeq,1324const ZSTD_longOffset_e isLongOffset,1325const int frame)1326{1327const BYTE* ip = (const BYTE*)seqStart;1328const BYTE* const iend = ip + seqSize;1329BYTE* const ostart = (BYTE*)dst;1330BYTE* const oend = ostart + maxDstSize;1331BYTE* op = ostart;1332const BYTE* litPtr = dctx->litPtr;1333const BYTE* litBufferEnd = dctx->litBufferEnd;1334const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);1335const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);1336const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);1337DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");1338(void)frame;13391340/* Regen sequences */1341if (nbSeq) {1342seqState_t seqState;1343dctx->fseEntropy = 1;1344{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }1345RETURN_ERROR_IF(1346ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),1347corruption_detected, "");1348ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);1349ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);1350ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);1351assert(dst != NULL);13521353ZSTD_STATIC_ASSERT(1354BIT_DStream_unfinished < BIT_DStream_completed &&1355BIT_DStream_endOfBuffer < BIT_DStream_completed &&1356BIT_DStream_completed < BIT_DStream_overflow);13571358/* decompress without overrunning litPtr begins */1359{1360seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);1361/* Align the decompression loop to 32 + 16 bytes.1362*1363* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression1364* speed swings based on the alignment of the decompression loop. This1365* performance swing is caused by parts of the decompression loop falling1366* out of the DSB. The entire decompression loop should fit in the DSB,1367* when it can't we get much worse performance. You can measure if you've1368* hit the good case or the bad case with this perf command for some1369* compressed file test.zst:1370*1371* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \1372* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst1373*1374* If you see most cycles served out of the MITE you've hit the bad case.1375* If you see most cycles served out of the DSB you've hit the good case.1376* If it is pretty even then you may be in an okay case.1377*1378* This issue has been reproduced on the following CPUs:1379* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i91380* Use Instruments->Counters to get DSB/MITE cycles.1381* I never got performance swings, but I was able to1382* go from the good case of mostly DSB to half of the1383* cycles served from MITE.1384* - Coffeelake: Intel i9-9900k1385* - Coffeelake: Intel i7-9700k1386*1387* I haven't been able to reproduce the instability or DSB misses on any1388* of the following CPUS:1389* - Haswell1390* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH1391* - Skylake1392*1393* Alignment is done for each of the three major decompression loops:1394* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer1395* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer1396* - ZSTD_decompressSequences_body1397* Alignment choices are made to minimize large swings on bad cases and influence on performance1398* from changes external to this code, rather than to overoptimize on the current commit.1399*1400* If you are seeing performance stability this script can help test.1401* It tests on 4 commits in zstd where I saw performance change.1402*1403* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f41404*/1405#if defined(__GNUC__) && defined(__x86_64__)1406__asm__(".p2align 6");1407# if __GNUC__ >= 71408/* good for gcc-7, gcc-9, and gcc-11 */1409__asm__("nop");1410__asm__(".p2align 5");1411__asm__("nop");1412__asm__(".p2align 4");1413# if __GNUC__ == 8 || __GNUC__ == 101414/* good for gcc-8 and gcc-10 */1415__asm__("nop");1416__asm__(".p2align 3");1417# endif1418# endif1419#endif14201421/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */1422for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {1423size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);1424#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1425assert(!ZSTD_isError(oneSeqSize));1426if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);1427#endif1428if (UNLIKELY(ZSTD_isError(oneSeqSize)))1429return oneSeqSize;1430DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);1431op += oneSeqSize;1432if (UNLIKELY(!--nbSeq))1433break;1434BIT_reloadDStream(&(seqState.DStream));1435sequence = ZSTD_decodeSequence(&seqState, isLongOffset);1436}14371438/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */1439if (nbSeq > 0) {1440const size_t leftoverLit = dctx->litBufferEnd - litPtr;1441if (leftoverLit)1442{1443RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");1444ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);1445sequence.litLength -= leftoverLit;1446op += leftoverLit;1447}1448litPtr = dctx->litExtraBuffer;1449litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;1450dctx->litBufferLocation = ZSTD_not_in_dst;1451{1452size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);1453#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1454assert(!ZSTD_isError(oneSeqSize));1455if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);1456#endif1457if (UNLIKELY(ZSTD_isError(oneSeqSize)))1458return oneSeqSize;1459DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);1460op += oneSeqSize;1461if (--nbSeq)1462BIT_reloadDStream(&(seqState.DStream));1463}1464}1465}14661467if (nbSeq > 0) /* there is remaining lit from extra buffer */1468{14691470#if defined(__GNUC__) && defined(__x86_64__)1471__asm__(".p2align 6");1472__asm__("nop");1473# if __GNUC__ != 71474/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */1475__asm__(".p2align 4");1476__asm__("nop");1477__asm__(".p2align 3");1478# elif __GNUC__ >= 111479__asm__(".p2align 3");1480# else1481__asm__(".p2align 5");1482__asm__("nop");1483__asm__(".p2align 3");1484# endif1485#endif14861487for (; ; ) {1488seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);1489size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);1490#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1491assert(!ZSTD_isError(oneSeqSize));1492if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);1493#endif1494if (UNLIKELY(ZSTD_isError(oneSeqSize)))1495return oneSeqSize;1496DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);1497op += oneSeqSize;1498if (UNLIKELY(!--nbSeq))1499break;1500BIT_reloadDStream(&(seqState.DStream));1501}1502}15031504/* check if reached exact end */1505DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);1506RETURN_ERROR_IF(nbSeq, corruption_detected, "");1507RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");1508/* save reps for next block */1509{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }1510}15111512/* last literal segment */1513if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */1514{1515size_t const lastLLSize = litBufferEnd - litPtr;1516RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");1517if (op != NULL) {1518ZSTD_memmove(op, litPtr, lastLLSize);1519op += lastLLSize;1520}1521litPtr = dctx->litExtraBuffer;1522litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;1523dctx->litBufferLocation = ZSTD_not_in_dst;1524}1525{ size_t const lastLLSize = litBufferEnd - litPtr;1526RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");1527if (op != NULL) {1528ZSTD_memcpy(op, litPtr, lastLLSize);1529op += lastLLSize;1530}1531}15321533return op-ostart;1534}15351536FORCE_INLINE_TEMPLATE size_t1537DONT_VECTORIZE1538ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,1539void* dst, size_t maxDstSize,1540const void* seqStart, size_t seqSize, int nbSeq,1541const ZSTD_longOffset_e isLongOffset,1542const int frame)1543{1544const BYTE* ip = (const BYTE*)seqStart;1545const BYTE* const iend = ip + seqSize;1546BYTE* const ostart = (BYTE*)dst;1547BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;1548BYTE* op = ostart;1549const BYTE* litPtr = dctx->litPtr;1550const BYTE* const litEnd = litPtr + dctx->litSize;1551const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);1552const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);1553const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);1554DEBUGLOG(5, "ZSTD_decompressSequences_body");1555(void)frame;15561557/* Regen sequences */1558if (nbSeq) {1559seqState_t seqState;1560dctx->fseEntropy = 1;1561{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }1562RETURN_ERROR_IF(1563ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),1564corruption_detected, "");1565ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);1566ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);1567ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);1568assert(dst != NULL);15691570ZSTD_STATIC_ASSERT(1571BIT_DStream_unfinished < BIT_DStream_completed &&1572BIT_DStream_endOfBuffer < BIT_DStream_completed &&1573BIT_DStream_completed < BIT_DStream_overflow);15741575#if defined(__GNUC__) && defined(__x86_64__)1576__asm__(".p2align 6");1577__asm__("nop");1578# if __GNUC__ >= 71579__asm__(".p2align 5");1580__asm__("nop");1581__asm__(".p2align 3");1582# else1583__asm__(".p2align 4");1584__asm__("nop");1585__asm__(".p2align 3");1586# endif1587#endif15881589for ( ; ; ) {1590seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);1591size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);1592#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1593assert(!ZSTD_isError(oneSeqSize));1594if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);1595#endif1596if (UNLIKELY(ZSTD_isError(oneSeqSize)))1597return oneSeqSize;1598DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);1599op += oneSeqSize;1600if (UNLIKELY(!--nbSeq))1601break;1602BIT_reloadDStream(&(seqState.DStream));1603}16041605/* check if reached exact end */1606DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);1607RETURN_ERROR_IF(nbSeq, corruption_detected, "");1608RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");1609/* save reps for next block */1610{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }1611}16121613/* last literal segment */1614{ size_t const lastLLSize = litEnd - litPtr;1615RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");1616if (op != NULL) {1617ZSTD_memcpy(op, litPtr, lastLLSize);1618op += lastLLSize;1619}1620}16211622return op-ostart;1623}16241625static size_t1626ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,1627void* dst, size_t maxDstSize,1628const void* seqStart, size_t seqSize, int nbSeq,1629const ZSTD_longOffset_e isLongOffset,1630const int frame)1631{1632return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1633}16341635static size_t1636ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,1637void* dst, size_t maxDstSize,1638const void* seqStart, size_t seqSize, int nbSeq,1639const ZSTD_longOffset_e isLongOffset,1640const int frame)1641{1642return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1643}1644#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */16451646#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT16471648FORCE_INLINE_TEMPLATE size_t1649ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,1650const BYTE* const prefixStart, const BYTE* const dictEnd)1651{1652prefetchPos += sequence.litLength;1653{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;1654const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.1655* No consequence though : memory address is only used for prefetching, not for dereferencing */1656PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */1657}1658return prefetchPos + sequence.matchLength;1659}16601661/* This decoding function employs prefetching1662* to reduce latency impact of cache misses.1663* It's generally employed when block contains a significant portion of long-distance matches1664* or when coupled with a "cold" dictionary */1665FORCE_INLINE_TEMPLATE size_t1666ZSTD_decompressSequencesLong_body(1667ZSTD_DCtx* dctx,1668void* dst, size_t maxDstSize,1669const void* seqStart, size_t seqSize, int nbSeq,1670const ZSTD_longOffset_e isLongOffset,1671const int frame)1672{1673const BYTE* ip = (const BYTE*)seqStart;1674const BYTE* const iend = ip + seqSize;1675BYTE* const ostart = (BYTE*)dst;1676BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;1677BYTE* op = ostart;1678const BYTE* litPtr = dctx->litPtr;1679const BYTE* litBufferEnd = dctx->litBufferEnd;1680const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);1681const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);1682const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);1683(void)frame;16841685/* Regen sequences */1686if (nbSeq) {1687#define STORED_SEQS 81688#define STORED_SEQS_MASK (STORED_SEQS-1)1689#define ADVANCED_SEQS STORED_SEQS1690seq_t sequences[STORED_SEQS];1691int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);1692seqState_t seqState;1693int seqNb;1694size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */16951696dctx->fseEntropy = 1;1697{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }1698assert(dst != NULL);1699assert(iend >= ip);1700RETURN_ERROR_IF(1701ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),1702corruption_detected, "");1703ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);1704ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);1705ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);17061707/* prepare in advance */1708for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {1709seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);1710prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);1711sequences[seqNb] = sequence;1712}1713RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");17141715/* decompress without stomping litBuffer */1716for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {1717seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);1718size_t oneSeqSize;17191720if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)1721{1722/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */1723const size_t leftoverLit = dctx->litBufferEnd - litPtr;1724if (leftoverLit)1725{1726RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");1727ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);1728sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;1729op += leftoverLit;1730}1731litPtr = dctx->litExtraBuffer;1732litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;1733dctx->litBufferLocation = ZSTD_not_in_dst;1734oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);1735#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1736assert(!ZSTD_isError(oneSeqSize));1737if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);1738#endif1739if (ZSTD_isError(oneSeqSize)) return oneSeqSize;17401741prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);1742sequences[seqNb & STORED_SEQS_MASK] = sequence;1743op += oneSeqSize;1744}1745else1746{1747/* lit buffer is either wholly contained in first or second split, or not split at all*/1748oneSeqSize = dctx->litBufferLocation == ZSTD_split ?1749ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :1750ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);1751#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1752assert(!ZSTD_isError(oneSeqSize));1753if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);1754#endif1755if (ZSTD_isError(oneSeqSize)) return oneSeqSize;17561757prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);1758sequences[seqNb & STORED_SEQS_MASK] = sequence;1759op += oneSeqSize;1760}1761}1762RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");17631764/* finish queue */1765seqNb -= seqAdvance;1766for ( ; seqNb<nbSeq ; seqNb++) {1767seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);1768if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)1769{1770const size_t leftoverLit = dctx->litBufferEnd - litPtr;1771if (leftoverLit)1772{1773RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");1774ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);1775sequence->litLength -= leftoverLit;1776op += leftoverLit;1777}1778litPtr = dctx->litExtraBuffer;1779litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;1780dctx->litBufferLocation = ZSTD_not_in_dst;1781{1782size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);1783#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1784assert(!ZSTD_isError(oneSeqSize));1785if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);1786#endif1787if (ZSTD_isError(oneSeqSize)) return oneSeqSize;1788op += oneSeqSize;1789}1790}1791else1792{1793size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?1794ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :1795ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);1796#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)1797assert(!ZSTD_isError(oneSeqSize));1798if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);1799#endif1800if (ZSTD_isError(oneSeqSize)) return oneSeqSize;1801op += oneSeqSize;1802}1803}18041805/* save reps for next block */1806{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }1807}18081809/* last literal segment */1810if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */1811{1812size_t const lastLLSize = litBufferEnd - litPtr;1813RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");1814if (op != NULL) {1815ZSTD_memmove(op, litPtr, lastLLSize);1816op += lastLLSize;1817}1818litPtr = dctx->litExtraBuffer;1819litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;1820}1821{ size_t const lastLLSize = litBufferEnd - litPtr;1822RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");1823if (op != NULL) {1824ZSTD_memmove(op, litPtr, lastLLSize);1825op += lastLLSize;1826}1827}18281829return op-ostart;1830}18311832static size_t1833ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,1834void* dst, size_t maxDstSize,1835const void* seqStart, size_t seqSize, int nbSeq,1836const ZSTD_longOffset_e isLongOffset,1837const int frame)1838{1839return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1840}1841#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */1842184318441845#if DYNAMIC_BMI218461847#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG1848static BMI2_TARGET_ATTRIBUTE size_t1849DONT_VECTORIZE1850ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,1851void* dst, size_t maxDstSize,1852const void* seqStart, size_t seqSize, int nbSeq,1853const ZSTD_longOffset_e isLongOffset,1854const int frame)1855{1856return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1857}1858static BMI2_TARGET_ATTRIBUTE size_t1859DONT_VECTORIZE1860ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,1861void* dst, size_t maxDstSize,1862const void* seqStart, size_t seqSize, int nbSeq,1863const ZSTD_longOffset_e isLongOffset,1864const int frame)1865{1866return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1867}1868#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */18691870#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT1871static BMI2_TARGET_ATTRIBUTE size_t1872ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,1873void* dst, size_t maxDstSize,1874const void* seqStart, size_t seqSize, int nbSeq,1875const ZSTD_longOffset_e isLongOffset,1876const int frame)1877{1878return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1879}1880#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */18811882#endif /* DYNAMIC_BMI2 */18831884typedef size_t (*ZSTD_decompressSequences_t)(1885ZSTD_DCtx* dctx,1886void* dst, size_t maxDstSize,1887const void* seqStart, size_t seqSize, int nbSeq,1888const ZSTD_longOffset_e isLongOffset,1889const int frame);18901891#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG1892static size_t1893ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,1894const void* seqStart, size_t seqSize, int nbSeq,1895const ZSTD_longOffset_e isLongOffset,1896const int frame)1897{1898DEBUGLOG(5, "ZSTD_decompressSequences");1899#if DYNAMIC_BMI21900if (ZSTD_DCtx_get_bmi2(dctx)) {1901return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1902}1903#endif1904return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1905}1906static size_t1907ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,1908const void* seqStart, size_t seqSize, int nbSeq,1909const ZSTD_longOffset_e isLongOffset,1910const int frame)1911{1912DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");1913#if DYNAMIC_BMI21914if (ZSTD_DCtx_get_bmi2(dctx)) {1915return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1916}1917#endif1918return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1919}1920#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */192119221923#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT1924/* ZSTD_decompressSequencesLong() :1925* decompression function triggered when a minimum share of offsets is considered "long",1926* aka out of cache.1927* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".1928* This function will try to mitigate main memory latency through the use of prefetching */1929static size_t1930ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,1931void* dst, size_t maxDstSize,1932const void* seqStart, size_t seqSize, int nbSeq,1933const ZSTD_longOffset_e isLongOffset,1934const int frame)1935{1936DEBUGLOG(5, "ZSTD_decompressSequencesLong");1937#if DYNAMIC_BMI21938if (ZSTD_DCtx_get_bmi2(dctx)) {1939return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1940}1941#endif1942return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);1943}1944#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */1945194619471948#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \1949!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)1950/* ZSTD_getLongOffsetsShare() :1951* condition : offTable must be valid1952* @return : "share" of long offsets (arbitrarily defined as > (1<<23))1953* compared to maximum possible of (1<<OffFSELog) */1954static unsigned1955ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)1956{1957const void* ptr = offTable;1958U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;1959const ZSTD_seqSymbol* table = offTable + 1;1960U32 const max = 1 << tableLog;1961U32 u, total = 0;1962DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);19631964assert(max <= (1 << OffFSELog)); /* max not too large */1965for (u=0; u<max; u++) {1966if (table[u].nbAdditionalBits > 22) total += 1;1967}19681969assert(tableLog <= OffFSELog);1970total <<= (OffFSELog - tableLog); /* scale to OffFSELog */19711972return total;1973}1974#endif19751976size_t1977ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,1978void* dst, size_t dstCapacity,1979const void* src, size_t srcSize, const int frame, const streaming_operation streaming)1980{ /* blockType == blockCompressed */1981const BYTE* ip = (const BYTE*)src;1982/* isLongOffset must be true if there are long offsets.1983* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.1984* We don't expect that to be the case in 64-bit mode.1985* In block mode, window size is not known, so we have to be conservative.1986* (note: but it could be evaluated from current-lowLimit)1987*/1988ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));1989DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);19901991RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");19921993/* Decode literals section */1994{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);1995DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);1996if (ZSTD_isError(litCSize)) return litCSize;1997ip += litCSize;1998srcSize -= litCSize;1999}20002001/* Build Decoding Tables */2002{2003/* These macros control at build-time which decompressor implementation2004* we use. If neither is defined, we do some inspection and dispatch at2005* runtime.2006*/2007#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \2008!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)2009int usePrefetchDecoder = dctx->ddictIsCold;2010#endif2011int nbSeq;2012size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);2013if (ZSTD_isError(seqHSize)) return seqHSize;2014ip += seqHSize;2015srcSize -= seqHSize;20162017RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");20182019#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \2020!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)2021if ( !usePrefetchDecoder2022&& (!frame || (dctx->fParams.windowSize > (1<<24)))2023&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */2024U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);2025U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */2026usePrefetchDecoder = (shareLongOffsets >= minShare);2027}2028#endif20292030dctx->ddictIsCold = 0;20312032#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \2033!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)2034if (usePrefetchDecoder)2035#endif2036#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT2037return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);2038#endif20392040#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG2041/* else */2042if (dctx->litBufferLocation == ZSTD_split)2043return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);2044else2045return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);2046#endif2047}2048}204920502051void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)2052{2053if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */2054dctx->dictEnd = dctx->previousDstEnd;2055dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));2056dctx->prefixStart = dst;2057dctx->previousDstEnd = dst;2058}2059}206020612062size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,2063void* dst, size_t dstCapacity,2064const void* src, size_t srcSize)2065{2066size_t dSize;2067ZSTD_checkContinuity(dctx, dst, dstCapacity);2068dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);2069dctx->previousDstEnd = (char*)dst + dSize;2070return dSize;2071}207220732074