Path: blob/master/Utilities/cmzstd/lib/compress/zstd_ldm.c
3158 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910#include "zstd_ldm.h"1112#include "../common/debug.h"13#include "../common/xxhash.h"14#include "zstd_fast.h" /* ZSTD_fillHashTable() */15#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */16#include "zstd_ldm_geartab.h"1718#define LDM_BUCKET_SIZE_LOG 319#define LDM_MIN_MATCH_LENGTH 6420#define LDM_HASH_RLOG 72122typedef struct {23U64 rolling;24U64 stopMask;25} ldmRollingHashState_t;2627/** ZSTD_ldm_gear_init():28*29* Initializes the rolling hash state such that it will honor the30* settings in params. */31static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)32{33unsigned maxBitsInMask = MIN(params->minMatchLength, 64);34unsigned hashRateLog = params->hashRateLog;3536state->rolling = ~(U32)0;3738/* The choice of the splitting criterion is subject to two conditions:39* 1. it has to trigger on average every 2^(hashRateLog) bytes;40* 2. ideally, it has to depend on a window of minMatchLength bytes.41*42* In the gear hash algorithm, bit n depends on the last n bytes;43* so in order to obtain a good quality splitting criterion it is44* preferable to use bits with high weight.45*46* To match condition 1 we use a mask with hashRateLog bits set47* and, because of the previous remark, we make sure these bits48* have the highest possible weight while still respecting49* condition 2.50*/51if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {52state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);53} else {54/* In this degenerate case we simply honor the hash rate. */55state->stopMask = ((U64)1 << hashRateLog) - 1;56}57}5859/** ZSTD_ldm_gear_reset()60* Feeds [data, data + minMatchLength) into the hash without registering any61* splits. This effectively resets the hash state. This is used when skipping62* over data, either at the beginning of a block, or skipping sections.63*/64static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,65BYTE const* data, size_t minMatchLength)66{67U64 hash = state->rolling;68size_t n = 0;6970#define GEAR_ITER_ONCE() do { \71hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \72n += 1; \73} while (0)74while (n + 3 < minMatchLength) {75GEAR_ITER_ONCE();76GEAR_ITER_ONCE();77GEAR_ITER_ONCE();78GEAR_ITER_ONCE();79}80while (n < minMatchLength) {81GEAR_ITER_ONCE();82}83#undef GEAR_ITER_ONCE84}8586/** ZSTD_ldm_gear_feed():87*88* Registers in the splits array all the split points found in the first89* size bytes following the data pointer. This function terminates when90* either all the data has been processed or LDM_BATCH_SIZE splits are91* present in the splits array.92*93* Precondition: The splits array must not be full.94* Returns: The number of bytes processed. */95static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,96BYTE const* data, size_t size,97size_t* splits, unsigned* numSplits)98{99size_t n;100U64 hash, mask;101102hash = state->rolling;103mask = state->stopMask;104n = 0;105106#define GEAR_ITER_ONCE() do { \107hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \108n += 1; \109if (UNLIKELY((hash & mask) == 0)) { \110splits[*numSplits] = n; \111*numSplits += 1; \112if (*numSplits == LDM_BATCH_SIZE) \113goto done; \114} \115} while (0)116117while (n + 3 < size) {118GEAR_ITER_ONCE();119GEAR_ITER_ONCE();120GEAR_ITER_ONCE();121GEAR_ITER_ONCE();122}123while (n < size) {124GEAR_ITER_ONCE();125}126127#undef GEAR_ITER_ONCE128129done:130state->rolling = hash;131return n;132}133134void ZSTD_ldm_adjustParameters(ldmParams_t* params,135ZSTD_compressionParameters const* cParams)136{137params->windowLog = cParams->windowLog;138ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);139DEBUGLOG(4, "ZSTD_ldm_adjustParameters");140if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;141if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;142if (params->hashLog == 0) {143params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);144assert(params->hashLog <= ZSTD_HASHLOG_MAX);145}146if (params->hashRateLog == 0) {147params->hashRateLog = params->windowLog < params->hashLog148? 0149: params->windowLog - params->hashLog;150}151params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);152}153154size_t ZSTD_ldm_getTableSize(ldmParams_t params)155{156size_t const ldmHSize = ((size_t)1) << params.hashLog;157size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);158size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);159size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)160+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));161return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;162}163164size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)165{166return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;167}168169/** ZSTD_ldm_getBucket() :170* Returns a pointer to the start of the bucket associated with hash. */171static ldmEntry_t* ZSTD_ldm_getBucket(172ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)173{174return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);175}176177/** ZSTD_ldm_insertEntry() :178* Insert the entry with corresponding hash into the hash table */179static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,180size_t const hash, const ldmEntry_t entry,181ldmParams_t const ldmParams)182{183BYTE* const pOffset = ldmState->bucketOffsets + hash;184unsigned const offset = *pOffset;185186*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;187*pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));188189}190191/** ZSTD_ldm_countBackwardsMatch() :192* Returns the number of bytes that match backwards before pIn and pMatch.193*194* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */195static size_t ZSTD_ldm_countBackwardsMatch(196const BYTE* pIn, const BYTE* pAnchor,197const BYTE* pMatch, const BYTE* pMatchBase)198{199size_t matchLength = 0;200while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {201pIn--;202pMatch--;203matchLength++;204}205return matchLength;206}207208/** ZSTD_ldm_countBackwardsMatch_2segments() :209* Returns the number of bytes that match backwards from pMatch,210* even with the backwards match spanning 2 different segments.211*212* On reaching `pMatchBase`, start counting from mEnd */213static size_t ZSTD_ldm_countBackwardsMatch_2segments(214const BYTE* pIn, const BYTE* pAnchor,215const BYTE* pMatch, const BYTE* pMatchBase,216const BYTE* pExtDictStart, const BYTE* pExtDictEnd)217{218size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);219if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {220/* If backwards match is entirely in the extDict or prefix, immediately return */221return matchLength;222}223DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);224matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);225DEBUGLOG(7, "final backwards match length = %zu", matchLength);226return matchLength;227}228229/** ZSTD_ldm_fillFastTables() :230*231* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.232* This is similar to ZSTD_loadDictionaryContent.233*234* The tables for the other strategies are filled within their235* block compressors. */236static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,237void const* end)238{239const BYTE* const iend = (const BYTE*)end;240241switch(ms->cParams.strategy)242{243case ZSTD_fast:244ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);245break;246247case ZSTD_dfast:248ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);249break;250251case ZSTD_greedy:252case ZSTD_lazy:253case ZSTD_lazy2:254case ZSTD_btlazy2:255case ZSTD_btopt:256case ZSTD_btultra:257case ZSTD_btultra2:258break;259default:260assert(0); /* not possible : not a valid strategy id */261}262263return 0;264}265266void ZSTD_ldm_fillHashTable(267ldmState_t* ldmState, const BYTE* ip,268const BYTE* iend, ldmParams_t const* params)269{270U32 const minMatchLength = params->minMatchLength;271U32 const hBits = params->hashLog - params->bucketSizeLog;272BYTE const* const base = ldmState->window.base;273BYTE const* const istart = ip;274ldmRollingHashState_t hashState;275size_t* const splits = ldmState->splitIndices;276unsigned numSplits;277278DEBUGLOG(5, "ZSTD_ldm_fillHashTable");279280ZSTD_ldm_gear_init(&hashState, params);281while (ip < iend) {282size_t hashed;283unsigned n;284285numSplits = 0;286hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);287288for (n = 0; n < numSplits; n++) {289if (ip + splits[n] >= istart + minMatchLength) {290BYTE const* const split = ip + splits[n] - minMatchLength;291U64 const xxhash = XXH64(split, minMatchLength, 0);292U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));293ldmEntry_t entry;294295entry.offset = (U32)(split - base);296entry.checksum = (U32)(xxhash >> 32);297ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);298}299}300301ip += hashed;302}303}304305306/** ZSTD_ldm_limitTableUpdate() :307*308* Sets cctx->nextToUpdate to a position corresponding closer to anchor309* if it is far way310* (after a long match, only update tables a limited amount). */311static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)312{313U32 const curr = (U32)(anchor - ms->window.base);314if (curr > ms->nextToUpdate + 1024) {315ms->nextToUpdate =316curr - MIN(512, curr - ms->nextToUpdate - 1024);317}318}319320static size_t ZSTD_ldm_generateSequences_internal(321ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,322ldmParams_t const* params, void const* src, size_t srcSize)323{324/* LDM parameters */325int const extDict = ZSTD_window_hasExtDict(ldmState->window);326U32 const minMatchLength = params->minMatchLength;327U32 const entsPerBucket = 1U << params->bucketSizeLog;328U32 const hBits = params->hashLog - params->bucketSizeLog;329/* Prefix and extDict parameters */330U32 const dictLimit = ldmState->window.dictLimit;331U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;332BYTE const* const base = ldmState->window.base;333BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;334BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;335BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;336BYTE const* const lowPrefixPtr = base + dictLimit;337/* Input bounds */338BYTE const* const istart = (BYTE const*)src;339BYTE const* const iend = istart + srcSize;340BYTE const* const ilimit = iend - HASH_READ_SIZE;341/* Input positions */342BYTE const* anchor = istart;343BYTE const* ip = istart;344/* Rolling hash state */345ldmRollingHashState_t hashState;346/* Arrays for staged-processing */347size_t* const splits = ldmState->splitIndices;348ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;349unsigned numSplits;350351if (srcSize < minMatchLength)352return iend - anchor;353354/* Initialize the rolling hash state with the first minMatchLength bytes */355ZSTD_ldm_gear_init(&hashState, params);356ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);357ip += minMatchLength;358359while (ip < ilimit) {360size_t hashed;361unsigned n;362363numSplits = 0;364hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,365splits, &numSplits);366367for (n = 0; n < numSplits; n++) {368BYTE const* const split = ip + splits[n] - minMatchLength;369U64 const xxhash = XXH64(split, minMatchLength, 0);370U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));371372candidates[n].split = split;373candidates[n].hash = hash;374candidates[n].checksum = (U32)(xxhash >> 32);375candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);376PREFETCH_L1(candidates[n].bucket);377}378379for (n = 0; n < numSplits; n++) {380size_t forwardMatchLength = 0, backwardMatchLength = 0,381bestMatchLength = 0, mLength;382U32 offset;383BYTE const* const split = candidates[n].split;384U32 const checksum = candidates[n].checksum;385U32 const hash = candidates[n].hash;386ldmEntry_t* const bucket = candidates[n].bucket;387ldmEntry_t const* cur;388ldmEntry_t const* bestEntry = NULL;389ldmEntry_t newEntry;390391newEntry.offset = (U32)(split - base);392newEntry.checksum = checksum;393394/* If a split point would generate a sequence overlapping with395* the previous one, we merely register it in the hash table and396* move on */397if (split < anchor) {398ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);399continue;400}401402for (cur = bucket; cur < bucket + entsPerBucket; cur++) {403size_t curForwardMatchLength, curBackwardMatchLength,404curTotalMatchLength;405if (cur->checksum != checksum || cur->offset <= lowestIndex) {406continue;407}408if (extDict) {409BYTE const* const curMatchBase =410cur->offset < dictLimit ? dictBase : base;411BYTE const* const pMatch = curMatchBase + cur->offset;412BYTE const* const matchEnd =413cur->offset < dictLimit ? dictEnd : iend;414BYTE const* const lowMatchPtr =415cur->offset < dictLimit ? dictStart : lowPrefixPtr;416curForwardMatchLength =417ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);418if (curForwardMatchLength < minMatchLength) {419continue;420}421curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(422split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);423} else { /* !extDict */424BYTE const* const pMatch = base + cur->offset;425curForwardMatchLength = ZSTD_count(split, pMatch, iend);426if (curForwardMatchLength < minMatchLength) {427continue;428}429curBackwardMatchLength =430ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);431}432curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;433434if (curTotalMatchLength > bestMatchLength) {435bestMatchLength = curTotalMatchLength;436forwardMatchLength = curForwardMatchLength;437backwardMatchLength = curBackwardMatchLength;438bestEntry = cur;439}440}441442/* No match found -- insert an entry into the hash table443* and process the next candidate match */444if (bestEntry == NULL) {445ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);446continue;447}448449/* Match found */450offset = (U32)(split - base) - bestEntry->offset;451mLength = forwardMatchLength + backwardMatchLength;452{453rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;454455/* Out of sequence storage */456if (rawSeqStore->size == rawSeqStore->capacity)457return ERROR(dstSize_tooSmall);458seq->litLength = (U32)(split - backwardMatchLength - anchor);459seq->matchLength = (U32)mLength;460seq->offset = offset;461rawSeqStore->size++;462}463464/* Insert the current entry into the hash table --- it must be465* done after the previous block to avoid clobbering bestEntry */466ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);467468anchor = split + forwardMatchLength;469470/* If we find a match that ends after the data that we've hashed471* then we have a repeating, overlapping, pattern. E.g. all zeros.472* If one repetition of the pattern matches our `stopMask` then all473* repetitions will. We don't need to insert them all into out table,474* only the first one. So skip over overlapping matches.475* This is a major speed boost (20x) for compressing a single byte476* repeated, when that byte ends up in the table.477*/478if (anchor > ip + hashed) {479ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);480/* Continue the outer loop at anchor (ip + hashed == anchor). */481ip = anchor - hashed;482break;483}484}485486ip += hashed;487}488489return iend - anchor;490}491492/*! ZSTD_ldm_reduceTable() :493* reduce table indexes by `reducerValue` */494static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,495U32 const reducerValue)496{497U32 u;498for (u = 0; u < size; u++) {499if (table[u].offset < reducerValue) table[u].offset = 0;500else table[u].offset -= reducerValue;501}502}503504size_t ZSTD_ldm_generateSequences(505ldmState_t* ldmState, rawSeqStore_t* sequences,506ldmParams_t const* params, void const* src, size_t srcSize)507{508U32 const maxDist = 1U << params->windowLog;509BYTE const* const istart = (BYTE const*)src;510BYTE const* const iend = istart + srcSize;511size_t const kMaxChunkSize = 1 << 20;512size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);513size_t chunk;514size_t leftoverSize = 0;515516assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);517/* Check that ZSTD_window_update() has been called for this chunk prior518* to passing it to this function.519*/520assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);521/* The input could be very large (in zstdmt), so it must be broken up into522* chunks to enforce the maximum distance and handle overflow correction.523*/524assert(sequences->pos <= sequences->size);525assert(sequences->size <= sequences->capacity);526for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {527BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;528size_t const remaining = (size_t)(iend - chunkStart);529BYTE const *const chunkEnd =530(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;531size_t const chunkSize = chunkEnd - chunkStart;532size_t newLeftoverSize;533size_t const prevSize = sequences->size;534535assert(chunkStart < iend);536/* 1. Perform overflow correction if necessary. */537if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {538U32 const ldmHSize = 1U << params->hashLog;539U32 const correction = ZSTD_window_correctOverflow(540&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);541ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);542/* invalidate dictionaries on overflow correction */543ldmState->loadedDictEnd = 0;544}545/* 2. We enforce the maximum offset allowed.546*547* kMaxChunkSize should be small enough that we don't lose too much of548* the window through early invalidation.549* TODO: * Test the chunk size.550* * Try invalidation after the sequence generation and test the551* offset against maxDist directly.552*553* NOTE: Because of dictionaries + sequence splitting we MUST make sure554* that any offset used is valid at the END of the sequence, since it may555* be split into two sequences. This condition holds when using556* ZSTD_window_enforceMaxDist(), but if we move to checking offsets557* against maxDist directly, we'll have to carefully handle that case.558*/559ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);560/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */561newLeftoverSize = ZSTD_ldm_generateSequences_internal(562ldmState, sequences, params, chunkStart, chunkSize);563if (ZSTD_isError(newLeftoverSize))564return newLeftoverSize;565/* 4. We add the leftover literals from previous iterations to the first566* newly generated sequence, or add the `newLeftoverSize` if none are567* generated.568*/569/* Prepend the leftover literals from the last call */570if (prevSize < sequences->size) {571sequences->seq[prevSize].litLength += (U32)leftoverSize;572leftoverSize = newLeftoverSize;573} else {574assert(newLeftoverSize == chunkSize);575leftoverSize += chunkSize;576}577}578return 0;579}580581void582ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)583{584while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {585rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;586if (srcSize <= seq->litLength) {587/* Skip past srcSize literals */588seq->litLength -= (U32)srcSize;589return;590}591srcSize -= seq->litLength;592seq->litLength = 0;593if (srcSize < seq->matchLength) {594/* Skip past the first srcSize of the match */595seq->matchLength -= (U32)srcSize;596if (seq->matchLength < minMatch) {597/* The match is too short, omit it */598if (rawSeqStore->pos + 1 < rawSeqStore->size) {599seq[1].litLength += seq[0].matchLength;600}601rawSeqStore->pos++;602}603return;604}605srcSize -= seq->matchLength;606seq->matchLength = 0;607rawSeqStore->pos++;608}609}610611/**612* If the sequence length is longer than remaining then the sequence is split613* between this block and the next.614*615* Returns the current sequence to handle, or if the rest of the block should616* be literals, it returns a sequence with offset == 0.617*/618static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,619U32 const remaining, U32 const minMatch)620{621rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];622assert(sequence.offset > 0);623/* Likely: No partial sequence */624if (remaining >= sequence.litLength + sequence.matchLength) {625rawSeqStore->pos++;626return sequence;627}628/* Cut the sequence short (offset == 0 ==> rest is literals). */629if (remaining <= sequence.litLength) {630sequence.offset = 0;631} else if (remaining < sequence.litLength + sequence.matchLength) {632sequence.matchLength = remaining - sequence.litLength;633if (sequence.matchLength < minMatch) {634sequence.offset = 0;635}636}637/* Skip past `remaining` bytes for the future sequences. */638ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);639return sequence;640}641642void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {643U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);644while (currPos && rawSeqStore->pos < rawSeqStore->size) {645rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];646if (currPos >= currSeq.litLength + currSeq.matchLength) {647currPos -= currSeq.litLength + currSeq.matchLength;648rawSeqStore->pos++;649} else {650rawSeqStore->posInSequence = currPos;651break;652}653}654if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {655rawSeqStore->posInSequence = 0;656}657}658659size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,660ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],661ZSTD_paramSwitch_e useRowMatchFinder,662void const* src, size_t srcSize)663{664const ZSTD_compressionParameters* const cParams = &ms->cParams;665unsigned const minMatch = cParams->minMatch;666ZSTD_blockCompressor const blockCompressor =667ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));668/* Input bounds */669BYTE const* const istart = (BYTE const*)src;670BYTE const* const iend = istart + srcSize;671/* Input positions */672BYTE const* ip = istart;673674DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);675/* If using opt parser, use LDMs only as candidates rather than always accepting them */676if (cParams->strategy >= ZSTD_btopt) {677size_t lastLLSize;678ms->ldmSeqStore = rawSeqStore;679lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);680ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);681return lastLLSize;682}683684assert(rawSeqStore->pos <= rawSeqStore->size);685assert(rawSeqStore->size <= rawSeqStore->capacity);686/* Loop through each sequence and apply the block compressor to the literals */687while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {688/* maybeSplitSequence updates rawSeqStore->pos */689rawSeq const sequence = maybeSplitSequence(rawSeqStore,690(U32)(iend - ip), minMatch);691int i;692/* End signal */693if (sequence.offset == 0)694break;695696assert(ip + sequence.litLength + sequence.matchLength <= iend);697698/* Fill tables for block compressor */699ZSTD_ldm_limitTableUpdate(ms, ip);700ZSTD_ldm_fillFastTables(ms, ip);701/* Run the block compressor */702DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);703{704size_t const newLitLength =705blockCompressor(ms, seqStore, rep, ip, sequence.litLength);706ip += sequence.litLength;707/* Update the repcodes */708for (i = ZSTD_REP_NUM - 1; i > 0; i--)709rep[i] = rep[i-1];710rep[0] = sequence.offset;711/* Store the sequence */712ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,713OFFSET_TO_OFFBASE(sequence.offset),714sequence.matchLength);715ip += sequence.matchLength;716}717}718/* Fill the tables for the block compressor */719ZSTD_ldm_limitTableUpdate(ms, ip);720ZSTD_ldm_fillFastTables(ms, ip);721/* Compress the last literals */722return blockCompressor(ms, seqStore, rep, ip, iend - ip);723}724725726