Path: blob/main/sys/contrib/openzfs/module/zstd/lib/compress/zstd_ldm.c
48774 views
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only1/*2* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.3* All rights reserved.4*5* This source code is licensed under both the BSD-style license (found in the6* LICENSE file in the root directory of this source tree) and the GPLv2 (found7* in the COPYING file in the root directory of this source tree).8* You may select, at your option, one of the above-listed licenses.9*/1011#include "zstd_ldm.h"1213#include "../common/debug.h"14#include "zstd_fast.h" /* ZSTD_fillHashTable() */15#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */1617#define LDM_BUCKET_SIZE_LOG 318#define LDM_MIN_MATCH_LENGTH 6419#define LDM_HASH_RLOG 720#define LDM_HASH_CHAR_OFFSET 102122void ZSTD_ldm_adjustParameters(ldmParams_t* params,23ZSTD_compressionParameters const* cParams)24{25params->windowLog = cParams->windowLog;26ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);27DEBUGLOG(4, "ZSTD_ldm_adjustParameters");28if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;29if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;30if (cParams->strategy >= ZSTD_btopt) {31/* Get out of the way of the optimal parser */32U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);33assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);34assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);35params->minMatchLength = minMatch;36}37if (params->hashLog == 0) {38params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);39assert(params->hashLog <= ZSTD_HASHLOG_MAX);40}41if (params->hashRateLog == 0) {42params->hashRateLog = params->windowLog < params->hashLog43? 044: params->windowLog - params->hashLog;45}46params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);47}4849size_t ZSTD_ldm_getTableSize(ldmParams_t params)50{51size_t const ldmHSize = ((size_t)1) << params.hashLog;52size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);53size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);54size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)55+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));56return params.enableLdm ? totalSize : 0;57}5859size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)60{61return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;62}6364/** ZSTD_ldm_getSmallHash() :65* numBits should be <= 3266* If numBits==0, returns 0.67* @return : the most significant numBits of value. */68static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)69{70assert(numBits <= 32);71return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));72}7374/** ZSTD_ldm_getChecksum() :75* numBitsToDiscard should be <= 3276* @return : the next most significant 32 bits after numBitsToDiscard */77static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)78{79assert(numBitsToDiscard <= 32);80return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;81}8283/** ZSTD_ldm_getTag() ;84* Given the hash, returns the most significant numTagBits bits85* after (32 + hbits) bits.86*87* If there are not enough bits remaining, return the last88* numTagBits bits. */89static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)90{91assert(numTagBits < 32 && hbits <= 32);92if (32 - hbits < numTagBits) {93return hash & (((U32)1 << numTagBits) - 1);94} else {95return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);96}97}9899/** ZSTD_ldm_getBucket() :100* Returns a pointer to the start of the bucket associated with hash. */101static ldmEntry_t* ZSTD_ldm_getBucket(102ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)103{104return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);105}106107/** ZSTD_ldm_insertEntry() :108* Insert the entry with corresponding hash into the hash table */109static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,110size_t const hash, const ldmEntry_t entry,111ldmParams_t const ldmParams)112{113BYTE* const bucketOffsets = ldmState->bucketOffsets;114*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;115bucketOffsets[hash]++;116bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;117}118119/** ZSTD_ldm_makeEntryAndInsertByTag() :120*121* Gets the small hash, checksum, and tag from the rollingHash.122*123* If the tag matches (1 << ldmParams.hashRateLog)-1, then124* creates an ldmEntry from the offset, and inserts it into the hash table.125*126* hBits is the length of the small hash, which is the most significant hBits127* of rollingHash. The checksum is the next 32 most significant bits, followed128* by ldmParams.hashRateLog bits that make up the tag. */129static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,130U64 const rollingHash,131U32 const hBits,132U32 const offset,133ldmParams_t const ldmParams)134{135U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);136U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;137if (tag == tagMask) {138U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);139U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);140ldmEntry_t entry;141entry.offset = offset;142entry.checksum = checksum;143ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);144}145}146147/** ZSTD_ldm_countBackwardsMatch() :148* Returns the number of bytes that match backwards before pIn and pMatch.149*150* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */151static size_t ZSTD_ldm_countBackwardsMatch(152const BYTE* pIn, const BYTE* pAnchor,153const BYTE* pMatch, const BYTE* pBase)154{155size_t matchLength = 0;156while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {157pIn--;158pMatch--;159matchLength++;160}161return matchLength;162}163164/** ZSTD_ldm_fillFastTables() :165*166* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.167* This is similar to ZSTD_loadDictionaryContent.168*169* The tables for the other strategies are filled within their170* block compressors. */171static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,172void const* end)173{174const BYTE* const iend = (const BYTE*)end;175176switch(ms->cParams.strategy)177{178case ZSTD_fast:179ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);180break;181182case ZSTD_dfast:183ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);184break;185186case ZSTD_greedy:187case ZSTD_lazy:188case ZSTD_lazy2:189case ZSTD_btlazy2:190case ZSTD_btopt:191case ZSTD_btultra:192case ZSTD_btultra2:193break;194default:195assert(0); /* not possible : not a valid strategy id */196}197198return 0;199}200201/** ZSTD_ldm_fillLdmHashTable() :202*203* Fills hashTable from (lastHashed + 1) to iend (non-inclusive).204* lastHash is the rolling hash that corresponds to lastHashed.205*206* Returns the rolling hash corresponding to position iend-1. */207static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,208U64 lastHash, const BYTE* lastHashed,209const BYTE* iend, const BYTE* base,210U32 hBits, ldmParams_t const ldmParams)211{212U64 rollingHash = lastHash;213const BYTE* cur = lastHashed + 1;214215while (cur < iend) {216rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],217cur[ldmParams.minMatchLength-1],218state->hashPower);219ZSTD_ldm_makeEntryAndInsertByTag(state,220rollingHash, hBits,221(U32)(cur - base), ldmParams);222++cur;223}224return rollingHash;225}226227void ZSTD_ldm_fillHashTable(228ldmState_t* state, const BYTE* ip,229const BYTE* iend, ldmParams_t const* params)230{231DEBUGLOG(5, "ZSTD_ldm_fillHashTable");232if ((size_t)(iend - ip) >= params->minMatchLength) {233U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);234ZSTD_ldm_fillLdmHashTable(235state, startingHash, ip, iend - params->minMatchLength, state->window.base,236params->hashLog - params->bucketSizeLog,237*params);238}239}240241242/** ZSTD_ldm_limitTableUpdate() :243*244* Sets cctx->nextToUpdate to a position corresponding closer to anchor245* if it is far way246* (after a long match, only update tables a limited amount). */247static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)248{249U32 const current = (U32)(anchor - ms->window.base);250if (current > ms->nextToUpdate + 1024) {251ms->nextToUpdate =252current - MIN(512, current - ms->nextToUpdate - 1024);253}254}255256static size_t ZSTD_ldm_generateSequences_internal(257ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,258ldmParams_t const* params, void const* src, size_t srcSize)259{260/* LDM parameters */261int const extDict = ZSTD_window_hasExtDict(ldmState->window);262U32 const minMatchLength = params->minMatchLength;263U64 const hashPower = ldmState->hashPower;264U32 const hBits = params->hashLog - params->bucketSizeLog;265U32 const ldmBucketSize = 1U << params->bucketSizeLog;266U32 const hashRateLog = params->hashRateLog;267U32 const ldmTagMask = (1U << params->hashRateLog) - 1;268/* Prefix and extDict parameters */269U32 const dictLimit = ldmState->window.dictLimit;270U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;271BYTE const* const base = ldmState->window.base;272BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;273BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;274BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;275BYTE const* const lowPrefixPtr = base + dictLimit;276/* Input bounds */277BYTE const* const istart = (BYTE const*)src;278BYTE const* const iend = istart + srcSize;279BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);280/* Input positions */281BYTE const* anchor = istart;282BYTE const* ip = istart;283/* Rolling hash */284BYTE const* lastHashed = NULL;285U64 rollingHash = 0;286287while (ip <= ilimit) {288size_t mLength;289U32 const current = (U32)(ip - base);290size_t forwardMatchLength = 0, backwardMatchLength = 0;291ldmEntry_t* bestEntry = NULL;292if (ip != istart) {293rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],294lastHashed[minMatchLength],295hashPower);296} else {297rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);298}299lastHashed = ip;300301/* Do not insert and do not look for a match */302if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {303ip++;304continue;305}306307/* Get the best entry and compute the match lengths */308{309ldmEntry_t* const bucket =310ZSTD_ldm_getBucket(ldmState,311ZSTD_ldm_getSmallHash(rollingHash, hBits),312*params);313ldmEntry_t* cur;314size_t bestMatchLength = 0;315U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);316317for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {318size_t curForwardMatchLength, curBackwardMatchLength,319curTotalMatchLength;320if (cur->checksum != checksum || cur->offset <= lowestIndex) {321continue;322}323if (extDict) {324BYTE const* const curMatchBase =325cur->offset < dictLimit ? dictBase : base;326BYTE const* const pMatch = curMatchBase + cur->offset;327BYTE const* const matchEnd =328cur->offset < dictLimit ? dictEnd : iend;329BYTE const* const lowMatchPtr =330cur->offset < dictLimit ? dictStart : lowPrefixPtr;331332curForwardMatchLength = ZSTD_count_2segments(333ip, pMatch, iend,334matchEnd, lowPrefixPtr);335if (curForwardMatchLength < minMatchLength) {336continue;337}338curBackwardMatchLength =339ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,340lowMatchPtr);341curTotalMatchLength = curForwardMatchLength +342curBackwardMatchLength;343} else { /* !extDict */344BYTE const* const pMatch = base + cur->offset;345curForwardMatchLength = ZSTD_count(ip, pMatch, iend);346if (curForwardMatchLength < minMatchLength) {347continue;348}349curBackwardMatchLength =350ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,351lowPrefixPtr);352curTotalMatchLength = curForwardMatchLength +353curBackwardMatchLength;354}355356if (curTotalMatchLength > bestMatchLength) {357bestMatchLength = curTotalMatchLength;358forwardMatchLength = curForwardMatchLength;359backwardMatchLength = curBackwardMatchLength;360bestEntry = cur;361}362}363}364365/* No match found -- continue searching */366if (bestEntry == NULL) {367ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,368hBits, current,369*params);370ip++;371continue;372}373374/* Match found */375mLength = forwardMatchLength + backwardMatchLength;376ip -= backwardMatchLength;377378{379/* Store the sequence:380* ip = current - backwardMatchLength381* The match is at (bestEntry->offset - backwardMatchLength)382*/383U32 const matchIndex = bestEntry->offset;384U32 const offset = current - matchIndex;385rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;386387/* Out of sequence storage */388if (rawSeqStore->size == rawSeqStore->capacity)389return ERROR(dstSize_tooSmall);390seq->litLength = (U32)(ip - anchor);391seq->matchLength = (U32)mLength;392seq->offset = offset;393rawSeqStore->size++;394}395396/* Insert the current entry into the hash table */397ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,398(U32)(lastHashed - base),399*params);400401assert(ip + backwardMatchLength == lastHashed);402403/* Fill the hash table from lastHashed+1 to ip+mLength*/404/* Heuristic: don't need to fill the entire table at end of block */405if (ip + mLength <= ilimit) {406rollingHash = ZSTD_ldm_fillLdmHashTable(407ldmState, rollingHash, lastHashed,408ip + mLength, base, hBits, *params);409lastHashed = ip + mLength - 1;410}411ip += mLength;412anchor = ip;413}414return iend - anchor;415}416417/*! ZSTD_ldm_reduceTable() :418* reduce table indexes by `reducerValue` */419static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,420U32 const reducerValue)421{422U32 u;423for (u = 0; u < size; u++) {424if (table[u].offset < reducerValue) table[u].offset = 0;425else table[u].offset -= reducerValue;426}427}428429size_t ZSTD_ldm_generateSequences(430ldmState_t* ldmState, rawSeqStore_t* sequences,431ldmParams_t const* params, void const* src, size_t srcSize)432{433U32 const maxDist = 1U << params->windowLog;434BYTE const* const istart = (BYTE const*)src;435BYTE const* const iend = istart + srcSize;436size_t const kMaxChunkSize = 1 << 20;437size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);438size_t chunk;439size_t leftoverSize = 0;440441assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);442/* Check that ZSTD_window_update() has been called for this chunk prior443* to passing it to this function.444*/445assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);446/* The input could be very large (in zstdmt), so it must be broken up into447* chunks to enforce the maximum distance and handle overflow correction.448*/449assert(sequences->pos <= sequences->size);450assert(sequences->size <= sequences->capacity);451for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {452BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;453size_t const remaining = (size_t)(iend - chunkStart);454BYTE const *const chunkEnd =455(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;456size_t const chunkSize = chunkEnd - chunkStart;457size_t newLeftoverSize;458size_t const prevSize = sequences->size;459460assert(chunkStart < iend);461/* 1. Perform overflow correction if necessary. */462if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {463U32 const ldmHSize = 1U << params->hashLog;464U32 const correction = ZSTD_window_correctOverflow(465&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);466ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);467/* invalidate dictionaries on overflow correction */468ldmState->loadedDictEnd = 0;469}470/* 2. We enforce the maximum offset allowed.471*472* kMaxChunkSize should be small enough that we don't lose too much of473* the window through early invalidation.474* TODO: * Test the chunk size.475* * Try invalidation after the sequence generation and test the476* the offset against maxDist directly.477*478* NOTE: Because of dictionaries + sequence splitting we MUST make sure479* that any offset used is valid at the END of the sequence, since it may480* be split into two sequences. This condition holds when using481* ZSTD_window_enforceMaxDist(), but if we move to checking offsets482* against maxDist directly, we'll have to carefully handle that case.483*/484ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);485/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */486newLeftoverSize = ZSTD_ldm_generateSequences_internal(487ldmState, sequences, params, chunkStart, chunkSize);488if (ZSTD_isError(newLeftoverSize))489return newLeftoverSize;490/* 4. We add the leftover literals from previous iterations to the first491* newly generated sequence, or add the `newLeftoverSize` if none are492* generated.493*/494/* Prepend the leftover literals from the last call */495if (prevSize < sequences->size) {496sequences->seq[prevSize].litLength += (U32)leftoverSize;497leftoverSize = newLeftoverSize;498} else {499assert(newLeftoverSize == chunkSize);500leftoverSize += chunkSize;501}502}503return 0;504}505506void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {507while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {508rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;509if (srcSize <= seq->litLength) {510/* Skip past srcSize literals */511seq->litLength -= (U32)srcSize;512return;513}514srcSize -= seq->litLength;515seq->litLength = 0;516if (srcSize < seq->matchLength) {517/* Skip past the first srcSize of the match */518seq->matchLength -= (U32)srcSize;519if (seq->matchLength < minMatch) {520/* The match is too short, omit it */521if (rawSeqStore->pos + 1 < rawSeqStore->size) {522seq[1].litLength += seq[0].matchLength;523}524rawSeqStore->pos++;525}526return;527}528srcSize -= seq->matchLength;529seq->matchLength = 0;530rawSeqStore->pos++;531}532}533534/**535* If the sequence length is longer than remaining then the sequence is split536* between this block and the next.537*538* Returns the current sequence to handle, or if the rest of the block should539* be literals, it returns a sequence with offset == 0.540*/541static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,542U32 const remaining, U32 const minMatch)543{544rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];545assert(sequence.offset > 0);546/* Likely: No partial sequence */547if (remaining >= sequence.litLength + sequence.matchLength) {548rawSeqStore->pos++;549return sequence;550}551/* Cut the sequence short (offset == 0 ==> rest is literals). */552if (remaining <= sequence.litLength) {553sequence.offset = 0;554} else if (remaining < sequence.litLength + sequence.matchLength) {555sequence.matchLength = remaining - sequence.litLength;556if (sequence.matchLength < minMatch) {557sequence.offset = 0;558}559}560/* Skip past `remaining` bytes for the future sequences. */561ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);562return sequence;563}564565size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,566ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],567void const* src, size_t srcSize)568{569const ZSTD_compressionParameters* const cParams = &ms->cParams;570unsigned const minMatch = cParams->minMatch;571ZSTD_blockCompressor const blockCompressor =572ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));573/* Input bounds */574BYTE const* const istart = (BYTE const*)src;575BYTE const* const iend = istart + srcSize;576/* Input positions */577BYTE const* ip = istart;578579DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);580assert(rawSeqStore->pos <= rawSeqStore->size);581assert(rawSeqStore->size <= rawSeqStore->capacity);582/* Loop through each sequence and apply the block compressor to the lits */583while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {584/* maybeSplitSequence updates rawSeqStore->pos */585rawSeq const sequence = maybeSplitSequence(rawSeqStore,586(U32)(iend - ip), minMatch);587int i;588/* End signal */589if (sequence.offset == 0)590break;591592assert(ip + sequence.litLength + sequence.matchLength <= iend);593594/* Fill tables for block compressor */595ZSTD_ldm_limitTableUpdate(ms, ip);596ZSTD_ldm_fillFastTables(ms, ip);597/* Run the block compressor */598DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);599{600size_t const newLitLength =601blockCompressor(ms, seqStore, rep, ip, sequence.litLength);602ip += sequence.litLength;603/* Update the repcodes */604for (i = ZSTD_REP_NUM - 1; i > 0; i--)605rep[i] = rep[i-1];606rep[0] = sequence.offset;607/* Store the sequence */608ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,609sequence.offset + ZSTD_REP_MOVE,610sequence.matchLength - MINMATCH);611ip += sequence.matchLength;612}613}614/* Fill the tables for the block compressor */615ZSTD_ldm_limitTableUpdate(ms, ip);616ZSTD_ldm_fillFastTables(ms, ip);617/* Compress the last literals */618return blockCompressor(ms, seqStore, rep, ip, iend - ip);619}620621622