Path: blob/main/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c
48774 views
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only1/*2* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.3* All rights reserved.4*5* This source code is licensed under both the BSD-style license (found in the6* LICENSE file in the root directory of this source tree) and the GPLv2 (found7* in the COPYING file in the root directory of this source tree).8* You may select, at your option, one of the above-listed licenses.9*/1011#include "zstd_compress_internal.h"12#include "zstd_lazy.h"131415/*-*************************************16* Binary Tree search17***************************************/1819static void20ZSTD_updateDUBT(ZSTD_matchState_t* ms,21const BYTE* ip, const BYTE* iend,22U32 mls)23{24const ZSTD_compressionParameters* const cParams = &ms->cParams;25U32* const hashTable = ms->hashTable;26U32 const hashLog = cParams->hashLog;2728U32* const bt = ms->chainTable;29U32 const btLog = cParams->chainLog - 1;30U32 const btMask = (1 << btLog) - 1;3132const BYTE* const base = ms->window.base;33U32 const target = (U32)(ip - base);34U32 idx = ms->nextToUpdate;3536if (idx != target)37DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",38idx, target, ms->window.dictLimit);39assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */40(void)iend;4142assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */43for ( ; idx < target ; idx++) {44size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */45U32 const matchIndex = hashTable[h];4647U32* const nextCandidatePtr = bt + 2*(idx&btMask);48U32* const sortMarkPtr = nextCandidatePtr + 1;4950DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);51hashTable[h] = idx; /* Update Hash Table */52*nextCandidatePtr = matchIndex; /* update BT like a chain */53*sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;54}55ms->nextToUpdate = target;56}575859/** ZSTD_insertDUBT1() :60* sort one already inserted but unsorted position61* assumption : current >= btlow == (current - btmask)62* doesn't fail */63static void64ZSTD_insertDUBT1(ZSTD_matchState_t* ms,65U32 current, const BYTE* inputEnd,66U32 nbCompares, U32 btLow,67const ZSTD_dictMode_e dictMode)68{69const ZSTD_compressionParameters* const cParams = &ms->cParams;70U32* const bt = ms->chainTable;71U32 const btLog = cParams->chainLog - 1;72U32 const btMask = (1 << btLog) - 1;73size_t commonLengthSmaller=0, commonLengthLarger=0;74const BYTE* const base = ms->window.base;75const BYTE* const dictBase = ms->window.dictBase;76const U32 dictLimit = ms->window.dictLimit;77const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;78const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;79const BYTE* const dictEnd = dictBase + dictLimit;80const BYTE* const prefixStart = base + dictLimit;81const BYTE* match;82U32* smallerPtr = bt + 2*(current&btMask);83U32* largerPtr = smallerPtr + 1;84U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */85U32 dummy32; /* to be nullified at the end */86U32 const windowValid = ms->window.lowLimit;87U32 const maxDistance = 1U << cParams->windowLog;88U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;899091DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",92current, dictLimit, windowLow);93assert(current >= btLow);94assert(ip < iend); /* condition for ZSTD_count */9596while (nbCompares-- && (matchIndex > windowLow)) {97U32* const nextPtr = bt + 2*(matchIndex & btMask);98size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */99assert(matchIndex < current);100/* note : all candidates are now supposed sorted,101* but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK102* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */103104if ( (dictMode != ZSTD_extDict)105|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/106|| (current < dictLimit) /* both in extDict */) {107const BYTE* const mBase = ( (dictMode != ZSTD_extDict)108|| (matchIndex+matchLength >= dictLimit)) ?109base : dictBase;110assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */111|| (current < dictLimit) );112match = mBase + matchIndex;113matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);114} else {115match = dictBase + matchIndex;116matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);117if (matchIndex+matchLength >= dictLimit)118match = base + matchIndex; /* preparation for next read of match[matchLength] */119}120121DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",122current, matchIndex, (U32)matchLength);123124if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */125break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */126}127128if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */129/* match is smaller than current */130*smallerPtr = matchIndex; /* update smaller idx */131commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */132if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */133DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",134matchIndex, btLow, nextPtr[1]);135smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */136matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */137} else {138/* match is larger than current */139*largerPtr = matchIndex;140commonLengthLarger = matchLength;141if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */142DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",143matchIndex, btLow, nextPtr[0]);144largerPtr = nextPtr;145matchIndex = nextPtr[0];146} }147148*smallerPtr = *largerPtr = 0;149}150151152static size_t153ZSTD_DUBT_findBetterDictMatch (154ZSTD_matchState_t* ms,155const BYTE* const ip, const BYTE* const iend,156size_t* offsetPtr,157size_t bestLength,158U32 nbCompares,159U32 const mls,160const ZSTD_dictMode_e dictMode)161{162const ZSTD_matchState_t * const dms = ms->dictMatchState;163const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;164const U32 * const dictHashTable = dms->hashTable;165U32 const hashLog = dmsCParams->hashLog;166size_t const h = ZSTD_hashPtr(ip, hashLog, mls);167U32 dictMatchIndex = dictHashTable[h];168169const BYTE* const base = ms->window.base;170const BYTE* const prefixStart = base + ms->window.dictLimit;171U32 const current = (U32)(ip-base);172const BYTE* const dictBase = dms->window.base;173const BYTE* const dictEnd = dms->window.nextSrc;174U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);175U32 const dictLowLimit = dms->window.lowLimit;176U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;177178U32* const dictBt = dms->chainTable;179U32 const btLog = dmsCParams->chainLog - 1;180U32 const btMask = (1 << btLog) - 1;181U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;182183size_t commonLengthSmaller=0, commonLengthLarger=0;184185(void)dictMode;186assert(dictMode == ZSTD_dictMatchState);187188while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {189U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);190size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */191const BYTE* match = dictBase + dictMatchIndex;192matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);193if (dictMatchIndex+matchLength >= dictHighLimit)194match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */195196if (matchLength > bestLength) {197U32 matchIndex = dictMatchIndex + dictIndexDelta;198if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {199DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",200current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);201bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;202}203if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */204break; /* drop, to guarantee consistency (miss a little bit of compression) */205}206}207208if (match[matchLength] < ip[matchLength]) {209if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */210commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */211dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */212} else {213/* match is larger than current */214if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */215commonLengthLarger = matchLength;216dictMatchIndex = nextPtr[0];217}218}219220if (bestLength >= MINMATCH) {221U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;222DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",223current, (U32)bestLength, (U32)*offsetPtr, mIndex);224}225return bestLength;226227}228229230static size_t231ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,232const BYTE* const ip, const BYTE* const iend,233size_t* offsetPtr,234U32 const mls,235const ZSTD_dictMode_e dictMode)236{237const ZSTD_compressionParameters* const cParams = &ms->cParams;238U32* const hashTable = ms->hashTable;239U32 const hashLog = cParams->hashLog;240size_t const h = ZSTD_hashPtr(ip, hashLog, mls);241U32 matchIndex = hashTable[h];242243const BYTE* const base = ms->window.base;244U32 const current = (U32)(ip-base);245U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);246247U32* const bt = ms->chainTable;248U32 const btLog = cParams->chainLog - 1;249U32 const btMask = (1 << btLog) - 1;250U32 const btLow = (btMask >= current) ? 0 : current - btMask;251U32 const unsortLimit = MAX(btLow, windowLow);252253U32* nextCandidate = bt + 2*(matchIndex&btMask);254U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;255U32 nbCompares = 1U << cParams->searchLog;256U32 nbCandidates = nbCompares;257U32 previousCandidate = 0;258259DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);260assert(ip <= iend-8); /* required for h calculation */261262/* reach end of unsorted candidates list */263while ( (matchIndex > unsortLimit)264&& (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)265&& (nbCandidates > 1) ) {266DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",267matchIndex);268*unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */269previousCandidate = matchIndex;270matchIndex = *nextCandidate;271nextCandidate = bt + 2*(matchIndex&btMask);272unsortedMark = bt + 2*(matchIndex&btMask) + 1;273nbCandidates --;274}275276/* nullify last candidate if it's still unsorted277* simplification, detrimental to compression ratio, beneficial for speed */278if ( (matchIndex > unsortLimit)279&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {280DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",281matchIndex);282*nextCandidate = *unsortedMark = 0;283}284285/* batch sort stacked candidates */286matchIndex = previousCandidate;287while (matchIndex) { /* will end on matchIndex == 0 */288U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;289U32 const nextCandidateIdx = *nextCandidateIdxPtr;290ZSTD_insertDUBT1(ms, matchIndex, iend,291nbCandidates, unsortLimit, dictMode);292matchIndex = nextCandidateIdx;293nbCandidates++;294}295296/* find longest match */297{ size_t commonLengthSmaller = 0, commonLengthLarger = 0;298const BYTE* const dictBase = ms->window.dictBase;299const U32 dictLimit = ms->window.dictLimit;300const BYTE* const dictEnd = dictBase + dictLimit;301const BYTE* const prefixStart = base + dictLimit;302U32* smallerPtr = bt + 2*(current&btMask);303U32* largerPtr = bt + 2*(current&btMask) + 1;304U32 matchEndIdx = current + 8 + 1;305U32 dummy32; /* to be nullified at the end */306size_t bestLength = 0;307308matchIndex = hashTable[h];309hashTable[h] = current; /* Update Hash Table */310311while (nbCompares-- && (matchIndex > windowLow)) {312U32* const nextPtr = bt + 2*(matchIndex & btMask);313size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */314const BYTE* match;315316if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {317match = base + matchIndex;318matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);319} else {320match = dictBase + matchIndex;321matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);322if (matchIndex+matchLength >= dictLimit)323match = base + matchIndex; /* to prepare for next usage of match[matchLength] */324}325326if (matchLength > bestLength) {327if (matchLength > matchEndIdx - matchIndex)328matchEndIdx = matchIndex + (U32)matchLength;329if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )330bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;331if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */332if (dictMode == ZSTD_dictMatchState) {333nbCompares = 0; /* in addition to avoiding checking any334* further in this loop, make sure we335* skip checking in the dictionary. */336}337break; /* drop, to guarantee consistency (miss a little bit of compression) */338}339}340341if (match[matchLength] < ip[matchLength]) {342/* match is smaller than current */343*smallerPtr = matchIndex; /* update smaller idx */344commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */345if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */346smallerPtr = nextPtr+1; /* new "smaller" => larger of match */347matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */348} else {349/* match is larger than current */350*largerPtr = matchIndex;351commonLengthLarger = matchLength;352if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */353largerPtr = nextPtr;354matchIndex = nextPtr[0];355} }356357*smallerPtr = *largerPtr = 0;358359if (dictMode == ZSTD_dictMatchState && nbCompares) {360bestLength = ZSTD_DUBT_findBetterDictMatch(361ms, ip, iend,362offsetPtr, bestLength, nbCompares,363mls, dictMode);364}365366assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */367ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */368if (bestLength >= MINMATCH) {369U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;370DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",371current, (U32)bestLength, (U32)*offsetPtr, mIndex);372}373return bestLength;374}375}376377378/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */379FORCE_INLINE_TEMPLATE size_t380ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,381const BYTE* const ip, const BYTE* const iLimit,382size_t* offsetPtr,383const U32 mls /* template */,384const ZSTD_dictMode_e dictMode)385{386DEBUGLOG(7, "ZSTD_BtFindBestMatch");387if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */388ZSTD_updateDUBT(ms, ip, iLimit, mls);389return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);390}391392393static size_t394ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,395const BYTE* ip, const BYTE* const iLimit,396size_t* offsetPtr)397{398switch(ms->cParams.minMatch)399{400default : /* includes case 3 */401case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);402case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);403case 7 :404case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);405}406}407408409static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (410ZSTD_matchState_t* ms,411const BYTE* ip, const BYTE* const iLimit,412size_t* offsetPtr)413{414switch(ms->cParams.minMatch)415{416default : /* includes case 3 */417case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);418case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);419case 7 :420case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);421}422}423424425static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (426ZSTD_matchState_t* ms,427const BYTE* ip, const BYTE* const iLimit,428size_t* offsetPtr)429{430switch(ms->cParams.minMatch)431{432default : /* includes case 3 */433case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);434case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);435case 7 :436case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);437}438}439440441442/* *********************************443* Hash Chain444***********************************/445#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]446447/* Update chains up to ip (excluded)448Assumption : always within prefix (i.e. not within extDict) */449static U32 ZSTD_insertAndFindFirstIndex_internal(450ZSTD_matchState_t* ms,451const ZSTD_compressionParameters* const cParams,452const BYTE* ip, U32 const mls)453{454U32* const hashTable = ms->hashTable;455const U32 hashLog = cParams->hashLog;456U32* const chainTable = ms->chainTable;457const U32 chainMask = (1 << cParams->chainLog) - 1;458const BYTE* const base = ms->window.base;459const U32 target = (U32)(ip - base);460U32 idx = ms->nextToUpdate;461462while(idx < target) { /* catch up */463size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);464NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];465hashTable[h] = idx;466idx++;467}468469ms->nextToUpdate = target;470return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];471}472473U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {474const ZSTD_compressionParameters* const cParams = &ms->cParams;475return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);476}477478479/* inlining is important to hardwire a hot branch (template emulation) */480FORCE_INLINE_TEMPLATE481size_t ZSTD_HcFindBestMatch_generic (482ZSTD_matchState_t* ms,483const BYTE* const ip, const BYTE* const iLimit,484size_t* offsetPtr,485const U32 mls, const ZSTD_dictMode_e dictMode)486{487const ZSTD_compressionParameters* const cParams = &ms->cParams;488U32* const chainTable = ms->chainTable;489const U32 chainSize = (1 << cParams->chainLog);490const U32 chainMask = chainSize-1;491const BYTE* const base = ms->window.base;492const BYTE* const dictBase = ms->window.dictBase;493const U32 dictLimit = ms->window.dictLimit;494const BYTE* const prefixStart = base + dictLimit;495const BYTE* const dictEnd = dictBase + dictLimit;496const U32 current = (U32)(ip-base);497const U32 maxDistance = 1U << cParams->windowLog;498const U32 lowestValid = ms->window.lowLimit;499const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;500const U32 isDictionary = (ms->loadedDictEnd != 0);501const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;502const U32 minChain = current > chainSize ? current - chainSize : 0;503U32 nbAttempts = 1U << cParams->searchLog;504size_t ml=4-1;505506/* HC4 match finder */507U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);508509for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {510size_t currentMl=0;511if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {512const BYTE* const match = base + matchIndex;513assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */514if (match[ml] == ip[ml]) /* potentially better */515currentMl = ZSTD_count(ip, match, iLimit);516} else {517const BYTE* const match = dictBase + matchIndex;518assert(match+4 <= dictEnd);519if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */520currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;521}522523/* save best solution */524if (currentMl > ml) {525ml = currentMl;526*offsetPtr = current - matchIndex + ZSTD_REP_MOVE;527if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */528}529530if (matchIndex <= minChain) break;531matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);532}533534if (dictMode == ZSTD_dictMatchState) {535const ZSTD_matchState_t* const dms = ms->dictMatchState;536const U32* const dmsChainTable = dms->chainTable;537const U32 dmsChainSize = (1 << dms->cParams.chainLog);538const U32 dmsChainMask = dmsChainSize - 1;539const U32 dmsLowestIndex = dms->window.dictLimit;540const BYTE* const dmsBase = dms->window.base;541const BYTE* const dmsEnd = dms->window.nextSrc;542const U32 dmsSize = (U32)(dmsEnd - dmsBase);543const U32 dmsIndexDelta = dictLimit - dmsSize;544const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;545546matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];547548for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {549size_t currentMl=0;550const BYTE* const match = dmsBase + matchIndex;551assert(match+4 <= dmsEnd);552if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */553currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;554555/* save best solution */556if (currentMl > ml) {557ml = currentMl;558*offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;559if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */560}561562if (matchIndex <= dmsMinChain) break;563matchIndex = dmsChainTable[matchIndex & dmsChainMask];564}565}566567return ml;568}569570571FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (572ZSTD_matchState_t* ms,573const BYTE* ip, const BYTE* const iLimit,574size_t* offsetPtr)575{576switch(ms->cParams.minMatch)577{578default : /* includes case 3 */579case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);580case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);581case 7 :582case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);583}584}585586587static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (588ZSTD_matchState_t* ms,589const BYTE* ip, const BYTE* const iLimit,590size_t* offsetPtr)591{592switch(ms->cParams.minMatch)593{594default : /* includes case 3 */595case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);596case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);597case 7 :598case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);599}600}601602603FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (604ZSTD_matchState_t* ms,605const BYTE* ip, const BYTE* const iLimit,606size_t* offsetPtr)607{608switch(ms->cParams.minMatch)609{610default : /* includes case 3 */611case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);612case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);613case 7 :614case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);615}616}617618619/* *******************************620* Common parser - lazy strategy621*********************************/622typedef enum { search_hashChain, search_binaryTree } searchMethod_e;623624FORCE_INLINE_TEMPLATE size_t625ZSTD_compressBlock_lazy_generic(626ZSTD_matchState_t* ms, seqStore_t* seqStore,627U32 rep[ZSTD_REP_NUM],628const void* src, size_t srcSize,629const searchMethod_e searchMethod, const U32 depth,630ZSTD_dictMode_e const dictMode)631{632const BYTE* const istart = (const BYTE*)src;633const BYTE* ip = istart;634const BYTE* anchor = istart;635const BYTE* const iend = istart + srcSize;636const BYTE* const ilimit = iend - 8;637const BYTE* const base = ms->window.base;638const U32 prefixLowestIndex = ms->window.dictLimit;639const BYTE* const prefixLowest = base + prefixLowestIndex;640641typedef size_t (*searchMax_f)(642ZSTD_matchState_t* ms,643const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);644searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?645(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS646: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :647(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS648: ZSTD_HcFindBestMatch_selectMLS);649U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;650651const ZSTD_matchState_t* const dms = ms->dictMatchState;652const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?653dms->window.dictLimit : 0;654const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?655dms->window.base : NULL;656const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?657dictBase + dictLowestIndex : NULL;658const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?659dms->window.nextSrc : NULL;660const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?661prefixLowestIndex - (U32)(dictEnd - dictBase) :6620;663const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));664665DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);666667/* init */668ip += (dictAndPrefixLength == 0);669if (dictMode == ZSTD_noDict) {670U32 const current = (U32)(ip - base);671U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);672U32 const maxRep = current - windowLow;673if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;674if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;675}676if (dictMode == ZSTD_dictMatchState) {677/* dictMatchState repCode checks don't currently handle repCode == 0678* disabling. */679assert(offset_1 <= dictAndPrefixLength);680assert(offset_2 <= dictAndPrefixLength);681}682683/* Match Loop */684#if defined(__GNUC__) && defined(__x86_64__)685/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the686* code alignment is perturbed. To fix the instability align the loop on 32-bytes.687*/688__asm__(".p2align 5");689#endif690while (ip < ilimit) {691size_t matchLength=0;692size_t offset=0;693const BYTE* start=ip+1;694695/* check repCode */696if (dictMode == ZSTD_dictMatchState) {697const U32 repIndex = (U32)(ip - base) + 1 - offset_1;698const BYTE* repMatch = (dictMode == ZSTD_dictMatchState699&& repIndex < prefixLowestIndex) ?700dictBase + (repIndex - dictIndexDelta) :701base + repIndex;702if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)703&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {704const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;705matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;706if (depth==0) goto _storeSequence;707}708}709if ( dictMode == ZSTD_noDict710&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {711matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;712if (depth==0) goto _storeSequence;713}714715/* first search (depth 0) */716{ size_t offsetFound = 999999999;717size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);718if (ml2 > matchLength)719matchLength = ml2, start = ip, offset=offsetFound;720}721722if (matchLength < 4) {723ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */724continue;725}726727/* let's try to find a better solution */728if (depth>=1)729while (ip<ilimit) {730ip ++;731if ( (dictMode == ZSTD_noDict)732&& (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {733size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;734int const gain2 = (int)(mlRep * 3);735int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);736if ((mlRep >= 4) && (gain2 > gain1))737matchLength = mlRep, offset = 0, start = ip;738}739if (dictMode == ZSTD_dictMatchState) {740const U32 repIndex = (U32)(ip - base) - offset_1;741const BYTE* repMatch = repIndex < prefixLowestIndex ?742dictBase + (repIndex - dictIndexDelta) :743base + repIndex;744if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)745&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {746const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;747size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;748int const gain2 = (int)(mlRep * 3);749int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);750if ((mlRep >= 4) && (gain2 > gain1))751matchLength = mlRep, offset = 0, start = ip;752}753}754{ size_t offset2=999999999;755size_t const ml2 = searchMax(ms, ip, iend, &offset2);756int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */757int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);758if ((ml2 >= 4) && (gain2 > gain1)) {759matchLength = ml2, offset = offset2, start = ip;760continue; /* search a better one */761} }762763/* let's find an even better one */764if ((depth==2) && (ip<ilimit)) {765ip ++;766if ( (dictMode == ZSTD_noDict)767&& (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {768size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;769int const gain2 = (int)(mlRep * 4);770int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);771if ((mlRep >= 4) && (gain2 > gain1))772matchLength = mlRep, offset = 0, start = ip;773}774if (dictMode == ZSTD_dictMatchState) {775const U32 repIndex = (U32)(ip - base) - offset_1;776const BYTE* repMatch = repIndex < prefixLowestIndex ?777dictBase + (repIndex - dictIndexDelta) :778base + repIndex;779if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)780&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {781const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;782size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;783int const gain2 = (int)(mlRep * 4);784int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);785if ((mlRep >= 4) && (gain2 > gain1))786matchLength = mlRep, offset = 0, start = ip;787}788}789{ size_t offset2=999999999;790size_t const ml2 = searchMax(ms, ip, iend, &offset2);791int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */792int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);793if ((ml2 >= 4) && (gain2 > gain1)) {794matchLength = ml2, offset = offset2, start = ip;795continue;796} } }797break; /* nothing found : store previous solution */798}799800/* NOTE:801* start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.802* (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which803* overflows the pointer, which is undefined behavior.804*/805/* catch up */806if (offset) {807if (dictMode == ZSTD_noDict) {808while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))809&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */810{ start--; matchLength++; }811}812if (dictMode == ZSTD_dictMatchState) {813U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));814const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;815const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;816while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */817}818offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);819}820/* store sequence */821_storeSequence:822{ size_t const litLength = start - anchor;823ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);824anchor = ip = start + matchLength;825}826827/* check immediate repcode */828if (dictMode == ZSTD_dictMatchState) {829while (ip <= ilimit) {830U32 const current2 = (U32)(ip-base);831U32 const repIndex = current2 - offset_2;832const BYTE* repMatch = dictMode == ZSTD_dictMatchState833&& repIndex < prefixLowestIndex ?834dictBase - dictIndexDelta + repIndex :835base + repIndex;836if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)837&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {838const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;839matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;840offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */841ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);842ip += matchLength;843anchor = ip;844continue;845}846break;847}848}849850if (dictMode == ZSTD_noDict) {851while ( ((ip <= ilimit) & (offset_2>0))852&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {853/* store sequence */854matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;855offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */856ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);857ip += matchLength;858anchor = ip;859continue; /* faster when present ... (?) */860} } }861862/* Save reps for next block */863rep[0] = offset_1 ? offset_1 : savedOffset;864rep[1] = offset_2 ? offset_2 : savedOffset;865866/* Return the last literals size */867return (size_t)(iend - anchor);868}869870871size_t ZSTD_compressBlock_btlazy2(872ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],873void const* src, size_t srcSize)874{875return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);876}877878size_t ZSTD_compressBlock_lazy2(879ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],880void const* src, size_t srcSize)881{882return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);883}884885size_t ZSTD_compressBlock_lazy(886ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],887void const* src, size_t srcSize)888{889return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);890}891892size_t ZSTD_compressBlock_greedy(893ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],894void const* src, size_t srcSize)895{896return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);897}898899size_t ZSTD_compressBlock_btlazy2_dictMatchState(900ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],901void const* src, size_t srcSize)902{903return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);904}905906size_t ZSTD_compressBlock_lazy2_dictMatchState(907ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],908void const* src, size_t srcSize)909{910return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);911}912913size_t ZSTD_compressBlock_lazy_dictMatchState(914ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],915void const* src, size_t srcSize)916{917return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);918}919920size_t ZSTD_compressBlock_greedy_dictMatchState(921ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],922void const* src, size_t srcSize)923{924return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);925}926927928FORCE_INLINE_TEMPLATE929size_t ZSTD_compressBlock_lazy_extDict_generic(930ZSTD_matchState_t* ms, seqStore_t* seqStore,931U32 rep[ZSTD_REP_NUM],932const void* src, size_t srcSize,933const searchMethod_e searchMethod, const U32 depth)934{935const BYTE* const istart = (const BYTE*)src;936const BYTE* ip = istart;937const BYTE* anchor = istart;938const BYTE* const iend = istart + srcSize;939const BYTE* const ilimit = iend - 8;940const BYTE* const base = ms->window.base;941const U32 dictLimit = ms->window.dictLimit;942const BYTE* const prefixStart = base + dictLimit;943const BYTE* const dictBase = ms->window.dictBase;944const BYTE* const dictEnd = dictBase + dictLimit;945const BYTE* const dictStart = dictBase + ms->window.lowLimit;946const U32 windowLog = ms->cParams.windowLog;947948typedef size_t (*searchMax_f)(949ZSTD_matchState_t* ms,950const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);951searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;952953U32 offset_1 = rep[0], offset_2 = rep[1];954955DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");956957/* init */958ip += (ip == prefixStart);959960/* Match Loop */961#if defined(__GNUC__) && defined(__x86_64__)962/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the963* code alignment is perturbed. To fix the instability align the loop on 32-bytes.964*/965__asm__(".p2align 5");966#endif967while (ip < ilimit) {968size_t matchLength=0;969size_t offset=0;970const BYTE* start=ip+1;971U32 current = (U32)(ip-base);972973/* check repCode */974{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);975const U32 repIndex = (U32)(current+1 - offset_1);976const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;977const BYTE* const repMatch = repBase + repIndex;978if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */979& (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */980if (MEM_read32(ip+1) == MEM_read32(repMatch)) {981/* repcode detected we should take it */982const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;983matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;984if (depth==0) goto _storeSequence;985} }986987/* first search (depth 0) */988{ size_t offsetFound = 999999999;989size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);990if (ml2 > matchLength)991matchLength = ml2, start = ip, offset=offsetFound;992}993994if (matchLength < 4) {995ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */996continue;997}998999/* let's try to find a better solution */1000if (depth>=1)1001while (ip<ilimit) {1002ip ++;1003current++;1004/* check repCode */1005if (offset) {1006const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);1007const U32 repIndex = (U32)(current - offset_1);1008const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;1009const BYTE* const repMatch = repBase + repIndex;1010if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */1011& (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */1012if (MEM_read32(ip) == MEM_read32(repMatch)) {1013/* repcode detected */1014const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;1015size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;1016int const gain2 = (int)(repLength * 3);1017int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);1018if ((repLength >= 4) && (gain2 > gain1))1019matchLength = repLength, offset = 0, start = ip;1020} }10211022/* search match, depth 1 */1023{ size_t offset2=999999999;1024size_t const ml2 = searchMax(ms, ip, iend, &offset2);1025int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */1026int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);1027if ((ml2 >= 4) && (gain2 > gain1)) {1028matchLength = ml2, offset = offset2, start = ip;1029continue; /* search a better one */1030} }10311032/* let's find an even better one */1033if ((depth==2) && (ip<ilimit)) {1034ip ++;1035current++;1036/* check repCode */1037if (offset) {1038const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);1039const U32 repIndex = (U32)(current - offset_1);1040const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;1041const BYTE* const repMatch = repBase + repIndex;1042if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */1043& (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */1044if (MEM_read32(ip) == MEM_read32(repMatch)) {1045/* repcode detected */1046const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;1047size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;1048int const gain2 = (int)(repLength * 4);1049int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);1050if ((repLength >= 4) && (gain2 > gain1))1051matchLength = repLength, offset = 0, start = ip;1052} }10531054/* search match, depth 2 */1055{ size_t offset2=999999999;1056size_t const ml2 = searchMax(ms, ip, iend, &offset2);1057int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */1058int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);1059if ((ml2 >= 4) && (gain2 > gain1)) {1060matchLength = ml2, offset = offset2, start = ip;1061continue;1062} } }1063break; /* nothing found : store previous solution */1064}10651066/* catch up */1067if (offset) {1068U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));1069const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;1070const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;1071while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */1072offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);1073}10741075/* store sequence */1076_storeSequence:1077{ size_t const litLength = start - anchor;1078ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);1079anchor = ip = start + matchLength;1080}10811082/* check immediate repcode */1083while (ip <= ilimit) {1084const U32 repCurrent = (U32)(ip-base);1085const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);1086const U32 repIndex = repCurrent - offset_2;1087const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;1088const BYTE* const repMatch = repBase + repIndex;1089if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */1090& (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */1091if (MEM_read32(ip) == MEM_read32(repMatch)) {1092/* repcode detected we should take it */1093const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;1094matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;1095offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */1096ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);1097ip += matchLength;1098anchor = ip;1099continue; /* faster when present ... (?) */1100}1101break;1102} }11031104/* Save reps for next block */1105rep[0] = offset_1;1106rep[1] = offset_2;11071108/* Return the last literals size */1109return (size_t)(iend - anchor);1110}111111121113size_t ZSTD_compressBlock_greedy_extDict(1114ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],1115void const* src, size_t srcSize)1116{1117return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);1118}11191120size_t ZSTD_compressBlock_lazy_extDict(1121ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],1122void const* src, size_t srcSize)11231124{1125return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);1126}11271128size_t ZSTD_compressBlock_lazy2_extDict(1129ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],1130void const* src, size_t srcSize)11311132{1133return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);1134}11351136size_t ZSTD_compressBlock_btlazy2_extDict(1137ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],1138void const* src, size_t srcSize)11391140{1141return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);1142}114311441145