Path: blob/master/Utilities/cmzstd/lib/compress/zstd_double_fast.c
3158 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910#include "zstd_compress_internal.h"11#include "zstd_double_fast.h"1213static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,14void const* end, ZSTD_dictTableLoadMethod_e dtlm)15{16const ZSTD_compressionParameters* const cParams = &ms->cParams;17U32* const hashLarge = ms->hashTable;18U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;19U32 const mls = cParams->minMatch;20U32* const hashSmall = ms->chainTable;21U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;22const BYTE* const base = ms->window.base;23const BYTE* ip = base + ms->nextToUpdate;24const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;25const U32 fastHashFillStep = 3;2627/* Always insert every fastHashFillStep position into the hash tables.28* Insert the other positions into the large hash table if their entry29* is empty.30*/31for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {32U32 const curr = (U32)(ip - base);33U32 i;34for (i = 0; i < fastHashFillStep; ++i) {35size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);36size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);37if (i == 0) {38ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);39}40if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {41ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);42}43/* Only load extra positions for ZSTD_dtlm_full */44if (dtlm == ZSTD_dtlm_fast)45break;46} }47}4849static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,50void const* end, ZSTD_dictTableLoadMethod_e dtlm)51{52const ZSTD_compressionParameters* const cParams = &ms->cParams;53U32* const hashLarge = ms->hashTable;54U32 const hBitsL = cParams->hashLog;55U32 const mls = cParams->minMatch;56U32* const hashSmall = ms->chainTable;57U32 const hBitsS = cParams->chainLog;58const BYTE* const base = ms->window.base;59const BYTE* ip = base + ms->nextToUpdate;60const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;61const U32 fastHashFillStep = 3;6263/* Always insert every fastHashFillStep position into the hash tables.64* Insert the other positions into the large hash table if their entry65* is empty.66*/67for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {68U32 const curr = (U32)(ip - base);69U32 i;70for (i = 0; i < fastHashFillStep; ++i) {71size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);72size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);73if (i == 0)74hashSmall[smHash] = curr + i;75if (i == 0 || hashLarge[lgHash] == 0)76hashLarge[lgHash] = curr + i;77/* Only load extra positions for ZSTD_dtlm_full */78if (dtlm == ZSTD_dtlm_fast)79break;80} }81}8283void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,84const void* const end,85ZSTD_dictTableLoadMethod_e dtlm,86ZSTD_tableFillPurpose_e tfp)87{88if (tfp == ZSTD_tfp_forCDict) {89ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);90} else {91ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);92}93}949596FORCE_INLINE_TEMPLATE97size_t ZSTD_compressBlock_doubleFast_noDict_generic(98ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],99void const* src, size_t srcSize, U32 const mls /* template */)100{101ZSTD_compressionParameters const* cParams = &ms->cParams;102U32* const hashLong = ms->hashTable;103const U32 hBitsL = cParams->hashLog;104U32* const hashSmall = ms->chainTable;105const U32 hBitsS = cParams->chainLog;106const BYTE* const base = ms->window.base;107const BYTE* const istart = (const BYTE*)src;108const BYTE* anchor = istart;109const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);110/* presumes that, if there is a dictionary, it must be using Attach mode */111const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);112const BYTE* const prefixLowest = base + prefixLowestIndex;113const BYTE* const iend = istart + srcSize;114const BYTE* const ilimit = iend - HASH_READ_SIZE;115U32 offset_1=rep[0], offset_2=rep[1];116U32 offsetSaved1 = 0, offsetSaved2 = 0;117118size_t mLength;119U32 offset;120U32 curr;121122/* how many positions to search before increasing step size */123const size_t kStepIncr = 1 << kSearchStrength;124/* the position at which to increment the step size if no match is found */125const BYTE* nextStep;126size_t step; /* the current step size */127128size_t hl0; /* the long hash at ip */129size_t hl1; /* the long hash at ip1 */130131U32 idxl0; /* the long match index for ip */132U32 idxl1; /* the long match index for ip1 */133134const BYTE* matchl0; /* the long match for ip */135const BYTE* matchs0; /* the short match for ip */136const BYTE* matchl1; /* the long match for ip1 */137138const BYTE* ip = istart; /* the current position */139const BYTE* ip1; /* the next position */140141DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");142143/* init */144ip += ((ip - prefixLowest) == 0);145{146U32 const current = (U32)(ip - base);147U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);148U32 const maxRep = current - windowLow;149if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;150if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;151}152153/* Outer Loop: one iteration per match found and stored */154while (1) {155step = 1;156nextStep = ip + kStepIncr;157ip1 = ip + step;158159if (ip1 > ilimit) {160goto _cleanup;161}162163hl0 = ZSTD_hashPtr(ip, hBitsL, 8);164idxl0 = hashLong[hl0];165matchl0 = base + idxl0;166167/* Inner Loop: one iteration per search / position */168do {169const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);170const U32 idxs0 = hashSmall[hs0];171curr = (U32)(ip-base);172matchs0 = base + idxs0;173174hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */175176/* check noDict repcode */177if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {178mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;179ip++;180ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);181goto _match_stored;182}183184hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);185186if (idxl0 > prefixLowestIndex) {187/* check prefix long match */188if (MEM_read64(matchl0) == MEM_read64(ip)) {189mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;190offset = (U32)(ip-matchl0);191while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */192goto _match_found;193}194}195196idxl1 = hashLong[hl1];197matchl1 = base + idxl1;198199if (idxs0 > prefixLowestIndex) {200/* check prefix short match */201if (MEM_read32(matchs0) == MEM_read32(ip)) {202goto _search_next_long;203}204}205206if (ip1 >= nextStep) {207PREFETCH_L1(ip1 + 64);208PREFETCH_L1(ip1 + 128);209step++;210nextStep += kStepIncr;211}212ip = ip1;213ip1 += step;214215hl0 = hl1;216idxl0 = idxl1;217matchl0 = matchl1;218#if defined(__aarch64__)219PREFETCH_L1(ip+256);220#endif221} while (ip1 <= ilimit);222223_cleanup:224/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),225* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */226offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;227228/* save reps for next block */229rep[0] = offset_1 ? offset_1 : offsetSaved1;230rep[1] = offset_2 ? offset_2 : offsetSaved2;231232/* Return the last literals size */233return (size_t)(iend - anchor);234235_search_next_long:236237/* check prefix long +1 match */238if (idxl1 > prefixLowestIndex) {239if (MEM_read64(matchl1) == MEM_read64(ip1)) {240ip = ip1;241mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;242offset = (U32)(ip-matchl1);243while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */244goto _match_found;245}246}247248/* if no long +1 match, explore the short match we found */249mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;250offset = (U32)(ip - matchs0);251while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */252253/* fall-through */254255_match_found: /* requires ip, offset, mLength */256offset_2 = offset_1;257offset_1 = offset;258259if (step < 4) {260/* It is unsafe to write this value back to the hashtable when ip1 is261* greater than or equal to the new ip we will have after we're done262* processing this match. Rather than perform that test directly263* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler264* more predictable test. The minmatch even if we take a short match is265* 4 bytes, so as long as step, the distance between ip and ip1266* (initially) is less than 4, we know ip1 < new ip. */267hashLong[hl1] = (U32)(ip1 - base);268}269270ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);271272_match_stored:273/* match found */274ip += mLength;275anchor = ip;276277if (ip <= ilimit) {278/* Complementary insertion */279/* done after iLimit test, as candidates could be > iend-8 */280{ U32 const indexToInsert = curr+2;281hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;282hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);283hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;284hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);285}286287/* check immediate repcode */288while ( (ip <= ilimit)289&& ( (offset_2>0)290& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {291/* store sequence */292size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;293U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */294hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);295hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);296ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);297ip += rLength;298anchor = ip;299continue; /* faster when present ... (?) */300}301}302}303}304305306FORCE_INLINE_TEMPLATE307size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(308ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],309void const* src, size_t srcSize,310U32 const mls /* template */)311{312ZSTD_compressionParameters const* cParams = &ms->cParams;313U32* const hashLong = ms->hashTable;314const U32 hBitsL = cParams->hashLog;315U32* const hashSmall = ms->chainTable;316const U32 hBitsS = cParams->chainLog;317const BYTE* const base = ms->window.base;318const BYTE* const istart = (const BYTE*)src;319const BYTE* ip = istart;320const BYTE* anchor = istart;321const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);322/* presumes that, if there is a dictionary, it must be using Attach mode */323const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);324const BYTE* const prefixLowest = base + prefixLowestIndex;325const BYTE* const iend = istart + srcSize;326const BYTE* const ilimit = iend - HASH_READ_SIZE;327U32 offset_1=rep[0], offset_2=rep[1];328329const ZSTD_matchState_t* const dms = ms->dictMatchState;330const ZSTD_compressionParameters* const dictCParams = &dms->cParams;331const U32* const dictHashLong = dms->hashTable;332const U32* const dictHashSmall = dms->chainTable;333const U32 dictStartIndex = dms->window.dictLimit;334const BYTE* const dictBase = dms->window.base;335const BYTE* const dictStart = dictBase + dictStartIndex;336const BYTE* const dictEnd = dms->window.nextSrc;337const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);338const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;339const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;340const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));341342DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");343344/* if a dictionary is attached, it must be within window range */345assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);346347if (ms->prefetchCDictTables) {348size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);349size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);350PREFETCH_AREA(dictHashLong, hashTableBytes)351PREFETCH_AREA(dictHashSmall, chainTableBytes)352}353354/* init */355ip += (dictAndPrefixLength == 0);356357/* dictMatchState repCode checks don't currently handle repCode == 0358* disabling. */359assert(offset_1 <= dictAndPrefixLength);360assert(offset_2 <= dictAndPrefixLength);361362/* Main Search Loop */363while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */364size_t mLength;365U32 offset;366size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);367size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);368size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);369size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);370U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];371U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];372int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);373int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);374U32 const curr = (U32)(ip-base);375U32 const matchIndexL = hashLong[h2];376U32 matchIndexS = hashSmall[h];377const BYTE* matchLong = base + matchIndexL;378const BYTE* match = base + matchIndexS;379const U32 repIndex = curr + 1 - offset_1;380const BYTE* repMatch = (repIndex < prefixLowestIndex) ?381dictBase + (repIndex - dictIndexDelta) :382base + repIndex;383hashLong[h2] = hashSmall[h] = curr; /* update hash tables */384385/* check repcode */386if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)387&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {388const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;389mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;390ip++;391ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);392goto _match_stored;393}394395if (matchIndexL > prefixLowestIndex) {396/* check prefix long match */397if (MEM_read64(matchLong) == MEM_read64(ip)) {398mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;399offset = (U32)(ip-matchLong);400while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */401goto _match_found;402}403} else if (dictTagsMatchL) {404/* check dictMatchState long match */405U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;406const BYTE* dictMatchL = dictBase + dictMatchIndexL;407assert(dictMatchL < dictEnd);408409if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {410mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;411offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);412while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */413goto _match_found;414} }415416if (matchIndexS > prefixLowestIndex) {417/* check prefix short match */418if (MEM_read32(match) == MEM_read32(ip)) {419goto _search_next_long;420}421} else if (dictTagsMatchS) {422/* check dictMatchState short match */423U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;424match = dictBase + dictMatchIndexS;425matchIndexS = dictMatchIndexS + dictIndexDelta;426427if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {428goto _search_next_long;429} }430431ip += ((ip-anchor) >> kSearchStrength) + 1;432#if defined(__aarch64__)433PREFETCH_L1(ip+256);434#endif435continue;436437_search_next_long:438{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);439size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);440U32 const matchIndexL3 = hashLong[hl3];441U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];442int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);443const BYTE* matchL3 = base + matchIndexL3;444hashLong[hl3] = curr + 1;445446/* check prefix long +1 match */447if (matchIndexL3 > prefixLowestIndex) {448if (MEM_read64(matchL3) == MEM_read64(ip+1)) {449mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;450ip++;451offset = (U32)(ip-matchL3);452while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */453goto _match_found;454}455} else if (dictTagsMatchL3) {456/* check dict long +1 match */457U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;458const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;459assert(dictMatchL3 < dictEnd);460if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {461mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;462ip++;463offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);464while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */465goto _match_found;466} } }467468/* if no long +1 match, explore the short match we found */469if (matchIndexS < prefixLowestIndex) {470mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;471offset = (U32)(curr - matchIndexS);472while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */473} else {474mLength = ZSTD_count(ip+4, match+4, iend) + 4;475offset = (U32)(ip - match);476while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */477}478479_match_found:480offset_2 = offset_1;481offset_1 = offset;482483ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);484485_match_stored:486/* match found */487ip += mLength;488anchor = ip;489490if (ip <= ilimit) {491/* Complementary insertion */492/* done after iLimit test, as candidates could be > iend-8 */493{ U32 const indexToInsert = curr+2;494hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;495hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);496hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;497hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);498}499500/* check immediate repcode */501while (ip <= ilimit) {502U32 const current2 = (U32)(ip-base);503U32 const repIndex2 = current2 - offset_2;504const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?505dictBase + repIndex2 - dictIndexDelta :506base + repIndex2;507if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)508&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {509const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;510size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;511U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */512ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);513hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;514hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;515ip += repLength2;516anchor = ip;517continue;518}519break;520}521}522} /* while (ip < ilimit) */523524/* save reps for next block */525rep[0] = offset_1;526rep[1] = offset_2;527528/* Return the last literals size */529return (size_t)(iend - anchor);530}531532#define ZSTD_GEN_DFAST_FN(dictMode, mls) \533static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \534ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \535void const* src, size_t srcSize) \536{ \537return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \538}539540ZSTD_GEN_DFAST_FN(noDict, 4)541ZSTD_GEN_DFAST_FN(noDict, 5)542ZSTD_GEN_DFAST_FN(noDict, 6)543ZSTD_GEN_DFAST_FN(noDict, 7)544545ZSTD_GEN_DFAST_FN(dictMatchState, 4)546ZSTD_GEN_DFAST_FN(dictMatchState, 5)547ZSTD_GEN_DFAST_FN(dictMatchState, 6)548ZSTD_GEN_DFAST_FN(dictMatchState, 7)549550551size_t ZSTD_compressBlock_doubleFast(552ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],553void const* src, size_t srcSize)554{555const U32 mls = ms->cParams.minMatch;556switch(mls)557{558default: /* includes case 3 */559case 4 :560return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);561case 5 :562return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);563case 6 :564return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);565case 7 :566return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);567}568}569570571size_t ZSTD_compressBlock_doubleFast_dictMatchState(572ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],573void const* src, size_t srcSize)574{575const U32 mls = ms->cParams.minMatch;576switch(mls)577{578default: /* includes case 3 */579case 4 :580return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);581case 5 :582return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);583case 6 :584return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);585case 7 :586return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);587}588}589590591static size_t ZSTD_compressBlock_doubleFast_extDict_generic(592ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],593void const* src, size_t srcSize,594U32 const mls /* template */)595{596ZSTD_compressionParameters const* cParams = &ms->cParams;597U32* const hashLong = ms->hashTable;598U32 const hBitsL = cParams->hashLog;599U32* const hashSmall = ms->chainTable;600U32 const hBitsS = cParams->chainLog;601const BYTE* const istart = (const BYTE*)src;602const BYTE* ip = istart;603const BYTE* anchor = istart;604const BYTE* const iend = istart + srcSize;605const BYTE* const ilimit = iend - 8;606const BYTE* const base = ms->window.base;607const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);608const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);609const U32 dictStartIndex = lowLimit;610const U32 dictLimit = ms->window.dictLimit;611const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;612const BYTE* const prefixStart = base + prefixStartIndex;613const BYTE* const dictBase = ms->window.dictBase;614const BYTE* const dictStart = dictBase + dictStartIndex;615const BYTE* const dictEnd = dictBase + prefixStartIndex;616U32 offset_1=rep[0], offset_2=rep[1];617618DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);619620/* if extDict is invalidated due to maxDistance, switch to "regular" variant */621if (prefixStartIndex == dictStartIndex)622return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);623624/* Search Loop */625while (ip < ilimit) { /* < instead of <=, because (ip+1) */626const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);627const U32 matchIndex = hashSmall[hSmall];628const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;629const BYTE* match = matchBase + matchIndex;630631const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);632const U32 matchLongIndex = hashLong[hLong];633const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;634const BYTE* matchLong = matchLongBase + matchLongIndex;635636const U32 curr = (U32)(ip-base);637const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */638const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;639const BYTE* const repMatch = repBase + repIndex;640size_t mLength;641hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */642643if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */644& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */645&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {646const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;647mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;648ip++;649ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);650} else {651if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {652const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;653const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;654U32 offset;655mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;656offset = curr - matchLongIndex;657while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */658offset_2 = offset_1;659offset_1 = offset;660ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);661662} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {663size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);664U32 const matchIndex3 = hashLong[h3];665const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;666const BYTE* match3 = match3Base + matchIndex3;667U32 offset;668hashLong[h3] = curr + 1;669if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {670const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;671const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;672mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;673ip++;674offset = curr+1 - matchIndex3;675while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */676} else {677const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;678const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;679mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;680offset = curr - matchIndex;681while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */682}683offset_2 = offset_1;684offset_1 = offset;685ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);686687} else {688ip += ((ip-anchor) >> kSearchStrength) + 1;689continue;690} }691692/* move to next sequence start */693ip += mLength;694anchor = ip;695696if (ip <= ilimit) {697/* Complementary insertion */698/* done after iLimit test, as candidates could be > iend-8 */699{ U32 const indexToInsert = curr+2;700hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;701hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);702hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;703hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);704}705706/* check immediate repcode */707while (ip <= ilimit) {708U32 const current2 = (U32)(ip-base);709U32 const repIndex2 = current2 - offset_2;710const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;711if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */712& (offset_2 <= current2 - dictStartIndex))713&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {714const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;715size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;716U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */717ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);718hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;719hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;720ip += repLength2;721anchor = ip;722continue;723}724break;725} } }726727/* save reps for next block */728rep[0] = offset_1;729rep[1] = offset_2;730731/* Return the last literals size */732return (size_t)(iend - anchor);733}734735ZSTD_GEN_DFAST_FN(extDict, 4)736ZSTD_GEN_DFAST_FN(extDict, 5)737ZSTD_GEN_DFAST_FN(extDict, 6)738ZSTD_GEN_DFAST_FN(extDict, 7)739740size_t ZSTD_compressBlock_doubleFast_extDict(741ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],742void const* src, size_t srcSize)743{744U32 const mls = ms->cParams.minMatch;745switch(mls)746{747default: /* includes case 3 */748case 4 :749return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);750case 5 :751return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);752case 6 :753return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);754case 7 :755return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);756}757}758759760