Path: blob/main/sys/contrib/zstd/lib/compress/zstd_double_fast.c
48378 views
/*1* Copyright (c) Yann Collet, Facebook, Inc.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910#include "zstd_compress_internal.h"11#include "zstd_double_fast.h"121314void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,15void const* end, ZSTD_dictTableLoadMethod_e dtlm)16{17const ZSTD_compressionParameters* const cParams = &ms->cParams;18U32* const hashLarge = ms->hashTable;19U32 const hBitsL = cParams->hashLog;20U32 const mls = cParams->minMatch;21U32* const hashSmall = ms->chainTable;22U32 const hBitsS = cParams->chainLog;23const BYTE* const base = ms->window.base;24const BYTE* ip = base + ms->nextToUpdate;25const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;26const U32 fastHashFillStep = 3;2728/* Always insert every fastHashFillStep position into the hash tables.29* Insert the other positions into the large hash table if their entry30* is empty.31*/32for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {33U32 const curr = (U32)(ip - base);34U32 i;35for (i = 0; i < fastHashFillStep; ++i) {36size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);37size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);38if (i == 0)39hashSmall[smHash] = curr + i;40if (i == 0 || hashLarge[lgHash] == 0)41hashLarge[lgHash] = curr + i;42/* Only load extra positions for ZSTD_dtlm_full */43if (dtlm == ZSTD_dtlm_fast)44break;45} }46}474849FORCE_INLINE_TEMPLATE50size_t ZSTD_compressBlock_doubleFast_noDict_generic(51ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],52void const* src, size_t srcSize, U32 const mls /* template */)53{54ZSTD_compressionParameters const* cParams = &ms->cParams;55U32* const hashLong = ms->hashTable;56const U32 hBitsL = cParams->hashLog;57U32* const hashSmall = ms->chainTable;58const U32 hBitsS = cParams->chainLog;59const BYTE* const base = ms->window.base;60const BYTE* const istart = (const BYTE*)src;61const BYTE* anchor = istart;62const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);63/* presumes that, if there is a dictionary, it must be using Attach mode */64const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);65const BYTE* const prefixLowest = base + prefixLowestIndex;66const BYTE* const iend = istart + srcSize;67const BYTE* const ilimit = iend - HASH_READ_SIZE;68U32 offset_1=rep[0], offset_2=rep[1];69U32 offsetSaved = 0;7071size_t mLength;72U32 offset;73U32 curr;7475/* how many positions to search before increasing step size */76const size_t kStepIncr = 1 << kSearchStrength;77/* the position at which to increment the step size if no match is found */78const BYTE* nextStep;79size_t step; /* the current step size */8081size_t hl0; /* the long hash at ip */82size_t hl1; /* the long hash at ip1 */8384U32 idxl0; /* the long match index for ip */85U32 idxl1; /* the long match index for ip1 */8687const BYTE* matchl0; /* the long match for ip */88const BYTE* matchs0; /* the short match for ip */89const BYTE* matchl1; /* the long match for ip1 */9091const BYTE* ip = istart; /* the current position */92const BYTE* ip1; /* the next position */9394DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");9596/* init */97ip += ((ip - prefixLowest) == 0);98{99U32 const current = (U32)(ip - base);100U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);101U32 const maxRep = current - windowLow;102if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;103if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;104}105106/* Outer Loop: one iteration per match found and stored */107while (1) {108step = 1;109nextStep = ip + kStepIncr;110ip1 = ip + step;111112if (ip1 > ilimit) {113goto _cleanup;114}115116hl0 = ZSTD_hashPtr(ip, hBitsL, 8);117idxl0 = hashLong[hl0];118matchl0 = base + idxl0;119120/* Inner Loop: one iteration per search / position */121do {122const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);123const U32 idxs0 = hashSmall[hs0];124curr = (U32)(ip-base);125matchs0 = base + idxs0;126127hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */128129/* check noDict repcode */130if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {131mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;132ip++;133ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);134goto _match_stored;135}136137hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);138139if (idxl0 > prefixLowestIndex) {140/* check prefix long match */141if (MEM_read64(matchl0) == MEM_read64(ip)) {142mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;143offset = (U32)(ip-matchl0);144while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */145goto _match_found;146}147}148149idxl1 = hashLong[hl1];150matchl1 = base + idxl1;151152if (idxs0 > prefixLowestIndex) {153/* check prefix short match */154if (MEM_read32(matchs0) == MEM_read32(ip)) {155goto _search_next_long;156}157}158159if (ip1 >= nextStep) {160PREFETCH_L1(ip1 + 64);161PREFETCH_L1(ip1 + 128);162step++;163nextStep += kStepIncr;164}165ip = ip1;166ip1 += step;167168hl0 = hl1;169idxl0 = idxl1;170matchl0 = matchl1;171#if defined(__aarch64__)172PREFETCH_L1(ip+256);173#endif174} while (ip1 <= ilimit);175176_cleanup:177/* save reps for next block */178rep[0] = offset_1 ? offset_1 : offsetSaved;179rep[1] = offset_2 ? offset_2 : offsetSaved;180181/* Return the last literals size */182return (size_t)(iend - anchor);183184_search_next_long:185186/* check prefix long +1 match */187if (idxl1 > prefixLowestIndex) {188if (MEM_read64(matchl1) == MEM_read64(ip1)) {189ip = ip1;190mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;191offset = (U32)(ip-matchl1);192while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */193goto _match_found;194}195}196197/* if no long +1 match, explore the short match we found */198mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;199offset = (U32)(ip - matchs0);200while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */201202/* fall-through */203204_match_found: /* requires ip, offset, mLength */205offset_2 = offset_1;206offset_1 = offset;207208if (step < 4) {209/* It is unsafe to write this value back to the hashtable when ip1 is210* greater than or equal to the new ip we will have after we're done211* processing this match. Rather than perform that test directly212* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler213* more predictable test. The minmatch even if we take a short match is214* 4 bytes, so as long as step, the distance between ip and ip1215* (initially) is less than 4, we know ip1 < new ip. */216hashLong[hl1] = (U32)(ip1 - base);217}218219ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);220221_match_stored:222/* match found */223ip += mLength;224anchor = ip;225226if (ip <= ilimit) {227/* Complementary insertion */228/* done after iLimit test, as candidates could be > iend-8 */229{ U32 const indexToInsert = curr+2;230hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;231hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);232hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;233hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);234}235236/* check immediate repcode */237while ( (ip <= ilimit)238&& ( (offset_2>0)239& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {240/* store sequence */241size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;242U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */243hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);244hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);245ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);246ip += rLength;247anchor = ip;248continue; /* faster when present ... (?) */249}250}251}252}253254255FORCE_INLINE_TEMPLATE256size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(257ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],258void const* src, size_t srcSize,259U32 const mls /* template */)260{261ZSTD_compressionParameters const* cParams = &ms->cParams;262U32* const hashLong = ms->hashTable;263const U32 hBitsL = cParams->hashLog;264U32* const hashSmall = ms->chainTable;265const U32 hBitsS = cParams->chainLog;266const BYTE* const base = ms->window.base;267const BYTE* const istart = (const BYTE*)src;268const BYTE* ip = istart;269const BYTE* anchor = istart;270const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);271/* presumes that, if there is a dictionary, it must be using Attach mode */272const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);273const BYTE* const prefixLowest = base + prefixLowestIndex;274const BYTE* const iend = istart + srcSize;275const BYTE* const ilimit = iend - HASH_READ_SIZE;276U32 offset_1=rep[0], offset_2=rep[1];277U32 offsetSaved = 0;278279const ZSTD_matchState_t* const dms = ms->dictMatchState;280const ZSTD_compressionParameters* const dictCParams = &dms->cParams;281const U32* const dictHashLong = dms->hashTable;282const U32* const dictHashSmall = dms->chainTable;283const U32 dictStartIndex = dms->window.dictLimit;284const BYTE* const dictBase = dms->window.base;285const BYTE* const dictStart = dictBase + dictStartIndex;286const BYTE* const dictEnd = dms->window.nextSrc;287const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);288const U32 dictHBitsL = dictCParams->hashLog;289const U32 dictHBitsS = dictCParams->chainLog;290const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));291292DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");293294/* if a dictionary is attached, it must be within window range */295assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);296297/* init */298ip += (dictAndPrefixLength == 0);299300/* dictMatchState repCode checks don't currently handle repCode == 0301* disabling. */302assert(offset_1 <= dictAndPrefixLength);303assert(offset_2 <= dictAndPrefixLength);304305/* Main Search Loop */306while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */307size_t mLength;308U32 offset;309size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);310size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);311size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);312size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);313U32 const curr = (U32)(ip-base);314U32 const matchIndexL = hashLong[h2];315U32 matchIndexS = hashSmall[h];316const BYTE* matchLong = base + matchIndexL;317const BYTE* match = base + matchIndexS;318const U32 repIndex = curr + 1 - offset_1;319const BYTE* repMatch = (repIndex < prefixLowestIndex) ?320dictBase + (repIndex - dictIndexDelta) :321base + repIndex;322hashLong[h2] = hashSmall[h] = curr; /* update hash tables */323324/* check repcode */325if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)326&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {327const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;328mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;329ip++;330ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);331goto _match_stored;332}333334if (matchIndexL > prefixLowestIndex) {335/* check prefix long match */336if (MEM_read64(matchLong) == MEM_read64(ip)) {337mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;338offset = (U32)(ip-matchLong);339while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */340goto _match_found;341}342} else {343/* check dictMatchState long match */344U32 const dictMatchIndexL = dictHashLong[dictHL];345const BYTE* dictMatchL = dictBase + dictMatchIndexL;346assert(dictMatchL < dictEnd);347348if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {349mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;350offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);351while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */352goto _match_found;353} }354355if (matchIndexS > prefixLowestIndex) {356/* check prefix short match */357if (MEM_read32(match) == MEM_read32(ip)) {358goto _search_next_long;359}360} else {361/* check dictMatchState short match */362U32 const dictMatchIndexS = dictHashSmall[dictHS];363match = dictBase + dictMatchIndexS;364matchIndexS = dictMatchIndexS + dictIndexDelta;365366if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {367goto _search_next_long;368} }369370ip += ((ip-anchor) >> kSearchStrength) + 1;371#if defined(__aarch64__)372PREFETCH_L1(ip+256);373#endif374continue;375376_search_next_long:377378{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);379size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);380U32 const matchIndexL3 = hashLong[hl3];381const BYTE* matchL3 = base + matchIndexL3;382hashLong[hl3] = curr + 1;383384/* check prefix long +1 match */385if (matchIndexL3 > prefixLowestIndex) {386if (MEM_read64(matchL3) == MEM_read64(ip+1)) {387mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;388ip++;389offset = (U32)(ip-matchL3);390while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */391goto _match_found;392}393} else {394/* check dict long +1 match */395U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];396const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;397assert(dictMatchL3 < dictEnd);398if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {399mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;400ip++;401offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);402while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */403goto _match_found;404} } }405406/* if no long +1 match, explore the short match we found */407if (matchIndexS < prefixLowestIndex) {408mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;409offset = (U32)(curr - matchIndexS);410while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */411} else {412mLength = ZSTD_count(ip+4, match+4, iend) + 4;413offset = (U32)(ip - match);414while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */415}416417_match_found:418offset_2 = offset_1;419offset_1 = offset;420421ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);422423_match_stored:424/* match found */425ip += mLength;426anchor = ip;427428if (ip <= ilimit) {429/* Complementary insertion */430/* done after iLimit test, as candidates could be > iend-8 */431{ U32 const indexToInsert = curr+2;432hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;433hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);434hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;435hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);436}437438/* check immediate repcode */439while (ip <= ilimit) {440U32 const current2 = (U32)(ip-base);441U32 const repIndex2 = current2 - offset_2;442const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?443dictBase + repIndex2 - dictIndexDelta :444base + repIndex2;445if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)446&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {447const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;448size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;449U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */450ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);451hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;452hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;453ip += repLength2;454anchor = ip;455continue;456}457break;458}459}460} /* while (ip < ilimit) */461462/* save reps for next block */463rep[0] = offset_1 ? offset_1 : offsetSaved;464rep[1] = offset_2 ? offset_2 : offsetSaved;465466/* Return the last literals size */467return (size_t)(iend - anchor);468}469470#define ZSTD_GEN_DFAST_FN(dictMode, mls) \471static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \472ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \473void const* src, size_t srcSize) \474{ \475return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \476}477478ZSTD_GEN_DFAST_FN(noDict, 4)479ZSTD_GEN_DFAST_FN(noDict, 5)480ZSTD_GEN_DFAST_FN(noDict, 6)481ZSTD_GEN_DFAST_FN(noDict, 7)482483ZSTD_GEN_DFAST_FN(dictMatchState, 4)484ZSTD_GEN_DFAST_FN(dictMatchState, 5)485ZSTD_GEN_DFAST_FN(dictMatchState, 6)486ZSTD_GEN_DFAST_FN(dictMatchState, 7)487488489size_t ZSTD_compressBlock_doubleFast(490ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],491void const* src, size_t srcSize)492{493const U32 mls = ms->cParams.minMatch;494switch(mls)495{496default: /* includes case 3 */497case 4 :498return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);499case 5 :500return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);501case 6 :502return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);503case 7 :504return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);505}506}507508509size_t ZSTD_compressBlock_doubleFast_dictMatchState(510ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],511void const* src, size_t srcSize)512{513const U32 mls = ms->cParams.minMatch;514switch(mls)515{516default: /* includes case 3 */517case 4 :518return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);519case 5 :520return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);521case 6 :522return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);523case 7 :524return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);525}526}527528529static size_t ZSTD_compressBlock_doubleFast_extDict_generic(530ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],531void const* src, size_t srcSize,532U32 const mls /* template */)533{534ZSTD_compressionParameters const* cParams = &ms->cParams;535U32* const hashLong = ms->hashTable;536U32 const hBitsL = cParams->hashLog;537U32* const hashSmall = ms->chainTable;538U32 const hBitsS = cParams->chainLog;539const BYTE* const istart = (const BYTE*)src;540const BYTE* ip = istart;541const BYTE* anchor = istart;542const BYTE* const iend = istart + srcSize;543const BYTE* const ilimit = iend - 8;544const BYTE* const base = ms->window.base;545const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);546const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);547const U32 dictStartIndex = lowLimit;548const U32 dictLimit = ms->window.dictLimit;549const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;550const BYTE* const prefixStart = base + prefixStartIndex;551const BYTE* const dictBase = ms->window.dictBase;552const BYTE* const dictStart = dictBase + dictStartIndex;553const BYTE* const dictEnd = dictBase + prefixStartIndex;554U32 offset_1=rep[0], offset_2=rep[1];555556DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);557558/* if extDict is invalidated due to maxDistance, switch to "regular" variant */559if (prefixStartIndex == dictStartIndex)560return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);561562/* Search Loop */563while (ip < ilimit) { /* < instead of <=, because (ip+1) */564const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);565const U32 matchIndex = hashSmall[hSmall];566const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;567const BYTE* match = matchBase + matchIndex;568569const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);570const U32 matchLongIndex = hashLong[hLong];571const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;572const BYTE* matchLong = matchLongBase + matchLongIndex;573574const U32 curr = (U32)(ip-base);575const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */576const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;577const BYTE* const repMatch = repBase + repIndex;578size_t mLength;579hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */580581if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */582& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */583&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {584const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;585mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;586ip++;587ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);588} else {589if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {590const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;591const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;592U32 offset;593mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;594offset = curr - matchLongIndex;595while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */596offset_2 = offset_1;597offset_1 = offset;598ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);599600} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {601size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);602U32 const matchIndex3 = hashLong[h3];603const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;604const BYTE* match3 = match3Base + matchIndex3;605U32 offset;606hashLong[h3] = curr + 1;607if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {608const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;609const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;610mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;611ip++;612offset = curr+1 - matchIndex3;613while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */614} else {615const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;616const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;617mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;618offset = curr - matchIndex;619while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */620}621offset_2 = offset_1;622offset_1 = offset;623ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);624625} else {626ip += ((ip-anchor) >> kSearchStrength) + 1;627continue;628} }629630/* move to next sequence start */631ip += mLength;632anchor = ip;633634if (ip <= ilimit) {635/* Complementary insertion */636/* done after iLimit test, as candidates could be > iend-8 */637{ U32 const indexToInsert = curr+2;638hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;639hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);640hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;641hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);642}643644/* check immediate repcode */645while (ip <= ilimit) {646U32 const current2 = (U32)(ip-base);647U32 const repIndex2 = current2 - offset_2;648const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;649if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */650& (offset_2 <= current2 - dictStartIndex))651&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {652const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;653size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;654U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */655ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);656hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;657hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;658ip += repLength2;659anchor = ip;660continue;661}662break;663} } }664665/* save reps for next block */666rep[0] = offset_1;667rep[1] = offset_2;668669/* Return the last literals size */670return (size_t)(iend - anchor);671}672673ZSTD_GEN_DFAST_FN(extDict, 4)674ZSTD_GEN_DFAST_FN(extDict, 5)675ZSTD_GEN_DFAST_FN(extDict, 6)676ZSTD_GEN_DFAST_FN(extDict, 7)677678size_t ZSTD_compressBlock_doubleFast_extDict(679ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],680void const* src, size_t srcSize)681{682U32 const mls = ms->cParams.minMatch;683switch(mls)684{685default: /* includes case 3 */686case 4 :687return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);688case 5 :689return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);690case 6 :691return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);692case 7 :693return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);694}695}696697698