Path: blob/master/Utilities/cmzstd/lib/compress/zstd_double_fast.c
5020 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910#include "zstd_compress_internal.h"11#include "zstd_double_fast.h"1213#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR1415static16ZSTD_ALLOW_POINTER_OVERFLOW_ATTR17void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms,18void const* end, ZSTD_dictTableLoadMethod_e dtlm)19{20const ZSTD_compressionParameters* const cParams = &ms->cParams;21U32* const hashLarge = ms->hashTable;22U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;23U32 const mls = cParams->minMatch;24U32* const hashSmall = ms->chainTable;25U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;26const BYTE* const base = ms->window.base;27const BYTE* ip = base + ms->nextToUpdate;28const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;29const U32 fastHashFillStep = 3;3031/* Always insert every fastHashFillStep position into the hash tables.32* Insert the other positions into the large hash table if their entry33* is empty.34*/35for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {36U32 const curr = (U32)(ip - base);37U32 i;38for (i = 0; i < fastHashFillStep; ++i) {39size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);40size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);41if (i == 0) {42ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);43}44if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {45ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);46}47/* Only load extra positions for ZSTD_dtlm_full */48if (dtlm == ZSTD_dtlm_fast)49break;50} }51}5253static54ZSTD_ALLOW_POINTER_OVERFLOW_ATTR55void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms,56void const* end, ZSTD_dictTableLoadMethod_e dtlm)57{58const ZSTD_compressionParameters* const cParams = &ms->cParams;59U32* const hashLarge = ms->hashTable;60U32 const hBitsL = cParams->hashLog;61U32 const mls = cParams->minMatch;62U32* const hashSmall = ms->chainTable;63U32 const hBitsS = cParams->chainLog;64const BYTE* const base = ms->window.base;65const BYTE* ip = base + ms->nextToUpdate;66const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;67const U32 fastHashFillStep = 3;6869/* Always insert every fastHashFillStep position into the hash tables.70* Insert the other positions into the large hash table if their entry71* is empty.72*/73for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {74U32 const curr = (U32)(ip - base);75U32 i;76for (i = 0; i < fastHashFillStep; ++i) {77size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);78size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);79if (i == 0)80hashSmall[smHash] = curr + i;81if (i == 0 || hashLarge[lgHash] == 0)82hashLarge[lgHash] = curr + i;83/* Only load extra positions for ZSTD_dtlm_full */84if (dtlm == ZSTD_dtlm_fast)85break;86} }87}8889void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,90const void* const end,91ZSTD_dictTableLoadMethod_e dtlm,92ZSTD_tableFillPurpose_e tfp)93{94if (tfp == ZSTD_tfp_forCDict) {95ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);96} else {97ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);98}99}100101102FORCE_INLINE_TEMPLATE103ZSTD_ALLOW_POINTER_OVERFLOW_ATTR104size_t ZSTD_compressBlock_doubleFast_noDict_generic(105ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],106void const* src, size_t srcSize, U32 const mls /* template */)107{108ZSTD_compressionParameters const* cParams = &ms->cParams;109U32* const hashLong = ms->hashTable;110const U32 hBitsL = cParams->hashLog;111U32* const hashSmall = ms->chainTable;112const U32 hBitsS = cParams->chainLog;113const BYTE* const base = ms->window.base;114const BYTE* const istart = (const BYTE*)src;115const BYTE* anchor = istart;116const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);117/* presumes that, if there is a dictionary, it must be using Attach mode */118const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);119const BYTE* const prefixLowest = base + prefixLowestIndex;120const BYTE* const iend = istart + srcSize;121const BYTE* const ilimit = iend - HASH_READ_SIZE;122U32 offset_1=rep[0], offset_2=rep[1];123U32 offsetSaved1 = 0, offsetSaved2 = 0;124125size_t mLength;126U32 offset;127U32 curr;128129/* how many positions to search before increasing step size */130const size_t kStepIncr = 1 << kSearchStrength;131/* the position at which to increment the step size if no match is found */132const BYTE* nextStep;133size_t step; /* the current step size */134135size_t hl0; /* the long hash at ip */136size_t hl1; /* the long hash at ip1 */137138U32 idxl0; /* the long match index for ip */139U32 idxl1; /* the long match index for ip1 */140141const BYTE* matchl0; /* the long match for ip */142const BYTE* matchs0; /* the short match for ip */143const BYTE* matchl1; /* the long match for ip1 */144const BYTE* matchs0_safe; /* matchs0 or safe address */145146const BYTE* ip = istart; /* the current position */147const BYTE* ip1; /* the next position */148/* Array of ~random data, should have low probability of matching data149* we load from here instead of from tables, if matchl0/matchl1 are150* invalid indices. Used to avoid unpredictable branches. */151const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};152153DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");154155/* init */156ip += ((ip - prefixLowest) == 0);157{158U32 const current = (U32)(ip - base);159U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);160U32 const maxRep = current - windowLow;161if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;162if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;163}164165/* Outer Loop: one iteration per match found and stored */166while (1) {167step = 1;168nextStep = ip + kStepIncr;169ip1 = ip + step;170171if (ip1 > ilimit) {172goto _cleanup;173}174175hl0 = ZSTD_hashPtr(ip, hBitsL, 8);176idxl0 = hashLong[hl0];177matchl0 = base + idxl0;178179/* Inner Loop: one iteration per search / position */180do {181const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);182const U32 idxs0 = hashSmall[hs0];183curr = (U32)(ip-base);184matchs0 = base + idxs0;185186hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */187188/* check noDict repcode */189if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {190mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;191ip++;192ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);193goto _match_stored;194}195196hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);197198/* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch.199* However expression below complies into conditional move. Since200* match is unlikely and we only *branch* on idxl0 > prefixLowestIndex201* if there is a match, all branches become predictable. */202{ const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);203204/* check prefix long match */205if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {206mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;207offset = (U32)(ip-matchl0);208while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */209goto _match_found;210} }211212idxl1 = hashLong[hl1];213matchl1 = base + idxl1;214215/* Same optimization as matchl0 above */216matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]);217218/* check prefix short match */219if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) {220goto _search_next_long;221}222223if (ip1 >= nextStep) {224PREFETCH_L1(ip1 + 64);225PREFETCH_L1(ip1 + 128);226step++;227nextStep += kStepIncr;228}229ip = ip1;230ip1 += step;231232hl0 = hl1;233idxl0 = idxl1;234matchl0 = matchl1;235#if defined(__aarch64__)236PREFETCH_L1(ip+256);237#endif238} while (ip1 <= ilimit);239240_cleanup:241/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),242* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */243offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;244245/* save reps for next block */246rep[0] = offset_1 ? offset_1 : offsetSaved1;247rep[1] = offset_2 ? offset_2 : offsetSaved2;248249/* Return the last literals size */250return (size_t)(iend - anchor);251252_search_next_long:253254/* short match found: let's check for a longer one */255mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;256offset = (U32)(ip - matchs0);257258/* check long match at +1 position */259if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) {260size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8;261if (l1len > mLength) {262/* use the long match instead */263ip = ip1;264mLength = l1len;265offset = (U32)(ip-matchl1);266matchs0 = matchl1;267}268}269270while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */271272/* fall-through */273274_match_found: /* requires ip, offset, mLength */275offset_2 = offset_1;276offset_1 = offset;277278if (step < 4) {279/* It is unsafe to write this value back to the hashtable when ip1 is280* greater than or equal to the new ip we will have after we're done281* processing this match. Rather than perform that test directly282* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler283* more predictable test. The minmatch even if we take a short match is284* 4 bytes, so as long as step, the distance between ip and ip1285* (initially) is less than 4, we know ip1 < new ip. */286hashLong[hl1] = (U32)(ip1 - base);287}288289ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);290291_match_stored:292/* match found */293ip += mLength;294anchor = ip;295296if (ip <= ilimit) {297/* Complementary insertion */298/* done after iLimit test, as candidates could be > iend-8 */299{ U32 const indexToInsert = curr+2;300hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;301hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);302hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;303hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);304}305306/* check immediate repcode */307while ( (ip <= ilimit)308&& ( (offset_2>0)309& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {310/* store sequence */311size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;312U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */313hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);314hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);315ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);316ip += rLength;317anchor = ip;318continue; /* faster when present ... (?) */319}320}321}322}323324325FORCE_INLINE_TEMPLATE326ZSTD_ALLOW_POINTER_OVERFLOW_ATTR327size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(328ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],329void const* src, size_t srcSize,330U32 const mls /* template */)331{332ZSTD_compressionParameters const* cParams = &ms->cParams;333U32* const hashLong = ms->hashTable;334const U32 hBitsL = cParams->hashLog;335U32* const hashSmall = ms->chainTable;336const U32 hBitsS = cParams->chainLog;337const BYTE* const base = ms->window.base;338const BYTE* const istart = (const BYTE*)src;339const BYTE* ip = istart;340const BYTE* anchor = istart;341const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);342/* presumes that, if there is a dictionary, it must be using Attach mode */343const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);344const BYTE* const prefixLowest = base + prefixLowestIndex;345const BYTE* const iend = istart + srcSize;346const BYTE* const ilimit = iend - HASH_READ_SIZE;347U32 offset_1=rep[0], offset_2=rep[1];348349const ZSTD_MatchState_t* const dms = ms->dictMatchState;350const ZSTD_compressionParameters* const dictCParams = &dms->cParams;351const U32* const dictHashLong = dms->hashTable;352const U32* const dictHashSmall = dms->chainTable;353const U32 dictStartIndex = dms->window.dictLimit;354const BYTE* const dictBase = dms->window.base;355const BYTE* const dictStart = dictBase + dictStartIndex;356const BYTE* const dictEnd = dms->window.nextSrc;357const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);358const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;359const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;360const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));361362DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");363364/* if a dictionary is attached, it must be within window range */365assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);366367if (ms->prefetchCDictTables) {368size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);369size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);370PREFETCH_AREA(dictHashLong, hashTableBytes);371PREFETCH_AREA(dictHashSmall, chainTableBytes);372}373374/* init */375ip += (dictAndPrefixLength == 0);376377/* dictMatchState repCode checks don't currently handle repCode == 0378* disabling. */379assert(offset_1 <= dictAndPrefixLength);380assert(offset_2 <= dictAndPrefixLength);381382/* Main Search Loop */383while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */384size_t mLength;385U32 offset;386size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);387size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);388size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);389size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);390U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];391U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];392int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);393int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);394U32 const curr = (U32)(ip-base);395U32 const matchIndexL = hashLong[h2];396U32 matchIndexS = hashSmall[h];397const BYTE* matchLong = base + matchIndexL;398const BYTE* match = base + matchIndexS;399const U32 repIndex = curr + 1 - offset_1;400const BYTE* repMatch = (repIndex < prefixLowestIndex) ?401dictBase + (repIndex - dictIndexDelta) :402base + repIndex;403hashLong[h2] = hashSmall[h] = curr; /* update hash tables */404405/* check repcode */406if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))407&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {408const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;409mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;410ip++;411ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);412goto _match_stored;413}414415if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {416/* check prefix long match */417mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;418offset = (U32)(ip-matchLong);419while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */420goto _match_found;421} else if (dictTagsMatchL) {422/* check dictMatchState long match */423U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;424const BYTE* dictMatchL = dictBase + dictMatchIndexL;425assert(dictMatchL < dictEnd);426427if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {428mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;429offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);430while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */431goto _match_found;432} }433434if (matchIndexS > prefixLowestIndex) {435/* short match candidate */436if (MEM_read32(match) == MEM_read32(ip)) {437goto _search_next_long;438}439} else if (dictTagsMatchS) {440/* check dictMatchState short match */441U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;442match = dictBase + dictMatchIndexS;443matchIndexS = dictMatchIndexS + dictIndexDelta;444445if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {446goto _search_next_long;447} }448449ip += ((ip-anchor) >> kSearchStrength) + 1;450#if defined(__aarch64__)451PREFETCH_L1(ip+256);452#endif453continue;454455_search_next_long:456{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);457size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);458U32 const matchIndexL3 = hashLong[hl3];459U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];460int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);461const BYTE* matchL3 = base + matchIndexL3;462hashLong[hl3] = curr + 1;463464/* check prefix long +1 match */465if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {466mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;467ip++;468offset = (U32)(ip-matchL3);469while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */470goto _match_found;471} else if (dictTagsMatchL3) {472/* check dict long +1 match */473U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;474const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;475assert(dictMatchL3 < dictEnd);476if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {477mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;478ip++;479offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);480while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */481goto _match_found;482} } }483484/* if no long +1 match, explore the short match we found */485if (matchIndexS < prefixLowestIndex) {486mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;487offset = (U32)(curr - matchIndexS);488while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */489} else {490mLength = ZSTD_count(ip+4, match+4, iend) + 4;491offset = (U32)(ip - match);492while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */493}494495_match_found:496offset_2 = offset_1;497offset_1 = offset;498499ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);500501_match_stored:502/* match found */503ip += mLength;504anchor = ip;505506if (ip <= ilimit) {507/* Complementary insertion */508/* done after iLimit test, as candidates could be > iend-8 */509{ U32 const indexToInsert = curr+2;510hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;511hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);512hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;513hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);514}515516/* check immediate repcode */517while (ip <= ilimit) {518U32 const current2 = (U32)(ip-base);519U32 const repIndex2 = current2 - offset_2;520const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?521dictBase + repIndex2 - dictIndexDelta :522base + repIndex2;523if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2))524&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {525const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;526size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;527U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */528ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);529hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;530hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;531ip += repLength2;532anchor = ip;533continue;534}535break;536}537}538} /* while (ip < ilimit) */539540/* save reps for next block */541rep[0] = offset_1;542rep[1] = offset_2;543544/* Return the last literals size */545return (size_t)(iend - anchor);546}547548#define ZSTD_GEN_DFAST_FN(dictMode, mls) \549static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \550ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \551void const* src, size_t srcSize) \552{ \553return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \554}555556ZSTD_GEN_DFAST_FN(noDict, 4)557ZSTD_GEN_DFAST_FN(noDict, 5)558ZSTD_GEN_DFAST_FN(noDict, 6)559ZSTD_GEN_DFAST_FN(noDict, 7)560561ZSTD_GEN_DFAST_FN(dictMatchState, 4)562ZSTD_GEN_DFAST_FN(dictMatchState, 5)563ZSTD_GEN_DFAST_FN(dictMatchState, 6)564ZSTD_GEN_DFAST_FN(dictMatchState, 7)565566567size_t ZSTD_compressBlock_doubleFast(568ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],569void const* src, size_t srcSize)570{571const U32 mls = ms->cParams.minMatch;572switch(mls)573{574default: /* includes case 3 */575case 4 :576return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);577case 5 :578return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);579case 6 :580return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);581case 7 :582return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);583}584}585586587size_t ZSTD_compressBlock_doubleFast_dictMatchState(588ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],589void const* src, size_t srcSize)590{591const U32 mls = ms->cParams.minMatch;592switch(mls)593{594default: /* includes case 3 */595case 4 :596return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);597case 5 :598return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);599case 6 :600return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);601case 7 :602return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);603}604}605606607static608ZSTD_ALLOW_POINTER_OVERFLOW_ATTR609size_t ZSTD_compressBlock_doubleFast_extDict_generic(610ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],611void const* src, size_t srcSize,612U32 const mls /* template */)613{614ZSTD_compressionParameters const* cParams = &ms->cParams;615U32* const hashLong = ms->hashTable;616U32 const hBitsL = cParams->hashLog;617U32* const hashSmall = ms->chainTable;618U32 const hBitsS = cParams->chainLog;619const BYTE* const istart = (const BYTE*)src;620const BYTE* ip = istart;621const BYTE* anchor = istart;622const BYTE* const iend = istart + srcSize;623const BYTE* const ilimit = iend - 8;624const BYTE* const base = ms->window.base;625const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);626const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);627const U32 dictStartIndex = lowLimit;628const U32 dictLimit = ms->window.dictLimit;629const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;630const BYTE* const prefixStart = base + prefixStartIndex;631const BYTE* const dictBase = ms->window.dictBase;632const BYTE* const dictStart = dictBase + dictStartIndex;633const BYTE* const dictEnd = dictBase + prefixStartIndex;634U32 offset_1=rep[0], offset_2=rep[1];635636DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);637638/* if extDict is invalidated due to maxDistance, switch to "regular" variant */639if (prefixStartIndex == dictStartIndex)640return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);641642/* Search Loop */643while (ip < ilimit) { /* < instead of <=, because (ip+1) */644const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);645const U32 matchIndex = hashSmall[hSmall];646const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;647const BYTE* match = matchBase + matchIndex;648649const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);650const U32 matchLongIndex = hashLong[hLong];651const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;652const BYTE* matchLong = matchLongBase + matchLongIndex;653654const U32 curr = (U32)(ip-base);655const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */656const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;657const BYTE* const repMatch = repBase + repIndex;658size_t mLength;659hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */660661if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex))662& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */663&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {664const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;665mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;666ip++;667ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);668} else {669if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {670const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;671const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;672U32 offset;673mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;674offset = curr - matchLongIndex;675while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */676offset_2 = offset_1;677offset_1 = offset;678ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);679680} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {681size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);682U32 const matchIndex3 = hashLong[h3];683const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;684const BYTE* match3 = match3Base + matchIndex3;685U32 offset;686hashLong[h3] = curr + 1;687if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {688const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;689const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;690mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;691ip++;692offset = curr+1 - matchIndex3;693while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */694} else {695const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;696const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;697mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;698offset = curr - matchIndex;699while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */700}701offset_2 = offset_1;702offset_1 = offset;703ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);704705} else {706ip += ((ip-anchor) >> kSearchStrength) + 1;707continue;708} }709710/* move to next sequence start */711ip += mLength;712anchor = ip;713714if (ip <= ilimit) {715/* Complementary insertion */716/* done after iLimit test, as candidates could be > iend-8 */717{ U32 const indexToInsert = curr+2;718hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;719hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);720hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;721hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);722}723724/* check immediate repcode */725while (ip <= ilimit) {726U32 const current2 = (U32)(ip-base);727U32 const repIndex2 = current2 - offset_2;728const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;729if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2))730& (offset_2 <= current2 - dictStartIndex))731&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {732const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;733size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;734U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */735ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);736hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;737hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;738ip += repLength2;739anchor = ip;740continue;741}742break;743} } }744745/* save reps for next block */746rep[0] = offset_1;747rep[1] = offset_2;748749/* Return the last literals size */750return (size_t)(iend - anchor);751}752753ZSTD_GEN_DFAST_FN(extDict, 4)754ZSTD_GEN_DFAST_FN(extDict, 5)755ZSTD_GEN_DFAST_FN(extDict, 6)756ZSTD_GEN_DFAST_FN(extDict, 7)757758size_t ZSTD_compressBlock_doubleFast_extDict(759ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],760void const* src, size_t srcSize)761{762U32 const mls = ms->cParams.minMatch;763switch(mls)764{765default: /* includes case 3 */766case 4 :767return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);768case 5 :769return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);770case 6 :771return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);772case 7 :773return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);774}775}776777#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */778779780