Path: blob/master/Utilities/cmzstd/lib/compress/zstd_preSplit.c
4998 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910#include "../common/compiler.h" /* ZSTD_ALIGNOF */11#include "../common/mem.h" /* S64 */12#include "../common/zstd_deps.h" /* ZSTD_memset */13#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */14#include "hist.h" /* HIST_add */15#include "zstd_preSplit.h"161718#define BLOCKSIZE_MIN 350019#define THRESHOLD_PENALTY_RATE 1620#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)21#define THRESHOLD_PENALTY 32223#define HASHLENGTH 224#define HASHLOG_MAX 1025#define HASHTABLESIZE (1 << HASHLOG_MAX)26#define HASHMASK (HASHTABLESIZE - 1)27#define KNUTH 0x9e3779b92829/* for hashLog > 8, hash 2 bytes.30* for hashLog == 8, just take the byte, no hashing.31* The speed of this method relies on compile-time constant propagation */32FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)33{34assert(hashLog >= 8);35if (hashLog == 8) return (U32)((const BYTE*)p)[0];36assert(hashLog <= HASHLOG_MAX);37return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);38}394041typedef struct {42unsigned events[HASHTABLESIZE];43size_t nbEvents;44} Fingerprint;45typedef struct {46Fingerprint pastEvents;47Fingerprint newEvents;48} FPStats;4950static void initStats(FPStats* fpstats)51{52ZSTD_memset(fpstats, 0, sizeof(FPStats));53}5455FORCE_INLINE_TEMPLATE void56addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)57{58const char* p = (const char*)src;59size_t limit = srcSize - HASHLENGTH + 1;60size_t n;61assert(srcSize >= HASHLENGTH);62for (n = 0; n < limit; n+=samplingRate) {63fp->events[hash2(p+n, hashLog)]++;64}65fp->nbEvents += limit/samplingRate;66}6768FORCE_INLINE_TEMPLATE void69recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)70{71ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));72fp->nbEvents = 0;73addEvents_generic(fp, src, srcSize, samplingRate, hashLog);74}7576typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);7778#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate7980#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \81static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \82{ \83recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \84}8586ZSTD_GEN_RECORD_FINGERPRINT(1, 10)87ZSTD_GEN_RECORD_FINGERPRINT(5, 10)88ZSTD_GEN_RECORD_FINGERPRINT(11, 9)89ZSTD_GEN_RECORD_FINGERPRINT(43, 8)909192static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }9394static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)95{96U64 distance = 0;97size_t n;98assert(hashLog <= HASHLOG_MAX);99for (n = 0; n < ((size_t)1 << hashLog); n++) {100distance +=101abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);102}103return distance;104}105106/* Compare newEvents with pastEvents107* return 1 when considered "too different"108*/109static int compareFingerprints(const Fingerprint* ref,110const Fingerprint* newfp,111int penalty,112unsigned hashLog)113{114assert(ref->nbEvents > 0);115assert(newfp->nbEvents > 0);116{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;117U64 deviation = fpDistance(ref, newfp, hashLog);118U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;119return deviation >= threshold;120}121}122123static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)124{125size_t n;126for (n = 0; n < HASHTABLESIZE; n++) {127acc->events[n] += newfp->events[n];128}129acc->nbEvents += newfp->nbEvents;130}131132static void flushEvents(FPStats* fpstats)133{134size_t n;135for (n = 0; n < HASHTABLESIZE; n++) {136fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];137}138fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;139ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));140}141142static void removeEvents(Fingerprint* acc, const Fingerprint* slice)143{144size_t n;145for (n = 0; n < HASHTABLESIZE; n++) {146assert(acc->events[n] >= slice->events[n]);147acc->events[n] -= slice->events[n];148}149acc->nbEvents -= slice->nbEvents;150}151152#define CHUNKSIZE (8 << 10)153static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,154int level,155void* workspace, size_t wkspSize)156{157static const RecordEvents_f records_fs[] = {158FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)159};160static const unsigned hashParams[] = { 8, 9, 10, 10 };161const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);162FPStats* const fpstats = (FPStats*)workspace;163const char* p = (const char*)blockStart;164int penalty = THRESHOLD_PENALTY;165size_t pos = 0;166assert(blockSize == (128 << 10));167assert(workspace != NULL);168assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);169ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));170assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;171172initStats(fpstats);173record_f(&fpstats->pastEvents, p, CHUNKSIZE);174for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {175record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);176if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {177return pos;178} else {179mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);180if (penalty > 0) penalty--;181}182}183assert(pos == blockSize);184return blockSize;185(void)flushEvents; (void)removeEvents;186}187188/* ZSTD_splitBlock_fromBorders(): very fast strategy :189* compare fingerprint from beginning and end of the block,190* derive from their difference if it's preferable to split in the middle,191* repeat the process a second time, for finer grained decision.192* 3 times did not brought improvements, so I stopped at 2.193* Benefits are good enough for a cheap heuristic.194* More accurate splitting saves more, but speed impact is also more perceptible.195* For better accuracy, use more elaborate variant *_byChunks.196*/197static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,198void* workspace, size_t wkspSize)199{200#define SEGMENT_SIZE 512201FPStats* const fpstats = (FPStats*)workspace;202Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));203assert(blockSize == (128 << 10));204assert(workspace != NULL);205assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);206ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));207assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;208209initStats(fpstats);210HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);211HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);212fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;213if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))214return blockSize;215216HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);217middleEvents->nbEvents = SEGMENT_SIZE;218{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);219U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);220U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;221if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)222return 64 KB;223return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;224}225}226227size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,228int level,229void* workspace, size_t wkspSize)230{231DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);232assert(0<=level && level<=4);233if (level == 0)234return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);235/* level >= 1*/236return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);237}238239240