Path: blob/main/sys/contrib/openzfs/module/zstd/lib/compress/zstd_preSplit.c
178701 views
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only1/*2* Copyright (c) Meta Platforms, Inc. and affiliates.3* All rights reserved.4*5* This source code is licensed under both the BSD-style license (found in the6* LICENSE file in the root directory of this source tree) and the GPLv2 (found7* in the COPYING file in the root directory of this source tree).8* You may select, at your option, one of the above-listed licenses.9*/1011#include "../common/compiler.h" /* ZSTD_ALIGNOF */12#include "../common/mem.h" /* S64 */13#include "../common/zstd_deps.h" /* ZSTD_memset */14#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */15#include "hist.h" /* HIST_add */16#include "zstd_preSplit.h"171819#define BLOCKSIZE_MIN 350020#define THRESHOLD_PENALTY_RATE 1621#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)22#define THRESHOLD_PENALTY 32324#define HASHLENGTH 225#define HASHLOG_MAX 1026#define HASHTABLESIZE (1 << HASHLOG_MAX)27#define HASHMASK (HASHTABLESIZE - 1)28#define KNUTH 0x9e3779b92930/* for hashLog > 8, hash 2 bytes.31* for hashLog == 8, just take the byte, no hashing.32* The speed of this method relies on compile-time constant propagation */33FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)34{35assert(hashLog >= 8);36if (hashLog == 8) return (U32)((const BYTE*)p)[0];37assert(hashLog <= HASHLOG_MAX);38return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);39}404142typedef struct {43unsigned events[HASHTABLESIZE];44size_t nbEvents;45} Fingerprint;46typedef struct {47Fingerprint pastEvents;48Fingerprint newEvents;49} FPStats;5051static void initStats(FPStats* fpstats)52{53ZSTD_memset(fpstats, 0, sizeof(FPStats));54}5556FORCE_INLINE_TEMPLATE void57addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)58{59const char* p = (const char*)src;60size_t limit = srcSize - HASHLENGTH + 1;61size_t n;62assert(srcSize >= HASHLENGTH);63for (n = 0; n < limit; n+=samplingRate) {64fp->events[hash2(p+n, hashLog)]++;65}66fp->nbEvents += limit/samplingRate;67}6869FORCE_INLINE_TEMPLATE void70recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)71{72ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));73fp->nbEvents = 0;74addEvents_generic(fp, src, srcSize, samplingRate, hashLog);75}7677typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);7879#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate8081#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \82static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \83{ \84recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \85}8687ZSTD_GEN_RECORD_FINGERPRINT(1, 10)88ZSTD_GEN_RECORD_FINGERPRINT(5, 10)89ZSTD_GEN_RECORD_FINGERPRINT(11, 9)90ZSTD_GEN_RECORD_FINGERPRINT(43, 8)919293static U64 ZSTD_abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }9495static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)96{97U64 distance = 0;98size_t n;99assert(hashLog <= HASHLOG_MAX);100for (n = 0; n < ((size_t)1 << hashLog); n++) {101distance +=102ZSTD_abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);103}104return distance;105}106107/* Compare newEvents with pastEvents108* return 1 when considered "too different"109*/110static int compareFingerprints(const Fingerprint* ref,111const Fingerprint* newfp,112int penalty,113unsigned hashLog)114{115assert(ref->nbEvents > 0);116assert(newfp->nbEvents > 0);117{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;118U64 deviation = fpDistance(ref, newfp, hashLog);119U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;120return deviation >= threshold;121}122}123124static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)125{126size_t n;127for (n = 0; n < HASHTABLESIZE; n++) {128acc->events[n] += newfp->events[n];129}130acc->nbEvents += newfp->nbEvents;131}132133static void flushEvents(FPStats* fpstats)134{135size_t n;136for (n = 0; n < HASHTABLESIZE; n++) {137fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];138}139fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;140ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));141}142143static void removeEvents(Fingerprint* acc, const Fingerprint* slice)144{145size_t n;146for (n = 0; n < HASHTABLESIZE; n++) {147assert(acc->events[n] >= slice->events[n]);148acc->events[n] -= slice->events[n];149}150acc->nbEvents -= slice->nbEvents;151}152153#define CHUNKSIZE (8 << 10)154static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,155int level,156void* workspace, size_t wkspSize)157{158static const RecordEvents_f records_fs[] = {159FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)160};161static const unsigned hashParams[] = { 8, 9, 10, 10 };162const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);163FPStats* const fpstats = (FPStats*)workspace;164const char* p = (const char*)blockStart;165int penalty = THRESHOLD_PENALTY;166size_t pos = 0;167assert(blockSize == (128 << 10));168assert(workspace != NULL);169assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);170ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));171assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;172173initStats(fpstats);174record_f(&fpstats->pastEvents, p, CHUNKSIZE);175for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {176record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);177if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {178return pos;179} else {180mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);181if (penalty > 0) penalty--;182}183}184assert(pos == blockSize);185return blockSize;186(void)flushEvents; (void)removeEvents;187}188189/* ZSTD_splitBlock_fromBorders(): very fast strategy :190* compare fingerprint from beginning and end of the block,191* derive from their difference if it's preferable to split in the middle,192* repeat the process a second time, for finer grained decision.193* 3 times did not brought improvements, so I stopped at 2.194* Benefits are good enough for a cheap heuristic.195* More accurate splitting saves more, but speed impact is also more perceptible.196* For better accuracy, use more elaborate variant *_byChunks.197*/198static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,199void* workspace, size_t wkspSize)200{201#define SEGMENT_SIZE 512202FPStats* const fpstats = (FPStats*)workspace;203Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));204assert(blockSize == (128 << 10));205assert(workspace != NULL);206assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);207ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));208assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;209210initStats(fpstats);211HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);212HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);213fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;214if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))215return blockSize;216217HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);218middleEvents->nbEvents = SEGMENT_SIZE;219{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);220U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);221U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;222if (ZSTD_abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)223return 64 KB;224return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;225}226}227228size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,229int level,230void* workspace, size_t wkspSize)231{232DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);233assert(0<=level && level<=4);234if (level == 0)235return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);236/* level >= 1*/237return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);238}239240241