Path: blob/master/Utilities/cmzstd/lib/common/zstd_internal.h
3158 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910#ifndef ZSTD_CCOMMON_H_MODULE11#define ZSTD_CCOMMON_H_MODULE1213/* this module contains definitions which must be identical14* across compression, decompression and dictBuilder.15* It also contains a few functions useful to at least 2 of them16* and which benefit from being inlined */1718/*-*************************************19* Dependencies20***************************************/21#include "compiler.h"22#include "cpu.h"23#include "mem.h"24#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */25#include "error_private.h"26#define ZSTD_STATIC_LINKING_ONLY27#include "../zstd.h"28#define FSE_STATIC_LINKING_ONLY29#include "fse.h"30#include "huf.h"31#ifndef XXH_STATIC_LINKING_ONLY32# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */33#endif34#include "xxhash.h" /* XXH_reset, update, digest */35#ifndef ZSTD_NO_TRACE36# include "zstd_trace.h"37#else38# define ZSTD_TRACE 039#endif4041#if defined (__cplusplus)42extern "C" {43#endif4445/* ---- static assert (debug) --- */46#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)47#define ZSTD_isError ERR_isError /* for inlining */48#define FSE_isError ERR_isError49#define HUF_isError ERR_isError505152/*-*************************************53* shared macros54***************************************/55#undef MIN56#undef MAX57#define MIN(a,b) ((a)<(b) ? (a) : (b))58#define MAX(a,b) ((a)>(b) ? (a) : (b))59#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))606162/*-*************************************63* Common constants64***************************************/65#define ZSTD_OPT_NUM (1<<12)6667#define ZSTD_REP_NUM 3 /* number of repcodes */68static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };6970#define KB *(1 <<10)71#define MB *(1 <<20)72#define GB *(1U<<30)7374#define BIT7 12875#define BIT6 6476#define BIT5 3277#define BIT4 1678#define BIT1 279#define BIT0 18081#define ZSTD_WINDOWLOG_ABSOLUTEMIN 1082static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };83static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };8485#define ZSTD_FRAMEIDSIZE 4 /* magic number size */8687#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */88static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;89typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;9091#define ZSTD_FRAMECHECKSUMSIZE 49293#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */94#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */95#define MIN_LITERALS_FOR_4_STREAMS 69697typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;9899#define LONGNBSEQ 0x7F00100101#define MINMATCH 3102103#define Litbits 8104#define LitHufLog 11105#define MaxLit ((1<<Litbits) - 1)106#define MaxML 52107#define MaxLL 35108#define DefaultMaxOff 28109#define MaxOff 31110#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */111#define MLFSELog 9112#define LLFSELog 9113#define OffFSELog 8114#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)115#define MaxMLBits 16116#define MaxLLBits 16117118#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */119/* Each table cannot take more than #symbols * FSELog bits */120#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)121122static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {1230, 0, 0, 0, 0, 0, 0, 0,1240, 0, 0, 0, 0, 0, 0, 0,1251, 1, 1, 1, 2, 2, 3, 3,1264, 6, 7, 8, 9,10,11,12,12713,14,15,16128};129static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {1304, 3, 2, 2, 2, 2, 2, 2,1312, 2, 2, 2, 2, 1, 1, 1,1322, 2, 2, 2, 2, 2, 2, 2,1332, 3, 2, 1, 1, 1, 1, 1,134-1,-1,-1,-1135};136#define LL_DEFAULTNORMLOG 6 /* for static allocation */137static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;138139static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {1400, 0, 0, 0, 0, 0, 0, 0,1410, 0, 0, 0, 0, 0, 0, 0,1420, 0, 0, 0, 0, 0, 0, 0,1430, 0, 0, 0, 0, 0, 0, 0,1441, 1, 1, 1, 2, 2, 3, 3,1454, 4, 5, 7, 8, 9,10,11,14612,13,14,15,16147};148static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {1491, 4, 3, 2, 2, 2, 2, 2,1502, 1, 1, 1, 1, 1, 1, 1,1511, 1, 1, 1, 1, 1, 1, 1,1521, 1, 1, 1, 1, 1, 1, 1,1531, 1, 1, 1, 1, 1, 1, 1,1541, 1, 1, 1, 1, 1,-1,-1,155-1,-1,-1,-1,-1156};157#define ML_DEFAULTNORMLOG 6 /* for static allocation */158static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;159160static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {1611, 1, 1, 1, 1, 1, 2, 2,1622, 1, 1, 1, 1, 1, 1, 1,1631, 1, 1, 1, 1, 1, 1, 1,164-1,-1,-1,-1,-1165};166#define OF_DEFAULTNORMLOG 5 /* for static allocation */167static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;168169170/*-*******************************************171* Shared functions to include for inlining172*********************************************/173static void ZSTD_copy8(void* dst, const void* src) {174#if defined(ZSTD_ARCH_ARM_NEON)175vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));176#else177ZSTD_memcpy(dst, src, 8);178#endif179}180#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }181182/* Need to use memmove here since the literal buffer can now be located within183the dst buffer. In circumstances where the op "catches up" to where the184literal buffer is, there can be partial overlaps in this call on the final185copy if the literal is being shifted by less than 16 bytes. */186static void ZSTD_copy16(void* dst, const void* src) {187#if defined(ZSTD_ARCH_ARM_NEON)188vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));189#elif defined(ZSTD_ARCH_X86_SSE2)190_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));191#elif defined(__clang__)192ZSTD_memmove(dst, src, 16);193#else194/* ZSTD_memmove is not inlined properly by gcc */195BYTE copy16_buf[16];196ZSTD_memcpy(copy16_buf, src, 16);197ZSTD_memcpy(dst, copy16_buf, 16);198#endif199}200#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }201202#define WILDCOPY_OVERLENGTH 32203#define WILDCOPY_VECLEN 16204205typedef enum {206ZSTD_no_overlap,207ZSTD_overlap_src_before_dst208/* ZSTD_overlap_dst_before_src, */209} ZSTD_overlap_e;210211/*! ZSTD_wildcopy() :212* Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)213* @param ovtype controls the overlap detection214* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.215* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.216* The src buffer must be before the dst buffer.217*/218MEM_STATIC FORCE_INLINE_ATTR219void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)220{221ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;222const BYTE* ip = (const BYTE*)src;223BYTE* op = (BYTE*)dst;224BYTE* const oend = op + length;225226if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {227/* Handle short offset copies. */228do {229COPY8(op, ip)230} while (op < oend);231} else {232assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);233/* Separate out the first COPY16() call because the copy length is234* almost certain to be short, so the branches have different235* probabilities. Since it is almost certain to be short, only do236* one COPY16() in the first call. Then, do two calls per loop since237* at that point it is more likely to have a high trip count.238*/239ZSTD_copy16(op, ip);240if (16 >= length) return;241op += 16;242ip += 16;243do {244COPY16(op, ip);245COPY16(op, ip);246}247while (op < oend);248}249}250251MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)252{253size_t const length = MIN(dstCapacity, srcSize);254if (length > 0) {255ZSTD_memcpy(dst, src, length);256}257return length;258}259260/* define "workspace is too large" as this number of times larger than needed */261#define ZSTD_WORKSPACETOOLARGE_FACTOR 3262263/* when workspace is continuously too large264* during at least this number of times,265* context's memory usage is considered wasteful,266* because it's sized to handle a worst case scenario which rarely happens.267* In which case, resize it down to free some memory */268#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128269270/* Controls whether the input/output buffer is buffered or stable. */271typedef enum {272ZSTD_bm_buffered = 0, /* Buffer the input/output */273ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */274} ZSTD_bufferMode_e;275276277/*-*******************************************278* Private declarations279*********************************************/280typedef struct seqDef_s {281U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */282U16 litLength;283U16 mlBase; /* mlBase == matchLength - MINMATCH */284} seqDef;285286/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */287typedef enum {288ZSTD_llt_none = 0, /* no longLengthType */289ZSTD_llt_literalLength = 1, /* represents a long literal */290ZSTD_llt_matchLength = 2 /* represents a long match */291} ZSTD_longLengthType_e;292293typedef struct {294seqDef* sequencesStart;295seqDef* sequences; /* ptr to end of sequences */296BYTE* litStart;297BYTE* lit; /* ptr to end of literals */298BYTE* llCode;299BYTE* mlCode;300BYTE* ofCode;301size_t maxNbSeq;302size_t maxNbLit;303304/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength305* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment306* the existing value of the litLength or matchLength by 0x10000.307*/308ZSTD_longLengthType_e longLengthType;309U32 longLengthPos; /* Index of the sequence to apply long length modification to */310} seqStore_t;311312typedef struct {313U32 litLength;314U32 matchLength;315} ZSTD_sequenceLength;316317/**318* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences319* indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.320*/321MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)322{323ZSTD_sequenceLength seqLen;324seqLen.litLength = seq->litLength;325seqLen.matchLength = seq->mlBase + MINMATCH;326if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {327if (seqStore->longLengthType == ZSTD_llt_literalLength) {328seqLen.litLength += 0x10000;329}330if (seqStore->longLengthType == ZSTD_llt_matchLength) {331seqLen.matchLength += 0x10000;332}333}334return seqLen;335}336337/**338* Contains the compressed frame size and an upper-bound for the decompressed frame size.339* Note: before using `compressedSize`, check for errors using ZSTD_isError().340* similarly, before using `decompressedBound`, check for errors using:341* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`342*/343typedef struct {344size_t nbBlocks;345size_t compressedSize;346unsigned long long decompressedBound;347} ZSTD_frameSizeInfo; /* decompress & legacy */348349const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */350int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */351352353/* ZSTD_invalidateRepCodes() :354* ensures next compression will not use repcodes from previous block.355* Note : only works with regular variant;356* do not use with extDict variant ! */357void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */358359360typedef struct {361blockType_e blockType;362U32 lastBlock;363U32 origSize;364} blockProperties_t; /* declared here for decompress and fullbench */365366/*! ZSTD_getcBlockSize() :367* Provides the size of compressed block from block header `src` */368/* Used by: decompress, fullbench (does not get its definition from here) */369size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,370blockProperties_t* bpPtr);371372/*! ZSTD_decodeSeqHeaders() :373* decode sequence header from src */374/* Used by: decompress, fullbench (does not get its definition from here) */375size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,376const void* src, size_t srcSize);377378/**379* @returns true iff the CPU supports dynamic BMI2 dispatch.380*/381MEM_STATIC int ZSTD_cpuSupportsBmi2(void)382{383ZSTD_cpuid_t cpuid = ZSTD_cpuid();384return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);385}386387#if defined (__cplusplus)388}389#endif390391#endif /* ZSTD_CCOMMON_H_MODULE */392393394