Path: blob/master/Utilities/cmzstd/lib/compress/zstd_compress_literals.c
3158 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910/*-*************************************11* Dependencies12***************************************/13#include "zstd_compress_literals.h"141516/* **************************************************************17* Debug Traces18****************************************************************/19#if DEBUGLEVEL >= 22021static size_t showHexa(const void* src, size_t srcSize)22{23const BYTE* const ip = (const BYTE*)src;24size_t u;25for (u=0; u<srcSize; u++) {26RAWLOG(5, " %02X", ip[u]); (void)ip;27}28RAWLOG(5, " \n");29return srcSize;30}3132#endif333435/* **************************************************************36* Literals compression - special cases37****************************************************************/38size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)39{40BYTE* const ostart = (BYTE*)dst;41U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);4243DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);4445RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");4647switch(flSize)48{49case 1: /* 2 - 1 - 5 */50ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));51break;52case 2: /* 2 - 2 - 12 */53MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));54break;55case 3: /* 2 - 2 - 20 */56MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));57break;58default: /* not necessary : flSize is {1,2,3} */59assert(0);60}6162ZSTD_memcpy(ostart + flSize, src, srcSize);63DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));64return srcSize + flSize;65}6667static int allBytesIdentical(const void* src, size_t srcSize)68{69assert(srcSize >= 1);70assert(src != NULL);71{ const BYTE b = ((const BYTE*)src)[0];72size_t p;73for (p=1; p<srcSize; p++) {74if (((const BYTE*)src)[p] != b) return 0;75}76return 1;77}78}7980size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)81{82BYTE* const ostart = (BYTE*)dst;83U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);8485assert(dstCapacity >= 4); (void)dstCapacity;86assert(allBytesIdentical(src, srcSize));8788switch(flSize)89{90case 1: /* 2 - 1 - 5 */91ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));92break;93case 2: /* 2 - 2 - 12 */94MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));95break;96case 3: /* 2 - 2 - 20 */97MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));98break;99default: /* not necessary : flSize is {1,2,3} */100assert(0);101}102103ostart[flSize] = *(const BYTE*)src;104DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);105return flSize+1;106}107108/* ZSTD_minLiteralsToCompress() :109* returns minimal amount of literals110* for literal compression to even be attempted.111* Minimum is made tighter as compression strategy increases.112*/113static size_t114ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)115{116assert((int)strategy >= 0);117assert((int)strategy <= 9);118/* btultra2 : min 8 bytes;119* then 2x larger for each successive compression strategy120* max threshold 64 bytes */121{ int const shift = MIN(9-(int)strategy, 3);122size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;123DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);124return mintc;125}126}127128size_t ZSTD_compressLiterals (129void* dst, size_t dstCapacity,130const void* src, size_t srcSize,131void* entropyWorkspace, size_t entropyWorkspaceSize,132const ZSTD_hufCTables_t* prevHuf,133ZSTD_hufCTables_t* nextHuf,134ZSTD_strategy strategy,135int disableLiteralCompression,136int suspectUncompressible,137int bmi2)138{139size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);140BYTE* const ostart = (BYTE*)dst;141U32 singleStream = srcSize < 256;142symbolEncodingType_e hType = set_compressed;143size_t cLitSize;144145DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",146disableLiteralCompression, (U32)srcSize, dstCapacity);147148DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));149150/* Prepare nextEntropy assuming reusing the existing table */151ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));152153if (disableLiteralCompression)154return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);155156/* if too small, don't even attempt compression (speed opt) */157if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))158return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);159160RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");161{ HUF_repeat repeat = prevHuf->repeatMode;162int const flags = 0163| (bmi2 ? HUF_flags_bmi2 : 0)164| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)165| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)166| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);167168typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);169huf_compress_f huf_compress;170if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;171huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;172cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,173src, srcSize,174HUF_SYMBOLVALUE_MAX, LitHufLog,175entropyWorkspace, entropyWorkspaceSize,176(HUF_CElt*)nextHuf->CTable,177&repeat, flags);178DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);179if (repeat != HUF_repeat_none) {180/* reused the existing table */181DEBUGLOG(5, "reusing statistics from previous huffman block");182hType = set_repeat;183}184}185186{ size_t const minGain = ZSTD_minGain(srcSize, strategy);187if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {188ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));189return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);190} }191if (cLitSize==1) {192/* A return value of 1 signals that the alphabet consists of a single symbol.193* However, in some rare circumstances, it could be the compressed size (a single byte).194* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.195* (it's also necessary to not generate statistics).196* Therefore, in such a case, actively check that all bytes are identical. */197if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {198ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));199return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);200} }201202if (hType == set_compressed) {203/* using a newly constructed table */204nextHuf->repeatMode = HUF_repeat_check;205}206207/* Build header */208switch(lhSize)209{210case 3: /* 2 - 2 - 10 - 10 */211if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);212{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);213MEM_writeLE24(ostart, lhc);214break;215}216case 4: /* 2 - 2 - 14 - 14 */217assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);218{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);219MEM_writeLE32(ostart, lhc);220break;221}222case 5: /* 2 - 2 - 18 - 18 */223assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);224{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);225MEM_writeLE32(ostart, lhc);226ostart[4] = (BYTE)(cLitSize >> 10);227break;228}229default: /* not possible : lhSize is {3,4,5} */230assert(0);231}232DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));233return lhSize+cLitSize;234}235236237