Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmzstd/lib/compress/zstd_compress_literals.c
3158 views
1
/*
2
* Copyright (c) Meta Platforms, Inc. and affiliates.
3
* All rights reserved.
4
*
5
* This source code is licensed under both the BSD-style license (found in the
6
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
* in the COPYING file in the root directory of this source tree).
8
* You may select, at your option, one of the above-listed licenses.
9
*/
10
11
/*-*************************************
12
* Dependencies
13
***************************************/
14
#include "zstd_compress_literals.h"
15
16
17
/* **************************************************************
18
* Debug Traces
19
****************************************************************/
20
#if DEBUGLEVEL >= 2
21
22
static size_t showHexa(const void* src, size_t srcSize)
23
{
24
const BYTE* const ip = (const BYTE*)src;
25
size_t u;
26
for (u=0; u<srcSize; u++) {
27
RAWLOG(5, " %02X", ip[u]); (void)ip;
28
}
29
RAWLOG(5, " \n");
30
return srcSize;
31
}
32
33
#endif
34
35
36
/* **************************************************************
37
* Literals compression - special cases
38
****************************************************************/
39
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
40
{
41
BYTE* const ostart = (BYTE*)dst;
42
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
43
44
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
45
46
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
47
48
switch(flSize)
49
{
50
case 1: /* 2 - 1 - 5 */
51
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
52
break;
53
case 2: /* 2 - 2 - 12 */
54
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
55
break;
56
case 3: /* 2 - 2 - 20 */
57
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
58
break;
59
default: /* not necessary : flSize is {1,2,3} */
60
assert(0);
61
}
62
63
ZSTD_memcpy(ostart + flSize, src, srcSize);
64
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
65
return srcSize + flSize;
66
}
67
68
static int allBytesIdentical(const void* src, size_t srcSize)
69
{
70
assert(srcSize >= 1);
71
assert(src != NULL);
72
{ const BYTE b = ((const BYTE*)src)[0];
73
size_t p;
74
for (p=1; p<srcSize; p++) {
75
if (((const BYTE*)src)[p] != b) return 0;
76
}
77
return 1;
78
}
79
}
80
81
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
82
{
83
BYTE* const ostart = (BYTE*)dst;
84
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
85
86
assert(dstCapacity >= 4); (void)dstCapacity;
87
assert(allBytesIdentical(src, srcSize));
88
89
switch(flSize)
90
{
91
case 1: /* 2 - 1 - 5 */
92
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
93
break;
94
case 2: /* 2 - 2 - 12 */
95
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
96
break;
97
case 3: /* 2 - 2 - 20 */
98
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
99
break;
100
default: /* not necessary : flSize is {1,2,3} */
101
assert(0);
102
}
103
104
ostart[flSize] = *(const BYTE*)src;
105
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
106
return flSize+1;
107
}
108
109
/* ZSTD_minLiteralsToCompress() :
110
* returns minimal amount of literals
111
* for literal compression to even be attempted.
112
* Minimum is made tighter as compression strategy increases.
113
*/
114
static size_t
115
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
116
{
117
assert((int)strategy >= 0);
118
assert((int)strategy <= 9);
119
/* btultra2 : min 8 bytes;
120
* then 2x larger for each successive compression strategy
121
* max threshold 64 bytes */
122
{ int const shift = MIN(9-(int)strategy, 3);
123
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
125
return mintc;
126
}
127
}
128
129
size_t ZSTD_compressLiterals (
130
void* dst, size_t dstCapacity,
131
const void* src, size_t srcSize,
132
void* entropyWorkspace, size_t entropyWorkspaceSize,
133
const ZSTD_hufCTables_t* prevHuf,
134
ZSTD_hufCTables_t* nextHuf,
135
ZSTD_strategy strategy,
136
int disableLiteralCompression,
137
int suspectUncompressible,
138
int bmi2)
139
{
140
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
141
BYTE* const ostart = (BYTE*)dst;
142
U32 singleStream = srcSize < 256;
143
symbolEncodingType_e hType = set_compressed;
144
size_t cLitSize;
145
146
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147
disableLiteralCompression, (U32)srcSize, dstCapacity);
148
149
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
150
151
/* Prepare nextEntropy assuming reusing the existing table */
152
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
153
154
if (disableLiteralCompression)
155
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
156
157
/* if too small, don't even attempt compression (speed opt) */
158
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
160
161
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
162
{ HUF_repeat repeat = prevHuf->repeatMode;
163
int const flags = 0
164
| (bmi2 ? HUF_flags_bmi2 : 0)
165
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
166
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
167
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
168
169
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170
huf_compress_f huf_compress;
171
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
172
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
174
src, srcSize,
175
HUF_SYMBOLVALUE_MAX, LitHufLog,
176
entropyWorkspace, entropyWorkspaceSize,
177
(HUF_CElt*)nextHuf->CTable,
178
&repeat, flags);
179
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
180
if (repeat != HUF_repeat_none) {
181
/* reused the existing table */
182
DEBUGLOG(5, "reusing statistics from previous huffman block");
183
hType = set_repeat;
184
}
185
}
186
187
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
188
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
191
} }
192
if (cLitSize==1) {
193
/* A return value of 1 signals that the alphabet consists of a single symbol.
194
* However, in some rare circumstances, it could be the compressed size (a single byte).
195
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196
* (it's also necessary to not generate statistics).
197
* Therefore, in such a case, actively check that all bytes are identical. */
198
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
201
} }
202
203
if (hType == set_compressed) {
204
/* using a newly constructed table */
205
nextHuf->repeatMode = HUF_repeat_check;
206
}
207
208
/* Build header */
209
switch(lhSize)
210
{
211
case 3: /* 2 - 2 - 10 - 10 */
212
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
214
MEM_writeLE24(ostart, lhc);
215
break;
216
}
217
case 4: /* 2 - 2 - 14 - 14 */
218
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
219
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
220
MEM_writeLE32(ostart, lhc);
221
break;
222
}
223
case 5: /* 2 - 2 - 18 - 18 */
224
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
225
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
226
MEM_writeLE32(ostart, lhc);
227
ostart[4] = (BYTE)(cLitSize >> 10);
228
break;
229
}
230
default: /* not possible : lhSize is {3,4,5} */
231
assert(0);
232
}
233
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
234
return lhSize+cLitSize;
235
}
236
237