Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/module/zstd/lib/compress/zstd_ldm.c
48774 views
1
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2
/*
3
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
4
* All rights reserved.
5
*
6
* This source code is licensed under both the BSD-style license (found in the
7
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
8
* in the COPYING file in the root directory of this source tree).
9
* You may select, at your option, one of the above-listed licenses.
10
*/
11
12
#include "zstd_ldm.h"
13
14
#include "../common/debug.h"
15
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
16
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
17
18
#define LDM_BUCKET_SIZE_LOG 3
19
#define LDM_MIN_MATCH_LENGTH 64
20
#define LDM_HASH_RLOG 7
21
#define LDM_HASH_CHAR_OFFSET 10
22
23
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
24
ZSTD_compressionParameters const* cParams)
25
{
26
params->windowLog = cParams->windowLog;
27
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
28
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
29
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
30
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
31
if (cParams->strategy >= ZSTD_btopt) {
32
/* Get out of the way of the optimal parser */
33
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
34
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
35
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
36
params->minMatchLength = minMatch;
37
}
38
if (params->hashLog == 0) {
39
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
40
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
41
}
42
if (params->hashRateLog == 0) {
43
params->hashRateLog = params->windowLog < params->hashLog
44
? 0
45
: params->windowLog - params->hashLog;
46
}
47
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
48
}
49
50
size_t ZSTD_ldm_getTableSize(ldmParams_t params)
51
{
52
size_t const ldmHSize = ((size_t)1) << params.hashLog;
53
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
54
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
55
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
56
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
57
return params.enableLdm ? totalSize : 0;
58
}
59
60
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
61
{
62
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
63
}
64
65
/** ZSTD_ldm_getSmallHash() :
66
* numBits should be <= 32
67
* If numBits==0, returns 0.
68
* @return : the most significant numBits of value. */
69
static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
70
{
71
assert(numBits <= 32);
72
return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
73
}
74
75
/** ZSTD_ldm_getChecksum() :
76
* numBitsToDiscard should be <= 32
77
* @return : the next most significant 32 bits after numBitsToDiscard */
78
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
79
{
80
assert(numBitsToDiscard <= 32);
81
return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
82
}
83
84
/** ZSTD_ldm_getTag() ;
85
* Given the hash, returns the most significant numTagBits bits
86
* after (32 + hbits) bits.
87
*
88
* If there are not enough bits remaining, return the last
89
* numTagBits bits. */
90
static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
91
{
92
assert(numTagBits < 32 && hbits <= 32);
93
if (32 - hbits < numTagBits) {
94
return hash & (((U32)1 << numTagBits) - 1);
95
} else {
96
return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
97
}
98
}
99
100
/** ZSTD_ldm_getBucket() :
101
* Returns a pointer to the start of the bucket associated with hash. */
102
static ldmEntry_t* ZSTD_ldm_getBucket(
103
ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
104
{
105
return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
106
}
107
108
/** ZSTD_ldm_insertEntry() :
109
* Insert the entry with corresponding hash into the hash table */
110
static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
111
size_t const hash, const ldmEntry_t entry,
112
ldmParams_t const ldmParams)
113
{
114
BYTE* const bucketOffsets = ldmState->bucketOffsets;
115
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
116
bucketOffsets[hash]++;
117
bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
118
}
119
120
/** ZSTD_ldm_makeEntryAndInsertByTag() :
121
*
122
* Gets the small hash, checksum, and tag from the rollingHash.
123
*
124
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
125
* creates an ldmEntry from the offset, and inserts it into the hash table.
126
*
127
* hBits is the length of the small hash, which is the most significant hBits
128
* of rollingHash. The checksum is the next 32 most significant bits, followed
129
* by ldmParams.hashRateLog bits that make up the tag. */
130
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
131
U64 const rollingHash,
132
U32 const hBits,
133
U32 const offset,
134
ldmParams_t const ldmParams)
135
{
136
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
137
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
138
if (tag == tagMask) {
139
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
140
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
141
ldmEntry_t entry;
142
entry.offset = offset;
143
entry.checksum = checksum;
144
ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
145
}
146
}
147
148
/** ZSTD_ldm_countBackwardsMatch() :
149
* Returns the number of bytes that match backwards before pIn and pMatch.
150
*
151
* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
152
static size_t ZSTD_ldm_countBackwardsMatch(
153
const BYTE* pIn, const BYTE* pAnchor,
154
const BYTE* pMatch, const BYTE* pBase)
155
{
156
size_t matchLength = 0;
157
while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
158
pIn--;
159
pMatch--;
160
matchLength++;
161
}
162
return matchLength;
163
}
164
165
/** ZSTD_ldm_fillFastTables() :
166
*
167
* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
168
* This is similar to ZSTD_loadDictionaryContent.
169
*
170
* The tables for the other strategies are filled within their
171
* block compressors. */
172
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
173
void const* end)
174
{
175
const BYTE* const iend = (const BYTE*)end;
176
177
switch(ms->cParams.strategy)
178
{
179
case ZSTD_fast:
180
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
181
break;
182
183
case ZSTD_dfast:
184
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
185
break;
186
187
case ZSTD_greedy:
188
case ZSTD_lazy:
189
case ZSTD_lazy2:
190
case ZSTD_btlazy2:
191
case ZSTD_btopt:
192
case ZSTD_btultra:
193
case ZSTD_btultra2:
194
break;
195
default:
196
assert(0); /* not possible : not a valid strategy id */
197
}
198
199
return 0;
200
}
201
202
/** ZSTD_ldm_fillLdmHashTable() :
203
*
204
* Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
205
* lastHash is the rolling hash that corresponds to lastHashed.
206
*
207
* Returns the rolling hash corresponding to position iend-1. */
208
static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
209
U64 lastHash, const BYTE* lastHashed,
210
const BYTE* iend, const BYTE* base,
211
U32 hBits, ldmParams_t const ldmParams)
212
{
213
U64 rollingHash = lastHash;
214
const BYTE* cur = lastHashed + 1;
215
216
while (cur < iend) {
217
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
218
cur[ldmParams.minMatchLength-1],
219
state->hashPower);
220
ZSTD_ldm_makeEntryAndInsertByTag(state,
221
rollingHash, hBits,
222
(U32)(cur - base), ldmParams);
223
++cur;
224
}
225
return rollingHash;
226
}
227
228
void ZSTD_ldm_fillHashTable(
229
ldmState_t* state, const BYTE* ip,
230
const BYTE* iend, ldmParams_t const* params)
231
{
232
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
233
if ((size_t)(iend - ip) >= params->minMatchLength) {
234
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
235
ZSTD_ldm_fillLdmHashTable(
236
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
237
params->hashLog - params->bucketSizeLog,
238
*params);
239
}
240
}
241
242
243
/** ZSTD_ldm_limitTableUpdate() :
244
*
245
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
246
* if it is far way
247
* (after a long match, only update tables a limited amount). */
248
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
249
{
250
U32 const current = (U32)(anchor - ms->window.base);
251
if (current > ms->nextToUpdate + 1024) {
252
ms->nextToUpdate =
253
current - MIN(512, current - ms->nextToUpdate - 1024);
254
}
255
}
256
257
static size_t ZSTD_ldm_generateSequences_internal(
258
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
259
ldmParams_t const* params, void const* src, size_t srcSize)
260
{
261
/* LDM parameters */
262
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
263
U32 const minMatchLength = params->minMatchLength;
264
U64 const hashPower = ldmState->hashPower;
265
U32 const hBits = params->hashLog - params->bucketSizeLog;
266
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
267
U32 const hashRateLog = params->hashRateLog;
268
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
269
/* Prefix and extDict parameters */
270
U32 const dictLimit = ldmState->window.dictLimit;
271
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
272
BYTE const* const base = ldmState->window.base;
273
BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
274
BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
275
BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
276
BYTE const* const lowPrefixPtr = base + dictLimit;
277
/* Input bounds */
278
BYTE const* const istart = (BYTE const*)src;
279
BYTE const* const iend = istart + srcSize;
280
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
281
/* Input positions */
282
BYTE const* anchor = istart;
283
BYTE const* ip = istart;
284
/* Rolling hash */
285
BYTE const* lastHashed = NULL;
286
U64 rollingHash = 0;
287
288
while (ip <= ilimit) {
289
size_t mLength;
290
U32 const current = (U32)(ip - base);
291
size_t forwardMatchLength = 0, backwardMatchLength = 0;
292
ldmEntry_t* bestEntry = NULL;
293
if (ip != istart) {
294
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
295
lastHashed[minMatchLength],
296
hashPower);
297
} else {
298
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
299
}
300
lastHashed = ip;
301
302
/* Do not insert and do not look for a match */
303
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
304
ip++;
305
continue;
306
}
307
308
/* Get the best entry and compute the match lengths */
309
{
310
ldmEntry_t* const bucket =
311
ZSTD_ldm_getBucket(ldmState,
312
ZSTD_ldm_getSmallHash(rollingHash, hBits),
313
*params);
314
ldmEntry_t* cur;
315
size_t bestMatchLength = 0;
316
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
317
318
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
319
size_t curForwardMatchLength, curBackwardMatchLength,
320
curTotalMatchLength;
321
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
322
continue;
323
}
324
if (extDict) {
325
BYTE const* const curMatchBase =
326
cur->offset < dictLimit ? dictBase : base;
327
BYTE const* const pMatch = curMatchBase + cur->offset;
328
BYTE const* const matchEnd =
329
cur->offset < dictLimit ? dictEnd : iend;
330
BYTE const* const lowMatchPtr =
331
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
332
333
curForwardMatchLength = ZSTD_count_2segments(
334
ip, pMatch, iend,
335
matchEnd, lowPrefixPtr);
336
if (curForwardMatchLength < minMatchLength) {
337
continue;
338
}
339
curBackwardMatchLength =
340
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
341
lowMatchPtr);
342
curTotalMatchLength = curForwardMatchLength +
343
curBackwardMatchLength;
344
} else { /* !extDict */
345
BYTE const* const pMatch = base + cur->offset;
346
curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
347
if (curForwardMatchLength < minMatchLength) {
348
continue;
349
}
350
curBackwardMatchLength =
351
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
352
lowPrefixPtr);
353
curTotalMatchLength = curForwardMatchLength +
354
curBackwardMatchLength;
355
}
356
357
if (curTotalMatchLength > bestMatchLength) {
358
bestMatchLength = curTotalMatchLength;
359
forwardMatchLength = curForwardMatchLength;
360
backwardMatchLength = curBackwardMatchLength;
361
bestEntry = cur;
362
}
363
}
364
}
365
366
/* No match found -- continue searching */
367
if (bestEntry == NULL) {
368
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
369
hBits, current,
370
*params);
371
ip++;
372
continue;
373
}
374
375
/* Match found */
376
mLength = forwardMatchLength + backwardMatchLength;
377
ip -= backwardMatchLength;
378
379
{
380
/* Store the sequence:
381
* ip = current - backwardMatchLength
382
* The match is at (bestEntry->offset - backwardMatchLength)
383
*/
384
U32 const matchIndex = bestEntry->offset;
385
U32 const offset = current - matchIndex;
386
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
387
388
/* Out of sequence storage */
389
if (rawSeqStore->size == rawSeqStore->capacity)
390
return ERROR(dstSize_tooSmall);
391
seq->litLength = (U32)(ip - anchor);
392
seq->matchLength = (U32)mLength;
393
seq->offset = offset;
394
rawSeqStore->size++;
395
}
396
397
/* Insert the current entry into the hash table */
398
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
399
(U32)(lastHashed - base),
400
*params);
401
402
assert(ip + backwardMatchLength == lastHashed);
403
404
/* Fill the hash table from lastHashed+1 to ip+mLength*/
405
/* Heuristic: don't need to fill the entire table at end of block */
406
if (ip + mLength <= ilimit) {
407
rollingHash = ZSTD_ldm_fillLdmHashTable(
408
ldmState, rollingHash, lastHashed,
409
ip + mLength, base, hBits, *params);
410
lastHashed = ip + mLength - 1;
411
}
412
ip += mLength;
413
anchor = ip;
414
}
415
return iend - anchor;
416
}
417
418
/*! ZSTD_ldm_reduceTable() :
419
* reduce table indexes by `reducerValue` */
420
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
421
U32 const reducerValue)
422
{
423
U32 u;
424
for (u = 0; u < size; u++) {
425
if (table[u].offset < reducerValue) table[u].offset = 0;
426
else table[u].offset -= reducerValue;
427
}
428
}
429
430
size_t ZSTD_ldm_generateSequences(
431
ldmState_t* ldmState, rawSeqStore_t* sequences,
432
ldmParams_t const* params, void const* src, size_t srcSize)
433
{
434
U32 const maxDist = 1U << params->windowLog;
435
BYTE const* const istart = (BYTE const*)src;
436
BYTE const* const iend = istart + srcSize;
437
size_t const kMaxChunkSize = 1 << 20;
438
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
439
size_t chunk;
440
size_t leftoverSize = 0;
441
442
assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
443
/* Check that ZSTD_window_update() has been called for this chunk prior
444
* to passing it to this function.
445
*/
446
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
447
/* The input could be very large (in zstdmt), so it must be broken up into
448
* chunks to enforce the maximum distance and handle overflow correction.
449
*/
450
assert(sequences->pos <= sequences->size);
451
assert(sequences->size <= sequences->capacity);
452
for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
453
BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
454
size_t const remaining = (size_t)(iend - chunkStart);
455
BYTE const *const chunkEnd =
456
(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
457
size_t const chunkSize = chunkEnd - chunkStart;
458
size_t newLeftoverSize;
459
size_t const prevSize = sequences->size;
460
461
assert(chunkStart < iend);
462
/* 1. Perform overflow correction if necessary. */
463
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
464
U32 const ldmHSize = 1U << params->hashLog;
465
U32 const correction = ZSTD_window_correctOverflow(
466
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
467
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
468
/* invalidate dictionaries on overflow correction */
469
ldmState->loadedDictEnd = 0;
470
}
471
/* 2. We enforce the maximum offset allowed.
472
*
473
* kMaxChunkSize should be small enough that we don't lose too much of
474
* the window through early invalidation.
475
* TODO: * Test the chunk size.
476
* * Try invalidation after the sequence generation and test the
477
* the offset against maxDist directly.
478
*
479
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
480
* that any offset used is valid at the END of the sequence, since it may
481
* be split into two sequences. This condition holds when using
482
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets
483
* against maxDist directly, we'll have to carefully handle that case.
484
*/
485
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
486
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
487
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
488
ldmState, sequences, params, chunkStart, chunkSize);
489
if (ZSTD_isError(newLeftoverSize))
490
return newLeftoverSize;
491
/* 4. We add the leftover literals from previous iterations to the first
492
* newly generated sequence, or add the `newLeftoverSize` if none are
493
* generated.
494
*/
495
/* Prepend the leftover literals from the last call */
496
if (prevSize < sequences->size) {
497
sequences->seq[prevSize].litLength += (U32)leftoverSize;
498
leftoverSize = newLeftoverSize;
499
} else {
500
assert(newLeftoverSize == chunkSize);
501
leftoverSize += chunkSize;
502
}
503
}
504
return 0;
505
}
506
507
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
508
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
509
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
510
if (srcSize <= seq->litLength) {
511
/* Skip past srcSize literals */
512
seq->litLength -= (U32)srcSize;
513
return;
514
}
515
srcSize -= seq->litLength;
516
seq->litLength = 0;
517
if (srcSize < seq->matchLength) {
518
/* Skip past the first srcSize of the match */
519
seq->matchLength -= (U32)srcSize;
520
if (seq->matchLength < minMatch) {
521
/* The match is too short, omit it */
522
if (rawSeqStore->pos + 1 < rawSeqStore->size) {
523
seq[1].litLength += seq[0].matchLength;
524
}
525
rawSeqStore->pos++;
526
}
527
return;
528
}
529
srcSize -= seq->matchLength;
530
seq->matchLength = 0;
531
rawSeqStore->pos++;
532
}
533
}
534
535
/**
536
* If the sequence length is longer than remaining then the sequence is split
537
* between this block and the next.
538
*
539
* Returns the current sequence to handle, or if the rest of the block should
540
* be literals, it returns a sequence with offset == 0.
541
*/
542
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
543
U32 const remaining, U32 const minMatch)
544
{
545
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
546
assert(sequence.offset > 0);
547
/* Likely: No partial sequence */
548
if (remaining >= sequence.litLength + sequence.matchLength) {
549
rawSeqStore->pos++;
550
return sequence;
551
}
552
/* Cut the sequence short (offset == 0 ==> rest is literals). */
553
if (remaining <= sequence.litLength) {
554
sequence.offset = 0;
555
} else if (remaining < sequence.litLength + sequence.matchLength) {
556
sequence.matchLength = remaining - sequence.litLength;
557
if (sequence.matchLength < minMatch) {
558
sequence.offset = 0;
559
}
560
}
561
/* Skip past `remaining` bytes for the future sequences. */
562
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
563
return sequence;
564
}
565
566
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
567
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
568
void const* src, size_t srcSize)
569
{
570
const ZSTD_compressionParameters* const cParams = &ms->cParams;
571
unsigned const minMatch = cParams->minMatch;
572
ZSTD_blockCompressor const blockCompressor =
573
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
574
/* Input bounds */
575
BYTE const* const istart = (BYTE const*)src;
576
BYTE const* const iend = istart + srcSize;
577
/* Input positions */
578
BYTE const* ip = istart;
579
580
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
581
assert(rawSeqStore->pos <= rawSeqStore->size);
582
assert(rawSeqStore->size <= rawSeqStore->capacity);
583
/* Loop through each sequence and apply the block compressor to the lits */
584
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
585
/* maybeSplitSequence updates rawSeqStore->pos */
586
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
587
(U32)(iend - ip), minMatch);
588
int i;
589
/* End signal */
590
if (sequence.offset == 0)
591
break;
592
593
assert(ip + sequence.litLength + sequence.matchLength <= iend);
594
595
/* Fill tables for block compressor */
596
ZSTD_ldm_limitTableUpdate(ms, ip);
597
ZSTD_ldm_fillFastTables(ms, ip);
598
/* Run the block compressor */
599
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
600
{
601
size_t const newLitLength =
602
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
603
ip += sequence.litLength;
604
/* Update the repcodes */
605
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
606
rep[i] = rep[i-1];
607
rep[0] = sequence.offset;
608
/* Store the sequence */
609
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
610
sequence.offset + ZSTD_REP_MOVE,
611
sequence.matchLength - MINMATCH);
612
ip += sequence.matchLength;
613
}
614
}
615
/* Fill the tables for the block compressor */
616
ZSTD_ldm_limitTableUpdate(ms, ip);
617
ZSTD_ldm_fillFastTables(ms, ip);
618
/* Compress the last literals */
619
return blockCompressor(ms, seqStore, rep, ip, iend - ip);
620
}
621
622