Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/LzFind.c
4253 views
1
/* LzFind.c -- Match finder for LZ algorithms
2
2024-03-01 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
#include <string.h>
7
// #include <stdio.h>
8
9
#include "CpuArch.h"
10
#include "LzFind.h"
11
#include "LzHash.h"
12
13
#define kBlockMoveAlign (1 << 7) // alignment for memmove()
14
#define kBlockSizeAlign (1 << 16) // alignment for block allocation
15
#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
16
17
#define kEmptyHashValue 0
18
19
#define kMaxValForNormalize ((UInt32)0)
20
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
21
22
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
23
24
#define GET_AVAIL_BYTES(p) \
25
Inline_MatchFinder_GetNumAvailableBytes(p)
26
27
28
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
29
#define kFix5HashSize kFix4HashSize
30
31
/*
32
HASH2_CALC:
33
if (hv) match, then cur[0] and cur[1] also match
34
*/
35
#define HASH2_CALC hv = GetUi16(cur);
36
37
// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
38
39
/*
40
HASH3_CALC:
41
if (cur[0]) and (h2) match, then cur[1] also match
42
if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
43
*/
44
#define HASH3_CALC { \
45
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
46
h2 = temp & (kHash2Size - 1); \
47
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
48
49
#define HASH4_CALC { \
50
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
51
h2 = temp & (kHash2Size - 1); \
52
temp ^= ((UInt32)cur[2] << 8); \
53
h3 = temp & (kHash3Size - 1); \
54
hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
55
56
#define HASH5_CALC { \
57
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
58
h2 = temp & (kHash2Size - 1); \
59
temp ^= ((UInt32)cur[2] << 8); \
60
h3 = temp & (kHash3Size - 1); \
61
temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
62
/* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
63
hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
64
65
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
66
67
68
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
69
{
70
// if (!p->directInput)
71
{
72
ISzAlloc_Free(alloc, p->bufBase);
73
p->bufBase = NULL;
74
}
75
}
76
77
78
static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
79
{
80
if (blockSize == 0)
81
return 0;
82
if (!p->bufBase || p->blockSize != blockSize)
83
{
84
// size_t blockSizeT;
85
LzInWindow_Free(p, alloc);
86
p->blockSize = blockSize;
87
// blockSizeT = blockSize;
88
89
// printf("\nblockSize = 0x%x\n", blockSize);
90
/*
91
#if defined _WIN64
92
// we can allocate 4GiB, but still use UInt32 for (p->blockSize)
93
// we use UInt32 type for (p->blockSize), because
94
// we don't want to wrap over 4 GiB,
95
// when we use (p->streamPos - p->pos) that is UInt32.
96
if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
97
{
98
blockSizeT = ((size_t)1 << 32);
99
printf("\nchanged to blockSizeT = 4GiB\n");
100
}
101
#endif
102
*/
103
104
p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
105
// printf("\nbufferBase = %p\n", p->bufBase);
106
// return 0; // for debug
107
}
108
return (p->bufBase != NULL);
109
}
110
111
static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
112
{
113
return ((CMatchFinder *)p)->buffer;
114
}
115
116
static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
117
{
118
return GET_AVAIL_BYTES((CMatchFinder *)p);
119
}
120
121
122
Z7_NO_INLINE
123
static void MatchFinder_ReadBlock(CMatchFinder *p)
124
{
125
if (p->streamEndWasReached || p->result != SZ_OK)
126
return;
127
128
/* We use (p->streamPos - p->pos) value.
129
(p->streamPos < p->pos) is allowed. */
130
131
if (p->directInput)
132
{
133
UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
134
if (curSize > p->directInputRem)
135
curSize = (UInt32)p->directInputRem;
136
p->streamPos += curSize;
137
p->directInputRem -= curSize;
138
if (p->directInputRem == 0)
139
p->streamEndWasReached = 1;
140
return;
141
}
142
143
for (;;)
144
{
145
const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
146
size_t size = (size_t)(p->bufBase + p->blockSize - dest);
147
if (size == 0)
148
{
149
/* we call ReadBlock() after NeedMove() and MoveBlock().
150
NeedMove() and MoveBlock() povide more than (keepSizeAfter)
151
to the end of (blockSize).
152
So we don't execute this branch in normal code flow.
153
We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
154
*/
155
// p->result = SZ_ERROR_FAIL; // we can show error here
156
return;
157
}
158
159
// #define kRead 3
160
// if (size > kRead) size = kRead; // for debug
161
162
/*
163
// we need cast (Byte *)dest.
164
#ifdef __clang__
165
#pragma GCC diagnostic ignored "-Wcast-qual"
166
#endif
167
*/
168
p->result = ISeqInStream_Read(p->stream,
169
p->bufBase + (dest - p->bufBase), &size);
170
if (p->result != SZ_OK)
171
return;
172
if (size == 0)
173
{
174
p->streamEndWasReached = 1;
175
return;
176
}
177
p->streamPos += (UInt32)size;
178
if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
179
return;
180
/* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
181
(GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
182
}
183
184
// on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
185
}
186
187
188
189
Z7_NO_INLINE
190
void MatchFinder_MoveBlock(CMatchFinder *p)
191
{
192
const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
193
const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
194
p->buffer = p->bufBase + keepBefore;
195
memmove(p->bufBase,
196
p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
197
keepBefore + (size_t)GET_AVAIL_BYTES(p));
198
}
199
200
/* We call MoveBlock() before ReadBlock().
201
So MoveBlock() can be wasteful operation, if the whole input data
202
can fit in current block even without calling MoveBlock().
203
in important case where (dataSize <= historySize)
204
condition (p->blockSize > dataSize + p->keepSizeAfter) is met
205
So there is no MoveBlock() in that case case.
206
*/
207
208
int MatchFinder_NeedMove(CMatchFinder *p)
209
{
210
if (p->directInput)
211
return 0;
212
if (p->streamEndWasReached || p->result != SZ_OK)
213
return 0;
214
return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
215
}
216
217
void MatchFinder_ReadIfRequired(CMatchFinder *p)
218
{
219
if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
220
MatchFinder_ReadBlock(p);
221
}
222
223
224
225
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
226
{
227
p->cutValue = 32;
228
p->btMode = 1;
229
p->numHashBytes = 4;
230
p->numHashBytes_Min = 2;
231
p->numHashOutBits = 0;
232
p->bigHash = 0;
233
}
234
235
#define kCrcPoly 0xEDB88320
236
237
void MatchFinder_Construct(CMatchFinder *p)
238
{
239
unsigned i;
240
p->buffer = NULL;
241
p->bufBase = NULL;
242
p->directInput = 0;
243
p->stream = NULL;
244
p->hash = NULL;
245
p->expectedDataSize = (UInt64)(Int64)-1;
246
MatchFinder_SetDefaultSettings(p);
247
248
for (i = 0; i < 256; i++)
249
{
250
UInt32 r = (UInt32)i;
251
unsigned j;
252
for (j = 0; j < 8; j++)
253
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
254
p->crc[i] = r;
255
}
256
}
257
258
#undef kCrcPoly
259
260
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
261
{
262
ISzAlloc_Free(alloc, p->hash);
263
p->hash = NULL;
264
}
265
266
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
267
{
268
MatchFinder_FreeThisClassMemory(p, alloc);
269
LzInWindow_Free(p, alloc);
270
}
271
272
static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
273
{
274
const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
275
if (sizeInBytes / sizeof(CLzRef) != num)
276
return NULL;
277
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
278
}
279
280
#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
281
#error Stop_Compiling_Bad_Reserve
282
#endif
283
284
285
286
static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
287
{
288
UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
289
/*
290
if (historySize > kMaxHistorySize)
291
return 0;
292
*/
293
// printf("\nhistorySize == 0x%x\n", historySize);
294
295
if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
296
return 0;
297
298
{
299
const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
300
const UInt32 rem = kBlockSizeMax - blockSize;
301
const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
302
+ (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
303
if (blockSize >= kBlockSizeMax
304
|| rem < kBlockSizeReserveMin) // we reject settings that will be slow
305
return 0;
306
if (reserve >= rem)
307
blockSize = kBlockSizeMax;
308
else
309
{
310
blockSize += reserve;
311
blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
312
}
313
}
314
// printf("\n LzFind_blockSize = %x\n", blockSize);
315
// printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
316
return blockSize;
317
}
318
319
320
// input is historySize
321
static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
322
{
323
if (p->numHashBytes == 2)
324
return (1 << 16) - 1;
325
if (hs != 0)
326
hs--;
327
hs |= (hs >> 1);
328
hs |= (hs >> 2);
329
hs |= (hs >> 4);
330
hs |= (hs >> 8);
331
// we propagated 16 bits in (hs). Low 16 bits must be set later
332
if (hs >= (1 << 24))
333
{
334
if (p->numHashBytes == 3)
335
hs = (1 << 24) - 1;
336
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
337
}
338
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
339
hs |= (1 << 16) - 1; /* don't change it! */
340
// bt5: we adjust the size with recommended minimum size
341
if (p->numHashBytes >= 5)
342
hs |= (256 << kLzHash_CrcShift_2) - 1;
343
return hs;
344
}
345
346
// input is historySize
347
static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
348
{
349
if (p->numHashBytes == 2)
350
return (1 << 16) - 1;
351
if (hs != 0)
352
hs--;
353
hs |= (hs >> 1);
354
hs |= (hs >> 2);
355
hs |= (hs >> 4);
356
hs |= (hs >> 8);
357
// we propagated 16 bits in (hs). Low 16 bits must be set later
358
hs >>= 1;
359
if (hs >= (1 << 24))
360
{
361
if (p->numHashBytes == 3)
362
hs = (1 << 24) - 1;
363
else
364
hs >>= 1;
365
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
366
}
367
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
368
hs |= (1 << 16) - 1; /* don't change it! */
369
// bt5: we adjust the size with recommended minimum size
370
if (p->numHashBytes >= 5)
371
hs |= (256 << kLzHash_CrcShift_2) - 1;
372
return hs;
373
}
374
375
376
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
377
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
378
ISzAllocPtr alloc)
379
{
380
/* we need one additional byte in (p->keepSizeBefore),
381
since we use MoveBlock() after (p->pos++) and before dictionary using */
382
// keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
383
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
384
385
keepAddBufferAfter += matchMaxLen;
386
/* we need (p->keepSizeAfter >= p->numHashBytes) */
387
if (keepAddBufferAfter < p->numHashBytes)
388
keepAddBufferAfter = p->numHashBytes;
389
// keepAddBufferAfter -= 2; // for debug
390
p->keepSizeAfter = keepAddBufferAfter;
391
392
if (p->directInput)
393
p->blockSize = 0;
394
if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
395
{
396
size_t hashSizeSum;
397
{
398
UInt32 hs;
399
UInt32 hsCur;
400
401
if (p->numHashOutBits != 0)
402
{
403
unsigned numBits = p->numHashOutBits;
404
const unsigned nbMax =
405
(p->numHashBytes == 2 ? 16 :
406
(p->numHashBytes == 3 ? 24 : 32));
407
if (numBits > nbMax)
408
numBits = nbMax;
409
if (numBits >= 32)
410
hs = (UInt32)0 - 1;
411
else
412
hs = ((UInt32)1 << numBits) - 1;
413
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
414
hs |= (1 << 16) - 1; /* don't change it! */
415
if (p->numHashBytes >= 5)
416
hs |= (256 << kLzHash_CrcShift_2) - 1;
417
{
418
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
419
if (hs > hs2)
420
hs = hs2;
421
}
422
hsCur = hs;
423
if (p->expectedDataSize < historySize)
424
{
425
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
426
if (hsCur > hs2)
427
hsCur = hs2;
428
}
429
}
430
else
431
{
432
hs = MatchFinder_GetHashMask(p, historySize);
433
hsCur = hs;
434
if (p->expectedDataSize < historySize)
435
{
436
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
437
if (hsCur > hs) // is it possible?
438
hsCur = hs;
439
}
440
}
441
442
p->hashMask = hsCur;
443
444
hashSizeSum = hs;
445
hashSizeSum++;
446
if (hashSizeSum < hs)
447
return 0;
448
{
449
UInt32 fixedHashSize = 0;
450
if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
451
if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
452
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
453
hashSizeSum += fixedHashSize;
454
p->fixedHashSize = fixedHashSize;
455
}
456
}
457
458
p->matchMaxLen = matchMaxLen;
459
460
{
461
size_t newSize;
462
size_t numSons;
463
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
464
p->historySize = historySize;
465
p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
466
467
numSons = newCyclicBufferSize;
468
if (p->btMode)
469
numSons <<= 1;
470
newSize = hashSizeSum + numSons;
471
472
if (numSons < newCyclicBufferSize || newSize < numSons)
473
return 0;
474
475
// aligned size is not required here, but it can be better for some loops
476
#define NUM_REFS_ALIGN_MASK 0xF
477
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
478
479
// 22.02: we don't reallocate buffer, if old size is enough
480
if (p->hash && p->numRefs >= newSize)
481
return 1;
482
483
MatchFinder_FreeThisClassMemory(p, alloc);
484
p->numRefs = newSize;
485
p->hash = AllocRefs(newSize, alloc);
486
487
if (p->hash)
488
{
489
p->son = p->hash + hashSizeSum;
490
return 1;
491
}
492
}
493
}
494
495
MatchFinder_Free(p, alloc);
496
return 0;
497
}
498
499
500
static void MatchFinder_SetLimits(CMatchFinder *p)
501
{
502
UInt32 k;
503
UInt32 n = kMaxValForNormalize - p->pos;
504
if (n == 0)
505
n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
506
507
k = p->cyclicBufferSize - p->cyclicBufferPos;
508
if (k < n)
509
n = k;
510
511
k = GET_AVAIL_BYTES(p);
512
{
513
const UInt32 ksa = p->keepSizeAfter;
514
UInt32 mm = p->matchMaxLen;
515
if (k > ksa)
516
k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
517
else if (k >= mm)
518
{
519
// the limitation for (p->lenLimit) update
520
k -= mm; // optimization : to reduce the number of checks
521
k++;
522
// k = 1; // non-optimized version : for debug
523
}
524
else
525
{
526
mm = k;
527
if (k != 0)
528
k = 1;
529
}
530
p->lenLimit = mm;
531
}
532
if (k < n)
533
n = k;
534
535
p->posLimit = p->pos + n;
536
}
537
538
539
void MatchFinder_Init_LowHash(CMatchFinder *p)
540
{
541
size_t i;
542
CLzRef *items = p->hash;
543
const size_t numItems = p->fixedHashSize;
544
for (i = 0; i < numItems; i++)
545
items[i] = kEmptyHashValue;
546
}
547
548
549
void MatchFinder_Init_HighHash(CMatchFinder *p)
550
{
551
size_t i;
552
CLzRef *items = p->hash + p->fixedHashSize;
553
const size_t numItems = (size_t)p->hashMask + 1;
554
for (i = 0; i < numItems; i++)
555
items[i] = kEmptyHashValue;
556
}
557
558
559
void MatchFinder_Init_4(CMatchFinder *p)
560
{
561
if (!p->directInput)
562
p->buffer = p->bufBase;
563
{
564
/* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
565
the code in CMatchFinderMt expects (pos = 1) */
566
p->pos =
567
p->streamPos =
568
1; // it's smallest optimal value. do not change it
569
// 0; // for debug
570
}
571
p->result = SZ_OK;
572
p->streamEndWasReached = 0;
573
}
574
575
576
// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
577
#define CYC_TO_POS_OFFSET 0
578
// #define CYC_TO_POS_OFFSET 1 // for debug
579
580
void MatchFinder_Init(void *_p)
581
{
582
CMatchFinder *p = (CMatchFinder *)_p;
583
MatchFinder_Init_HighHash(p);
584
MatchFinder_Init_LowHash(p);
585
MatchFinder_Init_4(p);
586
// if (readData)
587
MatchFinder_ReadBlock(p);
588
589
/* if we init (cyclicBufferPos = pos), then we can use one variable
590
instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
591
p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
592
// p->cyclicBufferPos = 0; // smallest value
593
// p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
594
MatchFinder_SetLimits(p);
595
}
596
597
598
599
#ifdef MY_CPU_X86_OR_AMD64
600
#if defined(__clang__) && (__clang_major__ >= 4) \
601
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
602
// || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
603
604
#define USE_LZFIND_SATUR_SUB_128
605
#define USE_LZFIND_SATUR_SUB_256
606
#define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
607
#define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
608
#elif defined(_MSC_VER)
609
#if (_MSC_VER >= 1600)
610
#define USE_LZFIND_SATUR_SUB_128
611
#endif
612
#if (_MSC_VER >= 1900)
613
#define USE_LZFIND_SATUR_SUB_256
614
#endif
615
#endif
616
617
#elif defined(MY_CPU_ARM64) \
618
/* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
619
620
#if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
621
|| defined(__GNUC__) && (__GNUC__ >= 6)
622
#define USE_LZFIND_SATUR_SUB_128
623
#ifdef MY_CPU_ARM64
624
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
625
#else
626
#define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
627
#endif
628
629
#elif defined(_MSC_VER)
630
#if (_MSC_VER >= 1910)
631
#define USE_LZFIND_SATUR_SUB_128
632
#endif
633
#endif
634
635
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
636
#include <arm64_neon.h>
637
#else
638
#include <arm_neon.h>
639
#endif
640
641
#endif
642
643
644
#ifdef USE_LZFIND_SATUR_SUB_128
645
646
// #define Z7_SHOW_HW_STATUS
647
648
#ifdef Z7_SHOW_HW_STATUS
649
#include <stdio.h>
650
#define PRF(x) x
651
PRF(;)
652
#else
653
#define PRF(x)
654
#endif
655
656
657
#ifdef MY_CPU_ARM_OR_ARM64
658
659
#ifdef MY_CPU_ARM64
660
// #define FORCE_LZFIND_SATUR_SUB_128
661
#endif
662
typedef uint32x4_t LzFind_v128;
663
#define SASUB_128_V(v, s) \
664
vsubq_u32(vmaxq_u32(v, s), s)
665
666
#else // MY_CPU_ARM_OR_ARM64
667
668
#include <smmintrin.h> // sse4.1
669
670
typedef __m128i LzFind_v128;
671
// SSE 4.1
672
#define SASUB_128_V(v, s) \
673
_mm_sub_epi32(_mm_max_epu32(v, s), s)
674
675
#endif // MY_CPU_ARM_OR_ARM64
676
677
678
#define SASUB_128(i) \
679
*( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
680
*(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
681
682
683
Z7_NO_INLINE
684
static
685
#ifdef LZFIND_ATTRIB_SSE41
686
LZFIND_ATTRIB_SSE41
687
#endif
688
void
689
Z7_FASTCALL
690
LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
691
{
692
const LzFind_v128 sub2 =
693
#ifdef MY_CPU_ARM_OR_ARM64
694
vdupq_n_u32(subValue);
695
#else
696
_mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
697
#endif
698
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
699
do
700
{
701
SASUB_128(0) SASUB_128(1) items += 2 * 4;
702
SASUB_128(0) SASUB_128(1) items += 2 * 4;
703
}
704
while (items != lim);
705
}
706
707
708
709
#ifdef USE_LZFIND_SATUR_SUB_256
710
711
#include <immintrin.h> // avx
712
/*
713
clang :immintrin.h uses
714
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
715
defined(__AVX2__)
716
#include <avx2intrin.h>
717
#endif
718
so we need <avxintrin.h> for clang-cl */
719
720
#if defined(__clang__)
721
#include <avxintrin.h>
722
#include <avx2intrin.h>
723
#endif
724
725
// AVX2:
726
#define SASUB_256(i) \
727
*( __m256i *)( void *)(items + (i) * 8) = \
728
_mm256_sub_epi32(_mm256_max_epu32( \
729
*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
730
731
Z7_NO_INLINE
732
static
733
#ifdef LZFIND_ATTRIB_AVX2
734
LZFIND_ATTRIB_AVX2
735
#endif
736
void
737
Z7_FASTCALL
738
LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
739
{
740
const __m256i sub2 = _mm256_set_epi32(
741
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
742
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
743
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
744
do
745
{
746
SASUB_256(0) SASUB_256(1) items += 2 * 8;
747
SASUB_256(0) SASUB_256(1) items += 2 * 8;
748
}
749
while (items != lim);
750
}
751
#endif // USE_LZFIND_SATUR_SUB_256
752
753
#ifndef FORCE_LZFIND_SATUR_SUB_128
754
typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
755
UInt32 subValue, CLzRef *items, const CLzRef *lim);
756
static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
757
#endif // FORCE_LZFIND_SATUR_SUB_128
758
759
#endif // USE_LZFIND_SATUR_SUB_128
760
761
762
// kEmptyHashValue must be zero
763
// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
764
#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
765
766
#ifdef FORCE_LZFIND_SATUR_SUB_128
767
768
#define DEFAULT_SaturSub LzFind_SaturSub_128
769
770
#else
771
772
#define DEFAULT_SaturSub LzFind_SaturSub_32
773
774
Z7_NO_INLINE
775
static
776
void
777
Z7_FASTCALL
778
LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
779
{
780
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
781
do
782
{
783
SASUB_32(0) SASUB_32(1) items += 2;
784
SASUB_32(0) SASUB_32(1) items += 2;
785
SASUB_32(0) SASUB_32(1) items += 2;
786
SASUB_32(0) SASUB_32(1) items += 2;
787
}
788
while (items != lim);
789
}
790
791
#endif
792
793
794
Z7_NO_INLINE
795
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
796
{
797
#define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
798
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
799
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
800
{
801
SASUB_32(0)
802
items++;
803
}
804
{
805
const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
806
CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
807
numItems &= k_Align_Mask;
808
if (items != lim)
809
{
810
#if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
811
if (g_LzFind_SaturSub)
812
g_LzFind_SaturSub(subValue, items, lim);
813
else
814
#endif
815
DEFAULT_SaturSub(subValue, items, lim);
816
}
817
items = lim;
818
}
819
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
820
for (; numItems != 0; numItems--)
821
{
822
SASUB_32(0)
823
items++;
824
}
825
}
826
827
828
829
// call MatchFinder_CheckLimits() only after (p->pos++) update
830
831
Z7_NO_INLINE
832
static void MatchFinder_CheckLimits(CMatchFinder *p)
833
{
834
if (// !p->streamEndWasReached && p->result == SZ_OK &&
835
p->keepSizeAfter == GET_AVAIL_BYTES(p))
836
{
837
// we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
838
if (MatchFinder_NeedMove(p))
839
MatchFinder_MoveBlock(p);
840
MatchFinder_ReadBlock(p);
841
}
842
843
if (p->pos == kMaxValForNormalize)
844
if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
845
/*
846
if we disable normalization for last bytes of data, and
847
if (data_size == 4 GiB), we don't call wastfull normalization,
848
but (pos) will be wrapped over Zero (0) in that case.
849
And we cannot resume later to normal operation
850
*/
851
{
852
// MatchFinder_Normalize(p);
853
/* after normalization we need (p->pos >= p->historySize + 1); */
854
/* we can reduce subValue to aligned value, if want to keep alignment
855
of (p->pos) and (p->buffer) for speculated accesses. */
856
const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
857
// const UInt32 subValue = (1 << 15); // for debug
858
// printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
859
MatchFinder_REDUCE_OFFSETS(p, subValue)
860
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
861
{
862
size_t numSonRefs = p->cyclicBufferSize;
863
if (p->btMode)
864
numSonRefs <<= 1;
865
MatchFinder_Normalize3(subValue, p->son, numSonRefs);
866
}
867
}
868
869
if (p->cyclicBufferPos == p->cyclicBufferSize)
870
p->cyclicBufferPos = 0;
871
872
MatchFinder_SetLimits(p);
873
}
874
875
876
/*
877
(lenLimit > maxLen)
878
*/
879
Z7_FORCE_INLINE
880
static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
881
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
882
UInt32 *d, unsigned maxLen)
883
{
884
/*
885
son[_cyclicBufferPos] = curMatch;
886
for (;;)
887
{
888
UInt32 delta = pos - curMatch;
889
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
890
return d;
891
{
892
const Byte *pb = cur - delta;
893
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
894
if (pb[maxLen] == cur[maxLen] && *pb == *cur)
895
{
896
UInt32 len = 0;
897
while (++len != lenLimit)
898
if (pb[len] != cur[len])
899
break;
900
if (maxLen < len)
901
{
902
maxLen = len;
903
*d++ = len;
904
*d++ = delta - 1;
905
if (len == lenLimit)
906
return d;
907
}
908
}
909
}
910
}
911
*/
912
913
const Byte *lim = cur + lenLimit;
914
son[_cyclicBufferPos] = curMatch;
915
916
do
917
{
918
UInt32 delta;
919
920
if (curMatch == 0)
921
break;
922
// if (curMatch2 >= curMatch) return NULL;
923
delta = pos - curMatch;
924
if (delta >= _cyclicBufferSize)
925
break;
926
{
927
ptrdiff_t diff;
928
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
929
diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
930
if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
931
{
932
const Byte *c = cur;
933
while (*c == c[diff])
934
{
935
if (++c == lim)
936
{
937
d[0] = (UInt32)(lim - cur);
938
d[1] = delta - 1;
939
return d + 2;
940
}
941
}
942
{
943
const unsigned len = (unsigned)(c - cur);
944
if (maxLen < len)
945
{
946
maxLen = len;
947
d[0] = (UInt32)len;
948
d[1] = delta - 1;
949
d += 2;
950
}
951
}
952
}
953
}
954
}
955
while (--cutValue);
956
957
return d;
958
}
959
960
961
Z7_FORCE_INLINE
962
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
963
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
964
UInt32 *d, UInt32 maxLen)
965
{
966
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
967
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
968
unsigned len0 = 0, len1 = 0;
969
970
UInt32 cmCheck;
971
972
// if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
973
974
cmCheck = (UInt32)(pos - _cyclicBufferSize);
975
if ((UInt32)pos <= _cyclicBufferSize)
976
cmCheck = 0;
977
978
if (cmCheck < curMatch)
979
do
980
{
981
const UInt32 delta = pos - curMatch;
982
{
983
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
984
const Byte *pb = cur - delta;
985
unsigned len = (len0 < len1 ? len0 : len1);
986
const UInt32 pair0 = pair[0];
987
if (pb[len] == cur[len])
988
{
989
if (++len != lenLimit && pb[len] == cur[len])
990
while (++len != lenLimit)
991
if (pb[len] != cur[len])
992
break;
993
if (maxLen < len)
994
{
995
maxLen = (UInt32)len;
996
*d++ = (UInt32)len;
997
*d++ = delta - 1;
998
if (len == lenLimit)
999
{
1000
*ptr1 = pair0;
1001
*ptr0 = pair[1];
1002
return d;
1003
}
1004
}
1005
}
1006
if (pb[len] < cur[len])
1007
{
1008
*ptr1 = curMatch;
1009
// const UInt32 curMatch2 = pair[1];
1010
// if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
1011
// curMatch = curMatch2;
1012
curMatch = pair[1];
1013
ptr1 = pair + 1;
1014
len1 = len;
1015
}
1016
else
1017
{
1018
*ptr0 = curMatch;
1019
curMatch = pair[0];
1020
ptr0 = pair;
1021
len0 = len;
1022
}
1023
}
1024
}
1025
while(--cutValue && cmCheck < curMatch);
1026
1027
*ptr0 = *ptr1 = kEmptyHashValue;
1028
return d;
1029
}
1030
1031
1032
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
1033
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
1034
{
1035
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
1036
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
1037
unsigned len0 = 0, len1 = 0;
1038
1039
UInt32 cmCheck;
1040
1041
cmCheck = (UInt32)(pos - _cyclicBufferSize);
1042
if ((UInt32)pos <= _cyclicBufferSize)
1043
cmCheck = 0;
1044
1045
if (// curMatch >= pos || // failure
1046
cmCheck < curMatch)
1047
do
1048
{
1049
const UInt32 delta = pos - curMatch;
1050
{
1051
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
1052
const Byte *pb = cur - delta;
1053
unsigned len = (len0 < len1 ? len0 : len1);
1054
if (pb[len] == cur[len])
1055
{
1056
while (++len != lenLimit)
1057
if (pb[len] != cur[len])
1058
break;
1059
{
1060
if (len == lenLimit)
1061
{
1062
*ptr1 = pair[0];
1063
*ptr0 = pair[1];
1064
return;
1065
}
1066
}
1067
}
1068
if (pb[len] < cur[len])
1069
{
1070
*ptr1 = curMatch;
1071
curMatch = pair[1];
1072
ptr1 = pair + 1;
1073
len1 = len;
1074
}
1075
else
1076
{
1077
*ptr0 = curMatch;
1078
curMatch = pair[0];
1079
ptr0 = pair;
1080
len0 = len;
1081
}
1082
}
1083
}
1084
while(--cutValue && cmCheck < curMatch);
1085
1086
*ptr0 = *ptr1 = kEmptyHashValue;
1087
return;
1088
}
1089
1090
1091
#define MOVE_POS \
1092
p->cyclicBufferPos++; \
1093
p->buffer++; \
1094
{ const UInt32 pos1 = p->pos + 1; \
1095
p->pos = pos1; \
1096
if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
1097
1098
#define MOVE_POS_RET MOVE_POS return distances;
1099
1100
Z7_NO_INLINE
1101
static void MatchFinder_MovePos(CMatchFinder *p)
1102
{
1103
/* we go here at the end of stream data, when (avail < num_hash_bytes)
1104
We don't update sons[cyclicBufferPos << btMode].
1105
So (sons) record will contain junk. And we cannot resume match searching
1106
to normal operation, even if we will provide more input data in buffer.
1107
p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
1108
if (p->btMode)
1109
p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
1110
*/
1111
MOVE_POS
1112
}
1113
1114
#define GET_MATCHES_HEADER2(minLen, ret_op) \
1115
UInt32 hv; const Byte *cur; UInt32 curMatch; \
1116
UInt32 lenLimit = p->lenLimit; \
1117
if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \
1118
cur = p->buffer;
1119
1120
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
1121
#define SKIP_HEADER(minLen) \
1122
do { GET_MATCHES_HEADER2(minLen, continue)
1123
1124
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \
1125
p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
1126
1127
#define SKIP_FOOTER \
1128
SkipMatchesSpec(MF_PARAMS(p)); \
1129
MOVE_POS \
1130
} while (--num);
1131
1132
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
1133
distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
1134
MOVE_POS_RET
1135
1136
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
1137
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
1138
1139
#define GET_MATCHES_FOOTER_HC(_maxLen_) \
1140
GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
1141
1142
1143
1144
#define UPDATE_maxLen { \
1145
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
1146
const Byte *c = cur + maxLen; \
1147
const Byte *lim = cur + lenLimit; \
1148
for (; c != lim; c++) if (*(c + diff) != *c) break; \
1149
maxLen = (unsigned)(c - cur); }
1150
1151
static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
1152
{
1153
CMatchFinder *p = (CMatchFinder *)_p;
1154
GET_MATCHES_HEADER(2)
1155
HASH2_CALC
1156
curMatch = p->hash[hv];
1157
p->hash[hv] = p->pos;
1158
GET_MATCHES_FOOTER_BT(1)
1159
}
1160
1161
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1162
{
1163
GET_MATCHES_HEADER(3)
1164
HASH_ZIP_CALC
1165
curMatch = p->hash[hv];
1166
p->hash[hv] = p->pos;
1167
GET_MATCHES_FOOTER_BT(2)
1168
}
1169
1170
1171
#define SET_mmm \
1172
mmm = p->cyclicBufferSize; \
1173
if (pos < mmm) \
1174
mmm = pos;
1175
1176
1177
static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
1178
{
1179
CMatchFinder *p = (CMatchFinder *)_p;
1180
UInt32 mmm;
1181
UInt32 h2, d2, pos;
1182
unsigned maxLen;
1183
UInt32 *hash;
1184
GET_MATCHES_HEADER(3)
1185
1186
HASH3_CALC
1187
1188
hash = p->hash;
1189
pos = p->pos;
1190
1191
d2 = pos - hash[h2];
1192
1193
curMatch = (hash + kFix3HashSize)[hv];
1194
1195
hash[h2] = pos;
1196
(hash + kFix3HashSize)[hv] = pos;
1197
1198
SET_mmm
1199
1200
maxLen = 2;
1201
1202
if (d2 < mmm && *(cur - d2) == *cur)
1203
{
1204
UPDATE_maxLen
1205
distances[0] = (UInt32)maxLen;
1206
distances[1] = d2 - 1;
1207
distances += 2;
1208
if (maxLen == lenLimit)
1209
{
1210
SkipMatchesSpec(MF_PARAMS(p));
1211
MOVE_POS_RET
1212
}
1213
}
1214
1215
GET_MATCHES_FOOTER_BT(maxLen)
1216
}
1217
1218
1219
static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
1220
{
1221
CMatchFinder *p = (CMatchFinder *)_p;
1222
UInt32 mmm;
1223
UInt32 h2, h3, d2, d3, pos;
1224
unsigned maxLen;
1225
UInt32 *hash;
1226
GET_MATCHES_HEADER(4)
1227
1228
HASH4_CALC
1229
1230
hash = p->hash;
1231
pos = p->pos;
1232
1233
d2 = pos - hash [h2];
1234
d3 = pos - (hash + kFix3HashSize)[h3];
1235
curMatch = (hash + kFix4HashSize)[hv];
1236
1237
hash [h2] = pos;
1238
(hash + kFix3HashSize)[h3] = pos;
1239
(hash + kFix4HashSize)[hv] = pos;
1240
1241
SET_mmm
1242
1243
maxLen = 3;
1244
1245
for (;;)
1246
{
1247
if (d2 < mmm && *(cur - d2) == *cur)
1248
{
1249
distances[0] = 2;
1250
distances[1] = d2 - 1;
1251
distances += 2;
1252
if (*(cur - d2 + 2) == cur[2])
1253
{
1254
// distances[-2] = 3;
1255
}
1256
else if (d3 < mmm && *(cur - d3) == *cur)
1257
{
1258
d2 = d3;
1259
distances[1] = d3 - 1;
1260
distances += 2;
1261
}
1262
else
1263
break;
1264
}
1265
else if (d3 < mmm && *(cur - d3) == *cur)
1266
{
1267
d2 = d3;
1268
distances[1] = d3 - 1;
1269
distances += 2;
1270
}
1271
else
1272
break;
1273
1274
UPDATE_maxLen
1275
distances[-2] = (UInt32)maxLen;
1276
if (maxLen == lenLimit)
1277
{
1278
SkipMatchesSpec(MF_PARAMS(p));
1279
MOVE_POS_RET
1280
}
1281
break;
1282
}
1283
1284
GET_MATCHES_FOOTER_BT(maxLen)
1285
}
1286
1287
1288
static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
1289
{
1290
CMatchFinder *p = (CMatchFinder *)_p;
1291
UInt32 mmm;
1292
UInt32 h2, h3, d2, d3, pos;
1293
unsigned maxLen;
1294
UInt32 *hash;
1295
GET_MATCHES_HEADER(5)
1296
1297
HASH5_CALC
1298
1299
hash = p->hash;
1300
pos = p->pos;
1301
1302
d2 = pos - hash [h2];
1303
d3 = pos - (hash + kFix3HashSize)[h3];
1304
// d4 = pos - (hash + kFix4HashSize)[h4];
1305
1306
curMatch = (hash + kFix5HashSize)[hv];
1307
1308
hash [h2] = pos;
1309
(hash + kFix3HashSize)[h3] = pos;
1310
// (hash + kFix4HashSize)[h4] = pos;
1311
(hash + kFix5HashSize)[hv] = pos;
1312
1313
SET_mmm
1314
1315
maxLen = 4;
1316
1317
for (;;)
1318
{
1319
if (d2 < mmm && *(cur - d2) == *cur)
1320
{
1321
distances[0] = 2;
1322
distances[1] = d2 - 1;
1323
distances += 2;
1324
if (*(cur - d2 + 2) == cur[2])
1325
{
1326
}
1327
else if (d3 < mmm && *(cur - d3) == *cur)
1328
{
1329
distances[1] = d3 - 1;
1330
distances += 2;
1331
d2 = d3;
1332
}
1333
else
1334
break;
1335
}
1336
else if (d3 < mmm && *(cur - d3) == *cur)
1337
{
1338
distances[1] = d3 - 1;
1339
distances += 2;
1340
d2 = d3;
1341
}
1342
else
1343
break;
1344
1345
distances[-2] = 3;
1346
if (*(cur - d2 + 3) != cur[3])
1347
break;
1348
UPDATE_maxLen
1349
distances[-2] = (UInt32)maxLen;
1350
if (maxLen == lenLimit)
1351
{
1352
SkipMatchesSpec(MF_PARAMS(p));
1353
MOVE_POS_RET
1354
}
1355
break;
1356
}
1357
1358
GET_MATCHES_FOOTER_BT(maxLen)
1359
}
1360
1361
1362
static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
1363
{
1364
CMatchFinder *p = (CMatchFinder *)_p;
1365
UInt32 mmm;
1366
UInt32 h2, h3, d2, d3, pos;
1367
unsigned maxLen;
1368
UInt32 *hash;
1369
GET_MATCHES_HEADER(4)
1370
1371
HASH4_CALC
1372
1373
hash = p->hash;
1374
pos = p->pos;
1375
1376
d2 = pos - hash [h2];
1377
d3 = pos - (hash + kFix3HashSize)[h3];
1378
curMatch = (hash + kFix4HashSize)[hv];
1379
1380
hash [h2] = pos;
1381
(hash + kFix3HashSize)[h3] = pos;
1382
(hash + kFix4HashSize)[hv] = pos;
1383
1384
SET_mmm
1385
1386
maxLen = 3;
1387
1388
for (;;)
1389
{
1390
if (d2 < mmm && *(cur - d2) == *cur)
1391
{
1392
distances[0] = 2;
1393
distances[1] = d2 - 1;
1394
distances += 2;
1395
if (*(cur - d2 + 2) == cur[2])
1396
{
1397
// distances[-2] = 3;
1398
}
1399
else if (d3 < mmm && *(cur - d3) == *cur)
1400
{
1401
d2 = d3;
1402
distances[1] = d3 - 1;
1403
distances += 2;
1404
}
1405
else
1406
break;
1407
}
1408
else if (d3 < mmm && *(cur - d3) == *cur)
1409
{
1410
d2 = d3;
1411
distances[1] = d3 - 1;
1412
distances += 2;
1413
}
1414
else
1415
break;
1416
1417
UPDATE_maxLen
1418
distances[-2] = (UInt32)maxLen;
1419
if (maxLen == lenLimit)
1420
{
1421
p->son[p->cyclicBufferPos] = curMatch;
1422
MOVE_POS_RET
1423
}
1424
break;
1425
}
1426
1427
GET_MATCHES_FOOTER_HC(maxLen)
1428
}
1429
1430
1431
static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
1432
{
1433
CMatchFinder *p = (CMatchFinder *)_p;
1434
UInt32 mmm;
1435
UInt32 h2, h3, d2, d3, pos;
1436
unsigned maxLen;
1437
UInt32 *hash;
1438
GET_MATCHES_HEADER(5)
1439
1440
HASH5_CALC
1441
1442
hash = p->hash;
1443
pos = p->pos;
1444
1445
d2 = pos - hash [h2];
1446
d3 = pos - (hash + kFix3HashSize)[h3];
1447
// d4 = pos - (hash + kFix4HashSize)[h4];
1448
1449
curMatch = (hash + kFix5HashSize)[hv];
1450
1451
hash [h2] = pos;
1452
(hash + kFix3HashSize)[h3] = pos;
1453
// (hash + kFix4HashSize)[h4] = pos;
1454
(hash + kFix5HashSize)[hv] = pos;
1455
1456
SET_mmm
1457
1458
maxLen = 4;
1459
1460
for (;;)
1461
{
1462
if (d2 < mmm && *(cur - d2) == *cur)
1463
{
1464
distances[0] = 2;
1465
distances[1] = d2 - 1;
1466
distances += 2;
1467
if (*(cur - d2 + 2) == cur[2])
1468
{
1469
}
1470
else if (d3 < mmm && *(cur - d3) == *cur)
1471
{
1472
distances[1] = d3 - 1;
1473
distances += 2;
1474
d2 = d3;
1475
}
1476
else
1477
break;
1478
}
1479
else if (d3 < mmm && *(cur - d3) == *cur)
1480
{
1481
distances[1] = d3 - 1;
1482
distances += 2;
1483
d2 = d3;
1484
}
1485
else
1486
break;
1487
1488
distances[-2] = 3;
1489
if (*(cur - d2 + 3) != cur[3])
1490
break;
1491
UPDATE_maxLen
1492
distances[-2] = (UInt32)maxLen;
1493
if (maxLen == lenLimit)
1494
{
1495
p->son[p->cyclicBufferPos] = curMatch;
1496
MOVE_POS_RET
1497
}
1498
break;
1499
}
1500
1501
GET_MATCHES_FOOTER_HC(maxLen)
1502
}
1503
1504
1505
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1506
{
1507
GET_MATCHES_HEADER(3)
1508
HASH_ZIP_CALC
1509
curMatch = p->hash[hv];
1510
p->hash[hv] = p->pos;
1511
GET_MATCHES_FOOTER_HC(2)
1512
}
1513
1514
1515
static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
1516
{
1517
CMatchFinder *p = (CMatchFinder *)_p;
1518
SKIP_HEADER(2)
1519
{
1520
HASH2_CALC
1521
curMatch = p->hash[hv];
1522
p->hash[hv] = p->pos;
1523
}
1524
SKIP_FOOTER
1525
}
1526
1527
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1528
{
1529
SKIP_HEADER(3)
1530
{
1531
HASH_ZIP_CALC
1532
curMatch = p->hash[hv];
1533
p->hash[hv] = p->pos;
1534
}
1535
SKIP_FOOTER
1536
}
1537
1538
static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
1539
{
1540
CMatchFinder *p = (CMatchFinder *)_p;
1541
SKIP_HEADER(3)
1542
{
1543
UInt32 h2;
1544
UInt32 *hash;
1545
HASH3_CALC
1546
hash = p->hash;
1547
curMatch = (hash + kFix3HashSize)[hv];
1548
hash[h2] =
1549
(hash + kFix3HashSize)[hv] = p->pos;
1550
}
1551
SKIP_FOOTER
1552
}
1553
1554
static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
1555
{
1556
CMatchFinder *p = (CMatchFinder *)_p;
1557
SKIP_HEADER(4)
1558
{
1559
UInt32 h2, h3;
1560
UInt32 *hash;
1561
HASH4_CALC
1562
hash = p->hash;
1563
curMatch = (hash + kFix4HashSize)[hv];
1564
hash [h2] =
1565
(hash + kFix3HashSize)[h3] =
1566
(hash + kFix4HashSize)[hv] = p->pos;
1567
}
1568
SKIP_FOOTER
1569
}
1570
1571
static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
1572
{
1573
CMatchFinder *p = (CMatchFinder *)_p;
1574
SKIP_HEADER(5)
1575
{
1576
UInt32 h2, h3;
1577
UInt32 *hash;
1578
HASH5_CALC
1579
hash = p->hash;
1580
curMatch = (hash + kFix5HashSize)[hv];
1581
hash [h2] =
1582
(hash + kFix3HashSize)[h3] =
1583
// (hash + kFix4HashSize)[h4] =
1584
(hash + kFix5HashSize)[hv] = p->pos;
1585
}
1586
SKIP_FOOTER
1587
}
1588
1589
1590
#define HC_SKIP_HEADER(minLen) \
1591
do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
1592
const Byte *cur; \
1593
UInt32 *hash; \
1594
UInt32 *son; \
1595
UInt32 pos = p->pos; \
1596
UInt32 num2 = num; \
1597
/* (p->pos == p->posLimit) is not allowed here !!! */ \
1598
{ const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
1599
num -= num2; \
1600
{ const UInt32 cycPos = p->cyclicBufferPos; \
1601
son = p->son + cycPos; \
1602
p->cyclicBufferPos = cycPos + num2; } \
1603
cur = p->buffer; \
1604
hash = p->hash; \
1605
do { \
1606
UInt32 curMatch; \
1607
UInt32 hv;
1608
1609
1610
#define HC_SKIP_FOOTER \
1611
cur++; pos++; *son++ = curMatch; \
1612
} while (--num2); \
1613
p->buffer = cur; \
1614
p->pos = pos; \
1615
if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
1616
}} while(num); \
1617
1618
1619
static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
1620
{
1621
CMatchFinder *p = (CMatchFinder *)_p;
1622
HC_SKIP_HEADER(4)
1623
1624
UInt32 h2, h3;
1625
HASH4_CALC
1626
curMatch = (hash + kFix4HashSize)[hv];
1627
hash [h2] =
1628
(hash + kFix3HashSize)[h3] =
1629
(hash + kFix4HashSize)[hv] = pos;
1630
1631
HC_SKIP_FOOTER
1632
}
1633
1634
1635
static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
1636
{
1637
CMatchFinder *p = (CMatchFinder *)_p;
1638
HC_SKIP_HEADER(5)
1639
1640
UInt32 h2, h3;
1641
HASH5_CALC
1642
curMatch = (hash + kFix5HashSize)[hv];
1643
hash [h2] =
1644
(hash + kFix3HashSize)[h3] =
1645
// (hash + kFix4HashSize)[h4] =
1646
(hash + kFix5HashSize)[hv] = pos;
1647
1648
HC_SKIP_FOOTER
1649
}
1650
1651
1652
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1653
{
1654
HC_SKIP_HEADER(3)
1655
1656
HASH_ZIP_CALC
1657
curMatch = hash[hv];
1658
hash[hv] = pos;
1659
1660
HC_SKIP_FOOTER
1661
}
1662
1663
1664
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
1665
{
1666
vTable->Init = MatchFinder_Init;
1667
vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
1668
vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
1669
if (!p->btMode)
1670
{
1671
if (p->numHashBytes <= 4)
1672
{
1673
vTable->GetMatches = Hc4_MatchFinder_GetMatches;
1674
vTable->Skip = Hc4_MatchFinder_Skip;
1675
}
1676
else
1677
{
1678
vTable->GetMatches = Hc5_MatchFinder_GetMatches;
1679
vTable->Skip = Hc5_MatchFinder_Skip;
1680
}
1681
}
1682
else if (p->numHashBytes == 2)
1683
{
1684
vTable->GetMatches = Bt2_MatchFinder_GetMatches;
1685
vTable->Skip = Bt2_MatchFinder_Skip;
1686
}
1687
else if (p->numHashBytes == 3)
1688
{
1689
vTable->GetMatches = Bt3_MatchFinder_GetMatches;
1690
vTable->Skip = Bt3_MatchFinder_Skip;
1691
}
1692
else if (p->numHashBytes == 4)
1693
{
1694
vTable->GetMatches = Bt4_MatchFinder_GetMatches;
1695
vTable->Skip = Bt4_MatchFinder_Skip;
1696
}
1697
else
1698
{
1699
vTable->GetMatches = Bt5_MatchFinder_GetMatches;
1700
vTable->Skip = Bt5_MatchFinder_Skip;
1701
}
1702
}
1703
1704
1705
1706
void LzFindPrepare(void)
1707
{
1708
#ifndef FORCE_LZFIND_SATUR_SUB_128
1709
#ifdef USE_LZFIND_SATUR_SUB_128
1710
LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
1711
#ifdef MY_CPU_ARM_OR_ARM64
1712
{
1713
if (CPU_IsSupported_NEON())
1714
{
1715
// #pragma message ("=== LzFind NEON")
1716
PRF(printf("\n=== LzFind NEON\n"));
1717
f = LzFind_SaturSub_128;
1718
}
1719
// f = 0; // for debug
1720
}
1721
#else // MY_CPU_ARM_OR_ARM64
1722
if (CPU_IsSupported_SSE41())
1723
{
1724
// #pragma message ("=== LzFind SSE41")
1725
PRF(printf("\n=== LzFind SSE41\n"));
1726
f = LzFind_SaturSub_128;
1727
1728
#ifdef USE_LZFIND_SATUR_SUB_256
1729
if (CPU_IsSupported_AVX2())
1730
{
1731
// #pragma message ("=== LzFind AVX2")
1732
PRF(printf("\n=== LzFind AVX2\n"));
1733
f = LzFind_SaturSub_256;
1734
}
1735
#endif
1736
}
1737
#endif // MY_CPU_ARM_OR_ARM64
1738
g_LzFind_SaturSub = f;
1739
#endif // USE_LZFIND_SATUR_SUB_128
1740
#endif // FORCE_LZFIND_SATUR_SUB_128
1741
}
1742
1743
1744
#undef MOVE_POS
1745
#undef MOVE_POS_RET
1746
#undef PRF
1747
1748