Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/LzFindOpt.c
4253 views
1
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2
2023-04-02 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
#include "CpuArch.h"
7
#include "LzFind.h"
8
9
// #include "LzFindMt.h"
10
11
// #define LOG_ITERS
12
13
// #define LOG_THREAD
14
15
#ifdef LOG_THREAD
16
#include <stdio.h>
17
#define PRF(x) x
18
#else
19
// #define PRF(x)
20
#endif
21
22
#ifdef LOG_ITERS
23
#include <stdio.h>
24
UInt64 g_NumIters_Tree;
25
UInt64 g_NumIters_Loop;
26
UInt64 g_NumIters_Bytes;
27
#define LOG_ITER(x) x
28
#else
29
#define LOG_ITER(x)
30
#endif
31
32
// ---------- BT THREAD ----------
33
34
#define USE_SON_PREFETCH
35
#define USE_LONG_MATCH_OPT
36
37
#define kEmptyHashValue 0
38
39
// #define CYC_TO_POS_OFFSET 0
40
41
// #define CYC_TO_POS_OFFSET 1 // for debug
42
43
/*
44
Z7_NO_INLINE
45
UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
46
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
47
{
48
do
49
{
50
UInt32 delta;
51
if (hash == size)
52
break;
53
delta = *hash++;
54
55
if (delta == 0 || delta > (UInt32)pos)
56
return NULL;
57
58
lenLimit++;
59
60
if (delta == (UInt32)pos)
61
{
62
CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
63
*d++ = 0;
64
ptr1[0] = kEmptyHashValue;
65
ptr1[1] = kEmptyHashValue;
66
}
67
else
68
{
69
UInt32 *_distances = ++d;
70
71
CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
72
CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
73
74
const Byte *len0 = cur, *len1 = cur;
75
UInt32 cutValue = _cutValue;
76
const Byte *maxLen = cur + _maxLen;
77
78
for (LOG_ITER(g_NumIters_Tree++);;)
79
{
80
LOG_ITER(g_NumIters_Loop++);
81
{
82
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
83
CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
84
const Byte *len = (len0 < len1 ? len0 : len1);
85
86
#ifdef USE_SON_PREFETCH
87
const UInt32 pair0 = *pair;
88
#endif
89
90
if (len[diff] == len[0])
91
{
92
if (++len != lenLimit && len[diff] == len[0])
93
while (++len != lenLimit)
94
{
95
LOG_ITER(g_NumIters_Bytes++);
96
if (len[diff] != len[0])
97
break;
98
}
99
if (maxLen < len)
100
{
101
maxLen = len;
102
*d++ = (UInt32)(len - cur);
103
*d++ = delta - 1;
104
105
if (len == lenLimit)
106
{
107
const UInt32 pair1 = pair[1];
108
*ptr1 =
109
#ifdef USE_SON_PREFETCH
110
pair0;
111
#else
112
pair[0];
113
#endif
114
*ptr0 = pair1;
115
116
_distances[-1] = (UInt32)(d - _distances);
117
118
#ifdef USE_LONG_MATCH_OPT
119
120
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
121
break;
122
123
{
124
for (;;)
125
{
126
hash++;
127
pos++;
128
cur++;
129
lenLimit++;
130
{
131
CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
132
#if 0
133
*(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
134
#else
135
const UInt32 p0 = ptr[0 + (diff * 2)];
136
const UInt32 p1 = ptr[1 + (diff * 2)];
137
ptr[0] = p0;
138
ptr[1] = p1;
139
// ptr[0] = ptr[0 + (diff * 2)];
140
// ptr[1] = ptr[1 + (diff * 2)];
141
#endif
142
}
143
// PrintSon(son + 2, pos - 1);
144
// printf("\npos = %x delta = %x\n", pos, delta);
145
len++;
146
*d++ = 2;
147
*d++ = (UInt32)(len - cur);
148
*d++ = delta - 1;
149
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
150
break;
151
}
152
}
153
#endif
154
155
break;
156
}
157
}
158
}
159
160
{
161
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
162
if (len[diff] < len[0])
163
{
164
delta = pair[1];
165
if (delta >= curMatch)
166
return NULL;
167
*ptr1 = curMatch;
168
ptr1 = pair + 1;
169
len1 = len;
170
}
171
else
172
{
173
delta = *pair;
174
if (delta >= curMatch)
175
return NULL;
176
*ptr0 = curMatch;
177
ptr0 = pair;
178
len0 = len;
179
}
180
181
delta = (UInt32)pos - delta;
182
183
if (--cutValue == 0 || delta >= pos)
184
{
185
*ptr0 = *ptr1 = kEmptyHashValue;
186
_distances[-1] = (UInt32)(d - _distances);
187
break;
188
}
189
}
190
}
191
} // for (tree iterations)
192
}
193
pos++;
194
cur++;
195
}
196
while (d < limit);
197
*posRes = (UInt32)pos;
198
return d;
199
}
200
*/
201
202
/* define cbs if you use 2 functions.
203
GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
204
GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
205
206
do not define cbs if you use 1 function:
207
GetMatchesSpecN_2()
208
*/
209
210
// #define cbs _cyclicBufferSize
211
212
/*
213
we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
214
to eliminate "movsx" BUG in old MSVC x64 compiler.
215
*/
216
217
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
218
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
219
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
220
UInt32 *posRes);
221
222
Z7_NO_INLINE
223
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
224
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
225
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
226
UInt32 *posRes)
227
{
228
do // while (hash != size)
229
{
230
UInt32 delta;
231
232
#ifndef cbs
233
UInt32 cbs;
234
#endif
235
236
if (hash == size)
237
break;
238
239
delta = *hash++;
240
241
if (delta == 0)
242
return NULL;
243
244
lenLimit++;
245
246
#ifndef cbs
247
cbs = _cyclicBufferSize;
248
if ((UInt32)pos < cbs)
249
{
250
if (delta > (UInt32)pos)
251
return NULL;
252
cbs = (UInt32)pos;
253
}
254
#endif
255
256
if (delta >= cbs)
257
{
258
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
259
*d++ = 0;
260
ptr1[0] = kEmptyHashValue;
261
ptr1[1] = kEmptyHashValue;
262
}
263
else
264
{
265
UInt32 *_distances = ++d;
266
267
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
268
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
269
270
UInt32 cutValue = _cutValue;
271
const Byte *len0 = cur, *len1 = cur;
272
const Byte *maxLen = cur + _maxLen;
273
274
// if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
275
for (LOG_ITER(g_NumIters_Tree++);;)
276
{
277
LOG_ITER(g_NumIters_Loop++);
278
{
279
// SPEC code
280
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
281
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
282
) << 1);
283
284
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
285
const Byte *len = (len0 < len1 ? len0 : len1);
286
287
#ifdef USE_SON_PREFETCH
288
const UInt32 pair0 = *pair;
289
#endif
290
291
if (len[diff] == len[0])
292
{
293
if (++len != lenLimit && len[diff] == len[0])
294
while (++len != lenLimit)
295
{
296
LOG_ITER(g_NumIters_Bytes++);
297
if (len[diff] != len[0])
298
break;
299
}
300
if (maxLen < len)
301
{
302
maxLen = len;
303
*d++ = (UInt32)(len - cur);
304
*d++ = delta - 1;
305
306
if (len == lenLimit)
307
{
308
const UInt32 pair1 = pair[1];
309
*ptr1 =
310
#ifdef USE_SON_PREFETCH
311
pair0;
312
#else
313
pair[0];
314
#endif
315
*ptr0 = pair1;
316
317
_distances[-1] = (UInt32)(d - _distances);
318
319
#ifdef USE_LONG_MATCH_OPT
320
321
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
322
break;
323
324
{
325
for (;;)
326
{
327
*d++ = 2;
328
*d++ = (UInt32)(lenLimit - cur);
329
*d++ = delta - 1;
330
cur++;
331
lenLimit++;
332
// SPEC
333
_cyclicBufferPos++;
334
{
335
// SPEC code
336
CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
337
const CLzRef *src = dest + ((diff
338
+ (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
339
// CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
340
#if 0
341
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
342
#else
343
const UInt32 p0 = src[0];
344
const UInt32 p1 = src[1];
345
dest[0] = p0;
346
dest[1] = p1;
347
#endif
348
}
349
pos++;
350
hash++;
351
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
352
break;
353
} // for() end for long matches
354
}
355
#endif
356
357
break; // break from TREE iterations
358
}
359
}
360
}
361
{
362
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
363
if (len[diff] < len[0])
364
{
365
delta = pair[1];
366
*ptr1 = curMatch;
367
ptr1 = pair + 1;
368
len1 = len;
369
if (delta >= curMatch)
370
return NULL;
371
}
372
else
373
{
374
delta = *pair;
375
*ptr0 = curMatch;
376
ptr0 = pair;
377
len0 = len;
378
if (delta >= curMatch)
379
return NULL;
380
}
381
delta = (UInt32)pos - delta;
382
383
if (--cutValue == 0 || delta >= cbs)
384
{
385
*ptr0 = *ptr1 = kEmptyHashValue;
386
_distances[-1] = (UInt32)(d - _distances);
387
break;
388
}
389
}
390
}
391
} // for (tree iterations)
392
}
393
pos++;
394
_cyclicBufferPos++;
395
cur++;
396
}
397
while (d < limit);
398
*posRes = (UInt32)pos;
399
return d;
400
}
401
402
403
404
/*
405
typedef UInt32 uint32plus; // size_t
406
407
UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
408
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
409
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
410
UInt32 *posRes)
411
{
412
do // while (hash != size)
413
{
414
UInt32 delta;
415
416
#ifndef cbs
417
UInt32 cbs;
418
#endif
419
420
if (hash == size)
421
break;
422
423
delta = *hash++;
424
425
if (delta == 0)
426
return NULL;
427
428
#ifndef cbs
429
cbs = _cyclicBufferSize;
430
if ((UInt32)pos < cbs)
431
{
432
if (delta > (UInt32)pos)
433
return NULL;
434
cbs = (UInt32)pos;
435
}
436
#endif
437
438
if (delta >= cbs)
439
{
440
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
441
*d++ = 0;
442
ptr1[0] = kEmptyHashValue;
443
ptr1[1] = kEmptyHashValue;
444
}
445
else
446
{
447
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
448
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
449
UInt32 *_distances = ++d;
450
uint32plus len0 = 0, len1 = 0;
451
UInt32 cutValue = _cutValue;
452
uint32plus maxLen = _maxLen;
453
// lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
454
455
for (LOG_ITER(g_NumIters_Tree++);;)
456
{
457
LOG_ITER(g_NumIters_Loop++);
458
{
459
// const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
460
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
461
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
462
) << 1);
463
const Byte *pb = cur - delta;
464
uint32plus len = (len0 < len1 ? len0 : len1);
465
466
#ifdef USE_SON_PREFETCH
467
const UInt32 pair0 = *pair;
468
#endif
469
470
if (pb[len] == cur[len])
471
{
472
if (++len != lenLimit && pb[len] == cur[len])
473
while (++len != lenLimit)
474
if (pb[len] != cur[len])
475
break;
476
if (maxLen < len)
477
{
478
maxLen = len;
479
*d++ = (UInt32)len;
480
*d++ = delta - 1;
481
if (len == lenLimit)
482
{
483
{
484
const UInt32 pair1 = pair[1];
485
*ptr0 = pair1;
486
*ptr1 =
487
#ifdef USE_SON_PREFETCH
488
pair0;
489
#else
490
pair[0];
491
#endif
492
}
493
494
_distances[-1] = (UInt32)(d - _distances);
495
496
#ifdef USE_LONG_MATCH_OPT
497
498
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
499
break;
500
501
{
502
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
503
for (;;)
504
{
505
*d++ = 2;
506
*d++ = (UInt32)lenLimit;
507
*d++ = delta - 1;
508
_cyclicBufferPos++;
509
{
510
CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
511
const CLzRef *src = dest + ((diff +
512
(ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
513
#if 0
514
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
515
#else
516
const UInt32 p0 = src[0];
517
const UInt32 p1 = src[1];
518
dest[0] = p0;
519
dest[1] = p1;
520
#endif
521
}
522
hash++;
523
pos++;
524
cur++;
525
pb++;
526
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
527
break;
528
}
529
}
530
#endif
531
532
break;
533
}
534
}
535
}
536
{
537
const UInt32 curMatch = (UInt32)pos - delta;
538
if (pb[len] < cur[len])
539
{
540
delta = pair[1];
541
*ptr1 = curMatch;
542
ptr1 = pair + 1;
543
len1 = len;
544
}
545
else
546
{
547
delta = *pair;
548
*ptr0 = curMatch;
549
ptr0 = pair;
550
len0 = len;
551
}
552
553
{
554
if (delta >= curMatch)
555
return NULL;
556
delta = (UInt32)pos - delta;
557
if (delta >= cbs
558
// delta >= _cyclicBufferSize || delta >= pos
559
|| --cutValue == 0)
560
{
561
*ptr0 = *ptr1 = kEmptyHashValue;
562
_distances[-1] = (UInt32)(d - _distances);
563
break;
564
}
565
}
566
}
567
}
568
} // for (tree iterations)
569
}
570
pos++;
571
_cyclicBufferPos++;
572
cur++;
573
}
574
while (d < limit);
575
*posRes = (UInt32)pos;
576
return d;
577
}
578
*/
579
580