Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/Bcj2.c
4253 views
1
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2
2023-03-01 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
#include "Bcj2.h"
7
#include "CpuArch.h"
8
9
#define kTopValue ((UInt32)1 << 24)
10
#define kNumBitModelTotalBits 11
11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
12
#define kNumMoveBits 5
13
14
// UInt32 bcj2_stats[256 + 2][2];
15
16
void Bcj2Dec_Init(CBcj2Dec *p)
17
{
18
unsigned i;
19
p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
20
p->ip = 0;
21
p->temp = 0;
22
p->range = 0;
23
p->code = 0;
24
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
25
p->probs[i] = kBitModelTotal >> 1;
26
}
27
28
SRes Bcj2Dec_Decode(CBcj2Dec *p)
29
{
30
UInt32 v = p->temp;
31
// const Byte *src;
32
if (p->range <= 5)
33
{
34
UInt32 code = p->code;
35
p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
36
for (; p->range != 5; p->range++)
37
{
38
if (p->range == 1 && code != 0)
39
return SZ_ERROR_DATA;
40
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
41
{
42
p->state = BCJ2_STREAM_RC;
43
return SZ_OK;
44
}
45
code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
46
p->code = code;
47
}
48
if (code == 0xffffffff)
49
return SZ_ERROR_DATA;
50
p->range = 0xffffffff;
51
}
52
// else
53
{
54
unsigned state = p->state;
55
// we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
56
if (BCJ2_IS_32BIT_STREAM(state))
57
{
58
const Byte *cur = p->bufs[state];
59
if (cur == p->lims[state])
60
return SZ_OK;
61
p->bufs[state] = cur + 4;
62
{
63
const UInt32 ip = p->ip + 4;
64
v = GetBe32a(cur) - ip;
65
p->ip = ip;
66
}
67
state = BCJ2_DEC_STATE_ORIG_0;
68
}
69
if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
70
{
71
Byte *dest = p->dest;
72
for (;;)
73
{
74
if (dest == p->destLim)
75
{
76
p->state = state;
77
p->temp = v;
78
return SZ_OK;
79
}
80
*dest++ = (Byte)v;
81
p->dest = dest;
82
if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
83
break;
84
v >>= 8;
85
}
86
}
87
}
88
89
// src = p->bufs[BCJ2_STREAM_MAIN];
90
for (;;)
91
{
92
/*
93
if (BCJ2_IS_32BIT_STREAM(p->state))
94
p->state = BCJ2_DEC_STATE_OK;
95
else
96
*/
97
{
98
if (p->range < kTopValue)
99
{
100
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
101
{
102
p->state = BCJ2_STREAM_RC;
103
p->temp = v;
104
return SZ_OK;
105
}
106
p->range <<= 8;
107
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
108
}
109
{
110
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
111
const Byte *srcLim;
112
Byte *dest = p->dest;
113
{
114
const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
115
SizeT num = (SizeT)(p->destLim - dest);
116
if (num >= rem)
117
num = rem;
118
#define NUM_ITERS 4
119
#if (NUM_ITERS & (NUM_ITERS - 1)) == 0
120
num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
121
#else
122
num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
123
#endif
124
srcLim = src + num;
125
}
126
127
#define NUM_SHIFT_BITS 24
128
#define ONE_ITER(indx) { \
129
const unsigned b = src[indx]; \
130
*dest++ = (Byte)b; \
131
v = (v << NUM_SHIFT_BITS) | b; \
132
if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
133
if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
134
((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
135
/* ++dest */; /* v = b; */ }
136
137
if (src != srcLim)
138
for (;;)
139
{
140
/* The dependency chain of 2-cycle for (v) calculation is not big problem here.
141
But we can remove dependency chain with v = b in the end of loop. */
142
ONE_ITER(0)
143
#if (NUM_ITERS > 1)
144
ONE_ITER(1)
145
#if (NUM_ITERS > 2)
146
ONE_ITER(2)
147
#if (NUM_ITERS > 3)
148
ONE_ITER(3)
149
#if (NUM_ITERS > 4)
150
ONE_ITER(4)
151
#if (NUM_ITERS > 5)
152
ONE_ITER(5)
153
#if (NUM_ITERS > 6)
154
ONE_ITER(6)
155
#if (NUM_ITERS > 7)
156
ONE_ITER(7)
157
#endif
158
#endif
159
#endif
160
#endif
161
#endif
162
#endif
163
#endif
164
165
src += NUM_ITERS;
166
if (src == srcLim)
167
break;
168
}
169
170
if (src == srcLim)
171
#if (NUM_ITERS > 1)
172
for (;;)
173
#endif
174
{
175
#if (NUM_ITERS > 1)
176
if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
177
#endif
178
{
179
const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
180
p->bufs[BCJ2_STREAM_MAIN] = src;
181
p->dest = dest;
182
p->ip += (UInt32)num;
183
/* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
184
p->state =
185
src == p->lims[BCJ2_STREAM_MAIN] ?
186
(unsigned)BCJ2_STREAM_MAIN :
187
(unsigned)BCJ2_DEC_STATE_ORIG;
188
p->temp = v;
189
return SZ_OK;
190
}
191
#if (NUM_ITERS > 1)
192
ONE_ITER(0)
193
src++;
194
#endif
195
}
196
197
{
198
const SizeT num = (SizeT)(dest - p->dest);
199
p->dest = dest; // p->dest += num;
200
p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
201
p->ip += (UInt32)num;
202
}
203
{
204
UInt32 bound, ttt;
205
CBcj2Prob *prob; // unsigned index;
206
/*
207
prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
208
2 + (Byte)(v >> 8) :
209
((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
210
*/
211
{
212
const unsigned c = ((v + 0x17) >> 6) & 1;
213
prob = p->probs + (unsigned)
214
(((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
215
// (Byte)
216
// 8x->0 : e9->1 : xxe8->xx+2
217
// 8x->0x100 : e9->0x101 : xxe8->xx
218
// (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
219
// (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
220
}
221
ttt = *prob;
222
bound = (p->range >> kNumBitModelTotalBits) * ttt;
223
if (p->code < bound)
224
{
225
// bcj2_stats[prob - p->probs][0]++;
226
p->range = bound;
227
*prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
228
continue;
229
}
230
{
231
// bcj2_stats[prob - p->probs][1]++;
232
p->range -= bound;
233
p->code -= bound;
234
*prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
235
}
236
}
237
}
238
}
239
{
240
/* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
241
// const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
242
const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
243
const Byte *cur = p->bufs[cj];
244
Byte *dest;
245
SizeT rem;
246
if (cur == p->lims[cj])
247
{
248
p->state = cj;
249
break;
250
}
251
v = GetBe32a(cur);
252
p->bufs[cj] = cur + 4;
253
{
254
const UInt32 ip = p->ip + 4;
255
v -= ip;
256
p->ip = ip;
257
}
258
dest = p->dest;
259
rem = (SizeT)(p->destLim - dest);
260
if (rem < 4)
261
{
262
if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
263
if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
264
if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
265
p->temp = v;
266
p->dest = dest + rem;
267
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
268
break;
269
}
270
SetUi32(dest, v)
271
v >>= 24;
272
p->dest = dest + 4;
273
}
274
}
275
276
if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
277
{
278
p->range <<= 8;
279
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
280
}
281
return SZ_OK;
282
}
283
284
#undef NUM_ITERS
285
#undef ONE_ITER
286
#undef NUM_SHIFT_BITS
287
#undef kTopValue
288
#undef kNumBitModelTotalBits
289
#undef kBitModelTotal
290
#undef kNumMoveBits
291
292