Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c
48521 views
1
/*
2
* Branch/Call/Jump (BCJ) filter decoders
3
*
4
* Authors: Lasse Collin <[email protected]>
5
* Igor Pavlov <https://7-zip.org/>
6
*
7
* This file has been put into the public domain.
8
* You can do whatever you want with this file.
9
*/
10
11
#include "xz_private.h"
12
13
/*
14
* The rest of the file is inside this ifdef. It makes things a little more
15
* convenient when building without support for any BCJ filters.
16
*/
17
#ifdef XZ_DEC_BCJ
18
19
struct xz_dec_bcj {
20
/* Type of the BCJ filter being used */
21
enum {
22
BCJ_X86 = 4, /* x86 or x86-64 */
23
BCJ_POWERPC = 5, /* Big endian only */
24
BCJ_IA64 = 6, /* Big or little endian */
25
BCJ_ARM = 7, /* Little endian only */
26
BCJ_ARMTHUMB = 8, /* Little endian only */
27
BCJ_SPARC = 9 /* Big or little endian */
28
} type;
29
30
/*
31
* Return value of the next filter in the chain. We need to preserve
32
* this information across calls, because we must not call the next
33
* filter anymore once it has returned XZ_STREAM_END.
34
*/
35
enum xz_ret ret;
36
37
/* True if we are operating in single-call mode. */
38
bool single_call;
39
40
/*
41
* Absolute position relative to the beginning of the uncompressed
42
* data (in a single .xz Block). We care only about the lowest 32
43
* bits so this doesn't need to be uint64_t even with big files.
44
*/
45
uint32_t pos;
46
47
/* x86 filter state */
48
uint32_t x86_prev_mask;
49
50
/* Temporary space to hold the variables from struct xz_buf */
51
uint8_t *out;
52
size_t out_pos;
53
size_t out_size;
54
55
struct {
56
/* Amount of already filtered data in the beginning of buf */
57
size_t filtered;
58
59
/* Total amount of data currently stored in buf */
60
size_t size;
61
62
/*
63
* Buffer to hold a mix of filtered and unfiltered data. This
64
* needs to be big enough to hold Alignment + 2 * Look-ahead:
65
*
66
* Type Alignment Look-ahead
67
* x86 1 4
68
* PowerPC 4 0
69
* IA-64 16 0
70
* ARM 4 0
71
* ARM-Thumb 2 2
72
* SPARC 4 0
73
*/
74
uint8_t buf[16];
75
} temp;
76
};
77
78
#ifdef XZ_DEC_X86
79
/*
80
* This is used to test the most significant byte of a memory address
81
* in an x86 instruction.
82
*/
83
static inline int bcj_x86_test_msbyte(uint8_t b)
84
{
85
return b == 0x00 || b == 0xFF;
86
}
87
88
static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
89
{
90
static const bool mask_to_allowed_status[8]
91
= { true, true, true, false, true, false, false, false };
92
93
static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
94
95
size_t i;
96
size_t prev_pos = (size_t)-1;
97
uint32_t prev_mask = s->x86_prev_mask;
98
uint32_t src;
99
uint32_t dest;
100
uint32_t j;
101
uint8_t b;
102
103
if (size <= 4)
104
return 0;
105
106
size -= 4;
107
for (i = 0; i < size; ++i) {
108
if ((buf[i] & 0xFE) != 0xE8)
109
continue;
110
111
prev_pos = i - prev_pos;
112
if (prev_pos > 3) {
113
prev_mask = 0;
114
} else {
115
prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116
if (prev_mask != 0) {
117
b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118
if (!mask_to_allowed_status[prev_mask]
119
|| bcj_x86_test_msbyte(b)) {
120
prev_pos = i;
121
prev_mask = (prev_mask << 1) | 1;
122
continue;
123
}
124
}
125
}
126
127
prev_pos = i;
128
129
if (bcj_x86_test_msbyte(buf[i + 4])) {
130
src = get_unaligned_le32(buf + i + 1);
131
while (true) {
132
dest = src - (s->pos + (uint32_t)i + 5);
133
if (prev_mask == 0)
134
break;
135
136
j = mask_to_bit_num[prev_mask] * 8;
137
b = (uint8_t)(dest >> (24 - j));
138
if (!bcj_x86_test_msbyte(b))
139
break;
140
141
src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142
}
143
144
dest &= 0x01FFFFFF;
145
dest |= (uint32_t)0 - (dest & 0x01000000);
146
put_unaligned_le32(dest, buf + i + 1);
147
i += 4;
148
} else {
149
prev_mask = (prev_mask << 1) | 1;
150
}
151
}
152
153
prev_pos = i - prev_pos;
154
s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155
return i;
156
}
157
#endif
158
159
#ifdef XZ_DEC_POWERPC
160
static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161
{
162
size_t i;
163
uint32_t instr;
164
165
for (i = 0; i + 4 <= size; i += 4) {
166
instr = get_unaligned_be32(buf + i);
167
if ((instr & 0xFC000003) == 0x48000001) {
168
instr &= 0x03FFFFFC;
169
instr -= s->pos + (uint32_t)i;
170
instr &= 0x03FFFFFC;
171
instr |= 0x48000001;
172
put_unaligned_be32(instr, buf + i);
173
}
174
}
175
176
return i;
177
}
178
#endif
179
180
#ifdef XZ_DEC_IA64
181
static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182
{
183
static const uint8_t branch_table[32] = {
184
0, 0, 0, 0, 0, 0, 0, 0,
185
0, 0, 0, 0, 0, 0, 0, 0,
186
4, 4, 6, 6, 0, 0, 7, 7,
187
4, 4, 0, 0, 4, 4, 0, 0
188
};
189
190
/*
191
* The local variables take a little bit stack space, but it's less
192
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
193
* stack usage here without doing that for the LZMA2 decoder too.
194
*/
195
196
/* Loop counters */
197
size_t i;
198
size_t j;
199
200
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201
uint32_t slot;
202
203
/* Bitwise offset of the instruction indicated by slot */
204
uint32_t bit_pos;
205
206
/* bit_pos split into byte and bit parts */
207
uint32_t byte_pos;
208
uint32_t bit_res;
209
210
/* Address part of an instruction */
211
uint32_t addr;
212
213
/* Mask used to detect which instructions to convert */
214
uint32_t mask;
215
216
/* 41-bit instruction stored somewhere in the lowest 48 bits */
217
uint64_t instr;
218
219
/* Instruction normalized with bit_res for easier manipulation */
220
uint64_t norm;
221
222
for (i = 0; i + 16 <= size; i += 16) {
223
mask = branch_table[buf[i] & 0x1F];
224
for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225
if (((mask >> slot) & 1) == 0)
226
continue;
227
228
byte_pos = bit_pos >> 3;
229
bit_res = bit_pos & 7;
230
instr = 0;
231
for (j = 0; j < 6; ++j)
232
instr |= (uint64_t)(buf[i + j + byte_pos])
233
<< (8 * j);
234
235
norm = instr >> bit_res;
236
237
if (((norm >> 37) & 0x0F) == 0x05
238
&& ((norm >> 9) & 0x07) == 0) {
239
addr = (norm >> 13) & 0x0FFFFF;
240
addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241
addr <<= 4;
242
addr -= s->pos + (uint32_t)i;
243
addr >>= 4;
244
245
norm &= ~((uint64_t)0x8FFFFF << 13);
246
norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247
norm |= (uint64_t)(addr & 0x100000)
248
<< (36 - 20);
249
250
instr &= (1 << bit_res) - 1;
251
instr |= norm << bit_res;
252
253
for (j = 0; j < 6; j++)
254
buf[i + j + byte_pos]
255
= (uint8_t)(instr >> (8 * j));
256
}
257
}
258
}
259
260
return i;
261
}
262
#endif
263
264
#ifdef XZ_DEC_ARM
265
static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266
{
267
size_t i;
268
uint32_t addr;
269
270
for (i = 0; i + 4 <= size; i += 4) {
271
if (buf[i + 3] == 0xEB) {
272
addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273
| ((uint32_t)buf[i + 2] << 16);
274
addr <<= 2;
275
addr -= s->pos + (uint32_t)i + 8;
276
addr >>= 2;
277
buf[i] = (uint8_t)addr;
278
buf[i + 1] = (uint8_t)(addr >> 8);
279
buf[i + 2] = (uint8_t)(addr >> 16);
280
}
281
}
282
283
return i;
284
}
285
#endif
286
287
#ifdef XZ_DEC_ARMTHUMB
288
static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
289
{
290
size_t i;
291
uint32_t addr;
292
293
for (i = 0; i + 4 <= size; i += 2) {
294
if ((buf[i + 1] & 0xF8) == 0xF0
295
&& (buf[i + 3] & 0xF8) == 0xF8) {
296
addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297
| ((uint32_t)buf[i] << 11)
298
| (((uint32_t)buf[i + 3] & 0x07) << 8)
299
| (uint32_t)buf[i + 2];
300
addr <<= 1;
301
addr -= s->pos + (uint32_t)i + 4;
302
addr >>= 1;
303
buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304
buf[i] = (uint8_t)(addr >> 11);
305
buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306
buf[i + 2] = (uint8_t)addr;
307
i += 2;
308
}
309
}
310
311
return i;
312
}
313
#endif
314
315
#ifdef XZ_DEC_SPARC
316
static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
317
{
318
size_t i;
319
uint32_t instr;
320
321
for (i = 0; i + 4 <= size; i += 4) {
322
instr = get_unaligned_be32(buf + i);
323
if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
324
instr <<= 2;
325
instr -= s->pos + (uint32_t)i;
326
instr >>= 2;
327
instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328
| 0x40000000 | (instr & 0x3FFFFF);
329
put_unaligned_be32(instr, buf + i);
330
}
331
}
332
333
return i;
334
}
335
#endif
336
337
/*
338
* Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339
* of data that got filtered.
340
*
341
* NOTE: This is implemented as a switch statement to avoid using function
342
* pointers, which could be problematic in the kernel boot code, which must
343
* avoid pointers to static data (at least on x86).
344
*/
345
static void bcj_apply(struct xz_dec_bcj *s,
346
uint8_t *buf, size_t *pos, size_t size)
347
{
348
size_t filtered;
349
350
buf += *pos;
351
size -= *pos;
352
353
switch (s->type) {
354
#ifdef XZ_DEC_X86
355
case BCJ_X86:
356
filtered = bcj_x86(s, buf, size);
357
break;
358
#endif
359
#ifdef XZ_DEC_POWERPC
360
case BCJ_POWERPC:
361
filtered = bcj_powerpc(s, buf, size);
362
break;
363
#endif
364
#ifdef XZ_DEC_IA64
365
case BCJ_IA64:
366
filtered = bcj_ia64(s, buf, size);
367
break;
368
#endif
369
#ifdef XZ_DEC_ARM
370
case BCJ_ARM:
371
filtered = bcj_arm(s, buf, size);
372
break;
373
#endif
374
#ifdef XZ_DEC_ARMTHUMB
375
case BCJ_ARMTHUMB:
376
filtered = bcj_armthumb(s, buf, size);
377
break;
378
#endif
379
#ifdef XZ_DEC_SPARC
380
case BCJ_SPARC:
381
filtered = bcj_sparc(s, buf, size);
382
break;
383
#endif
384
default:
385
/* Never reached but silence compiler warnings. */
386
filtered = 0;
387
break;
388
}
389
390
*pos += filtered;
391
s->pos += filtered;
392
}
393
394
/*
395
* Flush pending filtered data from temp to the output buffer.
396
* Move the remaining mixture of possibly filtered and unfiltered
397
* data to the beginning of temp.
398
*/
399
static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400
{
401
size_t copy_size;
402
403
copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404
memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405
b->out_pos += copy_size;
406
407
s->temp.filtered -= copy_size;
408
s->temp.size -= copy_size;
409
memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410
}
411
412
/*
413
* The BCJ filter functions are primitive in sense that they process the
414
* data in chunks of 1-16 bytes. To hide this issue, this function does
415
* some buffering.
416
*/
417
XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
418
struct xz_dec_lzma2 *lzma2,
419
struct xz_buf *b)
420
{
421
size_t out_start;
422
423
/*
424
* Flush pending already filtered data to the output buffer. Return
425
* immediately if we couldn't flush everything, or if the next
426
* filter in the chain had already returned XZ_STREAM_END.
427
*/
428
if (s->temp.filtered > 0) {
429
bcj_flush(s, b);
430
if (s->temp.filtered > 0)
431
return XZ_OK;
432
433
if (s->ret == XZ_STREAM_END)
434
return XZ_STREAM_END;
435
}
436
437
/*
438
* If we have more output space than what is currently pending in
439
* temp, copy the unfiltered data from temp to the output buffer
440
* and try to fill the output buffer by decoding more data from the
441
* next filter in the chain. Apply the BCJ filter on the new data
442
* in the output buffer. If everything cannot be filtered, copy it
443
* to temp and rewind the output buffer position accordingly.
444
*
445
* This needs to be always run when temp.size == 0 to handle a special
446
* case where the output buffer is full and the next filter has no
447
* more output coming but hasn't returned XZ_STREAM_END yet.
448
*/
449
if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
450
out_start = b->out_pos;
451
memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
452
b->out_pos += s->temp.size;
453
454
s->ret = xz_dec_lzma2_run(lzma2, b);
455
if (s->ret != XZ_STREAM_END
456
&& (s->ret != XZ_OK || s->single_call))
457
return s->ret;
458
459
bcj_apply(s, b->out, &out_start, b->out_pos);
460
461
/*
462
* As an exception, if the next filter returned XZ_STREAM_END,
463
* we can do that too, since the last few bytes that remain
464
* unfiltered are meant to remain unfiltered.
465
*/
466
if (s->ret == XZ_STREAM_END)
467
return XZ_STREAM_END;
468
469
s->temp.size = b->out_pos - out_start;
470
b->out_pos -= s->temp.size;
471
memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
472
473
/*
474
* If there wasn't enough input to the next filter to fill
475
* the output buffer with unfiltered data, there's no point
476
* to try decoding more data to temp.
477
*/
478
if (b->out_pos + s->temp.size < b->out_size)
479
return XZ_OK;
480
}
481
482
/*
483
* We have unfiltered data in temp. If the output buffer isn't full
484
* yet, try to fill the temp buffer by decoding more data from the
485
* next filter. Apply the BCJ filter on temp. Then we hopefully can
486
* fill the actual output buffer by copying filtered data from temp.
487
* A mix of filtered and unfiltered data may be left in temp; it will
488
* be taken care on the next call to this function.
489
*/
490
if (b->out_pos < b->out_size) {
491
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
492
s->out = b->out;
493
s->out_pos = b->out_pos;
494
s->out_size = b->out_size;
495
b->out = s->temp.buf;
496
b->out_pos = s->temp.size;
497
b->out_size = sizeof(s->temp.buf);
498
499
s->ret = xz_dec_lzma2_run(lzma2, b);
500
501
s->temp.size = b->out_pos;
502
b->out = s->out;
503
b->out_pos = s->out_pos;
504
b->out_size = s->out_size;
505
506
if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
507
return s->ret;
508
509
bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
510
511
/*
512
* If the next filter returned XZ_STREAM_END, we mark that
513
* everything is filtered, since the last unfiltered bytes
514
* of the stream are meant to be left as is.
515
*/
516
if (s->ret == XZ_STREAM_END)
517
s->temp.filtered = s->temp.size;
518
519
bcj_flush(s, b);
520
if (s->temp.filtered > 0)
521
return XZ_OK;
522
}
523
524
return s->ret;
525
}
526
527
XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
528
{
529
struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
530
if (s != NULL)
531
s->single_call = single_call;
532
533
return s;
534
}
535
536
XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
537
{
538
switch (id) {
539
#ifdef XZ_DEC_X86
540
case BCJ_X86:
541
#endif
542
#ifdef XZ_DEC_POWERPC
543
case BCJ_POWERPC:
544
#endif
545
#ifdef XZ_DEC_IA64
546
case BCJ_IA64:
547
#endif
548
#ifdef XZ_DEC_ARM
549
case BCJ_ARM:
550
#endif
551
#ifdef XZ_DEC_ARMTHUMB
552
case BCJ_ARMTHUMB:
553
#endif
554
#ifdef XZ_DEC_SPARC
555
case BCJ_SPARC:
556
#endif
557
break;
558
559
default:
560
/* Unsupported Filter ID */
561
return XZ_OPTIONS_ERROR;
562
}
563
564
s->type = id;
565
s->ret = XZ_OK;
566
s->pos = 0;
567
s->x86_prev_mask = 0;
568
s->temp.filtered = 0;
569
s->temp.size = 0;
570
571
return XZ_OK;
572
}
573
574
#endif
575
576