CoCalc -- file

GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmliblzma/liblzma/common/file_info.c
³¹⁵³ views
1
// SPDX-License-Identifier: 0BSD
2

3
///////////////////////////////////////////////////////////////////////////////
4
//
5
/// \file       file_info.c
6
/// \brief      Decode .xz file information into a lzma_index structure
7
//
8
//  Author:     Lasse Collin
9
//
10
///////////////////////////////////////////////////////////////////////////////
11

12
#include "index_decoder.h"
13

14

15
typedef struct {
16
	enum {
17
		SEQ_MAGIC_BYTES,
18
		SEQ_PADDING_SEEK,
19
		SEQ_PADDING_DECODE,
20
		SEQ_FOOTER,
21
		SEQ_INDEX_INIT,
22
		SEQ_INDEX_DECODE,
23
		SEQ_HEADER_DECODE,
24
		SEQ_HEADER_COMPARE,
25
	} sequence;
26

27
	/// Absolute position of in[*in_pos] in the file. All code that
28
	/// modifies *in_pos also updates this. seek_to_pos() needs this
29
	/// to determine if we need to request the application to seek for
30
	/// us or if we can do the seeking internally by adjusting *in_pos.
31
	uint64_t file_cur_pos;
32

33
	/// This refers to absolute positions of interesting parts of the
34
	/// input file. Sometimes it points to the *beginning* of a specific
35
	/// field and sometimes to the *end* of a field. The current target
36
	/// position at each moment is explained in the comments.
37
	uint64_t file_target_pos;
38

39
	/// Size of the .xz file (from the application).
40
	uint64_t file_size;
41

42
	/// Index decoder
43
	lzma_next_coder index_decoder;
44

45
	/// Number of bytes remaining in the Index field that is currently
46
	/// being decoded.
47
	lzma_vli index_remaining;
48

49
	/// The Index decoder will store the decoded Index in this pointer.
50
	lzma_index *this_index;
51

52
	/// Amount of Stream Padding in the current Stream.
53
	lzma_vli stream_padding;
54

55
	/// The final combined index is collected here.
56
	lzma_index *combined_index;
57

58
	/// Pointer from the application where to store the index information
59
	/// after successful decoding.
60
	lzma_index **dest_index;
61

62
	/// Pointer to lzma_stream.seek_pos to be used when returning
63
	/// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
64
	uint64_t *external_seek_pos;
65

66
	/// Memory usage limit
67
	uint64_t memlimit;
68

69
	/// Stream Flags from the very beginning of the file.
70
	lzma_stream_flags first_header_flags;
71

72
	/// Stream Flags from Stream Header of the current Stream.
73
	lzma_stream_flags header_flags;
74

75
	/// Stream Flags from Stream Footer of the current Stream.
76
	lzma_stream_flags footer_flags;
77

78
	size_t temp_pos;
79
	size_t temp_size;
80
	uint8_t temp[8192];
81

82
} lzma_file_info_coder;
83

84

85
/// Copies data from in[*in_pos] into coder->temp until
86
/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
87
/// in sync with *in_pos. Returns true if more input is needed.
88
static bool
89
fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
90
		size_t *restrict in_pos, size_t in_size)
91
{
92
	coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
93
			coder->temp, &coder->temp_pos, coder->temp_size);
94
	return coder->temp_pos < coder->temp_size;
95
}
96

97

98
/// Seeks to the absolute file position specified by target_pos.
99
/// This tries to do the seeking by only modifying *in_pos, if possible.
100
/// The main benefit of this is that if one passes the whole file at once
101
/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
102
/// as all the seeking can be done by adjusting *in_pos in this function.
103
///
104
/// Returns true if an external seek is needed and the caller must return
105
/// LZMA_SEEK_NEEDED.
106
static bool
107
seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
108
		size_t in_start, size_t *in_pos, size_t in_size)
109
{
110
	// The input buffer doesn't extend beyond the end of the file.
111
	// This has been checked by file_info_decode() already.
112
	assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
113

114
	const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
115
	const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
116

117
	bool external_seek_needed;
118

119
	if (target_pos >= pos_min && target_pos <= pos_max) {
120
		// The requested position is available in the current input
121
		// buffer or right after it. That is, in a corner case we
122
		// end up setting *in_pos == in_size and thus will immediately
123
		// need new input bytes from the application.
124
		*in_pos += (size_t)(target_pos - coder->file_cur_pos);
125
		external_seek_needed = false;
126
	} else {
127
		// Ask the application to seek the input file.
128
		*coder->external_seek_pos = target_pos;
129
		external_seek_needed = true;
130

131
		// Mark the whole input buffer as used. This way
132
		// lzma_stream.total_in will have a better estimate
133
		// of the amount of data read. It still won't be perfect
134
		// as the value will depend on the input buffer size that
135
		// the application uses, but it should be good enough for
136
		// those few who want an estimate.
137
		*in_pos = in_size;
138
	}
139

140
	// After seeking (internal or external) the current position
141
	// will match the requested target position.
142
	coder->file_cur_pos = target_pos;
143

144
	return external_seek_needed;
145
}
146

147

148
/// The caller sets coder->file_target_pos so that it points to the *end*
149
/// of the desired file position. This function then determines how far
150
/// backwards from that position we can seek. After seeking fill_temp()
151
/// can be used to read data into coder->temp. When fill_temp() has finished,
152
/// coder->temp[coder->temp_size] will match coder->file_target_pos.
153
///
154
/// This also validates that coder->target_file_pos is sane in sense that
155
/// we aren't trying to seek too far backwards (too close or beyond the
156
/// beginning of the file).
157
static lzma_ret
158
reverse_seek(lzma_file_info_coder *coder,
159
		size_t in_start, size_t *in_pos, size_t in_size)
160
{
161
	// Check that there is enough data before the target position
162
	// to contain at least Stream Header and Stream Footer. If there
163
	// isn't, the file cannot be valid.
164
	if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
165
		return LZMA_DATA_ERROR;
166

167
	coder->temp_pos = 0;
168

169
	// The Stream Header at the very beginning of the file gets handled
170
	// specially in SEQ_MAGIC_BYTES and thus we will never need to seek
171
	// there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
172
	// we avoid a useless external seek after SEQ_MAGIC_BYTES if the
173
	// application uses an extremely small input buffer and the input
174
	// file is very small.
175
	if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
176
			< sizeof(coder->temp))
177
		coder->temp_size = (size_t)(coder->file_target_pos
178
				- LZMA_STREAM_HEADER_SIZE);
179
	else
180
		coder->temp_size = sizeof(coder->temp);
181

182
	// The above if-statements guarantee this. This is important because
183
	// the Stream Header/Footer decoders assume that there's at least
184
	// LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
185
	assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
186

187
	if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
188
			in_start, in_pos, in_size))
189
		return LZMA_SEEK_NEEDED;
190

191
	return LZMA_OK;
192
}
193

194

195
/// Gets the number of zero-bytes at the end of the buffer.
196
static size_t
197
get_padding_size(const uint8_t *buf, size_t buf_size)
198
{
199
	size_t padding = 0;
200
	while (buf_size > 0 && buf[--buf_size] == 0x00)
201
		++padding;
202

203
	return padding;
204
}
205

206

207
/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
208
/// is used to tell the application that Magic Bytes didn't match. In other
209
/// Stream Header/Footer fields (in the middle/end of the file) it could be
210
/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
211
/// is a valid Stream Header at the beginning of the file. For those cases
212
/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
213
static lzma_ret
214
hide_format_error(lzma_ret ret)
215
{
216
	if (ret == LZMA_FORMAT_ERROR)
217
		ret = LZMA_DATA_ERROR;
218

219
	return ret;
220
}
221

222

223
/// Calls the Index decoder and updates coder->index_remaining.
224
/// This is a separate function because the input can be either directly
225
/// from the application or from coder->temp.
226
static lzma_ret
227
decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
228
		const uint8_t *restrict in, size_t *restrict in_pos,
229
		size_t in_size, bool update_file_cur_pos)
230
{
231
	const size_t in_start = *in_pos;
232

233
	const lzma_ret ret = coder->index_decoder.code(
234
			coder->index_decoder.coder,
235
			allocator, in, in_pos, in_size,
236
			NULL, NULL, 0, LZMA_RUN);
237

238
	coder->index_remaining -= *in_pos - in_start;
239

240
	if (update_file_cur_pos)
241
		coder->file_cur_pos += *in_pos - in_start;
242

243
	return ret;
244
}
245

246

247
static lzma_ret
248
file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
249
		const uint8_t *restrict in, size_t *restrict in_pos,
250
		size_t in_size,
251
		uint8_t *restrict out lzma_attribute((__unused__)),
252
		size_t *restrict out_pos lzma_attribute((__unused__)),
253
		size_t out_size lzma_attribute((__unused__)),
254
		lzma_action action lzma_attribute((__unused__)))
255
{
256
	lzma_file_info_coder *coder = coder_ptr;
257
	const size_t in_start = *in_pos;
258

259
	// If the caller provides input past the end of the file, trim
260
	// the extra bytes from the buffer so that we won't read too far.
261
	assert(coder->file_size >= coder->file_cur_pos);
262
	if (coder->file_size - coder->file_cur_pos < in_size - in_start)
263
		in_size = in_start
264
			+ (size_t)(coder->file_size - coder->file_cur_pos);
265

266
	while (true)
267
	switch (coder->sequence) {
268
	case SEQ_MAGIC_BYTES:
269
		// Decode the Stream Header at the beginning of the file
270
		// first to check if the Magic Bytes match. The flags
271
		// are stored in coder->first_header_flags so that we
272
		// don't need to seek to it again.
273
		//
274
		// Check that the file is big enough to contain at least
275
		// Stream Header.
276
		if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
277
			return LZMA_FORMAT_ERROR;
278

279
		// Read the Stream Header field into coder->temp.
280
		if (fill_temp(coder, in, in_pos, in_size))
281
			return LZMA_OK;
282

283
		// This is the only Stream Header/Footer decoding where we
284
		// want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
285
		// match. Elsewhere it will be converted to LZMA_DATA_ERROR.
286
		return_if_error(lzma_stream_header_decode(
287
				&coder->first_header_flags, coder->temp));
288

289
		// Now that we know that the Magic Bytes match, check the
290
		// file size. It's better to do this here after checking the
291
		// Magic Bytes since this way we can give LZMA_FORMAT_ERROR
292
		// instead of LZMA_DATA_ERROR when the Magic Bytes don't
293
		// match in a file that is too big or isn't a multiple of
294
		// four bytes.
295
		if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
296
			return LZMA_DATA_ERROR;
297

298
		// Start looking for Stream Padding and Stream Footer
299
		// at the end of the file.
300
		coder->file_target_pos = coder->file_size;
301

302
	// Fall through
303

304
	case SEQ_PADDING_SEEK:
305
		coder->sequence = SEQ_PADDING_DECODE;
306
		return_if_error(reverse_seek(
307
				coder, in_start, in_pos, in_size));
308

309
	// Fall through
310

311
	case SEQ_PADDING_DECODE: {
312
		// Copy to coder->temp first. This keeps the code simpler if
313
		// the application only provides input a few bytes at a time.
314
		if (fill_temp(coder, in, in_pos, in_size))
315
			return LZMA_OK;
316

317
		// Scan the buffer backwards to get the size of the
318
		// Stream Padding field (if any).
319
		const size_t new_padding = get_padding_size(
320
				coder->temp, coder->temp_size);
321
		coder->stream_padding += new_padding;
322

323
		// Set the target position to the beginning of Stream Padding
324
		// that has been observed so far. If all Stream Padding has
325
		// been seen, then the target position will be at the end
326
		// of the Stream Footer field.
327
		coder->file_target_pos -= new_padding;
328

329
		if (new_padding == coder->temp_size) {
330
			// The whole buffer was padding. Seek backwards in
331
			// the file to get more input.
332
			coder->sequence = SEQ_PADDING_SEEK;
333
			break;
334
		}
335

336
		// Size of Stream Padding must be a multiple of 4 bytes.
337
		if (coder->stream_padding & 3)
338
			return LZMA_DATA_ERROR;
339

340
		coder->sequence = SEQ_FOOTER;
341

342
		// Calculate the amount of non-padding data in coder->temp.
343
		coder->temp_size -= new_padding;
344
		coder->temp_pos = coder->temp_size;
345

346
		// We can avoid an external seek if the whole Stream Footer
347
		// is already in coder->temp. In that case SEQ_FOOTER won't
348
		// read more input and will find the Stream Footer from
349
		// coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
350
		//
351
		// Otherwise we will need to seek. The seeking is done so
352
		// that Stream Footer will be at the end of coder->temp.
353
		// This way it's likely that we also get a complete Index
354
		// field into coder->temp without needing a separate seek
355
		// for that (unless the Index field is big).
356
		if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
357
			return_if_error(reverse_seek(
358
					coder, in_start, in_pos, in_size));
359
	}
360

361
	// Fall through
362

363
	case SEQ_FOOTER:
364
		// Copy the Stream Footer field into coder->temp.
365
		// If Stream Footer was already available in coder->temp
366
		// in SEQ_PADDING_DECODE, then this does nothing.
367
		if (fill_temp(coder, in, in_pos, in_size))
368
			return LZMA_OK;
369

370
		// Make coder->file_target_pos and coder->temp_size point
371
		// to the beginning of Stream Footer and thus to the end
372
		// of the Index field. coder->temp_pos will be updated
373
		// a bit later.
374
		coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
375
		coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
376

377
		// Decode Stream Footer.
378
		return_if_error(hide_format_error(lzma_stream_footer_decode(
379
				&coder->footer_flags,
380
				coder->temp + coder->temp_size)));
381

382
		// Check that we won't seek past the beginning of the file.
383
		//
384
		// LZMA_STREAM_HEADER_SIZE is added because there must be
385
		// space for Stream Header too even though we won't seek
386
		// there before decoding the Index field.
387
		//
388
		// There's no risk of integer overflow here because
389
		// Backward Size cannot be greater than 2^34.
390
		if (coder->file_target_pos < coder->footer_flags.backward_size
391
				+ LZMA_STREAM_HEADER_SIZE)
392
			return LZMA_DATA_ERROR;
393

394
		// Set the target position to the beginning of the Index field.
395
		coder->file_target_pos -= coder->footer_flags.backward_size;
396
		coder->sequence = SEQ_INDEX_INIT;
397

398
		// We can avoid an external seek if the whole Index field is
399
		// already available in coder->temp.
400
		if (coder->temp_size >= coder->footer_flags.backward_size) {
401
			// Set coder->temp_pos to point to the beginning
402
			// of the Index.
403
			coder->temp_pos = coder->temp_size
404
					- coder->footer_flags.backward_size;
405
		} else {
406
			// These are set to zero to indicate that there's no
407
			// useful data (Index or anything else) in coder->temp.
408
			coder->temp_pos = 0;
409
			coder->temp_size = 0;
410

411
			// Seek to the beginning of the Index field.
412
			if (seek_to_pos(coder, coder->file_target_pos,
413
					in_start, in_pos, in_size))
414
				return LZMA_SEEK_NEEDED;
415
		}
416

417
	// Fall through
418

419
	case SEQ_INDEX_INIT: {
420
		// Calculate the amount of memory already used by the earlier
421
		// Indexes so that we know how big memory limit to pass to
422
		// the Index decoder.
423
		//
424
		// NOTE: When there are multiple Streams, the separate
425
		// lzma_index structures can use more RAM (as measured by
426
		// lzma_index_memused()) than the final combined lzma_index.
427
		// Thus memlimit may need to be slightly higher than the final
428
		// calculated memory usage will be. This is perhaps a bit
429
		// confusing to the application, but I think it shouldn't
430
		// cause problems in practice.
431
		uint64_t memused = 0;
432
		if (coder->combined_index != NULL) {
433
			memused = lzma_index_memused(coder->combined_index);
434
			assert(memused <= coder->memlimit);
435
			if (memused > coder->memlimit) // Extra sanity check
436
				return LZMA_PROG_ERROR;
437
		}
438

439
		// Initialize the Index decoder.
440
		return_if_error(lzma_index_decoder_init(
441
				&coder->index_decoder, allocator,
442
				&coder->this_index,
443
				coder->memlimit - memused));
444

445
		coder->index_remaining = coder->footer_flags.backward_size;
446
		coder->sequence = SEQ_INDEX_DECODE;
447
	}
448

449
	// Fall through
450

451
	case SEQ_INDEX_DECODE: {
452
		// Decode (a part of) the Index. If the whole Index is already
453
		// in coder->temp, read it from there. Otherwise read from
454
		// in[*in_pos] onwards. Note that index_decode() updates
455
		// coder->index_remaining and optionally coder->file_cur_pos.
456
		lzma_ret ret;
457
		if (coder->temp_size != 0) {
458
			assert(coder->temp_size - coder->temp_pos
459
					== coder->index_remaining);
460
			ret = decode_index(coder, allocator, coder->temp,
461
					&coder->temp_pos, coder->temp_size,
462
					false);
463
		} else {
464
			// Don't give the decoder more input than the known
465
			// remaining size of the Index field.
466
			size_t in_stop = in_size;
467
			if (in_size - *in_pos > coder->index_remaining)
468
				in_stop = *in_pos
469
					+ (size_t)(coder->index_remaining);
470

471
			ret = decode_index(coder, allocator,
472
					in, in_pos, in_stop, true);
473
		}
474

475
		switch (ret) {
476
		case LZMA_OK:
477
			// If the Index docoder asks for more input when we
478
			// have already given it as much input as Backward Size
479
			// indicated, the file is invalid.
480
			if (coder->index_remaining == 0)
481
				return LZMA_DATA_ERROR;
482

483
			// We cannot get here if we were reading Index from
484
			// coder->temp because when reading from coder->temp
485
			// we give the Index decoder exactly
486
			// coder->index_remaining bytes of input.
487
			assert(coder->temp_size == 0);
488

489
			return LZMA_OK;
490

491
		case LZMA_STREAM_END:
492
			// If the decoding seems to be successful, check also
493
			// that the Index decoder consumed as much input as
494
			// indicated by the Backward Size field.
495
			if (coder->index_remaining != 0)
496
				return LZMA_DATA_ERROR;
497

498
			break;
499

500
		default:
501
			return ret;
502
		}
503

504
		// Calculate how much the Index tells us to seek backwards
505
		// (relative to the beginning of the Index): Total size of
506
		// all Blocks plus the size of the Stream Header field.
507
		// No integer overflow here because lzma_index_total_size()
508
		// cannot return a value greater than LZMA_VLI_MAX.
509
		const uint64_t seek_amount
510
				= lzma_index_total_size(coder->this_index)
511
					+ LZMA_STREAM_HEADER_SIZE;
512

513
		// Check that Index is sane in sense that seek_amount won't
514
		// make us seek past the beginning of the file when locating
515
		// the Stream Header.
516
		//
517
		// coder->file_target_pos still points to the beginning of
518
		// the Index field.
519
		if (coder->file_target_pos < seek_amount)
520
			return LZMA_DATA_ERROR;
521

522
		// Set the target to the beginning of Stream Header.
523
		coder->file_target_pos -= seek_amount;
524

525
		if (coder->file_target_pos == 0) {
526
			// We would seek to the beginning of the file, but
527
			// since we already decoded that Stream Header in
528
			// SEQ_MAGIC_BYTES, we can use the cached value from
529
			// coder->first_header_flags to avoid the seek.
530
			coder->header_flags = coder->first_header_flags;
531
			coder->sequence = SEQ_HEADER_COMPARE;
532
			break;
533
		}
534

535
		coder->sequence = SEQ_HEADER_DECODE;
536

537
		// Make coder->file_target_pos point to the end of
538
		// the Stream Header field.
539
		coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
540

541
		// If coder->temp_size is non-zero, it points to the end
542
		// of the Index field. Then the beginning of the Index
543
		// field is at coder->temp[coder->temp_size
544
		// - coder->footer_flags.backward_size].
545
		assert(coder->temp_size == 0 || coder->temp_size
546
				>= coder->footer_flags.backward_size);
547

548
		// If coder->temp contained the whole Index, see if it has
549
		// enough data to contain also the Stream Header. If so,
550
		// we avoid an external seek.
551
		//
552
		// NOTE: This can happen only with small .xz files and only
553
		// for the non-first Stream as the Stream Flags of the first
554
		// Stream are cached and already handled a few lines above.
555
		// So this isn't as useful as the other seek-avoidance cases.
556
		if (coder->temp_size != 0 && coder->temp_size
557
				- coder->footer_flags.backward_size
558
				>= seek_amount) {
559
			// Make temp_pos and temp_size point to the *end* of
560
			// Stream Header so that SEQ_HEADER_DECODE will find
561
			// the start of Stream Header from coder->temp[
562
			// coder->temp_size - LZMA_STREAM_HEADER_SIZE].
563
			coder->temp_pos = coder->temp_size
564
					- coder->footer_flags.backward_size
565
					- seek_amount
566
					+ LZMA_STREAM_HEADER_SIZE;
567
			coder->temp_size = coder->temp_pos;
568
		} else {
569
			// Seek so that Stream Header will be at the end of
570
			// coder->temp. With typical multi-Stream files we
571
			// will usually also get the Stream Footer and Index
572
			// of the *previous* Stream in coder->temp and thus
573
			// won't need a separate seek for them.
574
			return_if_error(reverse_seek(coder,
575
					in_start, in_pos, in_size));
576
		}
577
	}
578

579
	// Fall through
580

581
	case SEQ_HEADER_DECODE:
582
		// Copy the Stream Header field into coder->temp.
583
		// If Stream Header was already available in coder->temp
584
		// in SEQ_INDEX_DECODE, then this does nothing.
585
		if (fill_temp(coder, in, in_pos, in_size))
586
			return LZMA_OK;
587

588
		// Make all these point to the beginning of Stream Header.
589
		coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
590
		coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
591
		coder->temp_pos = coder->temp_size;
592

593
		// Decode the Stream Header.
594
		return_if_error(hide_format_error(lzma_stream_header_decode(
595
				&coder->header_flags,
596
				coder->temp + coder->temp_size)));
597

598
		coder->sequence = SEQ_HEADER_COMPARE;
599

600
	// Fall through
601

602
	case SEQ_HEADER_COMPARE:
603
		// Compare Stream Header against Stream Footer. They must
604
		// match.
605
		return_if_error(lzma_stream_flags_compare(
606
				&coder->header_flags, &coder->footer_flags));
607

608
		// Store the decoded Stream Flags into the Index. Use the
609
		// Footer Flags because it contains Backward Size, although
610
		// it shouldn't matter in practice.
611
		if (lzma_index_stream_flags(coder->this_index,
612
				&coder->footer_flags) != LZMA_OK)
613
			return LZMA_PROG_ERROR;
614

615
		// Store also the size of the Stream Padding field. It is
616
		// needed to calculate the offsets of the Streams correctly.
617
		if (lzma_index_stream_padding(coder->this_index,
618
				coder->stream_padding) != LZMA_OK)
619
			return LZMA_PROG_ERROR;
620

621
		// Reset it so that it's ready for the next Stream.
622
		coder->stream_padding = 0;
623

624
		// Append the earlier decoded Indexes after this_index.
625
		if (coder->combined_index != NULL)
626
			return_if_error(lzma_index_cat(coder->this_index,
627
					coder->combined_index, allocator));
628

629
		coder->combined_index = coder->this_index;
630
		coder->this_index = NULL;
631

632
		// If the whole file was decoded, tell the caller that we
633
		// are finished.
634
		if (coder->file_target_pos == 0) {
635
			// The combined index must indicate the same file
636
			// size as was told to us at initialization.
637
			assert(lzma_index_file_size(coder->combined_index)
638
					== coder->file_size);
639

640
			// Make the combined index available to
641
			// the application.
642
			*coder->dest_index = coder->combined_index;
643
			coder->combined_index = NULL;
644

645
			// Mark the input buffer as used since we may have
646
			// done internal seeking and thus don't know how
647
			// many input bytes were actually used. This way
648
			// lzma_stream.total_in gets a slightly better
649
			// estimate of the amount of input used.
650
			*in_pos = in_size;
651
			return LZMA_STREAM_END;
652
		}
653

654
		// We didn't hit the beginning of the file yet, so continue
655
		// reading backwards in the file. If we have unprocessed
656
		// data in coder->temp, use it before requesting more data
657
		// from the application.
658
		//
659
		// coder->file_target_pos, coder->temp_size, and
660
		// coder->temp_pos all point to the beginning of Stream Header
661
		// and thus the end of the previous Stream in the file.
662
		coder->sequence = coder->temp_size > 0
663
				? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
664
		break;
665

666
	default:
667
		assert(0);
668
		return LZMA_PROG_ERROR;
669
	}
670
}
671

672

673
static lzma_ret
674
file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
675
		uint64_t *old_memlimit, uint64_t new_memlimit)
676
{
677
	lzma_file_info_coder *coder = coder_ptr;
678

679
	// The memory usage calculation comes from three things:
680
	//
681
	// (1) The Indexes that have already been decoded and processed into
682
	//     coder->combined_index.
683
	//
684
	// (2) The latest Index in coder->this_index that has been decoded but
685
	//     not yet put into coder->combined_index.
686
	//
687
	// (3) The latest Index that we have started decoding but haven't
688
	//     finished and thus isn't available in coder->this_index yet.
689
	//     Memory usage and limit information needs to be communicated
690
	//     from/to coder->index_decoder.
691
	//
692
	// Care has to be taken to not do both (2) and (3) when calculating
693
	// the memory usage.
694
	uint64_t combined_index_memusage = 0;
695
	uint64_t this_index_memusage = 0;
696

697
	// (1) If we have already successfully decoded one or more Indexes,
698
	// get their memory usage.
699
	if (coder->combined_index != NULL)
700
		combined_index_memusage = lzma_index_memused(
701
				coder->combined_index);
702

703
	// Choose between (2), (3), or neither.
704
	if (coder->this_index != NULL) {
705
		// (2) The latest Index is available. Use its memory usage.
706
		this_index_memusage = lzma_index_memused(coder->this_index);
707

708
	} else if (coder->sequence == SEQ_INDEX_DECODE) {
709
		// (3) The Index decoder is activate and hasn't yet stored
710
		// the new index in coder->this_index. Get the memory usage
711
		// information from the Index decoder.
712
		//
713
		// NOTE: If the Index decoder doesn't yet know how much memory
714
		// it will eventually need, it will return a tiny value here.
715
		uint64_t dummy;
716
		if (coder->index_decoder.memconfig(coder->index_decoder.coder,
717
					&this_index_memusage, &dummy, 0)
718
				!= LZMA_OK) {
719
			assert(0);
720
			return LZMA_PROG_ERROR;
721
		}
722
	}
723

724
	// Now we know the total memory usage/requirement. If we had neither
725
	// old Indexes nor a new Index, this will be zero which isn't
726
	// acceptable as lzma_memusage() has to return non-zero on success
727
	// and even with an empty .xz file we will end up with a lzma_index
728
	// that takes some memory.
729
	*memusage = combined_index_memusage + this_index_memusage;
730
	if (*memusage == 0)
731
		*memusage = lzma_index_memusage(1, 0);
732

733
	*old_memlimit = coder->memlimit;
734

735
	// If requested, set a new memory usage limit.
736
	if (new_memlimit != 0) {
737
		if (new_memlimit < *memusage)
738
			return LZMA_MEMLIMIT_ERROR;
739

740
		// In the condition (3) we need to tell the Index decoder
741
		// its new memory usage limit.
742
		if (coder->this_index == NULL
743
				&& coder->sequence == SEQ_INDEX_DECODE) {
744
			const uint64_t idec_new_memlimit = new_memlimit
745
					- combined_index_memusage;
746

747
			assert(this_index_memusage > 0);
748
			assert(idec_new_memlimit > 0);
749

750
			uint64_t dummy1;
751
			uint64_t dummy2;
752

753
			if (coder->index_decoder.memconfig(
754
					coder->index_decoder.coder,
755
					&dummy1, &dummy2, idec_new_memlimit)
756
					!= LZMA_OK) {
757
				assert(0);
758
				return LZMA_PROG_ERROR;
759
			}
760
		}
761

762
		coder->memlimit = new_memlimit;
763
	}
764

765
	return LZMA_OK;
766
}
767

768

769
static void
770
file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
771
{
772
	lzma_file_info_coder *coder = coder_ptr;
773

774
	lzma_next_end(&coder->index_decoder, allocator);
775
	lzma_index_end(coder->this_index, allocator);
776
	lzma_index_end(coder->combined_index, allocator);
777

778
	lzma_free(coder, allocator);
779
	return;
780
}
781

782

783
static lzma_ret
784
lzma_file_info_decoder_init(lzma_next_coder *next,
785
		const lzma_allocator *allocator, uint64_t *seek_pos,
786
		lzma_index **dest_index,
787
		uint64_t memlimit, uint64_t file_size)
788
{
789
	lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
790

791
	if (dest_index == NULL)
792
		return LZMA_PROG_ERROR;
793

794
	lzma_file_info_coder *coder = next->coder;
795
	if (coder == NULL) {
796
		coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
797
		if (coder == NULL)
798
			return LZMA_MEM_ERROR;
799

800
		next->coder = coder;
801
		next->code = &file_info_decode;
802
		next->end = &file_info_decoder_end;
803
		next->memconfig = &file_info_decoder_memconfig;
804

805
		coder->index_decoder = LZMA_NEXT_CODER_INIT;
806
		coder->this_index = NULL;
807
		coder->combined_index = NULL;
808
	}
809

810
	coder->sequence = SEQ_MAGIC_BYTES;
811
	coder->file_cur_pos = 0;
812
	coder->file_target_pos = 0;
813
	coder->file_size = file_size;
814

815
	lzma_index_end(coder->this_index, allocator);
816
	coder->this_index = NULL;
817

818
	lzma_index_end(coder->combined_index, allocator);
819
	coder->combined_index = NULL;
820

821
	coder->stream_padding = 0;
822

823
	coder->dest_index = dest_index;
824
	coder->external_seek_pos = seek_pos;
825

826
	// If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
827
	// won't return 0 (which would indicate an error).
828
	coder->memlimit = my_max(1, memlimit);
829

830
	// Prepare these for reading the first Stream Header into coder->temp.
831
	coder->temp_pos = 0;
832
	coder->temp_size = LZMA_STREAM_HEADER_SIZE;
833

834
	return LZMA_OK;
835
}
836

837

838
extern LZMA_API(lzma_ret)
839
lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
840
		uint64_t memlimit, uint64_t file_size)
841
{
842
	lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
843
			dest_index, memlimit, file_size);
844

845
	// We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
846
	// lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
847
	// combination in a sane way. Applications still need to be careful
848
	// if they use LZMA_FINISH so that they remember to reset it back
849
	// to LZMA_RUN after seeking if needed.
850
	strm->internal->supported_actions[LZMA_RUN] = true;
851
	strm->internal->supported_actions[LZMA_FINISH] = true;
852

853
	return LZMA_OK;
854
}
855

856
Product

Resources

Company