CoCalc -- index

GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmliblzma/liblzma/common/index_hash.c
³¹⁵³ views
1
// SPDX-License-Identifier: 0BSD
2

3
///////////////////////////////////////////////////////////////////////////////
4
//
5
/// \file       index_hash.c
6
/// \brief      Validates Index by using a hash function
7
//
8
//  Author:     Lasse Collin
9
//
10
///////////////////////////////////////////////////////////////////////////////
11

12
#include "common.h"
13
#include "index.h"
14
#include "check.h"
15

16

17
typedef struct {
18
	/// Sum of the Block sizes (including Block Padding)
19
	lzma_vli blocks_size;
20

21
	/// Sum of the Uncompressed Size fields
22
	lzma_vli uncompressed_size;
23

24
	/// Number of Records
25
	lzma_vli count;
26

27
	/// Size of the List of Index Records as bytes
28
	lzma_vli index_list_size;
29

30
	/// Check calculated from Unpadded Sizes and Uncompressed Sizes.
31
	lzma_check_state check;
32

33
} lzma_index_hash_info;
34

35

36
struct lzma_index_hash_s {
37
	enum {
38
		SEQ_BLOCK,
39
		SEQ_COUNT,
40
		SEQ_UNPADDED,
41
		SEQ_UNCOMPRESSED,
42
		SEQ_PADDING_INIT,
43
		SEQ_PADDING,
44
		SEQ_CRC32,
45
	} sequence;
46

47
	/// Information collected while decoding the actual Blocks.
48
	lzma_index_hash_info blocks;
49

50
	/// Information collected from the Index field.
51
	lzma_index_hash_info records;
52

53
	/// Number of Records not fully decoded
54
	lzma_vli remaining;
55

56
	/// Unpadded Size currently being read from an Index Record.
57
	lzma_vli unpadded_size;
58

59
	/// Uncompressed Size currently being read from an Index Record.
60
	lzma_vli uncompressed_size;
61

62
	/// Position in variable-length integers when decoding them from
63
	/// the List of Records.
64
	size_t pos;
65

66
	/// CRC32 of the Index
67
	uint32_t crc32;
68
};
69

70

71
extern LZMA_API(lzma_index_hash *)
72
lzma_index_hash_init(lzma_index_hash *index_hash,
73
		const lzma_allocator *allocator)
74
{
75
	if (index_hash == NULL) {
76
		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
77
		if (index_hash == NULL)
78
			return NULL;
79
	}
80

81
	index_hash->sequence = SEQ_BLOCK;
82
	index_hash->blocks.blocks_size = 0;
83
	index_hash->blocks.uncompressed_size = 0;
84
	index_hash->blocks.count = 0;
85
	index_hash->blocks.index_list_size = 0;
86
	index_hash->records.blocks_size = 0;
87
	index_hash->records.uncompressed_size = 0;
88
	index_hash->records.count = 0;
89
	index_hash->records.index_list_size = 0;
90
	index_hash->unpadded_size = 0;
91
	index_hash->uncompressed_size = 0;
92
	index_hash->pos = 0;
93
	index_hash->crc32 = 0;
94

95
	// These cannot fail because LZMA_CHECK_BEST is known to be supported.
96
	(void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
97
	(void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
98

99
	return index_hash;
100
}
101

102

103
extern LZMA_API(void)
104
lzma_index_hash_end(lzma_index_hash *index_hash,
105
		const lzma_allocator *allocator)
106
{
107
	lzma_free(index_hash, allocator);
108
	return;
109
}
110

111

112
extern LZMA_API(lzma_vli)
113
lzma_index_hash_size(const lzma_index_hash *index_hash)
114
{
115
	// Get the size of the Index from ->blocks instead of ->records for
116
	// cases where application wants to know the Index Size before
117
	// decoding the Index.
118
	return index_size(index_hash->blocks.count,
119
			index_hash->blocks.index_list_size);
120
}
121

122

123
/// Updates the sizes and the hash without any validation.
124
static void
125
hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
126
		lzma_vli uncompressed_size)
127
{
128
	info->blocks_size += vli_ceil4(unpadded_size);
129
	info->uncompressed_size += uncompressed_size;
130
	info->index_list_size += lzma_vli_size(unpadded_size)
131
			+ lzma_vli_size(uncompressed_size);
132
	++info->count;
133

134
	const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
135
	lzma_check_update(&info->check, LZMA_CHECK_BEST,
136
			(const uint8_t *)(sizes), sizeof(sizes));
137

138
	return;
139
}
140

141

142
extern LZMA_API(lzma_ret)
143
lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
144
		lzma_vli uncompressed_size)
145
{
146
	// Validate the arguments.
147
	if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
148
			|| unpadded_size < UNPADDED_SIZE_MIN
149
			|| unpadded_size > UNPADDED_SIZE_MAX
150
			|| uncompressed_size > LZMA_VLI_MAX)
151
		return LZMA_PROG_ERROR;
152

153
	// Update the hash.
154
	hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
155

156
	// Validate the properties of *info are still in allowed limits.
157
	if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
158
			|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
159
			|| index_size(index_hash->blocks.count,
160
					index_hash->blocks.index_list_size)
161
				> LZMA_BACKWARD_SIZE_MAX
162
			|| index_stream_size(index_hash->blocks.blocks_size,
163
					index_hash->blocks.count,
164
					index_hash->blocks.index_list_size)
165
				> LZMA_VLI_MAX)
166
		return LZMA_DATA_ERROR;
167

168
	return LZMA_OK;
169
}
170

171

172
extern LZMA_API(lzma_ret)
173
lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
174
		size_t *in_pos, size_t in_size)
175
{
176
	// Catch zero input buffer here, because in contrast to Index encoder
177
	// and decoder functions, applications call this function directly
178
	// instead of via lzma_code(), which does the buffer checking.
179
	if (*in_pos >= in_size)
180
		return LZMA_BUF_ERROR;
181

182
	// NOTE: This function has many similarities to index_encode() and
183
	// index_decode() functions found from index_encoder.c and
184
	// index_decoder.c. See the comments especially in index_encoder.c.
185
	const size_t in_start = *in_pos;
186
	lzma_ret ret = LZMA_OK;
187

188
	while (*in_pos < in_size)
189
	switch (index_hash->sequence) {
190
	case SEQ_BLOCK:
191
		// Check the Index Indicator is present.
192
		if (in[(*in_pos)++] != INDEX_INDICATOR)
193
			return LZMA_DATA_ERROR;
194

195
		index_hash->sequence = SEQ_COUNT;
196
		break;
197

198
	case SEQ_COUNT: {
199
		ret = lzma_vli_decode(&index_hash->remaining,
200
				&index_hash->pos, in, in_pos, in_size);
201
		if (ret != LZMA_STREAM_END)
202
			goto out;
203

204
		// The count must match the count of the Blocks decoded.
205
		if (index_hash->remaining != index_hash->blocks.count)
206
			return LZMA_DATA_ERROR;
207

208
		ret = LZMA_OK;
209
		index_hash->pos = 0;
210

211
		// Handle the special case when there are no Blocks.
212
		index_hash->sequence = index_hash->remaining == 0
213
				? SEQ_PADDING_INIT : SEQ_UNPADDED;
214
		break;
215
	}
216

217
	case SEQ_UNPADDED:
218
	case SEQ_UNCOMPRESSED: {
219
		lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
220
				? &index_hash->unpadded_size
221
				: &index_hash->uncompressed_size;
222

223
		ret = lzma_vli_decode(size, &index_hash->pos,
224
				in, in_pos, in_size);
225
		if (ret != LZMA_STREAM_END)
226
			goto out;
227

228
		ret = LZMA_OK;
229
		index_hash->pos = 0;
230

231
		if (index_hash->sequence == SEQ_UNPADDED) {
232
			if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
233
					|| index_hash->unpadded_size
234
						> UNPADDED_SIZE_MAX)
235
				return LZMA_DATA_ERROR;
236

237
			index_hash->sequence = SEQ_UNCOMPRESSED;
238
		} else {
239
			// Update the hash.
240
			hash_append(&index_hash->records,
241
					index_hash->unpadded_size,
242
					index_hash->uncompressed_size);
243

244
			// Verify that we don't go over the known sizes. Note
245
			// that this validation is simpler than the one used
246
			// in lzma_index_hash_append(), because here we know
247
			// that values in index_hash->blocks are already
248
			// validated and we are fine as long as we don't
249
			// exceed them in index_hash->records.
250
			if (index_hash->blocks.blocks_size
251
					< index_hash->records.blocks_size
252
					|| index_hash->blocks.uncompressed_size
253
					< index_hash->records.uncompressed_size
254
					|| index_hash->blocks.index_list_size
255
					< index_hash->records.index_list_size)
256
				return LZMA_DATA_ERROR;
257

258
			// Check if this was the last Record.
259
			index_hash->sequence = --index_hash->remaining == 0
260
					? SEQ_PADDING_INIT : SEQ_UNPADDED;
261
		}
262

263
		break;
264
	}
265

266
	case SEQ_PADDING_INIT:
267
		index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
268
				index_hash->records.count,
269
				index_hash->records.index_list_size)) & 3;
270
		index_hash->sequence = SEQ_PADDING;
271

272
	// Fall through
273

274
	case SEQ_PADDING:
275
		if (index_hash->pos > 0) {
276
			--index_hash->pos;
277
			if (in[(*in_pos)++] != 0x00)
278
				return LZMA_DATA_ERROR;
279

280
			break;
281
		}
282

283
		// Compare the sizes.
284
		if (index_hash->blocks.blocks_size
285
				!= index_hash->records.blocks_size
286
				|| index_hash->blocks.uncompressed_size
287
				!= index_hash->records.uncompressed_size
288
				|| index_hash->blocks.index_list_size
289
				!= index_hash->records.index_list_size)
290
			return LZMA_DATA_ERROR;
291

292
		// Finish the hashes and compare them.
293
		lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
294
		lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
295
		if (memcmp(index_hash->blocks.check.buffer.u8,
296
				index_hash->records.check.buffer.u8,
297
				lzma_check_size(LZMA_CHECK_BEST)) != 0)
298
			return LZMA_DATA_ERROR;
299

300
		// Finish the CRC32 calculation.
301
		index_hash->crc32 = lzma_crc32(in + in_start,
302
				*in_pos - in_start, index_hash->crc32);
303

304
		index_hash->sequence = SEQ_CRC32;
305

306
	// Fall through
307

308
	case SEQ_CRC32:
309
		do {
310
			if (*in_pos == in_size)
311
				return LZMA_OK;
312

313
			if (((index_hash->crc32 >> (index_hash->pos * 8))
314
					& 0xFF) != in[(*in_pos)++]) {
315
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
316
				return LZMA_DATA_ERROR;
317
#endif
318
			}
319

320
		} while (++index_hash->pos < 4);
321

322
		return LZMA_STREAM_END;
323

324
	default:
325
		assert(0);
326
		return LZMA_PROG_ERROR;
327
	}
328

329
out:
330
	// Update the CRC32.
331
	//
332
	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
333
	// In such a case we had no input and thus in_used == 0.
334
	{
335
		const size_t in_used = *in_pos - in_start;
336
		if (in_used > 0)
337
			index_hash->crc32 = lzma_crc32(in + in_start,
338
					in_used, index_hash->crc32);
339
	}
340

341
	return ret;
342
}
343

344
Product

Resources

Company