CoCalc -- zstd_decompress.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/zstd/doc/educational_decoder/zstd_decompress.c
⁴⁸³⁷⁸ views
1
/*
2
 * Copyright (c) Facebook, Inc.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10

11
/// Zstandard educational decoder implementation
12
/// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
13

14
#include <stdint.h>   // uint8_t, etc.
15
#include <stdlib.h>   // malloc, free, exit
16
#include <stdio.h>    // fprintf
17
#include <string.h>   // memset, memcpy
18
#include "zstd_decompress.h"
19

20

21
/******* IMPORTANT CONSTANTS *********************************************/
22

23
// Zstandard frame
24
// "Magic_Number
25
// 4 Bytes, little-endian format. Value : 0xFD2FB528"
26
#define ZSTD_MAGIC_NUMBER 0xFD2FB528U
27

28
// The size of `Block_Content` is limited by `Block_Maximum_Size`,
29
#define ZSTD_BLOCK_SIZE_MAX ((size_t)128 * 1024)
30

31
// literal blocks can't be larger than their block
32
#define MAX_LITERALS_SIZE ZSTD_BLOCK_SIZE_MAX
33

34

35
/******* UTILITY MACROS AND TYPES *********************************************/
36
#define MAX(a, b) ((a) > (b) ? (a) : (b))
37
#define MIN(a, b) ((a) < (b) ? (a) : (b))
38

39
#if defined(ZDEC_NO_MESSAGE)
40
#define MESSAGE(...)
41
#else
42
#define MESSAGE(...)  fprintf(stderr, "" __VA_ARGS__)
43
#endif
44

45
/// This decoder calls exit(1) when it encounters an error, however a production
46
/// library should propagate error codes
47
#define ERROR(s)                                                               \
48
    do {                                                                       \
49
        MESSAGE("Error: %s\n", s);                                     \
50
        exit(1);                                                               \
51
    } while (0)
52
#define INP_SIZE()                                                             \
53
    ERROR("Input buffer smaller than it should be or input is "                \
54
          "corrupted")
55
#define OUT_SIZE() ERROR("Output buffer too small for output")
56
#define CORRUPTION() ERROR("Corruption detected while decompressing")
57
#define BAD_ALLOC() ERROR("Memory allocation error")
58
#define IMPOSSIBLE() ERROR("An impossibility has occurred")
59

60
typedef uint8_t  u8;
61
typedef uint16_t u16;
62
typedef uint32_t u32;
63
typedef uint64_t u64;
64

65
typedef int8_t  i8;
66
typedef int16_t i16;
67
typedef int32_t i32;
68
typedef int64_t i64;
69
/******* END UTILITY MACROS AND TYPES *****************************************/
70

71
/******* IMPLEMENTATION PRIMITIVE PROTOTYPES **********************************/
72
/// The implementations for these functions can be found at the bottom of this
73
/// file.  They implement low-level functionality needed for the higher level
74
/// decompression functions.
75

76
/*** IO STREAM OPERATIONS *************/
77

78
/// ostream_t/istream_t are used to wrap the pointers/length data passed into
79
/// ZSTD_decompress, so that all IO operations are safely bounds checked
80
/// They are written/read forward, and reads are treated as little-endian
81
/// They should be used opaquely to ensure safety
82
typedef struct {
83
    u8 *ptr;
84
    size_t len;
85
} ostream_t;
86

87
typedef struct {
88
    const u8 *ptr;
89
    size_t len;
90

91
    // Input often reads a few bits at a time, so maintain an internal offset
92
    int bit_offset;
93
} istream_t;
94

95
/// The following two functions are the only ones that allow the istream to be
96
/// non-byte aligned
97

98
/// Reads `num` bits from a bitstream, and updates the internal offset
99
static inline u64 IO_read_bits(istream_t *const in, const int num_bits);
100
/// Backs-up the stream by `num` bits so they can be read again
101
static inline void IO_rewind_bits(istream_t *const in, const int num_bits);
102
/// If the remaining bits in a byte will be unused, advance to the end of the
103
/// byte
104
static inline void IO_align_stream(istream_t *const in);
105

106
/// Write the given byte into the output stream
107
static inline void IO_write_byte(ostream_t *const out, u8 symb);
108

109
/// Returns the number of bytes left to be read in this stream.  The stream must
110
/// be byte aligned.
111
static inline size_t IO_istream_len(const istream_t *const in);
112

113
/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
114
/// was skipped.  The stream must be byte aligned.
115
static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len);
116
/// Advances the stream by `len` bytes, and returns a pointer to the chunk that
117
/// was skipped so it can be written to.
118
static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len);
119

120
/// Advance the inner state by `len` bytes.  The stream must be byte aligned.
121
static inline void IO_advance_input(istream_t *const in, size_t len);
122

123
/// Returns an `ostream_t` constructed from the given pointer and length.
124
static inline ostream_t IO_make_ostream(u8 *out, size_t len);
125
/// Returns an `istream_t` constructed from the given pointer and length.
126
static inline istream_t IO_make_istream(const u8 *in, size_t len);
127

128
/// Returns an `istream_t` with the same base as `in`, and length `len`.
129
/// Then, advance `in` to account for the consumed bytes.
130
/// `in` must be byte aligned.
131
static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len);
132
/*** END IO STREAM OPERATIONS *********/
133

134
/*** BITSTREAM OPERATIONS *************/
135
/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits,
136
/// and return them interpreted as a little-endian unsigned integer.
137
static inline u64 read_bits_LE(const u8 *src, const int num_bits,
138
                               const size_t offset);
139

140
/// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
141
/// it updates `offset` to `offset - bits`, and then reads `bits` bits from
142
/// `src + offset`.  If the offset becomes negative, the extra bits at the
143
/// bottom are filled in with `0` bits instead of reading from before `src`.
144
static inline u64 STREAM_read_bits(const u8 *src, const int bits,
145
                                   i64 *const offset);
146
/*** END BITSTREAM OPERATIONS *********/
147

148
/*** BIT COUNTING OPERATIONS **********/
149
/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0`
150
static inline int highest_set_bit(const u64 num);
151
/*** END BIT COUNTING OPERATIONS ******/
152

153
/*** HUFFMAN PRIMITIVES ***************/
154
// Table decode method uses exponential memory, so we need to limit depth
155
#define HUF_MAX_BITS (16)
156

157
// Limit the maximum number of symbols to 256 so we can store a symbol in a byte
158
#define HUF_MAX_SYMBS (256)
159

160
/// Structure containing all tables necessary for efficient Huffman decoding
161
typedef struct {
162
    u8 *symbols;
163
    u8 *num_bits;
164
    int max_bits;
165
} HUF_dtable;
166

167
/// Decode a single symbol and read in enough bits to refresh the state
168
static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,
169
                                   u16 *const state, const u8 *const src,
170
                                   i64 *const offset);
171
/// Read in a full state's worth of bits to initialize it
172
static inline void HUF_init_state(const HUF_dtable *const dtable,
173
                                  u16 *const state, const u8 *const src,
174
                                  i64 *const offset);
175

176
/// Decompresses a single Huffman stream, returns the number of bytes decoded.
177
/// `src_len` must be the exact length of the Huffman-coded block.
178
static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
179
                                     ostream_t *const out, istream_t *const in);
180
/// Same as previous but decodes 4 streams, formatted as in the Zstandard
181
/// specification.
182
/// `src_len` must be the exact length of the Huffman-coded block.
183
static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
184
                                     ostream_t *const out, istream_t *const in);
185

186
/// Initialize a Huffman decoding table using the table of bit counts provided
187
static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
188
                            const int num_symbs);
189
/// Initialize a Huffman decoding table using the table of weights provided
190
/// Weights follow the definition provided in the Zstandard specification
191
static void HUF_init_dtable_usingweights(HUF_dtable *const table,
192
                                         const u8 *const weights,
193
                                         const int num_symbs);
194

195
/// Free the malloc'ed parts of a decoding table
196
static void HUF_free_dtable(HUF_dtable *const dtable);
197
/*** END HUFFMAN PRIMITIVES ***********/
198

199
/*** FSE PRIMITIVES *******************/
200
/// For more description of FSE see
201
/// https://github.com/Cyan4973/FiniteStateEntropy/
202

203
// FSE table decoding uses exponential memory, so limit the maximum accuracy
204
#define FSE_MAX_ACCURACY_LOG (15)
205
// Limit the maximum number of symbols so they can be stored in a single byte
206
#define FSE_MAX_SYMBS (256)
207

208
/// The tables needed to decode FSE encoded streams
209
typedef struct {
210
    u8 *symbols;
211
    u8 *num_bits;
212
    u16 *new_state_base;
213
    int accuracy_log;
214
} FSE_dtable;
215

216
/// Return the symbol for the current state
217
static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,
218
                                 const u16 state);
219
/// Read the number of bits necessary to update state, update, and shift offset
220
/// back to reflect the bits read
221
static inline void FSE_update_state(const FSE_dtable *const dtable,
222
                                    u16 *const state, const u8 *const src,
223
                                    i64 *const offset);
224

225
/// Combine peek and update: decode a symbol and update the state
226
static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,
227
                                   u16 *const state, const u8 *const src,
228
                                   i64 *const offset);
229

230
/// Read bits from the stream to initialize the state and shift offset back
231
static inline void FSE_init_state(const FSE_dtable *const dtable,
232
                                  u16 *const state, const u8 *const src,
233
                                  i64 *const offset);
234

235
/// Decompress two interleaved bitstreams (e.g. compressed Huffman weights)
236
/// using an FSE decoding table.  `src_len` must be the exact length of the
237
/// block.
238
static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
239
                                          ostream_t *const out,
240
                                          istream_t *const in);
241

242
/// Initialize a decoding table using normalized frequencies.
243
static void FSE_init_dtable(FSE_dtable *const dtable,
244
                            const i16 *const norm_freqs, const int num_symbs,
245
                            const int accuracy_log);
246

247
/// Decode an FSE header as defined in the Zstandard format specification and
248
/// use the decoded frequencies to initialize a decoding table.
249
static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
250
                                const int max_accuracy_log);
251

252
/// Initialize an FSE table that will always return the same symbol and consume
253
/// 0 bits per symbol, to be used for RLE mode in sequence commands
254
static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb);
255

256
/// Free the malloc'ed parts of a decoding table
257
static void FSE_free_dtable(FSE_dtable *const dtable);
258
/*** END FSE PRIMITIVES ***************/
259

260
/******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/
261

262
/******* ZSTD HELPER STRUCTS AND PROTOTYPES ***********************************/
263

264
/// A small structure that can be reused in various places that need to access
265
/// frame header information
266
typedef struct {
267
    // The size of window that we need to be able to contiguously store for
268
    // references
269
    size_t window_size;
270
    // The total output size of this compressed frame
271
    size_t frame_content_size;
272

273
    // The dictionary id if this frame uses one
274
    u32 dictionary_id;
275

276
    // Whether or not the content of this frame has a checksum
277
    int content_checksum_flag;
278
    // Whether or not the output for this frame is in a single segment
279
    int single_segment_flag;
280
} frame_header_t;
281

282
/// The context needed to decode blocks in a frame
283
typedef struct {
284
    frame_header_t header;
285

286
    // The total amount of data available for backreferences, to determine if an
287
    // offset too large to be correct
288
    size_t current_total_output;
289

290
    const u8 *dict_content;
291
    size_t dict_content_len;
292

293
    // Entropy encoding tables so they can be repeated by future blocks instead
294
    // of retransmitting
295
    HUF_dtable literals_dtable;
296
    FSE_dtable ll_dtable;
297
    FSE_dtable ml_dtable;
298
    FSE_dtable of_dtable;
299

300
    // The last 3 offsets for the special "repeat offsets".
301
    u64 previous_offsets[3];
302
} frame_context_t;
303

304
/// The decoded contents of a dictionary so that it doesn't have to be repeated
305
/// for each frame that uses it
306
struct dictionary_s {
307
    // Entropy tables
308
    HUF_dtable literals_dtable;
309
    FSE_dtable ll_dtable;
310
    FSE_dtable ml_dtable;
311
    FSE_dtable of_dtable;
312

313
    // Raw content for backreferences
314
    u8 *content;
315
    size_t content_size;
316

317
    // Offset history to prepopulate the frame's history
318
    u64 previous_offsets[3];
319

320
    u32 dictionary_id;
321
};
322

323
/// A tuple containing the parts necessary to decode and execute a ZSTD sequence
324
/// command
325
typedef struct {
326
    u32 literal_length;
327
    u32 match_length;
328
    u32 offset;
329
} sequence_command_t;
330

331
/// The decoder works top-down, starting at the high level like Zstd frames, and
332
/// working down to lower more technical levels such as blocks, literals, and
333
/// sequences.  The high-level functions roughly follow the outline of the
334
/// format specification:
335
/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
336

337
/// Before the implementation of each high-level function declared here, the
338
/// prototypes for their helper functions are defined and explained
339

340
/// Decode a single Zstd frame, or error if the input is not a valid frame.
341
/// Accepts a dict argument, which may be NULL indicating no dictionary.
342
/// See
343
/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame-concatenation
344
static void decode_frame(ostream_t *const out, istream_t *const in,
345
                         const dictionary_t *const dict);
346

347
// Decode data in a compressed block
348
static void decompress_block(frame_context_t *const ctx, ostream_t *const out,
349
                             istream_t *const in);
350

351
// Decode the literals section of a block
352
static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
353
                              u8 **const literals);
354

355
// Decode the sequences part of a block
356
static size_t decode_sequences(frame_context_t *const ctx, istream_t *const in,
357
                               sequence_command_t **const sequences);
358

359
// Execute the decoded sequences on the literals block
360
static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
361
                              const u8 *const literals,
362
                              const size_t literals_len,
363
                              const sequence_command_t *const sequences,
364
                              const size_t num_sequences);
365

366
// Copies literals and returns the total literal length that was copied
367
static u32 copy_literals(const size_t seq, istream_t *litstream,
368
                         ostream_t *const out);
369

370
// Given an offset code from a sequence command (either an actual offset value
371
// or an index for previous offset), computes the correct offset and updates
372
// the offset history
373
static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);
374

375
// Given an offset, match length, and total output, as well as the frame
376
// context for the dictionary, determines if the dictionary is used and
377
// executes the copy operation
378
static void execute_match_copy(frame_context_t *const ctx, size_t offset,
379
                              size_t match_length, size_t total_output,
380
                              ostream_t *const out);
381

382
/******* END ZSTD HELPER STRUCTS AND PROTOTYPES *******************************/
383

384
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
385
                       const void *const src, const size_t src_len) {
386
    dictionary_t* const uninit_dict = create_dictionary();
387
    size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,
388
                                                         src_len, uninit_dict);
389
    free_dictionary(uninit_dict);
390
    return decomp_size;
391
}
392

393
size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
394
                                 const void *const src, const size_t src_len,
395
                                 dictionary_t* parsed_dict) {
396

397
    istream_t in = IO_make_istream(src, src_len);
398
    ostream_t out = IO_make_ostream(dst, dst_len);
399

400
    // "A content compressed by Zstandard is transformed into a Zstandard frame.
401
    // Multiple frames can be appended into a single file or stream. A frame is
402
    // totally independent, has a defined beginning and end, and a set of
403
    // parameters which tells the decoder how to decompress it."
404

405
    /* this decoder assumes decompression of a single frame */
406
    decode_frame(&out, &in, parsed_dict);
407

408
    return (size_t)(out.ptr - (u8 *)dst);
409
}
410

411
/******* FRAME DECODING ******************************************************/
412

413
static void decode_data_frame(ostream_t *const out, istream_t *const in,
414
                              const dictionary_t *const dict);
415
static void init_frame_context(frame_context_t *const context,
416
                               istream_t *const in,
417
                               const dictionary_t *const dict);
418
static void free_frame_context(frame_context_t *const context);
419
static void parse_frame_header(frame_header_t *const header,
420
                               istream_t *const in);
421
static void frame_context_apply_dict(frame_context_t *const ctx,
422
                                     const dictionary_t *const dict);
423

424
static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
425
                            istream_t *const in);
426

427
static void decode_frame(ostream_t *const out, istream_t *const in,
428
                         const dictionary_t *const dict) {
429
    const u32 magic_number = (u32)IO_read_bits(in, 32);
430
    if (magic_number == ZSTD_MAGIC_NUMBER) {
431
        // ZSTD frame
432
        decode_data_frame(out, in, dict);
433

434
        return;
435
    }
436

437
    // not a real frame or a skippable frame
438
    ERROR("Tried to decode non-ZSTD frame");
439
}
440

441
/// Decode a frame that contains compressed data.  Not all frames do as there
442
/// are skippable frames.
443
/// See
444
/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#general-structure-of-zstandard-frame-format
445
static void decode_data_frame(ostream_t *const out, istream_t *const in,
446
                              const dictionary_t *const dict) {
447
    frame_context_t ctx;
448

449
    // Initialize the context that needs to be carried from block to block
450
    init_frame_context(&ctx, in, dict);
451

452
    if (ctx.header.frame_content_size != 0 &&
453
        ctx.header.frame_content_size > out->len) {
454
        OUT_SIZE();
455
    }
456

457
    decompress_data(&ctx, out, in);
458

459
    free_frame_context(&ctx);
460
}
461

462
/// Takes the information provided in the header and dictionary, and initializes
463
/// the context for this frame
464
static void init_frame_context(frame_context_t *const context,
465
                               istream_t *const in,
466
                               const dictionary_t *const dict) {
467
    // Most fields in context are correct when initialized to 0
468
    memset(context, 0, sizeof(frame_context_t));
469

470
    // Parse data from the frame header
471
    parse_frame_header(&context->header, in);
472

473
    // Set up the offset history for the repeat offset commands
474
    context->previous_offsets[0] = 1;
475
    context->previous_offsets[1] = 4;
476
    context->previous_offsets[2] = 8;
477

478
    // Apply details from the dict if it exists
479
    frame_context_apply_dict(context, dict);
480
}
481

482
static void free_frame_context(frame_context_t *const context) {
483
    HUF_free_dtable(&context->literals_dtable);
484

485
    FSE_free_dtable(&context->ll_dtable);
486
    FSE_free_dtable(&context->ml_dtable);
487
    FSE_free_dtable(&context->of_dtable);
488

489
    memset(context, 0, sizeof(frame_context_t));
490
}
491

492
static void parse_frame_header(frame_header_t *const header,
493
                               istream_t *const in) {
494
    // "The first header's byte is called the Frame_Header_Descriptor. It tells
495
    // which other fields are present. Decoding this byte is enough to tell the
496
    // size of Frame_Header.
497
    //
498
    // Bit number   Field name
499
    // 7-6  Frame_Content_Size_flag
500
    // 5    Single_Segment_flag
501
    // 4    Unused_bit
502
    // 3    Reserved_bit
503
    // 2    Content_Checksum_flag
504
    // 1-0  Dictionary_ID_flag"
505
    const u8 descriptor = (u8)IO_read_bits(in, 8);
506

507
    // decode frame header descriptor into flags
508
    const u8 frame_content_size_flag = descriptor >> 6;
509
    const u8 single_segment_flag = (descriptor >> 5) & 1;
510
    const u8 reserved_bit = (descriptor >> 3) & 1;
511
    const u8 content_checksum_flag = (descriptor >> 2) & 1;
512
    const u8 dictionary_id_flag = descriptor & 3;
513

514
    if (reserved_bit != 0) {
515
        CORRUPTION();
516
    }
517

518
    header->single_segment_flag = single_segment_flag;
519
    header->content_checksum_flag = content_checksum_flag;
520

521
    // decode window size
522
    if (!single_segment_flag) {
523
        // "Provides guarantees on maximum back-reference distance that will be
524
        // used within compressed data. This information is important for
525
        // decoders to allocate enough memory.
526
        //
527
        // Bit numbers  7-3         2-0
528
        // Field name   Exponent    Mantissa"
529
        u8 window_descriptor = (u8)IO_read_bits(in, 8);
530
        u8 exponent = window_descriptor >> 3;
531
        u8 mantissa = window_descriptor & 7;
532

533
        // Use the algorithm from the specification to compute window size
534
        // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
535
        size_t window_base = (size_t)1 << (10 + exponent);
536
        size_t window_add = (window_base / 8) * mantissa;
537
        header->window_size = window_base + window_add;
538
    }
539

540
    // decode dictionary id if it exists
541
    if (dictionary_id_flag) {
542
        // "This is a variable size field, which contains the ID of the
543
        // dictionary required to properly decode the frame. Note that this
544
        // field is optional. When it's not present, it's up to the caller to
545
        // make sure it uses the correct dictionary. Format is little-endian."
546
        const int bytes_array[] = {0, 1, 2, 4};
547
        const int bytes = bytes_array[dictionary_id_flag];
548

549
        header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
550
    } else {
551
        header->dictionary_id = 0;
552
    }
553

554
    // decode frame content size if it exists
555
    if (single_segment_flag || frame_content_size_flag) {
556
        // "This is the original (uncompressed) size. This information is
557
        // optional. The Field_Size is provided according to value of
558
        // Frame_Content_Size_flag. The Field_Size can be equal to 0 (not
559
        // present), 1, 2, 4 or 8 bytes. Format is little-endian."
560
        //
561
        // if frame_content_size_flag == 0 but single_segment_flag is set, we
562
        // still have a 1 byte field
563
        const int bytes_array[] = {1, 2, 4, 8};
564
        const int bytes = bytes_array[frame_content_size_flag];
565

566
        header->frame_content_size = IO_read_bits(in, bytes * 8);
567
        if (bytes == 2) {
568
            // "When Field_Size is 2, the offset of 256 is added."
569
            header->frame_content_size += 256;
570
        }
571
    } else {
572
        header->frame_content_size = 0;
573
    }
574

575
    if (single_segment_flag) {
576
        // "The Window_Descriptor byte is optional. It is absent when
577
        // Single_Segment_flag is set. In this case, the maximum back-reference
578
        // distance is the content size itself, which can be any value from 1 to
579
        // 2^64-1 bytes (16 EB)."
580
        header->window_size = header->frame_content_size;
581
    }
582
}
583

584
/// Decompress the data from a frame block by block
585
static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
586
                            istream_t *const in) {
587
    // "A frame encapsulates one or multiple blocks. Each block can be
588
    // compressed or not, and has a guaranteed maximum content size, which
589
    // depends on frame parameters. Unlike frames, each block depends on
590
    // previous blocks for proper decoding. However, each block can be
591
    // decompressed without waiting for its successor, allowing streaming
592
    // operations."
593
    int last_block = 0;
594
    do {
595
        // "Last_Block
596
        //
597
        // The lowest bit signals if this block is the last one. Frame ends
598
        // right after this block.
599
        //
600
        // Block_Type and Block_Size
601
        //
602
        // The next 2 bits represent the Block_Type, while the remaining 21 bits
603
        // represent the Block_Size. Format is little-endian."
604
        last_block = (int)IO_read_bits(in, 1);
605
        const int block_type = (int)IO_read_bits(in, 2);
606
        const size_t block_len = IO_read_bits(in, 21);
607

608
        switch (block_type) {
609
        case 0: {
610
            // "Raw_Block - this is an uncompressed block. Block_Size is the
611
            // number of bytes to read and copy."
612
            const u8 *const read_ptr = IO_get_read_ptr(in, block_len);
613
            u8 *const write_ptr = IO_get_write_ptr(out, block_len);
614

615
            // Copy the raw data into the output
616
            memcpy(write_ptr, read_ptr, block_len);
617

618
            ctx->current_total_output += block_len;
619
            break;
620
        }
621
        case 1: {
622
            // "RLE_Block - this is a single byte, repeated N times. In which
623
            // case, Block_Size is the size to regenerate, while the
624
            // "compressed" block is just 1 byte (the byte to repeat)."
625
            const u8 *const read_ptr = IO_get_read_ptr(in, 1);
626
            u8 *const write_ptr = IO_get_write_ptr(out, block_len);
627

628
            // Copy `block_len` copies of `read_ptr[0]` to the output
629
            memset(write_ptr, read_ptr[0], block_len);
630

631
            ctx->current_total_output += block_len;
632
            break;
633
        }
634
        case 2: {
635
            // "Compressed_Block - this is a Zstandard compressed block,
636
            // detailed in another section of this specification. Block_Size is
637
            // the compressed size.
638

639
            // Create a sub-stream for the block
640
            istream_t block_stream = IO_make_sub_istream(in, block_len);
641
            decompress_block(ctx, out, &block_stream);
642
            break;
643
        }
644
        case 3:
645
            // "Reserved - this is not a block. This value cannot be used with
646
            // current version of this specification."
647
            CORRUPTION();
648
            break;
649
        default:
650
            IMPOSSIBLE();
651
        }
652
    } while (!last_block);
653

654
    if (ctx->header.content_checksum_flag) {
655
        // This program does not support checking the checksum, so skip over it
656
        // if it's present
657
        IO_advance_input(in, 4);
658
    }
659
}
660
/******* END FRAME DECODING ***************************************************/
661

662
/******* BLOCK DECOMPRESSION **************************************************/
663
static void decompress_block(frame_context_t *const ctx, ostream_t *const out,
664
                             istream_t *const in) {
665
    // "A compressed block consists of 2 sections :
666
    //
667
    // Literals_Section
668
    // Sequences_Section"
669

670

671
    // Part 1: decode the literals block
672
    u8 *literals = NULL;
673
    const size_t literals_size = decode_literals(ctx, in, &literals);
674

675
    // Part 2: decode the sequences block
676
    sequence_command_t *sequences = NULL;
677
    const size_t num_sequences =
678
        decode_sequences(ctx, in, &sequences);
679

680
    // Part 3: combine literals and sequence commands to generate output
681
    execute_sequences(ctx, out, literals, literals_size, sequences,
682
                      num_sequences);
683
    free(literals);
684
    free(sequences);
685
}
686
/******* END BLOCK DECOMPRESSION **********************************************/
687

688
/******* LITERALS DECODING ****************************************************/
689
static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
690
                                     const int block_type,
691
                                     const int size_format);
692
static size_t decode_literals_compressed(frame_context_t *const ctx,
693
                                         istream_t *const in,
694
                                         u8 **const literals,
695
                                         const int block_type,
696
                                         const int size_format);
697
static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in);
698
static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
699
                                    int *const num_symbs);
700

701
static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
702
                              u8 **const literals) {
703
    // "Literals can be stored uncompressed or compressed using Huffman prefix
704
    // codes. When compressed, an optional tree description can be present,
705
    // followed by 1 or 4 streams."
706
    //
707
    // "Literals_Section_Header
708
    //
709
    // Header is in charge of describing how literals are packed. It's a
710
    // byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, using
711
    // little-endian convention."
712
    //
713
    // "Literals_Block_Type
714
    //
715
    // This field uses 2 lowest bits of first byte, describing 4 different block
716
    // types"
717
    //
718
    // size_format takes between 1 and 2 bits
719
    int block_type = (int)IO_read_bits(in, 2);
720
    int size_format = (int)IO_read_bits(in, 2);
721

722
    if (block_type <= 1) {
723
        // Raw or RLE literals block
724
        return decode_literals_simple(in, literals, block_type,
725
                                      size_format);
726
    } else {
727
        // Huffman compressed literals
728
        return decode_literals_compressed(ctx, in, literals, block_type,
729
                                          size_format);
730
    }
731
}
732

733
/// Decodes literals blocks in raw or RLE form
734
static size_t decode_literals_simple(istream_t *const in, u8 **const literals,
735
                                     const int block_type,
736
                                     const int size_format) {
737
    size_t size;
738
    switch (size_format) {
739
    // These cases are in the form ?0
740
    // In this case, the ? bit is actually part of the size field
741
    case 0:
742
    case 2:
743
        // "Size_Format uses 1 bit. Regenerated_Size uses 5 bits (0-31)."
744
        IO_rewind_bits(in, 1);
745
        size = IO_read_bits(in, 5);
746
        break;
747
    case 1:
748
        // "Size_Format uses 2 bits. Regenerated_Size uses 12 bits (0-4095)."
749
        size = IO_read_bits(in, 12);
750
        break;
751
    case 3:
752
        // "Size_Format uses 2 bits. Regenerated_Size uses 20 bits (0-1048575)."
753
        size = IO_read_bits(in, 20);
754
        break;
755
    default:
756
        // Size format is in range 0-3
757
        IMPOSSIBLE();
758
    }
759

760
    if (size > MAX_LITERALS_SIZE) {
761
        CORRUPTION();
762
    }
763

764
    *literals = malloc(size);
765
    if (!*literals) {
766
        BAD_ALLOC();
767
    }
768

769
    switch (block_type) {
770
    case 0: {
771
        // "Raw_Literals_Block - Literals are stored uncompressed."
772
        const u8 *const read_ptr = IO_get_read_ptr(in, size);
773
        memcpy(*literals, read_ptr, size);
774
        break;
775
    }
776
    case 1: {
777
        // "RLE_Literals_Block - Literals consist of a single byte value repeated N times."
778
        const u8 *const read_ptr = IO_get_read_ptr(in, 1);
779
        memset(*literals, read_ptr[0], size);
780
        break;
781
    }
782
    default:
783
        IMPOSSIBLE();
784
    }
785

786
    return size;
787
}
788

789
/// Decodes Huffman compressed literals
790
static size_t decode_literals_compressed(frame_context_t *const ctx,
791
                                         istream_t *const in,
792
                                         u8 **const literals,
793
                                         const int block_type,
794
                                         const int size_format) {
795
    size_t regenerated_size, compressed_size;
796
    // Only size_format=0 has 1 stream, so default to 4
797
    int num_streams = 4;
798
    switch (size_format) {
799
    case 0:
800
        // "A single stream. Both Compressed_Size and Regenerated_Size use 10
801
        // bits (0-1023)."
802
        num_streams = 1;
803
    // Fall through as it has the same size format
804
        /* fallthrough */
805
    case 1:
806
        // "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
807
        // (0-1023)."
808
        regenerated_size = IO_read_bits(in, 10);
809
        compressed_size = IO_read_bits(in, 10);
810
        break;
811
    case 2:
812
        // "4 streams. Both Compressed_Size and Regenerated_Size use 14 bits
813
        // (0-16383)."
814
        regenerated_size = IO_read_bits(in, 14);
815
        compressed_size = IO_read_bits(in, 14);
816
        break;
817
    case 3:
818
        // "4 streams. Both Compressed_Size and Regenerated_Size use 18 bits
819
        // (0-262143)."
820
        regenerated_size = IO_read_bits(in, 18);
821
        compressed_size = IO_read_bits(in, 18);
822
        break;
823
    default:
824
        // Impossible
825
        IMPOSSIBLE();
826
    }
827
    if (regenerated_size > MAX_LITERALS_SIZE) {
828
        CORRUPTION();
829
    }
830

831
    *literals = malloc(regenerated_size);
832
    if (!*literals) {
833
        BAD_ALLOC();
834
    }
835

836
    ostream_t lit_stream = IO_make_ostream(*literals, regenerated_size);
837
    istream_t huf_stream = IO_make_sub_istream(in, compressed_size);
838

839
    if (block_type == 2) {
840
        // Decode the provided Huffman table
841
        // "This section is only present when Literals_Block_Type type is
842
        // Compressed_Literals_Block (2)."
843

844
        HUF_free_dtable(&ctx->literals_dtable);
845
        decode_huf_table(&ctx->literals_dtable, &huf_stream);
846
    } else {
847
        // If the previous Huffman table is being repeated, ensure it exists
848
        if (!ctx->literals_dtable.symbols) {
849
            CORRUPTION();
850
        }
851
    }
852

853
    size_t symbols_decoded;
854
    if (num_streams == 1) {
855
        symbols_decoded = HUF_decompress_1stream(&ctx->literals_dtable, &lit_stream, &huf_stream);
856
    } else {
857
        symbols_decoded = HUF_decompress_4stream(&ctx->literals_dtable, &lit_stream, &huf_stream);
858
    }
859

860
    if (symbols_decoded != regenerated_size) {
861
        CORRUPTION();
862
    }
863

864
    return regenerated_size;
865
}
866

867
// Decode the Huffman table description
868
static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {
869
    // "All literal values from zero (included) to last present one (excluded)
870
    // are represented by Weight with values from 0 to Max_Number_of_Bits."
871

872
    // "This is a single byte value (0-255), which describes how to decode the list of weights."
873
    const u8 header = IO_read_bits(in, 8);
874

875
    u8 weights[HUF_MAX_SYMBS];
876
    memset(weights, 0, sizeof(weights));
877

878
    int num_symbs;
879

880
    if (header >= 128) {
881
        // "This is a direct representation, where each Weight is written
882
        // directly as a 4 bits field (0-15). The full representation occupies
883
        // ((Number_of_Symbols+1)/2) bytes, meaning it uses a last full byte
884
        // even if Number_of_Symbols is odd. Number_of_Symbols = headerByte -
885
        // 127"
886
        num_symbs = header - 127;
887
        const size_t bytes = (num_symbs + 1) / 2;
888

889
        const u8 *const weight_src = IO_get_read_ptr(in, bytes);
890

891
        for (int i = 0; i < num_symbs; i++) {
892
            // "They are encoded forward, 2
893
            // weights to a byte with the first weight taking the top four bits
894
            // and the second taking the bottom four (e.g. the following
895
            // operations could be used to read the weights: Weight[0] =
896
            // (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf), etc.)."
897
            if (i % 2 == 0) {
898
                weights[i] = weight_src[i / 2] >> 4;
899
            } else {
900
                weights[i] = weight_src[i / 2] & 0xf;
901
            }
902
        }
903
    } else {
904
        // The weights are FSE encoded, decode them before we can construct the
905
        // table
906
        istream_t fse_stream = IO_make_sub_istream(in, header);
907
        ostream_t weight_stream = IO_make_ostream(weights, HUF_MAX_SYMBS);
908
        fse_decode_hufweights(&weight_stream, &fse_stream, &num_symbs);
909
    }
910

911
    // Construct the table using the decoded weights
912
    HUF_init_dtable_usingweights(dtable, weights, num_symbs);
913
}
914

915
static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,
916
                                    int *const num_symbs) {
917
    const int MAX_ACCURACY_LOG = 7;
918

919
    FSE_dtable dtable;
920

921
    // "An FSE bitstream starts by a header, describing probabilities
922
    // distribution. It will create a Decoding Table. For a list of Huffman
923
    // weights, maximum accuracy is 7 bits."
924
    FSE_decode_header(&dtable, in, MAX_ACCURACY_LOG);
925

926
    // Decode the weights
927
    *num_symbs = FSE_decompress_interleaved2(&dtable, weights, in);
928

929
    FSE_free_dtable(&dtable);
930
}
931
/******* END LITERALS DECODING ************************************************/
932

933
/******* SEQUENCE DECODING ****************************************************/
934
/// The combination of FSE states needed to decode sequences
935
typedef struct {
936
    FSE_dtable ll_table;
937
    FSE_dtable of_table;
938
    FSE_dtable ml_table;
939

940
    u16 ll_state;
941
    u16 of_state;
942
    u16 ml_state;
943
} sequence_states_t;
944

945
/// Different modes to signal to decode_seq_tables what to do
946
typedef enum {
947
    seq_literal_length = 0,
948
    seq_offset = 1,
949
    seq_match_length = 2,
950
} seq_part_t;
951

952
typedef enum {
953
    seq_predefined = 0,
954
    seq_rle = 1,
955
    seq_fse = 2,
956
    seq_repeat = 3,
957
} seq_mode_t;
958

959
/// The predefined FSE distribution tables for `seq_predefined` mode
960
static const i16 SEQ_LITERAL_LENGTH_DEFAULT_DIST[36] = {
961
    4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,  1,  2,  2,
962
    2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
963
static const i16 SEQ_OFFSET_DEFAULT_DIST[29] = {
964
    1, 1, 1, 1, 1, 1, 2, 2, 2, 1,  1,  1,  1,  1, 1,
965
    1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
966
static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
967
    1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1,  1,  1,  1,  1,  1,  1, 1,
968
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
969
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
970

971
/// The sequence decoding baseline and number of additional bits to read/add
972
/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
973
static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
974
    0,  1,  2,   3,   4,   5,    6,    7,    8,    9,     10,    11,
975
    12, 13, 14,  15,  16,  18,   20,   22,   24,   28,    32,    40,
976
    48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
977
static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
978
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  1,  1,
979
    1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
980

981
static const u32 SEQ_MATCH_LENGTH_BASELINES[53] = {
982
    3,  4,   5,   6,   7,    8,    9,    10,   11,    12,    13,   14, 15, 16,
983
    17, 18,  19,  20,  21,   22,   23,   24,   25,    26,    27,   28, 29, 30,
984
    31, 32,  33,  34,  35,   37,   39,   41,   43,    47,    51,   59, 67, 83,
985
    99, 131, 259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539};
986
static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
987
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
988
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  1,  1,  1, 1,
989
    2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
990

991
/// Offset decoding is simpler so we just need a maximum code value
992
static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
993

994
static void decompress_sequences(frame_context_t *const ctx,
995
                                 istream_t *const in,
996
                                 sequence_command_t *const sequences,
997
                                 const size_t num_sequences);
998
static sequence_command_t decode_sequence(sequence_states_t *const state,
999
                                          const u8 *const src,
1000
                                          i64 *const offset);
1001
static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
1002
                               const seq_part_t type, const seq_mode_t mode);
1003

1004
static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
1005
                               sequence_command_t **const sequences) {
1006
    // "A compressed block is a succession of sequences . A sequence is a
1007
    // literal copy command, followed by a match copy command. A literal copy
1008
    // command specifies a length. It is the number of bytes to be copied (or
1009
    // extracted) from the literal section. A match copy command specifies an
1010
    // offset and a length. The offset gives the position to copy from, which
1011
    // can be within a previous block."
1012

1013
    size_t num_sequences;
1014

1015
    // "Number_of_Sequences
1016
    //
1017
    // This is a variable size field using between 1 and 3 bytes. Let's call its
1018
    // first byte byte0."
1019
    u8 header = IO_read_bits(in, 8);
1020
    if (header == 0) {
1021
        // "There are no sequences. The sequence section stops there.
1022
        // Regenerated content is defined entirely by literals section."
1023
        *sequences = NULL;
1024
        return 0;
1025
    } else if (header < 128) {
1026
        // "Number_of_Sequences = byte0 . Uses 1 byte."
1027
        num_sequences = header;
1028
    } else if (header < 255) {
1029
        // "Number_of_Sequences = ((byte0-128) << 8) + byte1 . Uses 2 bytes."
1030
        num_sequences = ((header - 128) << 8) + IO_read_bits(in, 8);
1031
    } else {
1032
        // "Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00 . Uses 3 bytes."
1033
        num_sequences = IO_read_bits(in, 16) + 0x7F00;
1034
    }
1035

1036
    *sequences = malloc(num_sequences * sizeof(sequence_command_t));
1037
    if (!*sequences) {
1038
        BAD_ALLOC();
1039
    }
1040

1041
    decompress_sequences(ctx, in, *sequences, num_sequences);
1042
    return num_sequences;
1043
}
1044

1045
/// Decompress the FSE encoded sequence commands
1046
static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
1047
                                 sequence_command_t *const sequences,
1048
                                 const size_t num_sequences) {
1049
    // "The Sequences_Section regroup all symbols required to decode commands.
1050
    // There are 3 symbol types : literals lengths, offsets and match lengths.
1051
    // They are encoded together, interleaved, in a single bitstream."
1052

1053
    // "Symbol compression modes
1054
    //
1055
    // This is a single byte, defining the compression mode of each symbol
1056
    // type."
1057
    //
1058
    // Bit number : Field name
1059
    // 7-6        : Literals_Lengths_Mode
1060
    // 5-4        : Offsets_Mode
1061
    // 3-2        : Match_Lengths_Mode
1062
    // 1-0        : Reserved
1063
    u8 compression_modes = IO_read_bits(in, 8);
1064

1065
    if ((compression_modes & 3) != 0) {
1066
        // Reserved bits set
1067
        CORRUPTION();
1068
    }
1069

1070
    // "Following the header, up to 3 distribution tables can be described. When
1071
    // present, they are in this order :
1072
    //
1073
    // Literals lengths
1074
    // Offsets
1075
    // Match Lengths"
1076
    // Update the tables we have stored in the context
1077
    decode_seq_table(&ctx->ll_dtable, in, seq_literal_length,
1078
                     (compression_modes >> 6) & 3);
1079

1080
    decode_seq_table(&ctx->of_dtable, in, seq_offset,
1081
                     (compression_modes >> 4) & 3);
1082

1083
    decode_seq_table(&ctx->ml_dtable, in, seq_match_length,
1084
                     (compression_modes >> 2) & 3);
1085

1086

1087
    sequence_states_t states;
1088

1089
    // Initialize the decoding tables
1090
    {
1091
        states.ll_table = ctx->ll_dtable;
1092
        states.of_table = ctx->of_dtable;
1093
        states.ml_table = ctx->ml_dtable;
1094
    }
1095

1096
    const size_t len = IO_istream_len(in);
1097
    const u8 *const src = IO_get_read_ptr(in, len);
1098

1099
    // "After writing the last bit containing information, the compressor writes
1100
    // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
1101
    const int padding = 8 - highest_set_bit(src[len - 1]);
1102
    // The offset starts at the end because FSE streams are read backwards
1103
    i64 bit_offset = (i64)(len * 8 - (size_t)padding);
1104

1105
    // "The bitstream starts with initial state values, each using the required
1106
    // number of bits in their respective accuracy, decoded previously from
1107
    // their normalized distribution.
1108
    //
1109
    // It starts by Literals_Length_State, followed by Offset_State, and finally
1110
    // Match_Length_State."
1111
    FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset);
1112
    FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset);
1113
    FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset);
1114

1115
    for (size_t i = 0; i < num_sequences; i++) {
1116
        // Decode sequences one by one
1117
        sequences[i] = decode_sequence(&states, src, &bit_offset);
1118
    }
1119

1120
    if (bit_offset != 0) {
1121
        CORRUPTION();
1122
    }
1123
}
1124

1125
// Decode a single sequence and update the state
1126
static sequence_command_t decode_sequence(sequence_states_t *const states,
1127
                                          const u8 *const src,
1128
                                          i64 *const offset) {
1129
    // "Each symbol is a code in its own context, which specifies Baseline and
1130
    // Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw
1131
    // additional bits in the same bitstream."
1132

1133
    // Decode symbols, but don't update states
1134
    const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state);
1135
    const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state);
1136
    const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state);
1137

1138
    // Offset doesn't need a max value as it's not decoded using a table
1139
    if (ll_code > SEQ_MAX_CODES[seq_literal_length] ||
1140
        ml_code > SEQ_MAX_CODES[seq_match_length]) {
1141
        CORRUPTION();
1142
    }
1143

1144
    // Read the interleaved bits
1145
    sequence_command_t seq;
1146
    // "Decoding starts by reading the Number_of_Bits required to decode Offset.
1147
    // It then does the same for Match_Length, and then for Literals_Length."
1148
    seq.offset = ((u32)1 << of_code) + STREAM_read_bits(src, of_code, offset);
1149

1150
    seq.match_length =
1151
        SEQ_MATCH_LENGTH_BASELINES[ml_code] +
1152
        STREAM_read_bits(src, SEQ_MATCH_LENGTH_EXTRA_BITS[ml_code], offset);
1153

1154
    seq.literal_length =
1155
        SEQ_LITERAL_LENGTH_BASELINES[ll_code] +
1156
        STREAM_read_bits(src, SEQ_LITERAL_LENGTH_EXTRA_BITS[ll_code], offset);
1157

1158
    // "If it is not the last sequence in the block, the next operation is to
1159
    // update states. Using the rules pre-calculated in the decoding tables,
1160
    // Literals_Length_State is updated, followed by Match_Length_State, and
1161
    // then Offset_State."
1162
    // If the stream is complete don't read bits to update state
1163
    if (*offset != 0) {
1164
        FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
1165
        FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
1166
        FSE_update_state(&states->of_table, &states->of_state, src, offset);
1167
    }
1168

1169
    return seq;
1170
}
1171

1172
/// Given a sequence part and table mode, decode the FSE distribution
1173
/// Errors if the mode is `seq_repeat` without a pre-existing table in `table`
1174
static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
1175
                             const seq_part_t type, const seq_mode_t mode) {
1176
    // Constant arrays indexed by seq_part_t
1177
    const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST,
1178
                                                SEQ_OFFSET_DEFAULT_DIST,
1179
                                                SEQ_MATCH_LENGTH_DEFAULT_DIST};
1180
    const size_t default_distribution_lengths[] = {36, 29, 53};
1181
    const size_t default_distribution_accuracies[] = {6, 5, 6};
1182

1183
    const size_t max_accuracies[] = {9, 8, 9};
1184

1185
    if (mode != seq_repeat) {
1186
        // Free old one before overwriting
1187
        FSE_free_dtable(table);
1188
    }
1189

1190
    switch (mode) {
1191
    case seq_predefined: {
1192
        // "Predefined_Mode : uses a predefined distribution table."
1193
        const i16 *distribution = default_distributions[type];
1194
        const size_t symbs = default_distribution_lengths[type];
1195
        const size_t accuracy_log = default_distribution_accuracies[type];
1196

1197
        FSE_init_dtable(table, distribution, symbs, accuracy_log);
1198
        break;
1199
    }
1200
    case seq_rle: {
1201
        // "RLE_Mode : it's a single code, repeated Number_of_Sequences times."
1202
        const u8 symb = IO_get_read_ptr(in, 1)[0];
1203
        FSE_init_dtable_rle(table, symb);
1204
        break;
1205
    }
1206
    case seq_fse: {
1207
        // "FSE_Compressed_Mode : standard FSE compression. A distribution table
1208
        // will be present "
1209
        FSE_decode_header(table, in, max_accuracies[type]);
1210
        break;
1211
    }
1212
    case seq_repeat:
1213
        // "Repeat_Mode : re-use distribution table from previous compressed
1214
        // block."
1215
        // Nothing to do here, table will be unchanged
1216
        if (!table->symbols) {
1217
            // This mode is invalid if we don't already have a table
1218
            CORRUPTION();
1219
        }
1220
        break;
1221
    default:
1222
        // Impossible, as mode is from 0-3
1223
        IMPOSSIBLE();
1224
        break;
1225
    }
1226

1227
}
1228
/******* END SEQUENCE DECODING ************************************************/
1229

1230
/******* SEQUENCE EXECUTION ***************************************************/
1231
static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,
1232
                              const u8 *const literals,
1233
                              const size_t literals_len,
1234
                              const sequence_command_t *const sequences,
1235
                              const size_t num_sequences) {
1236
    istream_t litstream = IO_make_istream(literals, literals_len);
1237

1238
    u64 *const offset_hist = ctx->previous_offsets;
1239
    size_t total_output = ctx->current_total_output;
1240

1241
    for (size_t i = 0; i < num_sequences; i++) {
1242
        const sequence_command_t seq = sequences[i];
1243
        {
1244
            const u32 literals_size = copy_literals(seq.literal_length, &litstream, out);
1245
            total_output += literals_size;
1246
        }
1247

1248
        size_t const offset = compute_offset(seq, offset_hist);
1249

1250
        size_t const match_length = seq.match_length;
1251

1252
        execute_match_copy(ctx, offset, match_length, total_output, out);
1253

1254
        total_output += match_length;
1255
    }
1256

1257
    // Copy any leftover literals
1258
    {
1259
        size_t len = IO_istream_len(&litstream);
1260
        copy_literals(len, &litstream, out);
1261
        total_output += len;
1262
    }
1263

1264
    ctx->current_total_output = total_output;
1265
}
1266

1267
static u32 copy_literals(const size_t literal_length, istream_t *litstream,
1268
                         ostream_t *const out) {
1269
    // If the sequence asks for more literals than are left, the
1270
    // sequence must be corrupted
1271
    if (literal_length > IO_istream_len(litstream)) {
1272
        CORRUPTION();
1273
    }
1274

1275
    u8 *const write_ptr = IO_get_write_ptr(out, literal_length);
1276
    const u8 *const read_ptr =
1277
         IO_get_read_ptr(litstream, literal_length);
1278
    // Copy literals to output
1279
    memcpy(write_ptr, read_ptr, literal_length);
1280

1281
    return literal_length;
1282
}
1283

1284
static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist) {
1285
    size_t offset;
1286
    // Offsets are special, we need to handle the repeat offsets
1287
    if (seq.offset <= 3) {
1288
        // "The first 3 values define a repeated offset and we will call
1289
        // them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3.
1290
        // They are sorted in recency order, with Repeated_Offset1 meaning
1291
        // 'most recent one'".
1292

1293
        // Use 0 indexing for the array
1294
        u32 idx = seq.offset - 1;
1295
        if (seq.literal_length == 0) {
1296
            // "There is an exception though, when current sequence's
1297
            // literals length is 0. In this case, repeated offsets are
1298
            // shifted by one, so Repeated_Offset1 becomes Repeated_Offset2,
1299
            // Repeated_Offset2 becomes Repeated_Offset3, and
1300
            // Repeated_Offset3 becomes Repeated_Offset1 - 1_byte."
1301
            idx++;
1302
        }
1303

1304
        if (idx == 0) {
1305
            offset = offset_hist[0];
1306
        } else {
1307
            // If idx == 3 then literal length was 0 and the offset was 3,
1308
            // as per the exception listed above
1309
            offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;
1310

1311
            // If idx == 1 we don't need to modify offset_hist[2], since
1312
            // we're using the second-most recent code
1313
            if (idx > 1) {
1314
                offset_hist[2] = offset_hist[1];
1315
            }
1316
            offset_hist[1] = offset_hist[0];
1317
            offset_hist[0] = offset;
1318
        }
1319
    } else {
1320
        // When it's not a repeat offset:
1321
        // "if (Offset_Value > 3) offset = Offset_Value - 3;"
1322
        offset = seq.offset - 3;
1323

1324
        // Shift back history
1325
        offset_hist[2] = offset_hist[1];
1326
        offset_hist[1] = offset_hist[0];
1327
        offset_hist[0] = offset;
1328
    }
1329
    return offset;
1330
}
1331

1332
static void execute_match_copy(frame_context_t *const ctx, size_t offset,
1333
                              size_t match_length, size_t total_output,
1334
                              ostream_t *const out) {
1335
    u8 *write_ptr = IO_get_write_ptr(out, match_length);
1336
    if (total_output <= ctx->header.window_size) {
1337
        // In this case offset might go back into the dictionary
1338
        if (offset > total_output + ctx->dict_content_len) {
1339
            // The offset goes beyond even the dictionary
1340
            CORRUPTION();
1341
        }
1342

1343
        if (offset > total_output) {
1344
            // "The rest of the dictionary is its content. The content act
1345
            // as a "past" in front of data to compress or decompress, so it
1346
            // can be referenced in sequence commands."
1347
            const size_t dict_copy =
1348
                MIN(offset - total_output, match_length);
1349
            const size_t dict_offset =
1350
                ctx->dict_content_len - (offset - total_output);
1351

1352
            memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy);
1353
            write_ptr += dict_copy;
1354
            match_length -= dict_copy;
1355
        }
1356
    } else if (offset > ctx->header.window_size) {
1357
        CORRUPTION();
1358
    }
1359

1360
    // We must copy byte by byte because the match length might be larger
1361
    // than the offset
1362
    // ex: if the output so far was "abc", a command with offset=3 and
1363
    // match_length=6 would produce "abcabcabc" as the new output
1364
    for (size_t j = 0; j < match_length; j++) {
1365
        *write_ptr = *(write_ptr - offset);
1366
        write_ptr++;
1367
    }
1368
}
1369
/******* END SEQUENCE EXECUTION ***********************************************/
1370

1371
/******* OUTPUT SIZE COUNTING *************************************************/
1372
/// Get the decompressed size of an input stream so memory can be allocated in
1373
/// advance.
1374
/// This implementation assumes `src` points to a single ZSTD-compressed frame
1375
size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
1376
    istream_t in = IO_make_istream(src, src_len);
1377

1378
    // get decompressed size from ZSTD frame header
1379
    {
1380
        const u32 magic_number = (u32)IO_read_bits(&in, 32);
1381

1382
        if (magic_number == ZSTD_MAGIC_NUMBER) {
1383
            // ZSTD frame
1384
            frame_header_t header;
1385
            parse_frame_header(&header, &in);
1386

1387
            if (header.frame_content_size == 0 && !header.single_segment_flag) {
1388
                // Content size not provided, we can't tell
1389
                return (size_t)-1;
1390
            }
1391

1392
            return header.frame_content_size;
1393
        } else {
1394
            // not a real frame or skippable frame
1395
            ERROR("ZSTD frame magic number did not match");
1396
        }
1397
    }
1398
}
1399
/******* END OUTPUT SIZE COUNTING *********************************************/
1400

1401
/******* DICTIONARY PARSING ***************************************************/
1402
dictionary_t* create_dictionary() {
1403
    dictionary_t* const dict = calloc(1, sizeof(dictionary_t));
1404
    if (!dict) {
1405
        BAD_ALLOC();
1406
    }
1407
    return dict;
1408
}
1409

1410
/// Free an allocated dictionary
1411
void free_dictionary(dictionary_t *const dict) {
1412
    HUF_free_dtable(&dict->literals_dtable);
1413
    FSE_free_dtable(&dict->ll_dtable);
1414
    FSE_free_dtable(&dict->of_dtable);
1415
    FSE_free_dtable(&dict->ml_dtable);
1416

1417
    free(dict->content);
1418

1419
    memset(dict, 0, sizeof(dictionary_t));
1420

1421
    free(dict);
1422
}
1423

1424

1425
#if !defined(ZDEC_NO_DICTIONARY)
1426
#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
1427
#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
1428

1429
static void init_dictionary_content(dictionary_t *const dict,
1430
                                    istream_t *const in);
1431

1432
void parse_dictionary(dictionary_t *const dict, const void *src,
1433
                             size_t src_len) {
1434
    const u8 *byte_src = (const u8 *)src;
1435
    memset(dict, 0, sizeof(dictionary_t));
1436
    if (src == NULL) { /* cannot initialize dictionary with null src */
1437
        NULL_SRC();
1438
    }
1439
    if (src_len < 8) {
1440
        DICT_SIZE_ERROR();
1441
    }
1442

1443
    istream_t in = IO_make_istream(byte_src, src_len);
1444

1445
    const u32 magic_number = IO_read_bits(&in, 32);
1446
    if (magic_number != 0xEC30A437) {
1447
        // raw content dict
1448
        IO_rewind_bits(&in, 32);
1449
        init_dictionary_content(dict, &in);
1450
        return;
1451
    }
1452

1453
    dict->dictionary_id = IO_read_bits(&in, 32);
1454

1455
    // "Entropy_Tables : following the same format as the tables in compressed
1456
    // blocks. They are stored in following order : Huffman tables for literals,
1457
    // FSE table for offsets, FSE table for match lengths, and FSE table for
1458
    // literals lengths. It's finally followed by 3 offset values, populating
1459
    // recent offsets (instead of using {1,4,8}), stored in order, 4-bytes
1460
    // little-endian each, for a total of 12 bytes. Each recent offset must have
1461
    // a value < dictionary size."
1462
    decode_huf_table(&dict->literals_dtable, &in);
1463
    decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse);
1464
    decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse);
1465
    decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse);
1466

1467
    // Read in the previous offset history
1468
    dict->previous_offsets[0] = IO_read_bits(&in, 32);
1469
    dict->previous_offsets[1] = IO_read_bits(&in, 32);
1470
    dict->previous_offsets[2] = IO_read_bits(&in, 32);
1471

1472
    // Ensure the provided offsets aren't too large
1473
    // "Each recent offset must have a value < dictionary size."
1474
    for (int i = 0; i < 3; i++) {
1475
        if (dict->previous_offsets[i] > src_len) {
1476
            ERROR("Dictionary corrupted");
1477
        }
1478
    }
1479

1480
    // "Content : The rest of the dictionary is its content. The content act as
1481
    // a "past" in front of data to compress or decompress, so it can be
1482
    // referenced in sequence commands."
1483
    init_dictionary_content(dict, &in);
1484
}
1485

1486
static void init_dictionary_content(dictionary_t *const dict,
1487
                                    istream_t *const in) {
1488
    // Copy in the content
1489
    dict->content_size = IO_istream_len(in);
1490
    dict->content = malloc(dict->content_size);
1491
    if (!dict->content) {
1492
        BAD_ALLOC();
1493
    }
1494

1495
    const u8 *const content = IO_get_read_ptr(in, dict->content_size);
1496

1497
    memcpy(dict->content, content, dict->content_size);
1498
}
1499

1500
static void HUF_copy_dtable(HUF_dtable *const dst,
1501
                            const HUF_dtable *const src) {
1502
    if (src->max_bits == 0) {
1503
        memset(dst, 0, sizeof(HUF_dtable));
1504
        return;
1505
    }
1506

1507
    const size_t size = (size_t)1 << src->max_bits;
1508
    dst->max_bits = src->max_bits;
1509

1510
    dst->symbols = malloc(size);
1511
    dst->num_bits = malloc(size);
1512
    if (!dst->symbols || !dst->num_bits) {
1513
        BAD_ALLOC();
1514
    }
1515

1516
    memcpy(dst->symbols, src->symbols, size);
1517
    memcpy(dst->num_bits, src->num_bits, size);
1518
}
1519

1520
static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
1521
    if (src->accuracy_log == 0) {
1522
        memset(dst, 0, sizeof(FSE_dtable));
1523
        return;
1524
    }
1525

1526
    size_t size = (size_t)1 << src->accuracy_log;
1527
    dst->accuracy_log = src->accuracy_log;
1528

1529
    dst->symbols = malloc(size);
1530
    dst->num_bits = malloc(size);
1531
    dst->new_state_base = malloc(size * sizeof(u16));
1532
    if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
1533
        BAD_ALLOC();
1534
    }
1535

1536
    memcpy(dst->symbols, src->symbols, size);
1537
    memcpy(dst->num_bits, src->num_bits, size);
1538
    memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
1539
}
1540

1541
/// A dictionary acts as initializing values for the frame context before
1542
/// decompression, so we implement it by applying it's predetermined
1543
/// tables and content to the context before beginning decompression
1544
static void frame_context_apply_dict(frame_context_t *const ctx,
1545
                                     const dictionary_t *const dict) {
1546
    // If the content pointer is NULL then it must be an empty dict
1547
    if (!dict || !dict->content)
1548
        return;
1549

1550
    // If the requested dictionary_id is non-zero, the correct dictionary must
1551
    // be present
1552
    if (ctx->header.dictionary_id != 0 &&
1553
        ctx->header.dictionary_id != dict->dictionary_id) {
1554
        ERROR("Wrong dictionary provided");
1555
    }
1556

1557
    // Copy the dict content to the context for references during sequence
1558
    // execution
1559
    ctx->dict_content = dict->content;
1560
    ctx->dict_content_len = dict->content_size;
1561

1562
    // If it's a formatted dict copy the precomputed tables in so they can
1563
    // be used in the table repeat modes
1564
    if (dict->dictionary_id != 0) {
1565
        // Deep copy the entropy tables so they can be freed independently of
1566
        // the dictionary struct
1567
        HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
1568
        FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
1569
        FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
1570
        FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
1571

1572
        // Copy the repeated offsets
1573
        memcpy(ctx->previous_offsets, dict->previous_offsets,
1574
               sizeof(ctx->previous_offsets));
1575
    }
1576
}
1577

1578
#else  // ZDEC_NO_DICTIONARY is defined
1579

1580
static void frame_context_apply_dict(frame_context_t *const ctx,
1581
                                     const dictionary_t *const dict) {
1582
    (void)ctx;
1583
    if (dict && dict->content) ERROR("dictionary not supported");
1584
}
1585

1586
#endif
1587
/******* END DICTIONARY PARSING ***********************************************/
1588

1589
/******* IO STREAM OPERATIONS *************************************************/
1590

1591
/// Reads `num` bits from a bitstream, and updates the internal offset
1592
static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
1593
    if (num_bits > 64 || num_bits <= 0) {
1594
        ERROR("Attempt to read an invalid number of bits");
1595
    }
1596

1597
    const size_t bytes = (num_bits + in->bit_offset + 7) / 8;
1598
    const size_t full_bytes = (num_bits + in->bit_offset) / 8;
1599
    if (bytes > in->len) {
1600
        INP_SIZE();
1601
    }
1602

1603
    const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset);
1604

1605
    in->bit_offset = (num_bits + in->bit_offset) % 8;
1606
    in->ptr += full_bytes;
1607
    in->len -= full_bytes;
1608

1609
    return result;
1610
}
1611

1612
/// If a non-zero number of bits have been read from the current byte, advance
1613
/// the offset to the next byte
1614
static inline void IO_rewind_bits(istream_t *const in, int num_bits) {
1615
    if (num_bits < 0) {
1616
        ERROR("Attempting to rewind stream by a negative number of bits");
1617
    }
1618

1619
    // move the offset back by `num_bits` bits
1620
    const int new_offset = in->bit_offset - num_bits;
1621
    // determine the number of whole bytes we have to rewind, rounding up to an
1622
    // integer number (e.g. if `new_offset == -5`, `bytes == 1`)
1623
    const i64 bytes = -(new_offset - 7) / 8;
1624

1625
    in->ptr -= bytes;
1626
    in->len += bytes;
1627
    // make sure the resulting `bit_offset` is positive, as mod in C does not
1628
    // convert numbers from negative to positive (e.g. -22 % 8 == -6)
1629
    in->bit_offset = ((new_offset % 8) + 8) % 8;
1630
}
1631

1632
/// If the remaining bits in a byte will be unused, advance to the end of the
1633
/// byte
1634
static inline void IO_align_stream(istream_t *const in) {
1635
    if (in->bit_offset != 0) {
1636
        if (in->len == 0) {
1637
            INP_SIZE();
1638
        }
1639
        in->ptr++;
1640
        in->len--;
1641
        in->bit_offset = 0;
1642
    }
1643
}
1644

1645
/// Write the given byte into the output stream
1646
static inline void IO_write_byte(ostream_t *const out, u8 symb) {
1647
    if (out->len == 0) {
1648
        OUT_SIZE();
1649
    }
1650

1651
    out->ptr[0] = symb;
1652
    out->ptr++;
1653
    out->len--;
1654
}
1655

1656
/// Returns the number of bytes left to be read in this stream.  The stream must
1657
/// be byte aligned.
1658
static inline size_t IO_istream_len(const istream_t *const in) {
1659
    return in->len;
1660
}
1661

1662
/// Returns a pointer where `len` bytes can be read, and advances the internal
1663
/// state.  The stream must be byte aligned.
1664
static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) {
1665
    if (len > in->len) {
1666
        INP_SIZE();
1667
    }
1668
    if (in->bit_offset != 0) {
1669
        ERROR("Attempting to operate on a non-byte aligned stream");
1670
    }
1671
    const u8 *const ptr = in->ptr;
1672
    in->ptr += len;
1673
    in->len -= len;
1674

1675
    return ptr;
1676
}
1677
/// Returns a pointer to write `len` bytes to, and advances the internal state
1678
static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len) {
1679
    if (len > out->len) {
1680
        OUT_SIZE();
1681
    }
1682
    u8 *const ptr = out->ptr;
1683
    out->ptr += len;
1684
    out->len -= len;
1685

1686
    return ptr;
1687
}
1688

1689
/// Advance the inner state by `len` bytes
1690
static inline void IO_advance_input(istream_t *const in, size_t len) {
1691
    if (len > in->len) {
1692
         INP_SIZE();
1693
    }
1694
    if (in->bit_offset != 0) {
1695
        ERROR("Attempting to operate on a non-byte aligned stream");
1696
    }
1697

1698
    in->ptr += len;
1699
    in->len -= len;
1700
}
1701

1702
/// Returns an `ostream_t` constructed from the given pointer and length
1703
static inline ostream_t IO_make_ostream(u8 *out, size_t len) {
1704
    return (ostream_t) { out, len };
1705
}
1706

1707
/// Returns an `istream_t` constructed from the given pointer and length
1708
static inline istream_t IO_make_istream(const u8 *in, size_t len) {
1709
    return (istream_t) { in, len, 0 };
1710
}
1711

1712
/// Returns an `istream_t` with the same base as `in`, and length `len`
1713
/// Then, advance `in` to account for the consumed bytes
1714
/// `in` must be byte aligned
1715
static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {
1716
    // Consume `len` bytes of the parent stream
1717
    const u8 *const ptr = IO_get_read_ptr(in, len);
1718

1719
    // Make a substream using the pointer to those `len` bytes
1720
    return IO_make_istream(ptr, len);
1721
}
1722
/******* END IO STREAM OPERATIONS *********************************************/
1723

1724
/******* BITSTREAM OPERATIONS *************************************************/
1725
/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
1726
static inline u64 read_bits_LE(const u8 *src, const int num_bits,
1727
                               const size_t offset) {
1728
    if (num_bits > 64) {
1729
        ERROR("Attempt to read an invalid number of bits");
1730
    }
1731

1732
    // Skip over bytes that aren't in range
1733
    src += offset / 8;
1734
    size_t bit_offset = offset % 8;
1735
    u64 res = 0;
1736

1737
    int shift = 0;
1738
    int left = num_bits;
1739
    while (left > 0) {
1740
        u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1);
1741
        // Read the next byte, shift it to account for the offset, and then mask
1742
        // out the top part if we don't need all the bits
1743
        res += (((u64)*src++ >> bit_offset) & mask) << shift;
1744
        shift += 8 - bit_offset;
1745
        left -= 8 - bit_offset;
1746
        bit_offset = 0;
1747
    }
1748

1749
    return res;
1750
}
1751

1752
/// Read bits from the end of a HUF or FSE bitstream.  `offset` is in bits, so
1753
/// it updates `offset` to `offset - bits`, and then reads `bits` bits from
1754
/// `src + offset`.  If the offset becomes negative, the extra bits at the
1755
/// bottom are filled in with `0` bits instead of reading from before `src`.
1756
static inline u64 STREAM_read_bits(const u8 *const src, const int bits,
1757
                                   i64 *const offset) {
1758
    *offset = *offset - bits;
1759
    size_t actual_off = *offset;
1760
    size_t actual_bits = bits;
1761
    // Don't actually read bits from before the start of src, so if `*offset <
1762
    // 0` fix actual_off and actual_bits to reflect the quantity to read
1763
    if (*offset < 0) {
1764
        actual_bits += *offset;
1765
        actual_off = 0;
1766
    }
1767
    u64 res = read_bits_LE(src, actual_bits, actual_off);
1768

1769
    if (*offset < 0) {
1770
        // Fill in the bottom "overflowed" bits with 0's
1771
        res = -*offset >= 64 ? 0 : (res << -*offset);
1772
    }
1773
    return res;
1774
}
1775
/******* END BITSTREAM OPERATIONS *********************************************/
1776

1777
/******* BIT COUNTING OPERATIONS **********************************************/
1778
/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to
1779
/// `num`, or `-1` if `num == 0`.
1780
static inline int highest_set_bit(const u64 num) {
1781
    for (int i = 63; i >= 0; i--) {
1782
        if (((u64)1 << i) <= num) {
1783
            return i;
1784
        }
1785
    }
1786
    return -1;
1787
}
1788
/******* END BIT COUNTING OPERATIONS ******************************************/
1789

1790
/******* HUFFMAN PRIMITIVES ***************************************************/
1791
static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,
1792
                                   u16 *const state, const u8 *const src,
1793
                                   i64 *const offset) {
1794
    // Look up the symbol and number of bits to read
1795
    const u8 symb = dtable->symbols[*state];
1796
    const u8 bits = dtable->num_bits[*state];
1797
    const u16 rest = STREAM_read_bits(src, bits, offset);
1798
    // Shift `bits` bits out of the state, keeping the low order bits that
1799
    // weren't necessary to determine this symbol.  Then add in the new bits
1800
    // read from the stream.
1801
    *state = ((*state << bits) + rest) & (((u16)1 << dtable->max_bits) - 1);
1802

1803
    return symb;
1804
}
1805

1806
static inline void HUF_init_state(const HUF_dtable *const dtable,
1807
                                  u16 *const state, const u8 *const src,
1808
                                  i64 *const offset) {
1809
    // Read in a full `dtable->max_bits` bits to initialize the state
1810
    const u8 bits = dtable->max_bits;
1811
    *state = STREAM_read_bits(src, bits, offset);
1812
}
1813

1814
static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,
1815
                                     ostream_t *const out,
1816
                                     istream_t *const in) {
1817
    const size_t len = IO_istream_len(in);
1818
    if (len == 0) {
1819
        INP_SIZE();
1820
    }
1821
    const u8 *const src = IO_get_read_ptr(in, len);
1822

1823
    // "Each bitstream must be read backward, that is starting from the end down
1824
    // to the beginning. Therefore it's necessary to know the size of each
1825
    // bitstream.
1826
    //
1827
    // It's also necessary to know exactly which bit is the latest. This is
1828
    // detected by a final bit flag : the highest bit of latest byte is a
1829
    // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
1830
    // final-bit-flag itself is not part of the useful bitstream. Hence, the
1831
    // last byte contains between 0 and 7 useful bits."
1832
    const int padding = 8 - highest_set_bit(src[len - 1]);
1833

1834
    // Offset starts at the end because HUF streams are read backwards
1835
    i64 bit_offset = len * 8 - padding;
1836
    u16 state;
1837

1838
    HUF_init_state(dtable, &state, src, &bit_offset);
1839

1840
    size_t symbols_written = 0;
1841
    while (bit_offset > -dtable->max_bits) {
1842
        // Iterate over the stream, decoding one symbol at a time
1843
        IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset));
1844
        symbols_written++;
1845
    }
1846
    // "The process continues up to reading the required number of symbols per
1847
    // stream. If a bitstream is not entirely and exactly consumed, hence
1848
    // reaching exactly its beginning position with all bits consumed, the
1849
    // decoding process is considered faulty."
1850

1851
    // When all symbols have been decoded, the final state value shouldn't have
1852
    // any data from the stream, so it should have "read" dtable->max_bits from
1853
    // before the start of `src`
1854
    // Therefore `offset`, the edge to start reading new bits at, should be
1855
    // dtable->max_bits before the start of the stream
1856
    if (bit_offset != -dtable->max_bits) {
1857
        CORRUPTION();
1858
    }
1859

1860
    return symbols_written;
1861
}
1862

1863
static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
1864
                                     ostream_t *const out, istream_t *const in) {
1865
    // "Compressed size is provided explicitly : in the 4-streams variant,
1866
    // bitstreams are preceded by 3 unsigned little-endian 16-bits values. Each
1867
    // value represents the compressed size of one stream, in order. The last
1868
    // stream size is deducted from total compressed size and from previously
1869
    // decoded stream sizes"
1870
    const size_t csize1 = IO_read_bits(in, 16);
1871
    const size_t csize2 = IO_read_bits(in, 16);
1872
    const size_t csize3 = IO_read_bits(in, 16);
1873

1874
    istream_t in1 = IO_make_sub_istream(in, csize1);
1875
    istream_t in2 = IO_make_sub_istream(in, csize2);
1876
    istream_t in3 = IO_make_sub_istream(in, csize3);
1877
    istream_t in4 = IO_make_sub_istream(in, IO_istream_len(in));
1878

1879
    size_t total_output = 0;
1880
    // Decode each stream independently for simplicity
1881
    // If we wanted to we could decode all 4 at the same time for speed,
1882
    // utilizing more execution units
1883
    total_output += HUF_decompress_1stream(dtable, out, &in1);
1884
    total_output += HUF_decompress_1stream(dtable, out, &in2);
1885
    total_output += HUF_decompress_1stream(dtable, out, &in3);
1886
    total_output += HUF_decompress_1stream(dtable, out, &in4);
1887

1888
    return total_output;
1889
}
1890

1891
/// Initializes a Huffman table using canonical Huffman codes
1892
/// For more explanation on canonical Huffman codes see
1893
/// http://www.cs.uofs.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html
1894
/// Codes within a level are allocated in symbol order (i.e. smaller symbols get
1895
/// earlier codes)
1896
static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
1897
                            const int num_symbs) {
1898
    memset(table, 0, sizeof(HUF_dtable));
1899
    if (num_symbs > HUF_MAX_SYMBS) {
1900
        ERROR("Too many symbols for Huffman");
1901
    }
1902

1903
    u8 max_bits = 0;
1904
    u16 rank_count[HUF_MAX_BITS + 1];
1905
    memset(rank_count, 0, sizeof(rank_count));
1906

1907
    // Count the number of symbols for each number of bits, and determine the
1908
    // depth of the tree
1909
    for (int i = 0; i < num_symbs; i++) {
1910
        if (bits[i] > HUF_MAX_BITS) {
1911
            ERROR("Huffman table depth too large");
1912
        }
1913
        max_bits = MAX(max_bits, bits[i]);
1914
        rank_count[bits[i]]++;
1915
    }
1916

1917
    const size_t table_size = 1 << max_bits;
1918
    table->max_bits = max_bits;
1919
    table->symbols = malloc(table_size);
1920
    table->num_bits = malloc(table_size);
1921

1922
    if (!table->symbols || !table->num_bits) {
1923
        free(table->symbols);
1924
        free(table->num_bits);
1925
        BAD_ALLOC();
1926
    }
1927

1928
    // "Symbols are sorted by Weight. Within same Weight, symbols keep natural
1929
    // order. Symbols with a Weight of zero are removed. Then, starting from
1930
    // lowest weight, prefix codes are distributed in order."
1931

1932
    u32 rank_idx[HUF_MAX_BITS + 1];
1933
    // Initialize the starting codes for each rank (number of bits)
1934
    rank_idx[max_bits] = 0;
1935
    for (int i = max_bits; i >= 1; i--) {
1936
        rank_idx[i - 1] = rank_idx[i] + rank_count[i] * (1 << (max_bits - i));
1937
        // The entire range takes the same number of bits so we can memset it
1938
        memset(&table->num_bits[rank_idx[i]], i, rank_idx[i - 1] - rank_idx[i]);
1939
    }
1940

1941
    if (rank_idx[0] != table_size) {
1942
        CORRUPTION();
1943
    }
1944

1945
    // Allocate codes and fill in the table
1946
    for (int i = 0; i < num_symbs; i++) {
1947
        if (bits[i] != 0) {
1948
            // Allocate a code for this symbol and set its range in the table
1949
            const u16 code = rank_idx[bits[i]];
1950
            // Since the code doesn't care about the bottom `max_bits - bits[i]`
1951
            // bits of state, it gets a range that spans all possible values of
1952
            // the lower bits
1953
            const u16 len = 1 << (max_bits - bits[i]);
1954
            memset(&table->symbols[code], i, len);
1955
            rank_idx[bits[i]] += len;
1956
        }
1957
    }
1958
}
1959

1960
static void HUF_init_dtable_usingweights(HUF_dtable *const table,
1961
                                         const u8 *const weights,
1962
                                         const int num_symbs) {
1963
    // +1 because the last weight is not transmitted in the header
1964
    if (num_symbs + 1 > HUF_MAX_SYMBS) {
1965
        ERROR("Too many symbols for Huffman");
1966
    }
1967

1968
    u8 bits[HUF_MAX_SYMBS];
1969

1970
    u64 weight_sum = 0;
1971
    for (int i = 0; i < num_symbs; i++) {
1972
        // Weights are in the same range as bit count
1973
        if (weights[i] > HUF_MAX_BITS) {
1974
            CORRUPTION();
1975
        }
1976
        weight_sum += weights[i] > 0 ? (u64)1 << (weights[i] - 1) : 0;
1977
    }
1978

1979
    // Find the first power of 2 larger than the sum
1980
    const int max_bits = highest_set_bit(weight_sum) + 1;
1981
    const u64 left_over = ((u64)1 << max_bits) - weight_sum;
1982
    // If the left over isn't a power of 2, the weights are invalid
1983
    if (left_over & (left_over - 1)) {
1984
        CORRUPTION();
1985
    }
1986

1987
    // left_over is used to find the last weight as it's not transmitted
1988
    // by inverting 2^(weight - 1) we can determine the value of last_weight
1989
    const int last_weight = highest_set_bit(left_over) + 1;
1990

1991
    for (int i = 0; i < num_symbs; i++) {
1992
        // "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0"
1993
        bits[i] = weights[i] > 0 ? (max_bits + 1 - weights[i]) : 0;
1994
    }
1995
    bits[num_symbs] =
1996
        max_bits + 1 - last_weight; // Last weight is always non-zero
1997

1998
    HUF_init_dtable(table, bits, num_symbs + 1);
1999
}
2000

2001
static void HUF_free_dtable(HUF_dtable *const dtable) {
2002
    free(dtable->symbols);
2003
    free(dtable->num_bits);
2004
    memset(dtable, 0, sizeof(HUF_dtable));
2005
}
2006
/******* END HUFFMAN PRIMITIVES ***********************************************/
2007

2008
/******* FSE PRIMITIVES *******************************************************/
2009
/// For more description of FSE see
2010
/// https://github.com/Cyan4973/FiniteStateEntropy/
2011

2012
/// Allow a symbol to be decoded without updating state
2013
static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,
2014
                                 const u16 state) {
2015
    return dtable->symbols[state];
2016
}
2017

2018
/// Consumes bits from the input and uses the current state to determine the
2019
/// next state
2020
static inline void FSE_update_state(const FSE_dtable *const dtable,
2021
                                    u16 *const state, const u8 *const src,
2022
                                    i64 *const offset) {
2023
    const u8 bits = dtable->num_bits[*state];
2024
    const u16 rest = STREAM_read_bits(src, bits, offset);
2025
    *state = dtable->new_state_base[*state] + rest;
2026
}
2027

2028
/// Decodes a single FSE symbol and updates the offset
2029
static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,
2030
                                   u16 *const state, const u8 *const src,
2031
                                   i64 *const offset) {
2032
    const u8 symb = FSE_peek_symbol(dtable, *state);
2033
    FSE_update_state(dtable, state, src, offset);
2034
    return symb;
2035
}
2036

2037
static inline void FSE_init_state(const FSE_dtable *const dtable,
2038
                                  u16 *const state, const u8 *const src,
2039
                                  i64 *const offset) {
2040
    // Read in a full `accuracy_log` bits to initialize the state
2041
    const u8 bits = dtable->accuracy_log;
2042
    *state = STREAM_read_bits(src, bits, offset);
2043
}
2044

2045
static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,
2046
                                          ostream_t *const out,
2047
                                          istream_t *const in) {
2048
    const size_t len = IO_istream_len(in);
2049
    if (len == 0) {
2050
        INP_SIZE();
2051
    }
2052
    const u8 *const src = IO_get_read_ptr(in, len);
2053

2054
    // "Each bitstream must be read backward, that is starting from the end down
2055
    // to the beginning. Therefore it's necessary to know the size of each
2056
    // bitstream.
2057
    //
2058
    // It's also necessary to know exactly which bit is the latest. This is
2059
    // detected by a final bit flag : the highest bit of latest byte is a
2060
    // final-bit-flag. Consequently, a last byte of 0 is not possible. And the
2061
    // final-bit-flag itself is not part of the useful bitstream. Hence, the
2062
    // last byte contains between 0 and 7 useful bits."
2063
    const int padding = 8 - highest_set_bit(src[len - 1]);
2064
    i64 offset = len * 8 - padding;
2065

2066
    u16 state1, state2;
2067
    // "The first state (State1) encodes the even indexed symbols, and the
2068
    // second (State2) encodes the odd indexes. State1 is initialized first, and
2069
    // then State2, and they take turns decoding a single symbol and updating
2070
    // their state."
2071
    FSE_init_state(dtable, &state1, src, &offset);
2072
    FSE_init_state(dtable, &state2, src, &offset);
2073

2074
    // Decode until we overflow the stream
2075
    // Since we decode in reverse order, overflowing the stream is offset going
2076
    // negative
2077
    size_t symbols_written = 0;
2078
    while (1) {
2079
        // "The number of symbols to decode is determined by tracking bitStream
2080
        // overflow condition: If updating state after decoding a symbol would
2081
        // require more bits than remain in the stream, it is assumed the extra
2082
        // bits are 0. Then, the symbols for each of the final states are
2083
        // decoded and the process is complete."
2084
        IO_write_byte(out, FSE_decode_symbol(dtable, &state1, src, &offset));
2085
        symbols_written++;
2086
        if (offset < 0) {
2087
            // There's still a symbol to decode in state2
2088
            IO_write_byte(out, FSE_peek_symbol(dtable, state2));
2089
            symbols_written++;
2090
            break;
2091
        }
2092

2093
        IO_write_byte(out, FSE_decode_symbol(dtable, &state2, src, &offset));
2094
        symbols_written++;
2095
        if (offset < 0) {
2096
            // There's still a symbol to decode in state1
2097
            IO_write_byte(out, FSE_peek_symbol(dtable, state1));
2098
            symbols_written++;
2099
            break;
2100
        }
2101
    }
2102

2103
    return symbols_written;
2104
}
2105

2106
static void FSE_init_dtable(FSE_dtable *const dtable,
2107
                            const i16 *const norm_freqs, const int num_symbs,
2108
                            const int accuracy_log) {
2109
    if (accuracy_log > FSE_MAX_ACCURACY_LOG) {
2110
        ERROR("FSE accuracy too large");
2111
    }
2112
    if (num_symbs > FSE_MAX_SYMBS) {
2113
        ERROR("Too many symbols for FSE");
2114
    }
2115

2116
    dtable->accuracy_log = accuracy_log;
2117

2118
    const size_t size = (size_t)1 << accuracy_log;
2119
    dtable->symbols = malloc(size * sizeof(u8));
2120
    dtable->num_bits = malloc(size * sizeof(u8));
2121
    dtable->new_state_base = malloc(size * sizeof(u16));
2122

2123
    if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {
2124
        BAD_ALLOC();
2125
    }
2126

2127
    // Used to determine how many bits need to be read for each state,
2128
    // and where the destination range should start
2129
    // Needs to be u16 because max value is 2 * max number of symbols,
2130
    // which can be larger than a byte can store
2131
    u16 state_desc[FSE_MAX_SYMBS];
2132

2133
    // "Symbols are scanned in their natural order for "less than 1"
2134
    // probabilities. Symbols with this probability are being attributed a
2135
    // single cell, starting from the end of the table. These symbols define a
2136
    // full state reset, reading Accuracy_Log bits."
2137
    int high_threshold = size;
2138
    for (int s = 0; s < num_symbs; s++) {
2139
        // Scan for low probability symbols to put at the top
2140
        if (norm_freqs[s] == -1) {
2141
            dtable->symbols[--high_threshold] = s;
2142
            state_desc[s] = 1;
2143
        }
2144
    }
2145

2146
    // "All remaining symbols are sorted in their natural order. Starting from
2147
    // symbol 0 and table position 0, each symbol gets attributed as many cells
2148
    // as its probability. Cell allocation is spread, not linear."
2149
    // Place the rest in the table
2150
    const u16 step = (size >> 1) + (size >> 3) + 3;
2151
    const u16 mask = size - 1;
2152
    u16 pos = 0;
2153
    for (int s = 0; s < num_symbs; s++) {
2154
        if (norm_freqs[s] <= 0) {
2155
            continue;
2156
        }
2157

2158
        state_desc[s] = norm_freqs[s];
2159

2160
        for (int i = 0; i < norm_freqs[s]; i++) {
2161
            // Give `norm_freqs[s]` states to symbol s
2162
            dtable->symbols[pos] = s;
2163
            // "A position is skipped if already occupied, typically by a "less
2164
            // than 1" probability symbol."
2165
            do {
2166
                pos = (pos + step) & mask;
2167
            } while (pos >=
2168
                     high_threshold);
2169
            // Note: no other collision checking is necessary as `step` is
2170
            // coprime to `size`, so the cycle will visit each position exactly
2171
            // once
2172
        }
2173
    }
2174
    if (pos != 0) {
2175
        CORRUPTION();
2176
    }
2177

2178
    // Now we can fill baseline and num bits
2179
    for (size_t i = 0; i < size; i++) {
2180
        u8 symbol = dtable->symbols[i];
2181
        u16 next_state_desc = state_desc[symbol]++;
2182
        // Fills in the table appropriately, next_state_desc increases by symbol
2183
        // over time, decreasing number of bits
2184
        dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc));
2185
        // Baseline increases until the bit threshold is passed, at which point
2186
        // it resets to 0
2187
        dtable->new_state_base[i] =
2188
            ((u16)next_state_desc << dtable->num_bits[i]) - size;
2189
    }
2190
}
2191

2192
/// Decode an FSE header as defined in the Zstandard format specification and
2193
/// use the decoded frequencies to initialize a decoding table.
2194
static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,
2195
                                const int max_accuracy_log) {
2196
    // "An FSE distribution table describes the probabilities of all symbols
2197
    // from 0 to the last present one (included) on a normalized scale of 1 <<
2198
    // Accuracy_Log .
2199
    //
2200
    // It's a bitstream which is read forward, in little-endian fashion. It's
2201
    // not necessary to know its exact size, since it will be discovered and
2202
    // reported by the decoding process.
2203
    if (max_accuracy_log > FSE_MAX_ACCURACY_LOG) {
2204
        ERROR("FSE accuracy too large");
2205
    }
2206

2207
    // The bitstream starts by reporting on which scale it operates.
2208
    // Accuracy_Log = low4bits + 5. Note that maximum Accuracy_Log for literal
2209
    // and match lengths is 9, and for offsets is 8. Higher values are
2210
    // considered errors."
2211
    const int accuracy_log = 5 + IO_read_bits(in, 4);
2212
    if (accuracy_log > max_accuracy_log) {
2213
        ERROR("FSE accuracy too large");
2214
    }
2215

2216
    // "Then follows each symbol value, from 0 to last present one. The number
2217
    // of bits used by each field is variable. It depends on :
2218
    //
2219
    // Remaining probabilities + 1 : example : Presuming an Accuracy_Log of 8,
2220
    // and presuming 100 probabilities points have already been distributed, the
2221
    // decoder may read any value from 0 to 255 - 100 + 1 == 156 (inclusive).
2222
    // Therefore, it must read log2sup(156) == 8 bits.
2223
    //
2224
    // Value decoded : small values use 1 less bit : example : Presuming values
2225
    // from 0 to 156 (inclusive) are possible, 255-156 = 99 values are remaining
2226
    // in an 8-bits field. They are used this way : first 99 values (hence from
2227
    // 0 to 98) use only 7 bits, values from 99 to 156 use 8 bits. "
2228

2229
    i32 remaining = 1 << accuracy_log;
2230
    i16 frequencies[FSE_MAX_SYMBS];
2231

2232
    int symb = 0;
2233
    while (remaining > 0 && symb < FSE_MAX_SYMBS) {
2234
        // Log of the number of possible values we could read
2235
        int bits = highest_set_bit(remaining + 1) + 1;
2236

2237
        u16 val = IO_read_bits(in, bits);
2238

2239
        // Try to mask out the lower bits to see if it qualifies for the "small
2240
        // value" threshold
2241
        const u16 lower_mask = ((u16)1 << (bits - 1)) - 1;
2242
        const u16 threshold = ((u16)1 << bits) - 1 - (remaining + 1);
2243

2244
        if ((val & lower_mask) < threshold) {
2245
            IO_rewind_bits(in, 1);
2246
            val = val & lower_mask;
2247
        } else if (val > lower_mask) {
2248
            val = val - threshold;
2249
        }
2250

2251
        // "Probability is obtained from Value decoded by following formula :
2252
        // Proba = value - 1"
2253
        const i16 proba = (i16)val - 1;
2254

2255
        // "It means value 0 becomes negative probability -1. -1 is a special
2256
        // probability, which means "less than 1". Its effect on distribution
2257
        // table is described in next paragraph. For the purpose of calculating
2258
        // cumulated distribution, it counts as one."
2259
        remaining -= proba < 0 ? -proba : proba;
2260

2261
        frequencies[symb] = proba;
2262
        symb++;
2263

2264
        // "When a symbol has a probability of zero, it is followed by a 2-bits
2265
        // repeat flag. This repeat flag tells how many probabilities of zeroes
2266
        // follow the current one. It provides a number ranging from 0 to 3. If
2267
        // it is a 3, another 2-bits repeat flag follows, and so on."
2268
        if (proba == 0) {
2269
            // Read the next two bits to see how many more 0s
2270
            int repeat = IO_read_bits(in, 2);
2271

2272
            while (1) {
2273
                for (int i = 0; i < repeat && symb < FSE_MAX_SYMBS; i++) {
2274
                    frequencies[symb++] = 0;
2275
                }
2276
                if (repeat == 3) {
2277
                    repeat = IO_read_bits(in, 2);
2278
                } else {
2279
                    break;
2280
                }
2281
            }
2282
        }
2283
    }
2284
    IO_align_stream(in);
2285

2286
    // "When last symbol reaches cumulated total of 1 << Accuracy_Log, decoding
2287
    // is complete. If the last symbol makes cumulated total go above 1 <<
2288
    // Accuracy_Log, distribution is considered corrupted."
2289
    if (remaining != 0 || symb >= FSE_MAX_SYMBS) {
2290
        CORRUPTION();
2291
    }
2292

2293
    // Initialize the decoding table using the determined weights
2294
    FSE_init_dtable(dtable, frequencies, symb, accuracy_log);
2295
}
2296

2297
static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb) {
2298
    dtable->symbols = malloc(sizeof(u8));
2299
    dtable->num_bits = malloc(sizeof(u8));
2300
    dtable->new_state_base = malloc(sizeof(u16));
2301

2302
    if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {
2303
        BAD_ALLOC();
2304
    }
2305

2306
    // This setup will always have a state of 0, always return symbol `symb`,
2307
    // and never consume any bits
2308
    dtable->symbols[0] = symb;
2309
    dtable->num_bits[0] = 0;
2310
    dtable->new_state_base[0] = 0;
2311
    dtable->accuracy_log = 0;
2312
}
2313

2314
static void FSE_free_dtable(FSE_dtable *const dtable) {
2315
    free(dtable->symbols);
2316
    free(dtable->num_bits);
2317
    free(dtable->new_state_base);
2318
    memset(dtable, 0, sizeof(FSE_dtable));
2319
}
2320
/******* END FSE PRIMITIVES ***************************************************/
2321

2322
Product

Resources

Company