Path: blob/main/sys/contrib/zstd/doc/educational_decoder/zstd_decompress.c
48378 views
/*1* Copyright (c) Facebook, Inc.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910/// Zstandard educational decoder implementation11/// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md1213#include <stdint.h> // uint8_t, etc.14#include <stdlib.h> // malloc, free, exit15#include <stdio.h> // fprintf16#include <string.h> // memset, memcpy17#include "zstd_decompress.h"181920/******* IMPORTANT CONSTANTS *********************************************/2122// Zstandard frame23// "Magic_Number24// 4 Bytes, little-endian format. Value : 0xFD2FB528"25#define ZSTD_MAGIC_NUMBER 0xFD2FB528U2627// The size of `Block_Content` is limited by `Block_Maximum_Size`,28#define ZSTD_BLOCK_SIZE_MAX ((size_t)128 * 1024)2930// literal blocks can't be larger than their block31#define MAX_LITERALS_SIZE ZSTD_BLOCK_SIZE_MAX323334/******* UTILITY MACROS AND TYPES *********************************************/35#define MAX(a, b) ((a) > (b) ? (a) : (b))36#define MIN(a, b) ((a) < (b) ? (a) : (b))3738#if defined(ZDEC_NO_MESSAGE)39#define MESSAGE(...)40#else41#define MESSAGE(...) fprintf(stderr, "" __VA_ARGS__)42#endif4344/// This decoder calls exit(1) when it encounters an error, however a production45/// library should propagate error codes46#define ERROR(s) \47do { \48MESSAGE("Error: %s\n", s); \49exit(1); \50} while (0)51#define INP_SIZE() \52ERROR("Input buffer smaller than it should be or input is " \53"corrupted")54#define OUT_SIZE() ERROR("Output buffer too small for output")55#define CORRUPTION() ERROR("Corruption detected while decompressing")56#define BAD_ALLOC() ERROR("Memory allocation error")57#define IMPOSSIBLE() ERROR("An impossibility has occurred")5859typedef uint8_t u8;60typedef uint16_t u16;61typedef uint32_t u32;62typedef uint64_t u64;6364typedef int8_t i8;65typedef int16_t i16;66typedef int32_t i32;67typedef int64_t i64;68/******* END UTILITY MACROS AND TYPES *****************************************/6970/******* IMPLEMENTATION PRIMITIVE PROTOTYPES **********************************/71/// The implementations for these functions can be found at the bottom of this72/// file. They implement low-level functionality needed for the higher level73/// decompression functions.7475/*** IO STREAM OPERATIONS *************/7677/// ostream_t/istream_t are used to wrap the pointers/length data passed into78/// ZSTD_decompress, so that all IO operations are safely bounds checked79/// They are written/read forward, and reads are treated as little-endian80/// They should be used opaquely to ensure safety81typedef struct {82u8 *ptr;83size_t len;84} ostream_t;8586typedef struct {87const u8 *ptr;88size_t len;8990// Input often reads a few bits at a time, so maintain an internal offset91int bit_offset;92} istream_t;9394/// The following two functions are the only ones that allow the istream to be95/// non-byte aligned9697/// Reads `num` bits from a bitstream, and updates the internal offset98static inline u64 IO_read_bits(istream_t *const in, const int num_bits);99/// Backs-up the stream by `num` bits so they can be read again100static inline void IO_rewind_bits(istream_t *const in, const int num_bits);101/// If the remaining bits in a byte will be unused, advance to the end of the102/// byte103static inline void IO_align_stream(istream_t *const in);104105/// Write the given byte into the output stream106static inline void IO_write_byte(ostream_t *const out, u8 symb);107108/// Returns the number of bytes left to be read in this stream. The stream must109/// be byte aligned.110static inline size_t IO_istream_len(const istream_t *const in);111112/// Advances the stream by `len` bytes, and returns a pointer to the chunk that113/// was skipped. The stream must be byte aligned.114static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len);115/// Advances the stream by `len` bytes, and returns a pointer to the chunk that116/// was skipped so it can be written to.117static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len);118119/// Advance the inner state by `len` bytes. The stream must be byte aligned.120static inline void IO_advance_input(istream_t *const in, size_t len);121122/// Returns an `ostream_t` constructed from the given pointer and length.123static inline ostream_t IO_make_ostream(u8 *out, size_t len);124/// Returns an `istream_t` constructed from the given pointer and length.125static inline istream_t IO_make_istream(const u8 *in, size_t len);126127/// Returns an `istream_t` with the same base as `in`, and length `len`.128/// Then, advance `in` to account for the consumed bytes.129/// `in` must be byte aligned.130static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len);131/*** END IO STREAM OPERATIONS *********/132133/*** BITSTREAM OPERATIONS *************/134/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits,135/// and return them interpreted as a little-endian unsigned integer.136static inline u64 read_bits_LE(const u8 *src, const int num_bits,137const size_t offset);138139/// Read bits from the end of a HUF or FSE bitstream. `offset` is in bits, so140/// it updates `offset` to `offset - bits`, and then reads `bits` bits from141/// `src + offset`. If the offset becomes negative, the extra bits at the142/// bottom are filled in with `0` bits instead of reading from before `src`.143static inline u64 STREAM_read_bits(const u8 *src, const int bits,144i64 *const offset);145/*** END BITSTREAM OPERATIONS *********/146147/*** BIT COUNTING OPERATIONS **********/148/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0`149static inline int highest_set_bit(const u64 num);150/*** END BIT COUNTING OPERATIONS ******/151152/*** HUFFMAN PRIMITIVES ***************/153// Table decode method uses exponential memory, so we need to limit depth154#define HUF_MAX_BITS (16)155156// Limit the maximum number of symbols to 256 so we can store a symbol in a byte157#define HUF_MAX_SYMBS (256)158159/// Structure containing all tables necessary for efficient Huffman decoding160typedef struct {161u8 *symbols;162u8 *num_bits;163int max_bits;164} HUF_dtable;165166/// Decode a single symbol and read in enough bits to refresh the state167static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,168u16 *const state, const u8 *const src,169i64 *const offset);170/// Read in a full state's worth of bits to initialize it171static inline void HUF_init_state(const HUF_dtable *const dtable,172u16 *const state, const u8 *const src,173i64 *const offset);174175/// Decompresses a single Huffman stream, returns the number of bytes decoded.176/// `src_len` must be the exact length of the Huffman-coded block.177static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,178ostream_t *const out, istream_t *const in);179/// Same as previous but decodes 4 streams, formatted as in the Zstandard180/// specification.181/// `src_len` must be the exact length of the Huffman-coded block.182static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,183ostream_t *const out, istream_t *const in);184185/// Initialize a Huffman decoding table using the table of bit counts provided186static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,187const int num_symbs);188/// Initialize a Huffman decoding table using the table of weights provided189/// Weights follow the definition provided in the Zstandard specification190static void HUF_init_dtable_usingweights(HUF_dtable *const table,191const u8 *const weights,192const int num_symbs);193194/// Free the malloc'ed parts of a decoding table195static void HUF_free_dtable(HUF_dtable *const dtable);196/*** END HUFFMAN PRIMITIVES ***********/197198/*** FSE PRIMITIVES *******************/199/// For more description of FSE see200/// https://github.com/Cyan4973/FiniteStateEntropy/201202// FSE table decoding uses exponential memory, so limit the maximum accuracy203#define FSE_MAX_ACCURACY_LOG (15)204// Limit the maximum number of symbols so they can be stored in a single byte205#define FSE_MAX_SYMBS (256)206207/// The tables needed to decode FSE encoded streams208typedef struct {209u8 *symbols;210u8 *num_bits;211u16 *new_state_base;212int accuracy_log;213} FSE_dtable;214215/// Return the symbol for the current state216static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,217const u16 state);218/// Read the number of bits necessary to update state, update, and shift offset219/// back to reflect the bits read220static inline void FSE_update_state(const FSE_dtable *const dtable,221u16 *const state, const u8 *const src,222i64 *const offset);223224/// Combine peek and update: decode a symbol and update the state225static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,226u16 *const state, const u8 *const src,227i64 *const offset);228229/// Read bits from the stream to initialize the state and shift offset back230static inline void FSE_init_state(const FSE_dtable *const dtable,231u16 *const state, const u8 *const src,232i64 *const offset);233234/// Decompress two interleaved bitstreams (e.g. compressed Huffman weights)235/// using an FSE decoding table. `src_len` must be the exact length of the236/// block.237static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,238ostream_t *const out,239istream_t *const in);240241/// Initialize a decoding table using normalized frequencies.242static void FSE_init_dtable(FSE_dtable *const dtable,243const i16 *const norm_freqs, const int num_symbs,244const int accuracy_log);245246/// Decode an FSE header as defined in the Zstandard format specification and247/// use the decoded frequencies to initialize a decoding table.248static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,249const int max_accuracy_log);250251/// Initialize an FSE table that will always return the same symbol and consume252/// 0 bits per symbol, to be used for RLE mode in sequence commands253static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb);254255/// Free the malloc'ed parts of a decoding table256static void FSE_free_dtable(FSE_dtable *const dtable);257/*** END FSE PRIMITIVES ***************/258259/******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/260261/******* ZSTD HELPER STRUCTS AND PROTOTYPES ***********************************/262263/// A small structure that can be reused in various places that need to access264/// frame header information265typedef struct {266// The size of window that we need to be able to contiguously store for267// references268size_t window_size;269// The total output size of this compressed frame270size_t frame_content_size;271272// The dictionary id if this frame uses one273u32 dictionary_id;274275// Whether or not the content of this frame has a checksum276int content_checksum_flag;277// Whether or not the output for this frame is in a single segment278int single_segment_flag;279} frame_header_t;280281/// The context needed to decode blocks in a frame282typedef struct {283frame_header_t header;284285// The total amount of data available for backreferences, to determine if an286// offset too large to be correct287size_t current_total_output;288289const u8 *dict_content;290size_t dict_content_len;291292// Entropy encoding tables so they can be repeated by future blocks instead293// of retransmitting294HUF_dtable literals_dtable;295FSE_dtable ll_dtable;296FSE_dtable ml_dtable;297FSE_dtable of_dtable;298299// The last 3 offsets for the special "repeat offsets".300u64 previous_offsets[3];301} frame_context_t;302303/// The decoded contents of a dictionary so that it doesn't have to be repeated304/// for each frame that uses it305struct dictionary_s {306// Entropy tables307HUF_dtable literals_dtable;308FSE_dtable ll_dtable;309FSE_dtable ml_dtable;310FSE_dtable of_dtable;311312// Raw content for backreferences313u8 *content;314size_t content_size;315316// Offset history to prepopulate the frame's history317u64 previous_offsets[3];318319u32 dictionary_id;320};321322/// A tuple containing the parts necessary to decode and execute a ZSTD sequence323/// command324typedef struct {325u32 literal_length;326u32 match_length;327u32 offset;328} sequence_command_t;329330/// The decoder works top-down, starting at the high level like Zstd frames, and331/// working down to lower more technical levels such as blocks, literals, and332/// sequences. The high-level functions roughly follow the outline of the333/// format specification:334/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md335336/// Before the implementation of each high-level function declared here, the337/// prototypes for their helper functions are defined and explained338339/// Decode a single Zstd frame, or error if the input is not a valid frame.340/// Accepts a dict argument, which may be NULL indicating no dictionary.341/// See342/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame-concatenation343static void decode_frame(ostream_t *const out, istream_t *const in,344const dictionary_t *const dict);345346// Decode data in a compressed block347static void decompress_block(frame_context_t *const ctx, ostream_t *const out,348istream_t *const in);349350// Decode the literals section of a block351static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,352u8 **const literals);353354// Decode the sequences part of a block355static size_t decode_sequences(frame_context_t *const ctx, istream_t *const in,356sequence_command_t **const sequences);357358// Execute the decoded sequences on the literals block359static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,360const u8 *const literals,361const size_t literals_len,362const sequence_command_t *const sequences,363const size_t num_sequences);364365// Copies literals and returns the total literal length that was copied366static u32 copy_literals(const size_t seq, istream_t *litstream,367ostream_t *const out);368369// Given an offset code from a sequence command (either an actual offset value370// or an index for previous offset), computes the correct offset and updates371// the offset history372static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist);373374// Given an offset, match length, and total output, as well as the frame375// context for the dictionary, determines if the dictionary is used and376// executes the copy operation377static void execute_match_copy(frame_context_t *const ctx, size_t offset,378size_t match_length, size_t total_output,379ostream_t *const out);380381/******* END ZSTD HELPER STRUCTS AND PROTOTYPES *******************************/382383size_t ZSTD_decompress(void *const dst, const size_t dst_len,384const void *const src, const size_t src_len) {385dictionary_t* const uninit_dict = create_dictionary();386size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,387src_len, uninit_dict);388free_dictionary(uninit_dict);389return decomp_size;390}391392size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,393const void *const src, const size_t src_len,394dictionary_t* parsed_dict) {395396istream_t in = IO_make_istream(src, src_len);397ostream_t out = IO_make_ostream(dst, dst_len);398399// "A content compressed by Zstandard is transformed into a Zstandard frame.400// Multiple frames can be appended into a single file or stream. A frame is401// totally independent, has a defined beginning and end, and a set of402// parameters which tells the decoder how to decompress it."403404/* this decoder assumes decompression of a single frame */405decode_frame(&out, &in, parsed_dict);406407return (size_t)(out.ptr - (u8 *)dst);408}409410/******* FRAME DECODING ******************************************************/411412static void decode_data_frame(ostream_t *const out, istream_t *const in,413const dictionary_t *const dict);414static void init_frame_context(frame_context_t *const context,415istream_t *const in,416const dictionary_t *const dict);417static void free_frame_context(frame_context_t *const context);418static void parse_frame_header(frame_header_t *const header,419istream_t *const in);420static void frame_context_apply_dict(frame_context_t *const ctx,421const dictionary_t *const dict);422423static void decompress_data(frame_context_t *const ctx, ostream_t *const out,424istream_t *const in);425426static void decode_frame(ostream_t *const out, istream_t *const in,427const dictionary_t *const dict) {428const u32 magic_number = (u32)IO_read_bits(in, 32);429if (magic_number == ZSTD_MAGIC_NUMBER) {430// ZSTD frame431decode_data_frame(out, in, dict);432433return;434}435436// not a real frame or a skippable frame437ERROR("Tried to decode non-ZSTD frame");438}439440/// Decode a frame that contains compressed data. Not all frames do as there441/// are skippable frames.442/// See443/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#general-structure-of-zstandard-frame-format444static void decode_data_frame(ostream_t *const out, istream_t *const in,445const dictionary_t *const dict) {446frame_context_t ctx;447448// Initialize the context that needs to be carried from block to block449init_frame_context(&ctx, in, dict);450451if (ctx.header.frame_content_size != 0 &&452ctx.header.frame_content_size > out->len) {453OUT_SIZE();454}455456decompress_data(&ctx, out, in);457458free_frame_context(&ctx);459}460461/// Takes the information provided in the header and dictionary, and initializes462/// the context for this frame463static void init_frame_context(frame_context_t *const context,464istream_t *const in,465const dictionary_t *const dict) {466// Most fields in context are correct when initialized to 0467memset(context, 0, sizeof(frame_context_t));468469// Parse data from the frame header470parse_frame_header(&context->header, in);471472// Set up the offset history for the repeat offset commands473context->previous_offsets[0] = 1;474context->previous_offsets[1] = 4;475context->previous_offsets[2] = 8;476477// Apply details from the dict if it exists478frame_context_apply_dict(context, dict);479}480481static void free_frame_context(frame_context_t *const context) {482HUF_free_dtable(&context->literals_dtable);483484FSE_free_dtable(&context->ll_dtable);485FSE_free_dtable(&context->ml_dtable);486FSE_free_dtable(&context->of_dtable);487488memset(context, 0, sizeof(frame_context_t));489}490491static void parse_frame_header(frame_header_t *const header,492istream_t *const in) {493// "The first header's byte is called the Frame_Header_Descriptor. It tells494// which other fields are present. Decoding this byte is enough to tell the495// size of Frame_Header.496//497// Bit number Field name498// 7-6 Frame_Content_Size_flag499// 5 Single_Segment_flag500// 4 Unused_bit501// 3 Reserved_bit502// 2 Content_Checksum_flag503// 1-0 Dictionary_ID_flag"504const u8 descriptor = (u8)IO_read_bits(in, 8);505506// decode frame header descriptor into flags507const u8 frame_content_size_flag = descriptor >> 6;508const u8 single_segment_flag = (descriptor >> 5) & 1;509const u8 reserved_bit = (descriptor >> 3) & 1;510const u8 content_checksum_flag = (descriptor >> 2) & 1;511const u8 dictionary_id_flag = descriptor & 3;512513if (reserved_bit != 0) {514CORRUPTION();515}516517header->single_segment_flag = single_segment_flag;518header->content_checksum_flag = content_checksum_flag;519520// decode window size521if (!single_segment_flag) {522// "Provides guarantees on maximum back-reference distance that will be523// used within compressed data. This information is important for524// decoders to allocate enough memory.525//526// Bit numbers 7-3 2-0527// Field name Exponent Mantissa"528u8 window_descriptor = (u8)IO_read_bits(in, 8);529u8 exponent = window_descriptor >> 3;530u8 mantissa = window_descriptor & 7;531532// Use the algorithm from the specification to compute window size533// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor534size_t window_base = (size_t)1 << (10 + exponent);535size_t window_add = (window_base / 8) * mantissa;536header->window_size = window_base + window_add;537}538539// decode dictionary id if it exists540if (dictionary_id_flag) {541// "This is a variable size field, which contains the ID of the542// dictionary required to properly decode the frame. Note that this543// field is optional. When it's not present, it's up to the caller to544// make sure it uses the correct dictionary. Format is little-endian."545const int bytes_array[] = {0, 1, 2, 4};546const int bytes = bytes_array[dictionary_id_flag];547548header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);549} else {550header->dictionary_id = 0;551}552553// decode frame content size if it exists554if (single_segment_flag || frame_content_size_flag) {555// "This is the original (uncompressed) size. This information is556// optional. The Field_Size is provided according to value of557// Frame_Content_Size_flag. The Field_Size can be equal to 0 (not558// present), 1, 2, 4 or 8 bytes. Format is little-endian."559//560// if frame_content_size_flag == 0 but single_segment_flag is set, we561// still have a 1 byte field562const int bytes_array[] = {1, 2, 4, 8};563const int bytes = bytes_array[frame_content_size_flag];564565header->frame_content_size = IO_read_bits(in, bytes * 8);566if (bytes == 2) {567// "When Field_Size is 2, the offset of 256 is added."568header->frame_content_size += 256;569}570} else {571header->frame_content_size = 0;572}573574if (single_segment_flag) {575// "The Window_Descriptor byte is optional. It is absent when576// Single_Segment_flag is set. In this case, the maximum back-reference577// distance is the content size itself, which can be any value from 1 to578// 2^64-1 bytes (16 EB)."579header->window_size = header->frame_content_size;580}581}582583/// Decompress the data from a frame block by block584static void decompress_data(frame_context_t *const ctx, ostream_t *const out,585istream_t *const in) {586// "A frame encapsulates one or multiple blocks. Each block can be587// compressed or not, and has a guaranteed maximum content size, which588// depends on frame parameters. Unlike frames, each block depends on589// previous blocks for proper decoding. However, each block can be590// decompressed without waiting for its successor, allowing streaming591// operations."592int last_block = 0;593do {594// "Last_Block595//596// The lowest bit signals if this block is the last one. Frame ends597// right after this block.598//599// Block_Type and Block_Size600//601// The next 2 bits represent the Block_Type, while the remaining 21 bits602// represent the Block_Size. Format is little-endian."603last_block = (int)IO_read_bits(in, 1);604const int block_type = (int)IO_read_bits(in, 2);605const size_t block_len = IO_read_bits(in, 21);606607switch (block_type) {608case 0: {609// "Raw_Block - this is an uncompressed block. Block_Size is the610// number of bytes to read and copy."611const u8 *const read_ptr = IO_get_read_ptr(in, block_len);612u8 *const write_ptr = IO_get_write_ptr(out, block_len);613614// Copy the raw data into the output615memcpy(write_ptr, read_ptr, block_len);616617ctx->current_total_output += block_len;618break;619}620case 1: {621// "RLE_Block - this is a single byte, repeated N times. In which622// case, Block_Size is the size to regenerate, while the623// "compressed" block is just 1 byte (the byte to repeat)."624const u8 *const read_ptr = IO_get_read_ptr(in, 1);625u8 *const write_ptr = IO_get_write_ptr(out, block_len);626627// Copy `block_len` copies of `read_ptr[0]` to the output628memset(write_ptr, read_ptr[0], block_len);629630ctx->current_total_output += block_len;631break;632}633case 2: {634// "Compressed_Block - this is a Zstandard compressed block,635// detailed in another section of this specification. Block_Size is636// the compressed size.637638// Create a sub-stream for the block639istream_t block_stream = IO_make_sub_istream(in, block_len);640decompress_block(ctx, out, &block_stream);641break;642}643case 3:644// "Reserved - this is not a block. This value cannot be used with645// current version of this specification."646CORRUPTION();647break;648default:649IMPOSSIBLE();650}651} while (!last_block);652653if (ctx->header.content_checksum_flag) {654// This program does not support checking the checksum, so skip over it655// if it's present656IO_advance_input(in, 4);657}658}659/******* END FRAME DECODING ***************************************************/660661/******* BLOCK DECOMPRESSION **************************************************/662static void decompress_block(frame_context_t *const ctx, ostream_t *const out,663istream_t *const in) {664// "A compressed block consists of 2 sections :665//666// Literals_Section667// Sequences_Section"668669670// Part 1: decode the literals block671u8 *literals = NULL;672const size_t literals_size = decode_literals(ctx, in, &literals);673674// Part 2: decode the sequences block675sequence_command_t *sequences = NULL;676const size_t num_sequences =677decode_sequences(ctx, in, &sequences);678679// Part 3: combine literals and sequence commands to generate output680execute_sequences(ctx, out, literals, literals_size, sequences,681num_sequences);682free(literals);683free(sequences);684}685/******* END BLOCK DECOMPRESSION **********************************************/686687/******* LITERALS DECODING ****************************************************/688static size_t decode_literals_simple(istream_t *const in, u8 **const literals,689const int block_type,690const int size_format);691static size_t decode_literals_compressed(frame_context_t *const ctx,692istream_t *const in,693u8 **const literals,694const int block_type,695const int size_format);696static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in);697static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,698int *const num_symbs);699700static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,701u8 **const literals) {702// "Literals can be stored uncompressed or compressed using Huffman prefix703// codes. When compressed, an optional tree description can be present,704// followed by 1 or 4 streams."705//706// "Literals_Section_Header707//708// Header is in charge of describing how literals are packed. It's a709// byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, using710// little-endian convention."711//712// "Literals_Block_Type713//714// This field uses 2 lowest bits of first byte, describing 4 different block715// types"716//717// size_format takes between 1 and 2 bits718int block_type = (int)IO_read_bits(in, 2);719int size_format = (int)IO_read_bits(in, 2);720721if (block_type <= 1) {722// Raw or RLE literals block723return decode_literals_simple(in, literals, block_type,724size_format);725} else {726// Huffman compressed literals727return decode_literals_compressed(ctx, in, literals, block_type,728size_format);729}730}731732/// Decodes literals blocks in raw or RLE form733static size_t decode_literals_simple(istream_t *const in, u8 **const literals,734const int block_type,735const int size_format) {736size_t size;737switch (size_format) {738// These cases are in the form ?0739// In this case, the ? bit is actually part of the size field740case 0:741case 2:742// "Size_Format uses 1 bit. Regenerated_Size uses 5 bits (0-31)."743IO_rewind_bits(in, 1);744size = IO_read_bits(in, 5);745break;746case 1:747// "Size_Format uses 2 bits. Regenerated_Size uses 12 bits (0-4095)."748size = IO_read_bits(in, 12);749break;750case 3:751// "Size_Format uses 2 bits. Regenerated_Size uses 20 bits (0-1048575)."752size = IO_read_bits(in, 20);753break;754default:755// Size format is in range 0-3756IMPOSSIBLE();757}758759if (size > MAX_LITERALS_SIZE) {760CORRUPTION();761}762763*literals = malloc(size);764if (!*literals) {765BAD_ALLOC();766}767768switch (block_type) {769case 0: {770// "Raw_Literals_Block - Literals are stored uncompressed."771const u8 *const read_ptr = IO_get_read_ptr(in, size);772memcpy(*literals, read_ptr, size);773break;774}775case 1: {776// "RLE_Literals_Block - Literals consist of a single byte value repeated N times."777const u8 *const read_ptr = IO_get_read_ptr(in, 1);778memset(*literals, read_ptr[0], size);779break;780}781default:782IMPOSSIBLE();783}784785return size;786}787788/// Decodes Huffman compressed literals789static size_t decode_literals_compressed(frame_context_t *const ctx,790istream_t *const in,791u8 **const literals,792const int block_type,793const int size_format) {794size_t regenerated_size, compressed_size;795// Only size_format=0 has 1 stream, so default to 4796int num_streams = 4;797switch (size_format) {798case 0:799// "A single stream. Both Compressed_Size and Regenerated_Size use 10800// bits (0-1023)."801num_streams = 1;802// Fall through as it has the same size format803/* fallthrough */804case 1:805// "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits806// (0-1023)."807regenerated_size = IO_read_bits(in, 10);808compressed_size = IO_read_bits(in, 10);809break;810case 2:811// "4 streams. Both Compressed_Size and Regenerated_Size use 14 bits812// (0-16383)."813regenerated_size = IO_read_bits(in, 14);814compressed_size = IO_read_bits(in, 14);815break;816case 3:817// "4 streams. Both Compressed_Size and Regenerated_Size use 18 bits818// (0-262143)."819regenerated_size = IO_read_bits(in, 18);820compressed_size = IO_read_bits(in, 18);821break;822default:823// Impossible824IMPOSSIBLE();825}826if (regenerated_size > MAX_LITERALS_SIZE) {827CORRUPTION();828}829830*literals = malloc(regenerated_size);831if (!*literals) {832BAD_ALLOC();833}834835ostream_t lit_stream = IO_make_ostream(*literals, regenerated_size);836istream_t huf_stream = IO_make_sub_istream(in, compressed_size);837838if (block_type == 2) {839// Decode the provided Huffman table840// "This section is only present when Literals_Block_Type type is841// Compressed_Literals_Block (2)."842843HUF_free_dtable(&ctx->literals_dtable);844decode_huf_table(&ctx->literals_dtable, &huf_stream);845} else {846// If the previous Huffman table is being repeated, ensure it exists847if (!ctx->literals_dtable.symbols) {848CORRUPTION();849}850}851852size_t symbols_decoded;853if (num_streams == 1) {854symbols_decoded = HUF_decompress_1stream(&ctx->literals_dtable, &lit_stream, &huf_stream);855} else {856symbols_decoded = HUF_decompress_4stream(&ctx->literals_dtable, &lit_stream, &huf_stream);857}858859if (symbols_decoded != regenerated_size) {860CORRUPTION();861}862863return regenerated_size;864}865866// Decode the Huffman table description867static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) {868// "All literal values from zero (included) to last present one (excluded)869// are represented by Weight with values from 0 to Max_Number_of_Bits."870871// "This is a single byte value (0-255), which describes how to decode the list of weights."872const u8 header = IO_read_bits(in, 8);873874u8 weights[HUF_MAX_SYMBS];875memset(weights, 0, sizeof(weights));876877int num_symbs;878879if (header >= 128) {880// "This is a direct representation, where each Weight is written881// directly as a 4 bits field (0-15). The full representation occupies882// ((Number_of_Symbols+1)/2) bytes, meaning it uses a last full byte883// even if Number_of_Symbols is odd. Number_of_Symbols = headerByte -884// 127"885num_symbs = header - 127;886const size_t bytes = (num_symbs + 1) / 2;887888const u8 *const weight_src = IO_get_read_ptr(in, bytes);889890for (int i = 0; i < num_symbs; i++) {891// "They are encoded forward, 2892// weights to a byte with the first weight taking the top four bits893// and the second taking the bottom four (e.g. the following894// operations could be used to read the weights: Weight[0] =895// (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf), etc.)."896if (i % 2 == 0) {897weights[i] = weight_src[i / 2] >> 4;898} else {899weights[i] = weight_src[i / 2] & 0xf;900}901}902} else {903// The weights are FSE encoded, decode them before we can construct the904// table905istream_t fse_stream = IO_make_sub_istream(in, header);906ostream_t weight_stream = IO_make_ostream(weights, HUF_MAX_SYMBS);907fse_decode_hufweights(&weight_stream, &fse_stream, &num_symbs);908}909910// Construct the table using the decoded weights911HUF_init_dtable_usingweights(dtable, weights, num_symbs);912}913914static void fse_decode_hufweights(ostream_t *weights, istream_t *const in,915int *const num_symbs) {916const int MAX_ACCURACY_LOG = 7;917918FSE_dtable dtable;919920// "An FSE bitstream starts by a header, describing probabilities921// distribution. It will create a Decoding Table. For a list of Huffman922// weights, maximum accuracy is 7 bits."923FSE_decode_header(&dtable, in, MAX_ACCURACY_LOG);924925// Decode the weights926*num_symbs = FSE_decompress_interleaved2(&dtable, weights, in);927928FSE_free_dtable(&dtable);929}930/******* END LITERALS DECODING ************************************************/931932/******* SEQUENCE DECODING ****************************************************/933/// The combination of FSE states needed to decode sequences934typedef struct {935FSE_dtable ll_table;936FSE_dtable of_table;937FSE_dtable ml_table;938939u16 ll_state;940u16 of_state;941u16 ml_state;942} sequence_states_t;943944/// Different modes to signal to decode_seq_tables what to do945typedef enum {946seq_literal_length = 0,947seq_offset = 1,948seq_match_length = 2,949} seq_part_t;950951typedef enum {952seq_predefined = 0,953seq_rle = 1,954seq_fse = 2,955seq_repeat = 3,956} seq_mode_t;957958/// The predefined FSE distribution tables for `seq_predefined` mode959static const i16 SEQ_LITERAL_LENGTH_DEFAULT_DIST[36] = {9604, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2,9612, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};962static const i16 SEQ_OFFSET_DEFAULT_DIST[29] = {9631, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1,9641, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};965static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {9661, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,9671, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,9681, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};969970/// The sequence decoding baseline and number of additional bits to read/add971/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets972static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {9730, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,97412, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,97548, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};976static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {9770, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,9781, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};979980static const u32 SEQ_MATCH_LENGTH_BASELINES[53] = {9813, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,98217, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,98331, 32, 33, 34, 35, 37, 39, 41, 43, 47, 51, 59, 67, 83,98499, 131, 259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539};985static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {9860, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,9870, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,9882, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};989990/// Offset decoding is simpler so we just need a maximum code value991static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};992993static void decompress_sequences(frame_context_t *const ctx,994istream_t *const in,995sequence_command_t *const sequences,996const size_t num_sequences);997static sequence_command_t decode_sequence(sequence_states_t *const state,998const u8 *const src,999i64 *const offset);1000static void decode_seq_table(FSE_dtable *const table, istream_t *const in,1001const seq_part_t type, const seq_mode_t mode);10021003static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,1004sequence_command_t **const sequences) {1005// "A compressed block is a succession of sequences . A sequence is a1006// literal copy command, followed by a match copy command. A literal copy1007// command specifies a length. It is the number of bytes to be copied (or1008// extracted) from the literal section. A match copy command specifies an1009// offset and a length. The offset gives the position to copy from, which1010// can be within a previous block."10111012size_t num_sequences;10131014// "Number_of_Sequences1015//1016// This is a variable size field using between 1 and 3 bytes. Let's call its1017// first byte byte0."1018u8 header = IO_read_bits(in, 8);1019if (header == 0) {1020// "There are no sequences. The sequence section stops there.1021// Regenerated content is defined entirely by literals section."1022*sequences = NULL;1023return 0;1024} else if (header < 128) {1025// "Number_of_Sequences = byte0 . Uses 1 byte."1026num_sequences = header;1027} else if (header < 255) {1028// "Number_of_Sequences = ((byte0-128) << 8) + byte1 . Uses 2 bytes."1029num_sequences = ((header - 128) << 8) + IO_read_bits(in, 8);1030} else {1031// "Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00 . Uses 3 bytes."1032num_sequences = IO_read_bits(in, 16) + 0x7F00;1033}10341035*sequences = malloc(num_sequences * sizeof(sequence_command_t));1036if (!*sequences) {1037BAD_ALLOC();1038}10391040decompress_sequences(ctx, in, *sequences, num_sequences);1041return num_sequences;1042}10431044/// Decompress the FSE encoded sequence commands1045static void decompress_sequences(frame_context_t *const ctx, istream_t *in,1046sequence_command_t *const sequences,1047const size_t num_sequences) {1048// "The Sequences_Section regroup all symbols required to decode commands.1049// There are 3 symbol types : literals lengths, offsets and match lengths.1050// They are encoded together, interleaved, in a single bitstream."10511052// "Symbol compression modes1053//1054// This is a single byte, defining the compression mode of each symbol1055// type."1056//1057// Bit number : Field name1058// 7-6 : Literals_Lengths_Mode1059// 5-4 : Offsets_Mode1060// 3-2 : Match_Lengths_Mode1061// 1-0 : Reserved1062u8 compression_modes = IO_read_bits(in, 8);10631064if ((compression_modes & 3) != 0) {1065// Reserved bits set1066CORRUPTION();1067}10681069// "Following the header, up to 3 distribution tables can be described. When1070// present, they are in this order :1071//1072// Literals lengths1073// Offsets1074// Match Lengths"1075// Update the tables we have stored in the context1076decode_seq_table(&ctx->ll_dtable, in, seq_literal_length,1077(compression_modes >> 6) & 3);10781079decode_seq_table(&ctx->of_dtable, in, seq_offset,1080(compression_modes >> 4) & 3);10811082decode_seq_table(&ctx->ml_dtable, in, seq_match_length,1083(compression_modes >> 2) & 3);108410851086sequence_states_t states;10871088// Initialize the decoding tables1089{1090states.ll_table = ctx->ll_dtable;1091states.of_table = ctx->of_dtable;1092states.ml_table = ctx->ml_dtable;1093}10941095const size_t len = IO_istream_len(in);1096const u8 *const src = IO_get_read_ptr(in, len);10971098// "After writing the last bit containing information, the compressor writes1099// a single 1-bit and then fills the byte with 0-7 0 bits of padding."1100const int padding = 8 - highest_set_bit(src[len - 1]);1101// The offset starts at the end because FSE streams are read backwards1102i64 bit_offset = (i64)(len * 8 - (size_t)padding);11031104// "The bitstream starts with initial state values, each using the required1105// number of bits in their respective accuracy, decoded previously from1106// their normalized distribution.1107//1108// It starts by Literals_Length_State, followed by Offset_State, and finally1109// Match_Length_State."1110FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset);1111FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset);1112FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset);11131114for (size_t i = 0; i < num_sequences; i++) {1115// Decode sequences one by one1116sequences[i] = decode_sequence(&states, src, &bit_offset);1117}11181119if (bit_offset != 0) {1120CORRUPTION();1121}1122}11231124// Decode a single sequence and update the state1125static sequence_command_t decode_sequence(sequence_states_t *const states,1126const u8 *const src,1127i64 *const offset) {1128// "Each symbol is a code in its own context, which specifies Baseline and1129// Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw1130// additional bits in the same bitstream."11311132// Decode symbols, but don't update states1133const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state);1134const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state);1135const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state);11361137// Offset doesn't need a max value as it's not decoded using a table1138if (ll_code > SEQ_MAX_CODES[seq_literal_length] ||1139ml_code > SEQ_MAX_CODES[seq_match_length]) {1140CORRUPTION();1141}11421143// Read the interleaved bits1144sequence_command_t seq;1145// "Decoding starts by reading the Number_of_Bits required to decode Offset.1146// It then does the same for Match_Length, and then for Literals_Length."1147seq.offset = ((u32)1 << of_code) + STREAM_read_bits(src, of_code, offset);11481149seq.match_length =1150SEQ_MATCH_LENGTH_BASELINES[ml_code] +1151STREAM_read_bits(src, SEQ_MATCH_LENGTH_EXTRA_BITS[ml_code], offset);11521153seq.literal_length =1154SEQ_LITERAL_LENGTH_BASELINES[ll_code] +1155STREAM_read_bits(src, SEQ_LITERAL_LENGTH_EXTRA_BITS[ll_code], offset);11561157// "If it is not the last sequence in the block, the next operation is to1158// update states. Using the rules pre-calculated in the decoding tables,1159// Literals_Length_State is updated, followed by Match_Length_State, and1160// then Offset_State."1161// If the stream is complete don't read bits to update state1162if (*offset != 0) {1163FSE_update_state(&states->ll_table, &states->ll_state, src, offset);1164FSE_update_state(&states->ml_table, &states->ml_state, src, offset);1165FSE_update_state(&states->of_table, &states->of_state, src, offset);1166}11671168return seq;1169}11701171/// Given a sequence part and table mode, decode the FSE distribution1172/// Errors if the mode is `seq_repeat` without a pre-existing table in `table`1173static void decode_seq_table(FSE_dtable *const table, istream_t *const in,1174const seq_part_t type, const seq_mode_t mode) {1175// Constant arrays indexed by seq_part_t1176const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST,1177SEQ_OFFSET_DEFAULT_DIST,1178SEQ_MATCH_LENGTH_DEFAULT_DIST};1179const size_t default_distribution_lengths[] = {36, 29, 53};1180const size_t default_distribution_accuracies[] = {6, 5, 6};11811182const size_t max_accuracies[] = {9, 8, 9};11831184if (mode != seq_repeat) {1185// Free old one before overwriting1186FSE_free_dtable(table);1187}11881189switch (mode) {1190case seq_predefined: {1191// "Predefined_Mode : uses a predefined distribution table."1192const i16 *distribution = default_distributions[type];1193const size_t symbs = default_distribution_lengths[type];1194const size_t accuracy_log = default_distribution_accuracies[type];11951196FSE_init_dtable(table, distribution, symbs, accuracy_log);1197break;1198}1199case seq_rle: {1200// "RLE_Mode : it's a single code, repeated Number_of_Sequences times."1201const u8 symb = IO_get_read_ptr(in, 1)[0];1202FSE_init_dtable_rle(table, symb);1203break;1204}1205case seq_fse: {1206// "FSE_Compressed_Mode : standard FSE compression. A distribution table1207// will be present "1208FSE_decode_header(table, in, max_accuracies[type]);1209break;1210}1211case seq_repeat:1212// "Repeat_Mode : re-use distribution table from previous compressed1213// block."1214// Nothing to do here, table will be unchanged1215if (!table->symbols) {1216// This mode is invalid if we don't already have a table1217CORRUPTION();1218}1219break;1220default:1221// Impossible, as mode is from 0-31222IMPOSSIBLE();1223break;1224}12251226}1227/******* END SEQUENCE DECODING ************************************************/12281229/******* SEQUENCE EXECUTION ***************************************************/1230static void execute_sequences(frame_context_t *const ctx, ostream_t *const out,1231const u8 *const literals,1232const size_t literals_len,1233const sequence_command_t *const sequences,1234const size_t num_sequences) {1235istream_t litstream = IO_make_istream(literals, literals_len);12361237u64 *const offset_hist = ctx->previous_offsets;1238size_t total_output = ctx->current_total_output;12391240for (size_t i = 0; i < num_sequences; i++) {1241const sequence_command_t seq = sequences[i];1242{1243const u32 literals_size = copy_literals(seq.literal_length, &litstream, out);1244total_output += literals_size;1245}12461247size_t const offset = compute_offset(seq, offset_hist);12481249size_t const match_length = seq.match_length;12501251execute_match_copy(ctx, offset, match_length, total_output, out);12521253total_output += match_length;1254}12551256// Copy any leftover literals1257{1258size_t len = IO_istream_len(&litstream);1259copy_literals(len, &litstream, out);1260total_output += len;1261}12621263ctx->current_total_output = total_output;1264}12651266static u32 copy_literals(const size_t literal_length, istream_t *litstream,1267ostream_t *const out) {1268// If the sequence asks for more literals than are left, the1269// sequence must be corrupted1270if (literal_length > IO_istream_len(litstream)) {1271CORRUPTION();1272}12731274u8 *const write_ptr = IO_get_write_ptr(out, literal_length);1275const u8 *const read_ptr =1276IO_get_read_ptr(litstream, literal_length);1277// Copy literals to output1278memcpy(write_ptr, read_ptr, literal_length);12791280return literal_length;1281}12821283static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist) {1284size_t offset;1285// Offsets are special, we need to handle the repeat offsets1286if (seq.offset <= 3) {1287// "The first 3 values define a repeated offset and we will call1288// them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3.1289// They are sorted in recency order, with Repeated_Offset1 meaning1290// 'most recent one'".12911292// Use 0 indexing for the array1293u32 idx = seq.offset - 1;1294if (seq.literal_length == 0) {1295// "There is an exception though, when current sequence's1296// literals length is 0. In this case, repeated offsets are1297// shifted by one, so Repeated_Offset1 becomes Repeated_Offset2,1298// Repeated_Offset2 becomes Repeated_Offset3, and1299// Repeated_Offset3 becomes Repeated_Offset1 - 1_byte."1300idx++;1301}13021303if (idx == 0) {1304offset = offset_hist[0];1305} else {1306// If idx == 3 then literal length was 0 and the offset was 3,1307// as per the exception listed above1308offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1;13091310// If idx == 1 we don't need to modify offset_hist[2], since1311// we're using the second-most recent code1312if (idx > 1) {1313offset_hist[2] = offset_hist[1];1314}1315offset_hist[1] = offset_hist[0];1316offset_hist[0] = offset;1317}1318} else {1319// When it's not a repeat offset:1320// "if (Offset_Value > 3) offset = Offset_Value - 3;"1321offset = seq.offset - 3;13221323// Shift back history1324offset_hist[2] = offset_hist[1];1325offset_hist[1] = offset_hist[0];1326offset_hist[0] = offset;1327}1328return offset;1329}13301331static void execute_match_copy(frame_context_t *const ctx, size_t offset,1332size_t match_length, size_t total_output,1333ostream_t *const out) {1334u8 *write_ptr = IO_get_write_ptr(out, match_length);1335if (total_output <= ctx->header.window_size) {1336// In this case offset might go back into the dictionary1337if (offset > total_output + ctx->dict_content_len) {1338// The offset goes beyond even the dictionary1339CORRUPTION();1340}13411342if (offset > total_output) {1343// "The rest of the dictionary is its content. The content act1344// as a "past" in front of data to compress or decompress, so it1345// can be referenced in sequence commands."1346const size_t dict_copy =1347MIN(offset - total_output, match_length);1348const size_t dict_offset =1349ctx->dict_content_len - (offset - total_output);13501351memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy);1352write_ptr += dict_copy;1353match_length -= dict_copy;1354}1355} else if (offset > ctx->header.window_size) {1356CORRUPTION();1357}13581359// We must copy byte by byte because the match length might be larger1360// than the offset1361// ex: if the output so far was "abc", a command with offset=3 and1362// match_length=6 would produce "abcabcabc" as the new output1363for (size_t j = 0; j < match_length; j++) {1364*write_ptr = *(write_ptr - offset);1365write_ptr++;1366}1367}1368/******* END SEQUENCE EXECUTION ***********************************************/13691370/******* OUTPUT SIZE COUNTING *************************************************/1371/// Get the decompressed size of an input stream so memory can be allocated in1372/// advance.1373/// This implementation assumes `src` points to a single ZSTD-compressed frame1374size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {1375istream_t in = IO_make_istream(src, src_len);13761377// get decompressed size from ZSTD frame header1378{1379const u32 magic_number = (u32)IO_read_bits(&in, 32);13801381if (magic_number == ZSTD_MAGIC_NUMBER) {1382// ZSTD frame1383frame_header_t header;1384parse_frame_header(&header, &in);13851386if (header.frame_content_size == 0 && !header.single_segment_flag) {1387// Content size not provided, we can't tell1388return (size_t)-1;1389}13901391return header.frame_content_size;1392} else {1393// not a real frame or skippable frame1394ERROR("ZSTD frame magic number did not match");1395}1396}1397}1398/******* END OUTPUT SIZE COUNTING *********************************************/13991400/******* DICTIONARY PARSING ***************************************************/1401dictionary_t* create_dictionary() {1402dictionary_t* const dict = calloc(1, sizeof(dictionary_t));1403if (!dict) {1404BAD_ALLOC();1405}1406return dict;1407}14081409/// Free an allocated dictionary1410void free_dictionary(dictionary_t *const dict) {1411HUF_free_dtable(&dict->literals_dtable);1412FSE_free_dtable(&dict->ll_dtable);1413FSE_free_dtable(&dict->of_dtable);1414FSE_free_dtable(&dict->ml_dtable);14151416free(dict->content);14171418memset(dict, 0, sizeof(dictionary_t));14191420free(dict);1421}142214231424#if !defined(ZDEC_NO_DICTIONARY)1425#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")1426#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");14271428static void init_dictionary_content(dictionary_t *const dict,1429istream_t *const in);14301431void parse_dictionary(dictionary_t *const dict, const void *src,1432size_t src_len) {1433const u8 *byte_src = (const u8 *)src;1434memset(dict, 0, sizeof(dictionary_t));1435if (src == NULL) { /* cannot initialize dictionary with null src */1436NULL_SRC();1437}1438if (src_len < 8) {1439DICT_SIZE_ERROR();1440}14411442istream_t in = IO_make_istream(byte_src, src_len);14431444const u32 magic_number = IO_read_bits(&in, 32);1445if (magic_number != 0xEC30A437) {1446// raw content dict1447IO_rewind_bits(&in, 32);1448init_dictionary_content(dict, &in);1449return;1450}14511452dict->dictionary_id = IO_read_bits(&in, 32);14531454// "Entropy_Tables : following the same format as the tables in compressed1455// blocks. They are stored in following order : Huffman tables for literals,1456// FSE table for offsets, FSE table for match lengths, and FSE table for1457// literals lengths. It's finally followed by 3 offset values, populating1458// recent offsets (instead of using {1,4,8}), stored in order, 4-bytes1459// little-endian each, for a total of 12 bytes. Each recent offset must have1460// a value < dictionary size."1461decode_huf_table(&dict->literals_dtable, &in);1462decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse);1463decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse);1464decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse);14651466// Read in the previous offset history1467dict->previous_offsets[0] = IO_read_bits(&in, 32);1468dict->previous_offsets[1] = IO_read_bits(&in, 32);1469dict->previous_offsets[2] = IO_read_bits(&in, 32);14701471// Ensure the provided offsets aren't too large1472// "Each recent offset must have a value < dictionary size."1473for (int i = 0; i < 3; i++) {1474if (dict->previous_offsets[i] > src_len) {1475ERROR("Dictionary corrupted");1476}1477}14781479// "Content : The rest of the dictionary is its content. The content act as1480// a "past" in front of data to compress or decompress, so it can be1481// referenced in sequence commands."1482init_dictionary_content(dict, &in);1483}14841485static void init_dictionary_content(dictionary_t *const dict,1486istream_t *const in) {1487// Copy in the content1488dict->content_size = IO_istream_len(in);1489dict->content = malloc(dict->content_size);1490if (!dict->content) {1491BAD_ALLOC();1492}14931494const u8 *const content = IO_get_read_ptr(in, dict->content_size);14951496memcpy(dict->content, content, dict->content_size);1497}14981499static void HUF_copy_dtable(HUF_dtable *const dst,1500const HUF_dtable *const src) {1501if (src->max_bits == 0) {1502memset(dst, 0, sizeof(HUF_dtable));1503return;1504}15051506const size_t size = (size_t)1 << src->max_bits;1507dst->max_bits = src->max_bits;15081509dst->symbols = malloc(size);1510dst->num_bits = malloc(size);1511if (!dst->symbols || !dst->num_bits) {1512BAD_ALLOC();1513}15141515memcpy(dst->symbols, src->symbols, size);1516memcpy(dst->num_bits, src->num_bits, size);1517}15181519static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {1520if (src->accuracy_log == 0) {1521memset(dst, 0, sizeof(FSE_dtable));1522return;1523}15241525size_t size = (size_t)1 << src->accuracy_log;1526dst->accuracy_log = src->accuracy_log;15271528dst->symbols = malloc(size);1529dst->num_bits = malloc(size);1530dst->new_state_base = malloc(size * sizeof(u16));1531if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {1532BAD_ALLOC();1533}15341535memcpy(dst->symbols, src->symbols, size);1536memcpy(dst->num_bits, src->num_bits, size);1537memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));1538}15391540/// A dictionary acts as initializing values for the frame context before1541/// decompression, so we implement it by applying it's predetermined1542/// tables and content to the context before beginning decompression1543static void frame_context_apply_dict(frame_context_t *const ctx,1544const dictionary_t *const dict) {1545// If the content pointer is NULL then it must be an empty dict1546if (!dict || !dict->content)1547return;15481549// If the requested dictionary_id is non-zero, the correct dictionary must1550// be present1551if (ctx->header.dictionary_id != 0 &&1552ctx->header.dictionary_id != dict->dictionary_id) {1553ERROR("Wrong dictionary provided");1554}15551556// Copy the dict content to the context for references during sequence1557// execution1558ctx->dict_content = dict->content;1559ctx->dict_content_len = dict->content_size;15601561// If it's a formatted dict copy the precomputed tables in so they can1562// be used in the table repeat modes1563if (dict->dictionary_id != 0) {1564// Deep copy the entropy tables so they can be freed independently of1565// the dictionary struct1566HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);1567FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);1568FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);1569FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);15701571// Copy the repeated offsets1572memcpy(ctx->previous_offsets, dict->previous_offsets,1573sizeof(ctx->previous_offsets));1574}1575}15761577#else // ZDEC_NO_DICTIONARY is defined15781579static void frame_context_apply_dict(frame_context_t *const ctx,1580const dictionary_t *const dict) {1581(void)ctx;1582if (dict && dict->content) ERROR("dictionary not supported");1583}15841585#endif1586/******* END DICTIONARY PARSING ***********************************************/15871588/******* IO STREAM OPERATIONS *************************************************/15891590/// Reads `num` bits from a bitstream, and updates the internal offset1591static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {1592if (num_bits > 64 || num_bits <= 0) {1593ERROR("Attempt to read an invalid number of bits");1594}15951596const size_t bytes = (num_bits + in->bit_offset + 7) / 8;1597const size_t full_bytes = (num_bits + in->bit_offset) / 8;1598if (bytes > in->len) {1599INP_SIZE();1600}16011602const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset);16031604in->bit_offset = (num_bits + in->bit_offset) % 8;1605in->ptr += full_bytes;1606in->len -= full_bytes;16071608return result;1609}16101611/// If a non-zero number of bits have been read from the current byte, advance1612/// the offset to the next byte1613static inline void IO_rewind_bits(istream_t *const in, int num_bits) {1614if (num_bits < 0) {1615ERROR("Attempting to rewind stream by a negative number of bits");1616}16171618// move the offset back by `num_bits` bits1619const int new_offset = in->bit_offset - num_bits;1620// determine the number of whole bytes we have to rewind, rounding up to an1621// integer number (e.g. if `new_offset == -5`, `bytes == 1`)1622const i64 bytes = -(new_offset - 7) / 8;16231624in->ptr -= bytes;1625in->len += bytes;1626// make sure the resulting `bit_offset` is positive, as mod in C does not1627// convert numbers from negative to positive (e.g. -22 % 8 == -6)1628in->bit_offset = ((new_offset % 8) + 8) % 8;1629}16301631/// If the remaining bits in a byte will be unused, advance to the end of the1632/// byte1633static inline void IO_align_stream(istream_t *const in) {1634if (in->bit_offset != 0) {1635if (in->len == 0) {1636INP_SIZE();1637}1638in->ptr++;1639in->len--;1640in->bit_offset = 0;1641}1642}16431644/// Write the given byte into the output stream1645static inline void IO_write_byte(ostream_t *const out, u8 symb) {1646if (out->len == 0) {1647OUT_SIZE();1648}16491650out->ptr[0] = symb;1651out->ptr++;1652out->len--;1653}16541655/// Returns the number of bytes left to be read in this stream. The stream must1656/// be byte aligned.1657static inline size_t IO_istream_len(const istream_t *const in) {1658return in->len;1659}16601661/// Returns a pointer where `len` bytes can be read, and advances the internal1662/// state. The stream must be byte aligned.1663static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) {1664if (len > in->len) {1665INP_SIZE();1666}1667if (in->bit_offset != 0) {1668ERROR("Attempting to operate on a non-byte aligned stream");1669}1670const u8 *const ptr = in->ptr;1671in->ptr += len;1672in->len -= len;16731674return ptr;1675}1676/// Returns a pointer to write `len` bytes to, and advances the internal state1677static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len) {1678if (len > out->len) {1679OUT_SIZE();1680}1681u8 *const ptr = out->ptr;1682out->ptr += len;1683out->len -= len;16841685return ptr;1686}16871688/// Advance the inner state by `len` bytes1689static inline void IO_advance_input(istream_t *const in, size_t len) {1690if (len > in->len) {1691INP_SIZE();1692}1693if (in->bit_offset != 0) {1694ERROR("Attempting to operate on a non-byte aligned stream");1695}16961697in->ptr += len;1698in->len -= len;1699}17001701/// Returns an `ostream_t` constructed from the given pointer and length1702static inline ostream_t IO_make_ostream(u8 *out, size_t len) {1703return (ostream_t) { out, len };1704}17051706/// Returns an `istream_t` constructed from the given pointer and length1707static inline istream_t IO_make_istream(const u8 *in, size_t len) {1708return (istream_t) { in, len, 0 };1709}17101711/// Returns an `istream_t` with the same base as `in`, and length `len`1712/// Then, advance `in` to account for the consumed bytes1713/// `in` must be byte aligned1714static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) {1715// Consume `len` bytes of the parent stream1716const u8 *const ptr = IO_get_read_ptr(in, len);17171718// Make a substream using the pointer to those `len` bytes1719return IO_make_istream(ptr, len);1720}1721/******* END IO STREAM OPERATIONS *********************************************/17221723/******* BITSTREAM OPERATIONS *************************************************/1724/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits1725static inline u64 read_bits_LE(const u8 *src, const int num_bits,1726const size_t offset) {1727if (num_bits > 64) {1728ERROR("Attempt to read an invalid number of bits");1729}17301731// Skip over bytes that aren't in range1732src += offset / 8;1733size_t bit_offset = offset % 8;1734u64 res = 0;17351736int shift = 0;1737int left = num_bits;1738while (left > 0) {1739u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1);1740// Read the next byte, shift it to account for the offset, and then mask1741// out the top part if we don't need all the bits1742res += (((u64)*src++ >> bit_offset) & mask) << shift;1743shift += 8 - bit_offset;1744left -= 8 - bit_offset;1745bit_offset = 0;1746}17471748return res;1749}17501751/// Read bits from the end of a HUF or FSE bitstream. `offset` is in bits, so1752/// it updates `offset` to `offset - bits`, and then reads `bits` bits from1753/// `src + offset`. If the offset becomes negative, the extra bits at the1754/// bottom are filled in with `0` bits instead of reading from before `src`.1755static inline u64 STREAM_read_bits(const u8 *const src, const int bits,1756i64 *const offset) {1757*offset = *offset - bits;1758size_t actual_off = *offset;1759size_t actual_bits = bits;1760// Don't actually read bits from before the start of src, so if `*offset <1761// 0` fix actual_off and actual_bits to reflect the quantity to read1762if (*offset < 0) {1763actual_bits += *offset;1764actual_off = 0;1765}1766u64 res = read_bits_LE(src, actual_bits, actual_off);17671768if (*offset < 0) {1769// Fill in the bottom "overflowed" bits with 0's1770res = -*offset >= 64 ? 0 : (res << -*offset);1771}1772return res;1773}1774/******* END BITSTREAM OPERATIONS *********************************************/17751776/******* BIT COUNTING OPERATIONS **********************************************/1777/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to1778/// `num`, or `-1` if `num == 0`.1779static inline int highest_set_bit(const u64 num) {1780for (int i = 63; i >= 0; i--) {1781if (((u64)1 << i) <= num) {1782return i;1783}1784}1785return -1;1786}1787/******* END BIT COUNTING OPERATIONS ******************************************/17881789/******* HUFFMAN PRIMITIVES ***************************************************/1790static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable,1791u16 *const state, const u8 *const src,1792i64 *const offset) {1793// Look up the symbol and number of bits to read1794const u8 symb = dtable->symbols[*state];1795const u8 bits = dtable->num_bits[*state];1796const u16 rest = STREAM_read_bits(src, bits, offset);1797// Shift `bits` bits out of the state, keeping the low order bits that1798// weren't necessary to determine this symbol. Then add in the new bits1799// read from the stream.1800*state = ((*state << bits) + rest) & (((u16)1 << dtable->max_bits) - 1);18011802return symb;1803}18041805static inline void HUF_init_state(const HUF_dtable *const dtable,1806u16 *const state, const u8 *const src,1807i64 *const offset) {1808// Read in a full `dtable->max_bits` bits to initialize the state1809const u8 bits = dtable->max_bits;1810*state = STREAM_read_bits(src, bits, offset);1811}18121813static size_t HUF_decompress_1stream(const HUF_dtable *const dtable,1814ostream_t *const out,1815istream_t *const in) {1816const size_t len = IO_istream_len(in);1817if (len == 0) {1818INP_SIZE();1819}1820const u8 *const src = IO_get_read_ptr(in, len);18211822// "Each bitstream must be read backward, that is starting from the end down1823// to the beginning. Therefore it's necessary to know the size of each1824// bitstream.1825//1826// It's also necessary to know exactly which bit is the latest. This is1827// detected by a final bit flag : the highest bit of latest byte is a1828// final-bit-flag. Consequently, a last byte of 0 is not possible. And the1829// final-bit-flag itself is not part of the useful bitstream. Hence, the1830// last byte contains between 0 and 7 useful bits."1831const int padding = 8 - highest_set_bit(src[len - 1]);18321833// Offset starts at the end because HUF streams are read backwards1834i64 bit_offset = len * 8 - padding;1835u16 state;18361837HUF_init_state(dtable, &state, src, &bit_offset);18381839size_t symbols_written = 0;1840while (bit_offset > -dtable->max_bits) {1841// Iterate over the stream, decoding one symbol at a time1842IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset));1843symbols_written++;1844}1845// "The process continues up to reading the required number of symbols per1846// stream. If a bitstream is not entirely and exactly consumed, hence1847// reaching exactly its beginning position with all bits consumed, the1848// decoding process is considered faulty."18491850// When all symbols have been decoded, the final state value shouldn't have1851// any data from the stream, so it should have "read" dtable->max_bits from1852// before the start of `src`1853// Therefore `offset`, the edge to start reading new bits at, should be1854// dtable->max_bits before the start of the stream1855if (bit_offset != -dtable->max_bits) {1856CORRUPTION();1857}18581859return symbols_written;1860}18611862static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,1863ostream_t *const out, istream_t *const in) {1864// "Compressed size is provided explicitly : in the 4-streams variant,1865// bitstreams are preceded by 3 unsigned little-endian 16-bits values. Each1866// value represents the compressed size of one stream, in order. The last1867// stream size is deducted from total compressed size and from previously1868// decoded stream sizes"1869const size_t csize1 = IO_read_bits(in, 16);1870const size_t csize2 = IO_read_bits(in, 16);1871const size_t csize3 = IO_read_bits(in, 16);18721873istream_t in1 = IO_make_sub_istream(in, csize1);1874istream_t in2 = IO_make_sub_istream(in, csize2);1875istream_t in3 = IO_make_sub_istream(in, csize3);1876istream_t in4 = IO_make_sub_istream(in, IO_istream_len(in));18771878size_t total_output = 0;1879// Decode each stream independently for simplicity1880// If we wanted to we could decode all 4 at the same time for speed,1881// utilizing more execution units1882total_output += HUF_decompress_1stream(dtable, out, &in1);1883total_output += HUF_decompress_1stream(dtable, out, &in2);1884total_output += HUF_decompress_1stream(dtable, out, &in3);1885total_output += HUF_decompress_1stream(dtable, out, &in4);18861887return total_output;1888}18891890/// Initializes a Huffman table using canonical Huffman codes1891/// For more explanation on canonical Huffman codes see1892/// http://www.cs.uofs.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html1893/// Codes within a level are allocated in symbol order (i.e. smaller symbols get1894/// earlier codes)1895static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,1896const int num_symbs) {1897memset(table, 0, sizeof(HUF_dtable));1898if (num_symbs > HUF_MAX_SYMBS) {1899ERROR("Too many symbols for Huffman");1900}19011902u8 max_bits = 0;1903u16 rank_count[HUF_MAX_BITS + 1];1904memset(rank_count, 0, sizeof(rank_count));19051906// Count the number of symbols for each number of bits, and determine the1907// depth of the tree1908for (int i = 0; i < num_symbs; i++) {1909if (bits[i] > HUF_MAX_BITS) {1910ERROR("Huffman table depth too large");1911}1912max_bits = MAX(max_bits, bits[i]);1913rank_count[bits[i]]++;1914}19151916const size_t table_size = 1 << max_bits;1917table->max_bits = max_bits;1918table->symbols = malloc(table_size);1919table->num_bits = malloc(table_size);19201921if (!table->symbols || !table->num_bits) {1922free(table->symbols);1923free(table->num_bits);1924BAD_ALLOC();1925}19261927// "Symbols are sorted by Weight. Within same Weight, symbols keep natural1928// order. Symbols with a Weight of zero are removed. Then, starting from1929// lowest weight, prefix codes are distributed in order."19301931u32 rank_idx[HUF_MAX_BITS + 1];1932// Initialize the starting codes for each rank (number of bits)1933rank_idx[max_bits] = 0;1934for (int i = max_bits; i >= 1; i--) {1935rank_idx[i - 1] = rank_idx[i] + rank_count[i] * (1 << (max_bits - i));1936// The entire range takes the same number of bits so we can memset it1937memset(&table->num_bits[rank_idx[i]], i, rank_idx[i - 1] - rank_idx[i]);1938}19391940if (rank_idx[0] != table_size) {1941CORRUPTION();1942}19431944// Allocate codes and fill in the table1945for (int i = 0; i < num_symbs; i++) {1946if (bits[i] != 0) {1947// Allocate a code for this symbol and set its range in the table1948const u16 code = rank_idx[bits[i]];1949// Since the code doesn't care about the bottom `max_bits - bits[i]`1950// bits of state, it gets a range that spans all possible values of1951// the lower bits1952const u16 len = 1 << (max_bits - bits[i]);1953memset(&table->symbols[code], i, len);1954rank_idx[bits[i]] += len;1955}1956}1957}19581959static void HUF_init_dtable_usingweights(HUF_dtable *const table,1960const u8 *const weights,1961const int num_symbs) {1962// +1 because the last weight is not transmitted in the header1963if (num_symbs + 1 > HUF_MAX_SYMBS) {1964ERROR("Too many symbols for Huffman");1965}19661967u8 bits[HUF_MAX_SYMBS];19681969u64 weight_sum = 0;1970for (int i = 0; i < num_symbs; i++) {1971// Weights are in the same range as bit count1972if (weights[i] > HUF_MAX_BITS) {1973CORRUPTION();1974}1975weight_sum += weights[i] > 0 ? (u64)1 << (weights[i] - 1) : 0;1976}19771978// Find the first power of 2 larger than the sum1979const int max_bits = highest_set_bit(weight_sum) + 1;1980const u64 left_over = ((u64)1 << max_bits) - weight_sum;1981// If the left over isn't a power of 2, the weights are invalid1982if (left_over & (left_over - 1)) {1983CORRUPTION();1984}19851986// left_over is used to find the last weight as it's not transmitted1987// by inverting 2^(weight - 1) we can determine the value of last_weight1988const int last_weight = highest_set_bit(left_over) + 1;19891990for (int i = 0; i < num_symbs; i++) {1991// "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0"1992bits[i] = weights[i] > 0 ? (max_bits + 1 - weights[i]) : 0;1993}1994bits[num_symbs] =1995max_bits + 1 - last_weight; // Last weight is always non-zero19961997HUF_init_dtable(table, bits, num_symbs + 1);1998}19992000static void HUF_free_dtable(HUF_dtable *const dtable) {2001free(dtable->symbols);2002free(dtable->num_bits);2003memset(dtable, 0, sizeof(HUF_dtable));2004}2005/******* END HUFFMAN PRIMITIVES ***********************************************/20062007/******* FSE PRIMITIVES *******************************************************/2008/// For more description of FSE see2009/// https://github.com/Cyan4973/FiniteStateEntropy/20102011/// Allow a symbol to be decoded without updating state2012static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable,2013const u16 state) {2014return dtable->symbols[state];2015}20162017/// Consumes bits from the input and uses the current state to determine the2018/// next state2019static inline void FSE_update_state(const FSE_dtable *const dtable,2020u16 *const state, const u8 *const src,2021i64 *const offset) {2022const u8 bits = dtable->num_bits[*state];2023const u16 rest = STREAM_read_bits(src, bits, offset);2024*state = dtable->new_state_base[*state] + rest;2025}20262027/// Decodes a single FSE symbol and updates the offset2028static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable,2029u16 *const state, const u8 *const src,2030i64 *const offset) {2031const u8 symb = FSE_peek_symbol(dtable, *state);2032FSE_update_state(dtable, state, src, offset);2033return symb;2034}20352036static inline void FSE_init_state(const FSE_dtable *const dtable,2037u16 *const state, const u8 *const src,2038i64 *const offset) {2039// Read in a full `accuracy_log` bits to initialize the state2040const u8 bits = dtable->accuracy_log;2041*state = STREAM_read_bits(src, bits, offset);2042}20432044static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable,2045ostream_t *const out,2046istream_t *const in) {2047const size_t len = IO_istream_len(in);2048if (len == 0) {2049INP_SIZE();2050}2051const u8 *const src = IO_get_read_ptr(in, len);20522053// "Each bitstream must be read backward, that is starting from the end down2054// to the beginning. Therefore it's necessary to know the size of each2055// bitstream.2056//2057// It's also necessary to know exactly which bit is the latest. This is2058// detected by a final bit flag : the highest bit of latest byte is a2059// final-bit-flag. Consequently, a last byte of 0 is not possible. And the2060// final-bit-flag itself is not part of the useful bitstream. Hence, the2061// last byte contains between 0 and 7 useful bits."2062const int padding = 8 - highest_set_bit(src[len - 1]);2063i64 offset = len * 8 - padding;20642065u16 state1, state2;2066// "The first state (State1) encodes the even indexed symbols, and the2067// second (State2) encodes the odd indexes. State1 is initialized first, and2068// then State2, and they take turns decoding a single symbol and updating2069// their state."2070FSE_init_state(dtable, &state1, src, &offset);2071FSE_init_state(dtable, &state2, src, &offset);20722073// Decode until we overflow the stream2074// Since we decode in reverse order, overflowing the stream is offset going2075// negative2076size_t symbols_written = 0;2077while (1) {2078// "The number of symbols to decode is determined by tracking bitStream2079// overflow condition: If updating state after decoding a symbol would2080// require more bits than remain in the stream, it is assumed the extra2081// bits are 0. Then, the symbols for each of the final states are2082// decoded and the process is complete."2083IO_write_byte(out, FSE_decode_symbol(dtable, &state1, src, &offset));2084symbols_written++;2085if (offset < 0) {2086// There's still a symbol to decode in state22087IO_write_byte(out, FSE_peek_symbol(dtable, state2));2088symbols_written++;2089break;2090}20912092IO_write_byte(out, FSE_decode_symbol(dtable, &state2, src, &offset));2093symbols_written++;2094if (offset < 0) {2095// There's still a symbol to decode in state12096IO_write_byte(out, FSE_peek_symbol(dtable, state1));2097symbols_written++;2098break;2099}2100}21012102return symbols_written;2103}21042105static void FSE_init_dtable(FSE_dtable *const dtable,2106const i16 *const norm_freqs, const int num_symbs,2107const int accuracy_log) {2108if (accuracy_log > FSE_MAX_ACCURACY_LOG) {2109ERROR("FSE accuracy too large");2110}2111if (num_symbs > FSE_MAX_SYMBS) {2112ERROR("Too many symbols for FSE");2113}21142115dtable->accuracy_log = accuracy_log;21162117const size_t size = (size_t)1 << accuracy_log;2118dtable->symbols = malloc(size * sizeof(u8));2119dtable->num_bits = malloc(size * sizeof(u8));2120dtable->new_state_base = malloc(size * sizeof(u16));21212122if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {2123BAD_ALLOC();2124}21252126// Used to determine how many bits need to be read for each state,2127// and where the destination range should start2128// Needs to be u16 because max value is 2 * max number of symbols,2129// which can be larger than a byte can store2130u16 state_desc[FSE_MAX_SYMBS];21312132// "Symbols are scanned in their natural order for "less than 1"2133// probabilities. Symbols with this probability are being attributed a2134// single cell, starting from the end of the table. These symbols define a2135// full state reset, reading Accuracy_Log bits."2136int high_threshold = size;2137for (int s = 0; s < num_symbs; s++) {2138// Scan for low probability symbols to put at the top2139if (norm_freqs[s] == -1) {2140dtable->symbols[--high_threshold] = s;2141state_desc[s] = 1;2142}2143}21442145// "All remaining symbols are sorted in their natural order. Starting from2146// symbol 0 and table position 0, each symbol gets attributed as many cells2147// as its probability. Cell allocation is spread, not linear."2148// Place the rest in the table2149const u16 step = (size >> 1) + (size >> 3) + 3;2150const u16 mask = size - 1;2151u16 pos = 0;2152for (int s = 0; s < num_symbs; s++) {2153if (norm_freqs[s] <= 0) {2154continue;2155}21562157state_desc[s] = norm_freqs[s];21582159for (int i = 0; i < norm_freqs[s]; i++) {2160// Give `norm_freqs[s]` states to symbol s2161dtable->symbols[pos] = s;2162// "A position is skipped if already occupied, typically by a "less2163// than 1" probability symbol."2164do {2165pos = (pos + step) & mask;2166} while (pos >=2167high_threshold);2168// Note: no other collision checking is necessary as `step` is2169// coprime to `size`, so the cycle will visit each position exactly2170// once2171}2172}2173if (pos != 0) {2174CORRUPTION();2175}21762177// Now we can fill baseline and num bits2178for (size_t i = 0; i < size; i++) {2179u8 symbol = dtable->symbols[i];2180u16 next_state_desc = state_desc[symbol]++;2181// Fills in the table appropriately, next_state_desc increases by symbol2182// over time, decreasing number of bits2183dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc));2184// Baseline increases until the bit threshold is passed, at which point2185// it resets to 02186dtable->new_state_base[i] =2187((u16)next_state_desc << dtable->num_bits[i]) - size;2188}2189}21902191/// Decode an FSE header as defined in the Zstandard format specification and2192/// use the decoded frequencies to initialize a decoding table.2193static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in,2194const int max_accuracy_log) {2195// "An FSE distribution table describes the probabilities of all symbols2196// from 0 to the last present one (included) on a normalized scale of 1 <<2197// Accuracy_Log .2198//2199// It's a bitstream which is read forward, in little-endian fashion. It's2200// not necessary to know its exact size, since it will be discovered and2201// reported by the decoding process.2202if (max_accuracy_log > FSE_MAX_ACCURACY_LOG) {2203ERROR("FSE accuracy too large");2204}22052206// The bitstream starts by reporting on which scale it operates.2207// Accuracy_Log = low4bits + 5. Note that maximum Accuracy_Log for literal2208// and match lengths is 9, and for offsets is 8. Higher values are2209// considered errors."2210const int accuracy_log = 5 + IO_read_bits(in, 4);2211if (accuracy_log > max_accuracy_log) {2212ERROR("FSE accuracy too large");2213}22142215// "Then follows each symbol value, from 0 to last present one. The number2216// of bits used by each field is variable. It depends on :2217//2218// Remaining probabilities + 1 : example : Presuming an Accuracy_Log of 8,2219// and presuming 100 probabilities points have already been distributed, the2220// decoder may read any value from 0 to 255 - 100 + 1 == 156 (inclusive).2221// Therefore, it must read log2sup(156) == 8 bits.2222//2223// Value decoded : small values use 1 less bit : example : Presuming values2224// from 0 to 156 (inclusive) are possible, 255-156 = 99 values are remaining2225// in an 8-bits field. They are used this way : first 99 values (hence from2226// 0 to 98) use only 7 bits, values from 99 to 156 use 8 bits. "22272228i32 remaining = 1 << accuracy_log;2229i16 frequencies[FSE_MAX_SYMBS];22302231int symb = 0;2232while (remaining > 0 && symb < FSE_MAX_SYMBS) {2233// Log of the number of possible values we could read2234int bits = highest_set_bit(remaining + 1) + 1;22352236u16 val = IO_read_bits(in, bits);22372238// Try to mask out the lower bits to see if it qualifies for the "small2239// value" threshold2240const u16 lower_mask = ((u16)1 << (bits - 1)) - 1;2241const u16 threshold = ((u16)1 << bits) - 1 - (remaining + 1);22422243if ((val & lower_mask) < threshold) {2244IO_rewind_bits(in, 1);2245val = val & lower_mask;2246} else if (val > lower_mask) {2247val = val - threshold;2248}22492250// "Probability is obtained from Value decoded by following formula :2251// Proba = value - 1"2252const i16 proba = (i16)val - 1;22532254// "It means value 0 becomes negative probability -1. -1 is a special2255// probability, which means "less than 1". Its effect on distribution2256// table is described in next paragraph. For the purpose of calculating2257// cumulated distribution, it counts as one."2258remaining -= proba < 0 ? -proba : proba;22592260frequencies[symb] = proba;2261symb++;22622263// "When a symbol has a probability of zero, it is followed by a 2-bits2264// repeat flag. This repeat flag tells how many probabilities of zeroes2265// follow the current one. It provides a number ranging from 0 to 3. If2266// it is a 3, another 2-bits repeat flag follows, and so on."2267if (proba == 0) {2268// Read the next two bits to see how many more 0s2269int repeat = IO_read_bits(in, 2);22702271while (1) {2272for (int i = 0; i < repeat && symb < FSE_MAX_SYMBS; i++) {2273frequencies[symb++] = 0;2274}2275if (repeat == 3) {2276repeat = IO_read_bits(in, 2);2277} else {2278break;2279}2280}2281}2282}2283IO_align_stream(in);22842285// "When last symbol reaches cumulated total of 1 << Accuracy_Log, decoding2286// is complete. If the last symbol makes cumulated total go above 1 <<2287// Accuracy_Log, distribution is considered corrupted."2288if (remaining != 0 || symb >= FSE_MAX_SYMBS) {2289CORRUPTION();2290}22912292// Initialize the decoding table using the determined weights2293FSE_init_dtable(dtable, frequencies, symb, accuracy_log);2294}22952296static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb) {2297dtable->symbols = malloc(sizeof(u8));2298dtable->num_bits = malloc(sizeof(u8));2299dtable->new_state_base = malloc(sizeof(u16));23002301if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) {2302BAD_ALLOC();2303}23042305// This setup will always have a state of 0, always return symbol `symb`,2306// and never consume any bits2307dtable->symbols[0] = symb;2308dtable->num_bits[0] = 0;2309dtable->new_state_base[0] = 0;2310dtable->accuracy_log = 0;2311}23122313static void FSE_free_dtable(FSE_dtable *const dtable) {2314free(dtable->symbols);2315free(dtable->num_bits);2316free(dtable->new_state_base);2317memset(dtable, 0, sizeof(FSE_dtable));2318}2319/******* END FSE PRIMITIVES ***************************************************/232023212322