Path: blob/master/Utilities/cmliblzma/liblzma/lz/lz_decoder.h
3156 views
// SPDX-License-Identifier: 0BSD12///////////////////////////////////////////////////////////////////////////////3//4/// \file lz_decoder.h5/// \brief LZ out window6///7// Authors: Igor Pavlov8// Lasse Collin9//10///////////////////////////////////////////////////////////////////////////////1112#ifndef LZMA_LZ_DECODER_H13#define LZMA_LZ_DECODER_H1415#include "common.h"161718/// Maximum length of a match rounded up to a nice power of 2 which is19/// a good size for aligned memcpy(). The allocated dictionary buffer will20/// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:21///22/// (1) Every time the decoder reaches the end of the dictionary buffer,23/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.24/// This way dict_repeat() will only need to copy from one place,25/// never from both the end and beginning of the buffer.26///27/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between28/// the oldest byte still in the dictionary and the current write29/// position. This way dict_repeat(dict, dict->size - 1, &len)30/// won't need memmove() as the copying cannot overlap.31///32/// Note that memcpy() still cannot be used if distance < len.33///34/// LZMA's longest match length is 273 so pick a multiple of 16 above that.35#define LZ_DICT_REPEAT_MAX 288363738typedef struct {39/// Pointer to the dictionary buffer.40uint8_t *buf;4142/// Write position in dictionary. The next byte will be written to43/// buf[pos].44size_t pos;4546/// Indicates how full the dictionary is. This is used by47/// dict_is_distance_valid() to detect corrupt files that would48/// read beyond the beginning of the dictionary.49size_t full;5051/// Write limit52size_t limit;5354/// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes55/// larger than the actual dictionary size. This is enforced by56/// how the value for "full" is set; it can be at most57/// "size - 2 * LZ_DICT_REPEAT_MAX".58size_t size;5960/// True once the dictionary has become full and the writing position61/// has been wrapped in decode_buffer() in lz_decoder.c.62bool has_wrapped;6364/// True when dictionary should be reset before decoding more data.65bool need_reset;6667} lzma_dict;686970typedef struct {71size_t dict_size;72const uint8_t *preset_dict;73size_t preset_dict_size;74} lzma_lz_options;757677typedef struct {78/// Data specific to the LZ-based decoder79void *coder;8081/// Function to decode from in[] to *dict82lzma_ret (*code)(void *coder,83lzma_dict *restrict dict, const uint8_t *restrict in,84size_t *restrict in_pos, size_t in_size);8586void (*reset)(void *coder, const void *options);8788/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN89/// then allow_eopm will always be true.90void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,91bool allow_eopm);9293/// Free allocated resources94void (*end)(void *coder, const lzma_allocator *allocator);9596} lzma_lz_decoder;979899#define LZMA_LZ_DECODER_INIT \100(lzma_lz_decoder){ \101.coder = NULL, \102.code = NULL, \103.reset = NULL, \104.set_uncompressed = NULL, \105.end = NULL, \106}107108109extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,110const lzma_allocator *allocator,111const lzma_filter_info *filters,112lzma_ret (*lz_init)(lzma_lz_decoder *lz,113const lzma_allocator *allocator,114lzma_vli id, const void *options,115lzma_lz_options *lz_options));116117extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);118119120//////////////////////121// Inline functions //122//////////////////////123124/// Get a byte from the history buffer.125static inline uint8_t126dict_get(const lzma_dict *const dict, const uint32_t distance)127{128return dict->buf[dict->pos - distance - 1129+ (distance < dict->pos130? 0 : dict->size - LZ_DICT_REPEAT_MAX)];131}132133134/// Optimized version of dict_get(dict, 0)135static inline uint8_t136dict_get0(const lzma_dict *const dict)137{138return dict->buf[dict->pos - 1];139}140141142/// Test if dictionary is empty.143static inline bool144dict_is_empty(const lzma_dict *const dict)145{146return dict->full == 0;147}148149150/// Validate the match distance151static inline bool152dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)153{154return dict->full > distance;155}156157158/// Repeat *len bytes at distance.159static inline bool160dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)161{162// Don't write past the end of the dictionary.163const size_t dict_avail = dict->limit - dict->pos;164uint32_t left = my_min(dict_avail, *len);165*len -= left;166167size_t back = dict->pos - distance - 1;168if (distance >= dict->pos)169back += dict->size - LZ_DICT_REPEAT_MAX;170171// Repeat a block of data from the history. Because memcpy() is faster172// than copying byte by byte in a loop, the copying process gets split173// into two cases.174if (distance < left) {175// Source and target areas overlap, thus we can't use176// memcpy() nor even memmove() safely.177do {178dict->buf[dict->pos++] = dict->buf[back++];179} while (--left > 0);180} else {181memcpy(dict->buf + dict->pos, dict->buf + back, left);182dict->pos += left;183}184185// Update how full the dictionary is.186if (!dict->has_wrapped)187dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;188189return *len != 0;190}191192193static inline void194dict_put(lzma_dict *dict, uint8_t byte)195{196dict->buf[dict->pos++] = byte;197198if (!dict->has_wrapped)199dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;200}201202203/// Puts one byte into the dictionary. Returns true if the dictionary was204/// already full and the byte couldn't be added.205static inline bool206dict_put_safe(lzma_dict *dict, uint8_t byte)207{208if (unlikely(dict->pos == dict->limit))209return true;210211dict_put(dict, byte);212return false;213}214215216/// Copies arbitrary amount of data into the dictionary.217static inline void218dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,219size_t *restrict in_pos, size_t in_size,220size_t *restrict left)221{222// NOTE: If we are being given more data than the size of the223// dictionary, it could be possible to optimize the LZ decoder224// so that not everything needs to go through the dictionary.225// This shouldn't be very common thing in practice though, and226// the slowdown of one extra memcpy() isn't bad compared to how227// much time it would have taken if the data were compressed.228229if (in_size - *in_pos > *left)230in_size = *in_pos + *left;231232*left -= lzma_bufcpy(in, in_pos, in_size,233dict->buf, &dict->pos, dict->limit);234235if (!dict->has_wrapped)236dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;237238return;239}240241242static inline void243dict_reset(lzma_dict *dict)244{245dict->need_reset = true;246return;247}248249#endif250251252