Path: blob/master/Utilities/cmliblzma/liblzma/lz/lz_decoder.c
3156 views
// SPDX-License-Identifier: 0BSD12///////////////////////////////////////////////////////////////////////////////3//4/// \file lz_decoder.c5/// \brief LZ out window6///7// Authors: Igor Pavlov8// Lasse Collin9//10///////////////////////////////////////////////////////////////////////////////1112// liblzma supports multiple LZ77-based filters. The LZ part is shared13// between these filters. The LZ code takes care of dictionary handling14// and passing the data between filters in the chain. The filter-specific15// part decodes from the input buffer to the dictionary.161718#include "lz_decoder.h"192021typedef struct {22/// Dictionary (history buffer)23lzma_dict dict;2425/// The actual LZ-based decoder e.g. LZMA26lzma_lz_decoder lz;2728/// Next filter in the chain, if any. Note that LZMA and LZMA2 are29/// only allowed as the last filter, but the long-range filter in30/// future can be in the middle of the chain.31lzma_next_coder next;3233/// True if the next filter in the chain has returned LZMA_STREAM_END.34bool next_finished;3536/// True if the LZ decoder (e.g. LZMA) has detected end of payload37/// marker. This may become true before next_finished becomes true.38bool this_finished;3940/// Temporary buffer needed when the LZ-based filter is not the last41/// filter in the chain. The output of the next filter is first42/// decoded into buffer[], which is then used as input for the actual43/// LZ-based decoder.44struct {45size_t pos;46size_t size;47uint8_t buffer[LZMA_BUFFER_SIZE];48} temp;49} lzma_coder;505152static void53lz_decoder_reset(lzma_coder *coder)54{55coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX;56coder->dict.full = 0;57coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0';58coder->dict.has_wrapped = false;59coder->dict.need_reset = false;60return;61}626364static lzma_ret65decode_buffer(lzma_coder *coder,66const uint8_t *restrict in, size_t *restrict in_pos,67size_t in_size, uint8_t *restrict out,68size_t *restrict out_pos, size_t out_size)69{70while (true) {71// Wrap the dictionary if needed.72if (coder->dict.pos == coder->dict.size) {73// See the comment of #define LZ_DICT_REPEAT_MAX.74coder->dict.pos = LZ_DICT_REPEAT_MAX;75coder->dict.has_wrapped = true;76memcpy(coder->dict.buf, coder->dict.buf77+ coder->dict.size78- LZ_DICT_REPEAT_MAX,79LZ_DICT_REPEAT_MAX);80}8182// Store the current dictionary position. It is needed to know83// where to start copying to the out[] buffer.84const size_t dict_start = coder->dict.pos;8586// Calculate how much we allow coder->lz.code() to decode.87// It must not decode past the end of the dictionary88// buffer, and we don't want it to decode more than is89// actually needed to fill the out[] buffer.90coder->dict.limit = coder->dict.pos91+ my_min(out_size - *out_pos,92coder->dict.size - coder->dict.pos);9394// Call the coder->lz.code() to do the actual decoding.95const lzma_ret ret = coder->lz.code(96coder->lz.coder, &coder->dict,97in, in_pos, in_size);9899// Copy the decoded data from the dictionary to the out[]100// buffer. Do it conditionally because out can be NULL101// (in which case copy_size is always 0). Calling memcpy()102// with a null-pointer is undefined even if the third103// argument is 0.104const size_t copy_size = coder->dict.pos - dict_start;105assert(copy_size <= out_size - *out_pos);106107if (copy_size > 0)108memcpy(out + *out_pos, coder->dict.buf + dict_start,109copy_size);110111*out_pos += copy_size;112113// Reset the dictionary if so requested by coder->lz.code().114if (coder->dict.need_reset) {115lz_decoder_reset(coder);116117// Since we reset dictionary, we don't check if118// dictionary became full.119if (ret != LZMA_OK || *out_pos == out_size)120return ret;121} else {122// Return if everything got decoded or an error123// occurred, or if there's no more data to decode.124//125// Note that detecting if there's something to decode126// is done by looking if dictionary become full127// instead of looking if *in_pos == in_size. This128// is because it is possible that all the input was129// consumed already but some data is pending to be130// written to the dictionary.131if (ret != LZMA_OK || *out_pos == out_size132|| coder->dict.pos < coder->dict.size)133return ret;134}135}136}137138139static lzma_ret140lz_decode(void *coder_ptr, const lzma_allocator *allocator,141const uint8_t *restrict in, size_t *restrict in_pos,142size_t in_size, uint8_t *restrict out,143size_t *restrict out_pos, size_t out_size,144lzma_action action)145{146lzma_coder *coder = coder_ptr;147148if (coder->next.code == NULL)149return decode_buffer(coder, in, in_pos, in_size,150out, out_pos, out_size);151152// We aren't the last coder in the chain, we need to decode153// our input to a temporary buffer.154while (*out_pos < out_size) {155// Fill the temporary buffer if it is empty.156if (!coder->next_finished157&& coder->temp.pos == coder->temp.size) {158coder->temp.pos = 0;159coder->temp.size = 0;160161const lzma_ret ret = coder->next.code(162coder->next.coder,163allocator, in, in_pos, in_size,164coder->temp.buffer, &coder->temp.size,165LZMA_BUFFER_SIZE, action);166167if (ret == LZMA_STREAM_END)168coder->next_finished = true;169else if (ret != LZMA_OK || coder->temp.size == 0)170return ret;171}172173if (coder->this_finished) {174if (coder->temp.size != 0)175return LZMA_DATA_ERROR;176177if (coder->next_finished)178return LZMA_STREAM_END;179180return LZMA_OK;181}182183const lzma_ret ret = decode_buffer(coder, coder->temp.buffer,184&coder->temp.pos, coder->temp.size,185out, out_pos, out_size);186187if (ret == LZMA_STREAM_END)188coder->this_finished = true;189else if (ret != LZMA_OK)190return ret;191else if (coder->next_finished && *out_pos < out_size)192return LZMA_DATA_ERROR;193}194195return LZMA_OK;196}197198199static void200lz_decoder_end(void *coder_ptr, const lzma_allocator *allocator)201{202lzma_coder *coder = coder_ptr;203204lzma_next_end(&coder->next, allocator);205lzma_free(coder->dict.buf, allocator);206207if (coder->lz.end != NULL)208coder->lz.end(coder->lz.coder, allocator);209else210lzma_free(coder->lz.coder, allocator);211212lzma_free(coder, allocator);213return;214}215216217extern lzma_ret218lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,219const lzma_filter_info *filters,220lzma_ret (*lz_init)(lzma_lz_decoder *lz,221const lzma_allocator *allocator,222lzma_vli id, const void *options,223lzma_lz_options *lz_options))224{225// Allocate the base structure if it isn't already allocated.226lzma_coder *coder = next->coder;227if (coder == NULL) {228coder = lzma_alloc(sizeof(lzma_coder), allocator);229if (coder == NULL)230return LZMA_MEM_ERROR;231232next->coder = coder;233next->code = &lz_decode;234next->end = &lz_decoder_end;235236coder->dict.buf = NULL;237coder->dict.size = 0;238coder->lz = LZMA_LZ_DECODER_INIT;239coder->next = LZMA_NEXT_CODER_INIT;240}241242// Allocate and initialize the LZ-based decoder. It will also give243// us the dictionary size.244lzma_lz_options lz_options;245return_if_error(lz_init(&coder->lz, allocator,246filters[0].id, filters[0].options, &lz_options));247248// If the dictionary size is very small, increase it to 4096 bytes.249// This is to prevent constant wrapping of the dictionary, which250// would slow things down. The downside is that since we don't check251// separately for the real dictionary size, we may happily accept252// corrupt files.253if (lz_options.dict_size < 4096)254lz_options.dict_size = 4096;255256// Make dictionary size a multiple of 16. Some LZ-based decoders like257// LZMA use the lowest bits lzma_dict.pos to know the alignment of the258// data. Aligned buffer is also good when memcpying from the259// dictionary to the output buffer, since applications are260// recommended to give aligned buffers to liblzma.261//262// Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is263// needed for alloc_size.264//265// Avoid integer overflow.266if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX)267return LZMA_MEM_ERROR;268269lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));270271// Reserve extra space as explained in the comment272// of #define LZ_DICT_REPEAT_MAX.273const size_t alloc_size274= lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX;275276// Allocate and initialize the dictionary.277if (coder->dict.size != alloc_size) {278lzma_free(coder->dict.buf, allocator);279coder->dict.buf = lzma_alloc(alloc_size, allocator);280if (coder->dict.buf == NULL)281return LZMA_MEM_ERROR;282283// NOTE: Yes, alloc_size, not lz_options.dict_size. The way284// coder->dict.full is updated will take care that we will285// still reject distances larger than lz_options.dict_size.286coder->dict.size = alloc_size;287}288289lz_decoder_reset(next->coder);290291// Use the preset dictionary if it was given to us.292if (lz_options.preset_dict != NULL293&& lz_options.preset_dict_size > 0) {294// If the preset dictionary is bigger than the actual295// dictionary, copy only the tail.296const size_t copy_size = my_min(lz_options.preset_dict_size,297lz_options.dict_size);298const size_t offset = lz_options.preset_dict_size - copy_size;299memcpy(coder->dict.buf + coder->dict.pos,300lz_options.preset_dict + offset,301copy_size);302303// dict.pos isn't zero after lz_decoder_reset().304coder->dict.pos += copy_size;305coder->dict.full = copy_size;306}307308// Miscellaneous initializations309coder->next_finished = false;310coder->this_finished = false;311coder->temp.pos = 0;312coder->temp.size = 0;313314// Initialize the next filter in the chain, if any.315return lzma_next_filter_init(&coder->next, allocator, filters + 1);316}317318319extern uint64_t320lzma_lz_decoder_memusage(size_t dictionary_size)321{322return sizeof(lzma_coder) + (uint64_t)(dictionary_size);323}324325326