Path: blob/master/Utilities/cmliblzma/liblzma/lz/lz_encoder.c
3156 views
// SPDX-License-Identifier: 0BSD12///////////////////////////////////////////////////////////////////////////////3//4/// \file lz_encoder.c5/// \brief LZ in window6///7// Authors: Igor Pavlov8// Lasse Collin9//10///////////////////////////////////////////////////////////////////////////////1112#include "lz_encoder.h"13#include "lz_encoder_hash.h"1415// See lz_encoder_hash.h. This is a bit hackish but avoids making16// endianness a conditional in makefiles.17#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)18# include "lz_encoder_hash_table.h"19#endif2021#include "memcmplen.h"222324typedef struct {25/// LZ-based encoder e.g. LZMA26lzma_lz_encoder lz;2728/// History buffer and match finder29lzma_mf mf;3031/// Next coder in the chain32lzma_next_coder next;33} lzma_coder;343536/// \brief Moves the data in the input window to free space for new data37///38/// mf->buffer is a sliding input window, which keeps mf->keep_size_before39/// bytes of input history available all the time. Now and then we need to40/// "slide" the buffer to make space for the new data to the end of the41/// buffer. At the same time, data older than keep_size_before is dropped.42///43static void44move_window(lzma_mf *mf)45{46// Align the move to a multiple of 16 bytes. Some LZ-based encoders47// like LZMA use the lowest bits of mf->read_pos to know the48// alignment of the uncompressed data. We also get better speed49// for memmove() with aligned buffers.50assert(mf->read_pos > mf->keep_size_before);51const uint32_t move_offset52= (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);5354assert(mf->write_pos > move_offset);55const size_t move_size = mf->write_pos - move_offset;5657assert(move_offset + move_size <= mf->size);5859memmove(mf->buffer, mf->buffer + move_offset, move_size);6061mf->offset += move_offset;62mf->read_pos -= move_offset;63mf->read_limit -= move_offset;64mf->write_pos -= move_offset;6566return;67}686970/// \brief Tries to fill the input window (mf->buffer)71///72/// If we are the last encoder in the chain, our input data is in in[].73/// Otherwise we call the next filter in the chain to process in[] and74/// write its output to mf->buffer.75///76/// This function must not be called once it has returned LZMA_STREAM_END.77///78static lzma_ret79fill_window(lzma_coder *coder, const lzma_allocator *allocator,80const uint8_t *in, size_t *in_pos, size_t in_size,81lzma_action action)82{83assert(coder->mf.read_pos <= coder->mf.write_pos);8485// Move the sliding window if needed.86if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)87move_window(&coder->mf);8889// Maybe this is ugly, but lzma_mf uses uint32_t for most things90// (which I find cleanest), but we need size_t here when filling91// the history window.92size_t write_pos = coder->mf.write_pos;93lzma_ret ret;94if (coder->next.code == NULL) {95// Not using a filter, simply memcpy() as much as possible.96lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,97&write_pos, coder->mf.size);9899ret = action != LZMA_RUN && *in_pos == in_size100? LZMA_STREAM_END : LZMA_OK;101102} else {103ret = coder->next.code(coder->next.coder, allocator,104in, in_pos, in_size,105coder->mf.buffer, &write_pos,106coder->mf.size, action);107}108109coder->mf.write_pos = write_pos;110111// Silence Valgrind. lzma_memcmplen() can read extra bytes112// and Valgrind will give warnings if those bytes are uninitialized113// because Valgrind cannot see that the values of the uninitialized114// bytes are eventually ignored.115memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA);116117// If end of stream has been reached or flushing completed, we allow118// the encoder to process all the input (that is, read_pos is allowed119// to reach write_pos). Otherwise we keep keep_size_after bytes120// available as prebuffer.121if (ret == LZMA_STREAM_END) {122assert(*in_pos == in_size);123ret = LZMA_OK;124coder->mf.action = action;125coder->mf.read_limit = coder->mf.write_pos;126127} else if (coder->mf.write_pos > coder->mf.keep_size_after) {128// This needs to be done conditionally, because if we got129// only little new input, there may be too little input130// to do any encoding yet.131coder->mf.read_limit = coder->mf.write_pos132- coder->mf.keep_size_after;133}134135// Restart the match finder after finished LZMA_SYNC_FLUSH.136if (coder->mf.pending > 0137&& coder->mf.read_pos < coder->mf.read_limit) {138// Match finder may update coder->pending and expects it to139// start from zero, so use a temporary variable.140const uint32_t pending = coder->mf.pending;141coder->mf.pending = 0;142143// Rewind read_pos so that the match finder can hash144// the pending bytes.145assert(coder->mf.read_pos >= pending);146coder->mf.read_pos -= pending;147148// Call the skip function directly instead of using149// mf_skip(), since we don't want to touch mf->read_ahead.150coder->mf.skip(&coder->mf, pending);151}152153return ret;154}155156157static lzma_ret158lz_encode(void *coder_ptr, const lzma_allocator *allocator,159const uint8_t *restrict in, size_t *restrict in_pos,160size_t in_size,161uint8_t *restrict out, size_t *restrict out_pos,162size_t out_size, lzma_action action)163{164lzma_coder *coder = coder_ptr;165166while (*out_pos < out_size167&& (*in_pos < in_size || action != LZMA_RUN)) {168// Read more data to coder->mf.buffer if needed.169if (coder->mf.action == LZMA_RUN && coder->mf.read_pos170>= coder->mf.read_limit)171return_if_error(fill_window(coder, allocator,172in, in_pos, in_size, action));173174// Encode175const lzma_ret ret = coder->lz.code(coder->lz.coder,176&coder->mf, out, out_pos, out_size);177if (ret != LZMA_OK) {178// Setting this to LZMA_RUN for cases when we are179// flushing. It doesn't matter when finishing or if180// an error occurred.181coder->mf.action = LZMA_RUN;182return ret;183}184}185186return LZMA_OK;187}188189190static bool191lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,192const lzma_lz_options *lz_options)193{194// For now, the dictionary size is limited to 1.5 GiB. This may grow195// in the future if needed, but it needs a little more work than just196// changing this check.197if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size)198|| lz_options->nice_len > lz_options->match_len_max)199return true;200201mf->keep_size_before = lz_options->before_size + lz_options->dict_size;202203mf->keep_size_after = lz_options->after_size204+ lz_options->match_len_max;205206// To avoid constant memmove()s, allocate some extra space. Since207// memmove()s become more expensive when the size of the buffer208// increases, we reserve more space when a large dictionary is209// used to make the memmove() calls rarer.210//211// This works with dictionaries up to about 3 GiB. If bigger212// dictionary is wanted, some extra work is needed:213// - Several variables in lzma_mf have to be changed from uint32_t214// to size_t.215// - Memory usage calculation needs something too, e.g. use uint64_t216// for mf->size.217uint32_t reserve = lz_options->dict_size / 2;218if (reserve > (UINT32_C(1) << 30))219reserve /= 2;220221reserve += (lz_options->before_size + lz_options->match_len_max222+ lz_options->after_size) / 2 + (UINT32_C(1) << 19);223224const uint32_t old_size = mf->size;225mf->size = mf->keep_size_before + reserve + mf->keep_size_after;226227// Deallocate the old history buffer if it exists but has different228// size than what is needed now.229if (mf->buffer != NULL && old_size != mf->size) {230lzma_free(mf->buffer, allocator);231mf->buffer = NULL;232}233234// Match finder options235mf->match_len_max = lz_options->match_len_max;236mf->nice_len = lz_options->nice_len;237238// cyclic_size has to stay smaller than 2 Gi. Note that this doesn't239// mean limiting dictionary size to less than 2 GiB. With a match240// finder that uses multibyte resolution (hashes start at e.g. every241// fourth byte), cyclic_size would stay below 2 Gi even when242// dictionary size is greater than 2 GiB.243//244// It would be possible to allow cyclic_size >= 2 Gi, but then we245// would need to be careful to use 64-bit types in various places246// (size_t could do since we would need bigger than 32-bit address247// space anyway). It would also require either zeroing a multigigabyte248// buffer at initialization (waste of time and RAM) or allow249// normalization in lz_encoder_mf.c to access uninitialized250// memory to keep the code simpler. The current way is simple and251// still allows pretty big dictionaries, so I don't expect these252// limits to change.253mf->cyclic_size = lz_options->dict_size + 1;254255// Validate the match finder ID and setup the function pointers.256switch (lz_options->match_finder) {257#ifdef HAVE_MF_HC3258case LZMA_MF_HC3:259mf->find = &lzma_mf_hc3_find;260mf->skip = &lzma_mf_hc3_skip;261break;262#endif263#ifdef HAVE_MF_HC4264case LZMA_MF_HC4:265mf->find = &lzma_mf_hc4_find;266mf->skip = &lzma_mf_hc4_skip;267break;268#endif269#ifdef HAVE_MF_BT2270case LZMA_MF_BT2:271mf->find = &lzma_mf_bt2_find;272mf->skip = &lzma_mf_bt2_skip;273break;274#endif275#ifdef HAVE_MF_BT3276case LZMA_MF_BT3:277mf->find = &lzma_mf_bt3_find;278mf->skip = &lzma_mf_bt3_skip;279break;280#endif281#ifdef HAVE_MF_BT4282case LZMA_MF_BT4:283mf->find = &lzma_mf_bt4_find;284mf->skip = &lzma_mf_bt4_skip;285break;286#endif287288default:289return true;290}291292// Calculate the sizes of mf->hash and mf->son.293//294// NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len295// is big enough for the selected match finder. This makes it296// easier for applications as nice_len = 2 will always be accepted297// even though the effective value can be slightly bigger.298const uint32_t hash_bytes299= mf_get_hash_bytes(lz_options->match_finder);300assert(hash_bytes <= mf->nice_len);301302const bool is_bt = (lz_options->match_finder & 0x10) != 0;303uint32_t hs;304305if (hash_bytes == 2) {306hs = 0xFFFF;307} else {308// Round dictionary size up to the next 2^n - 1 so it can309// be used as a hash mask.310hs = lz_options->dict_size - 1;311hs |= hs >> 1;312hs |= hs >> 2;313hs |= hs >> 4;314hs |= hs >> 8;315hs >>= 1;316hs |= 0xFFFF;317318if (hs > (UINT32_C(1) << 24)) {319if (hash_bytes == 3)320hs = (UINT32_C(1) << 24) - 1;321else322hs >>= 1;323}324}325326mf->hash_mask = hs;327328++hs;329if (hash_bytes > 2)330hs += HASH_2_SIZE;331if (hash_bytes > 3)332hs += HASH_3_SIZE;333/*334No match finder uses this at the moment.335if (mf->hash_bytes > 4)336hs += HASH_4_SIZE;337*/338339const uint32_t old_hash_count = mf->hash_count;340const uint32_t old_sons_count = mf->sons_count;341mf->hash_count = hs;342mf->sons_count = mf->cyclic_size;343if (is_bt)344mf->sons_count *= 2;345346// Deallocate the old hash array if it exists and has different size347// than what is needed now.348if (old_hash_count != mf->hash_count349|| old_sons_count != mf->sons_count) {350lzma_free(mf->hash, allocator);351mf->hash = NULL;352353lzma_free(mf->son, allocator);354mf->son = NULL;355}356357// Maximum number of match finder cycles358mf->depth = lz_options->depth;359if (mf->depth == 0) {360if (is_bt)361mf->depth = 16 + mf->nice_len / 2;362else363mf->depth = 4 + mf->nice_len / 4;364}365366return false;367}368369370static bool371lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,372const lzma_lz_options *lz_options)373{374// Allocate the history buffer.375if (mf->buffer == NULL) {376// lzma_memcmplen() is used for the dictionary buffer377// so we need to allocate a few extra bytes to prevent378// it from reading past the end of the buffer.379mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,380allocator);381if (mf->buffer == NULL)382return true;383384// Keep Valgrind happy with lzma_memcmplen() and initialize385// the extra bytes whose value may get read but which will386// effectively get ignored.387memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);388}389390// Use cyclic_size as initial mf->offset. This allows391// avoiding a few branches in the match finders. The downside is392// that match finder needs to be normalized more often, which may393// hurt performance with huge dictionaries.394mf->offset = mf->cyclic_size;395mf->read_pos = 0;396mf->read_ahead = 0;397mf->read_limit = 0;398mf->write_pos = 0;399mf->pending = 0;400401#if UINT32_MAX >= SIZE_MAX / 4402// Check for integer overflow. (Huge dictionaries are not403// possible on 32-bit CPU.)404if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)405|| mf->sons_count > SIZE_MAX / sizeof(uint32_t))406return true;407#endif408409// Allocate and initialize the hash table. Since EMPTY_HASH_VALUE410// is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.411//412// We don't need to initialize mf->son, but not doing that may413// make Valgrind complain in normalization (see normalize() in414// lz_encoder_mf.c). Skipping the initialization is *very* good415// when big dictionary is used but only small amount of data gets416// actually compressed: most of the mf->son won't get actually417// allocated by the kernel, so we avoid wasting RAM and improve418// initialization speed a lot.419if (mf->hash == NULL) {420mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),421allocator);422mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),423allocator);424425if (mf->hash == NULL || mf->son == NULL) {426lzma_free(mf->hash, allocator);427mf->hash = NULL;428429lzma_free(mf->son, allocator);430mf->son = NULL;431432return true;433}434} else {435/*436for (uint32_t i = 0; i < mf->hash_count; ++i)437mf->hash[i] = EMPTY_HASH_VALUE;438*/439memzero(mf->hash, mf->hash_count * sizeof(uint32_t));440}441442mf->cyclic_pos = 0;443444// Handle preset dictionary.445if (lz_options->preset_dict != NULL446&& lz_options->preset_dict_size > 0) {447// If the preset dictionary is bigger than the actual448// dictionary, use only the tail.449mf->write_pos = my_min(lz_options->preset_dict_size, mf->size);450memcpy(mf->buffer, lz_options->preset_dict451+ lz_options->preset_dict_size - mf->write_pos,452mf->write_pos);453mf->action = LZMA_SYNC_FLUSH;454mf->skip(mf, mf->write_pos);455}456457mf->action = LZMA_RUN;458459return false;460}461462463extern uint64_t464lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)465{466// Old buffers must not exist when calling lz_encoder_prepare().467lzma_mf mf = {468.buffer = NULL,469.hash = NULL,470.son = NULL,471.hash_count = 0,472.sons_count = 0,473};474475// Setup the size information into mf.476if (lz_encoder_prepare(&mf, NULL, lz_options))477return UINT64_MAX;478479// Calculate the memory usage.480return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)481+ mf.size + sizeof(lzma_coder);482}483484485static void486lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator)487{488lzma_coder *coder = coder_ptr;489490lzma_next_end(&coder->next, allocator);491492lzma_free(coder->mf.son, allocator);493lzma_free(coder->mf.hash, allocator);494lzma_free(coder->mf.buffer, allocator);495496if (coder->lz.end != NULL)497coder->lz.end(coder->lz.coder, allocator);498else499lzma_free(coder->lz.coder, allocator);500501lzma_free(coder, allocator);502return;503}504505506static lzma_ret507lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator,508const lzma_filter *filters_null lzma_attribute((__unused__)),509const lzma_filter *reversed_filters)510{511lzma_coder *coder = coder_ptr;512513if (coder->lz.options_update == NULL)514return LZMA_PROG_ERROR;515516return_if_error(coder->lz.options_update(517coder->lz.coder, reversed_filters));518519return lzma_next_filter_update(520&coder->next, allocator, reversed_filters + 1);521}522523524static lzma_ret525lz_encoder_set_out_limit(void *coder_ptr, uint64_t *uncomp_size,526uint64_t out_limit)527{528lzma_coder *coder = coder_ptr;529530// This is supported only if there are no other filters chained.531if (coder->next.code == NULL && coder->lz.set_out_limit != NULL)532return coder->lz.set_out_limit(533coder->lz.coder, uncomp_size, out_limit);534535return LZMA_OPTIONS_ERROR;536}537538539extern lzma_ret540lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,541const lzma_filter_info *filters,542lzma_ret (*lz_init)(lzma_lz_encoder *lz,543const lzma_allocator *allocator,544lzma_vli id, const void *options,545lzma_lz_options *lz_options))546{547#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)548// The CRC32 table must be initialized.549lzma_crc32_init();550#endif551552// Allocate and initialize the base data structure.553lzma_coder *coder = next->coder;554if (coder == NULL) {555coder = lzma_alloc(sizeof(lzma_coder), allocator);556if (coder == NULL)557return LZMA_MEM_ERROR;558559next->coder = coder;560next->code = &lz_encode;561next->end = &lz_encoder_end;562next->update = &lz_encoder_update;563next->set_out_limit = &lz_encoder_set_out_limit;564565coder->lz.coder = NULL;566coder->lz.code = NULL;567coder->lz.end = NULL;568coder->lz.options_update = NULL;569coder->lz.set_out_limit = NULL;570571// mf.size is initialized to silence Valgrind572// when used on optimized binaries (GCC may reorder573// code in a way that Valgrind gets unhappy).574coder->mf.buffer = NULL;575coder->mf.size = 0;576coder->mf.hash = NULL;577coder->mf.son = NULL;578coder->mf.hash_count = 0;579coder->mf.sons_count = 0;580581coder->next = LZMA_NEXT_CODER_INIT;582}583584// Initialize the LZ-based encoder.585lzma_lz_options lz_options;586return_if_error(lz_init(&coder->lz, allocator,587filters[0].id, filters[0].options, &lz_options));588589// Setup the size information into coder->mf and deallocate590// old buffers if they have wrong size.591if (lz_encoder_prepare(&coder->mf, allocator, &lz_options))592return LZMA_OPTIONS_ERROR;593594// Allocate new buffers if needed, and do the rest of595// the initialization.596if (lz_encoder_init(&coder->mf, allocator, &lz_options))597return LZMA_MEM_ERROR;598599// Initialize the next filter in the chain, if any.600return lzma_next_filter_init(&coder->next, allocator, filters + 1);601}602603604extern LZMA_API(lzma_bool)605lzma_mf_is_supported(lzma_match_finder mf)606{607switch (mf) {608#ifdef HAVE_MF_HC3609case LZMA_MF_HC3:610return true;611#endif612#ifdef HAVE_MF_HC4613case LZMA_MF_HC4:614return true;615#endif616#ifdef HAVE_MF_BT2617case LZMA_MF_BT2:618return true;619#endif620#ifdef HAVE_MF_BT3621case LZMA_MF_BT3:622return true;623#endif624#ifdef HAVE_MF_BT4625case LZMA_MF_BT4:626return true;627#endif628default:629return false;630}631}632633634