Path: blob/main/sys/contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c
48521 views
/*1* LZMA2 decoder2*3* Authors: Lasse Collin <[email protected]>4* Igor Pavlov <https://7-zip.org/>5*6* This file has been put into the public domain.7* You can do whatever you want with this file.8*/910#include "xz_private.h"11#include "xz_lzma2.h"1213/*14* Range decoder initialization eats the first five bytes of each LZMA chunk.15*/16#define RC_INIT_BYTES 51718/*19* Minimum number of usable input buffer to safely decode one LZMA symbol.20* The worst case is that we decode 22 bits using probabilities and 2621* direct bits. This may decode at maximum of 20 bytes of input. However,22* lzma_main() does an extra normalization before returning, thus we23* need to put 21 here.24*/25#define LZMA_IN_REQUIRED 212627/*28* Dictionary (history buffer)29*30* These are always true:31* start <= pos <= full <= end32* pos <= limit <= end33*34* In multi-call mode, also these are true:35* end == size36* size <= size_max37* allocated <= size38*39* Most of these variables are size_t to support single-call mode,40* in which the dictionary variables address the actual output41* buffer directly.42*/43struct dictionary {44/* Beginning of the history buffer */45uint8_t *buf;4647/* Old position in buf (before decoding more data) */48size_t start;4950/* Position in buf */51size_t pos;5253/*54* How full dictionary is. This is used to detect corrupt input that55* would read beyond the beginning of the uncompressed stream.56*/57size_t full;5859/* Write limit; we don't write to buf[limit] or later bytes. */60size_t limit;6162/*63* End of the dictionary buffer. In multi-call mode, this is64* the same as the dictionary size. In single-call mode, this65* indicates the size of the output buffer.66*/67size_t end;6869/*70* Size of the dictionary as specified in Block Header. This is used71* together with "full" to detect corrupt input that would make us72* read beyond the beginning of the uncompressed stream.73*/74uint32_t size;7576/*77* Maximum allowed dictionary size in multi-call mode.78* This is ignored in single-call mode.79*/80uint32_t size_max;8182/*83* Amount of memory currently allocated for the dictionary.84* This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,85* size_max is always the same as the allocated size.)86*/87uint32_t allocated;8889/* Operation mode */90enum xz_mode mode;91};9293/* Range decoder */94struct rc_dec {95uint32_t range;96uint32_t code;9798/*99* Number of initializing bytes remaining to be read100* by rc_read_init().101*/102uint32_t init_bytes_left;103104/*105* Buffer from which we read our input. It can be either106* temp.buf or the caller-provided input buffer.107*/108const uint8_t *in;109size_t in_pos;110size_t in_limit;111};112113/* Probabilities for a length decoder. */114struct lzma_len_dec {115/* Probability of match length being at least 10 */116uint16_t choice;117118/* Probability of match length being at least 18 */119uint16_t choice2;120121/* Probabilities for match lengths 2-9 */122uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];123124/* Probabilities for match lengths 10-17 */125uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];126127/* Probabilities for match lengths 18-273 */128uint16_t high[LEN_HIGH_SYMBOLS];129};130131struct lzma_dec {132/* Distances of latest four matches */133uint32_t rep0;134uint32_t rep1;135uint32_t rep2;136uint32_t rep3;137138/* Types of the most recently seen LZMA symbols */139enum lzma_state state;140141/*142* Length of a match. This is updated so that dict_repeat can143* be called again to finish repeating the whole match.144*/145uint32_t len;146147/*148* LZMA properties or related bit masks (number of literal149* context bits, a mask derived from the number of literal150* position bits, and a mask derived from the number151* position bits)152*/153uint32_t lc;154uint32_t literal_pos_mask; /* (1 << lp) - 1 */155uint32_t pos_mask; /* (1 << pb) - 1 */156157/* If 1, it's a match. Otherwise it's a single 8-bit literal. */158uint16_t is_match[STATES][POS_STATES_MAX];159160/* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */161uint16_t is_rep[STATES];162163/*164* If 0, distance of a repeated match is rep0.165* Otherwise check is_rep1.166*/167uint16_t is_rep0[STATES];168169/*170* If 0, distance of a repeated match is rep1.171* Otherwise check is_rep2.172*/173uint16_t is_rep1[STATES];174175/* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */176uint16_t is_rep2[STATES];177178/*179* If 1, the repeated match has length of one byte. Otherwise180* the length is decoded from rep_len_decoder.181*/182uint16_t is_rep0_long[STATES][POS_STATES_MAX];183184/*185* Probability tree for the highest two bits of the match186* distance. There is a separate probability tree for match187* lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].188*/189uint16_t dist_slot[DIST_STATES][DIST_SLOTS];190191/*192* Probility trees for additional bits for match distance193* when the distance is in the range [4, 127].194*/195uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];196197/*198* Probability tree for the lowest four bits of a match199* distance that is equal to or greater than 128.200*/201uint16_t dist_align[ALIGN_SIZE];202203/* Length of a normal match */204struct lzma_len_dec match_len_dec;205206/* Length of a repeated match */207struct lzma_len_dec rep_len_dec;208209/* Probabilities of literals */210uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];211};212213struct lzma2_dec {214/* Position in xz_dec_lzma2_run(). */215enum lzma2_seq {216SEQ_CONTROL,217SEQ_UNCOMPRESSED_1,218SEQ_UNCOMPRESSED_2,219SEQ_COMPRESSED_0,220SEQ_COMPRESSED_1,221SEQ_PROPERTIES,222SEQ_LZMA_PREPARE,223SEQ_LZMA_RUN,224SEQ_COPY225} sequence;226227/* Next position after decoding the compressed size of the chunk. */228enum lzma2_seq next_sequence;229230/* Uncompressed size of LZMA chunk (2 MiB at maximum) */231uint32_t uncompressed;232233/*234* Compressed size of LZMA chunk or compressed/uncompressed235* size of uncompressed chunk (64 KiB at maximum)236*/237uint32_t compressed;238239/*240* True if dictionary reset is needed. This is false before241* the first chunk (LZMA or uncompressed).242*/243bool need_dict_reset;244245/*246* True if new LZMA properties are needed. This is false247* before the first LZMA chunk.248*/249bool need_props;250251#ifdef XZ_DEC_MICROLZMA252bool pedantic_microlzma;253#endif254};255256struct xz_dec_lzma2 {257/*258* The order below is important on x86 to reduce code size and259* it shouldn't hurt on other platforms. Everything up to and260* including lzma.pos_mask are in the first 128 bytes on x86-32,261* which allows using smaller instructions to access those262* variables. On x86-64, fewer variables fit into the first 128263* bytes, but this is still the best order without sacrificing264* the readability by splitting the structures.265*/266struct rc_dec rc;267struct dictionary dict;268struct lzma2_dec lzma2;269struct lzma_dec lzma;270271/*272* Temporary buffer which holds small number of input bytes between273* decoder calls. See lzma2_lzma() for details.274*/275struct {276uint32_t size;277uint8_t buf[3 * LZMA_IN_REQUIRED];278} temp;279};280281/**************282* Dictionary *283**************/284285/*286* Reset the dictionary state. When in single-call mode, set up the beginning287* of the dictionary to point to the actual output buffer.288*/289static void dict_reset(struct dictionary *dict, struct xz_buf *b)290{291if (DEC_IS_SINGLE(dict->mode)) {292dict->buf = b->out + b->out_pos;293dict->end = b->out_size - b->out_pos;294}295296dict->start = 0;297dict->pos = 0;298dict->limit = 0;299dict->full = 0;300}301302/* Set dictionary write limit */303static void dict_limit(struct dictionary *dict, size_t out_max)304{305if (dict->end - dict->pos <= out_max)306dict->limit = dict->end;307else308dict->limit = dict->pos + out_max;309}310311/* Return true if at least one byte can be written into the dictionary. */312static inline bool dict_has_space(const struct dictionary *dict)313{314return dict->pos < dict->limit;315}316317/*318* Get a byte from the dictionary at the given distance. The distance is319* assumed to valid, or as a special case, zero when the dictionary is320* still empty. This special case is needed for single-call decoding to321* avoid writing a '\0' to the end of the destination buffer.322*/323static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)324{325size_t offset = dict->pos - dist - 1;326327if (dist >= dict->pos)328offset += dict->end;329330return dict->full > 0 ? dict->buf[offset] : 0;331}332333/*334* Put one byte into the dictionary. It is assumed that there is space for it.335*/336static inline void dict_put(struct dictionary *dict, uint8_t byte)337{338dict->buf[dict->pos++] = byte;339340if (dict->full < dict->pos)341dict->full = dict->pos;342}343344/*345* Repeat given number of bytes from the given distance. If the distance is346* invalid, false is returned. On success, true is returned and *len is347* updated to indicate how many bytes were left to be repeated.348*/349static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)350{351size_t back;352uint32_t left;353354if (dist >= dict->full || dist >= dict->size)355return false;356357left = min_t(size_t, dict->limit - dict->pos, *len);358*len -= left;359360back = dict->pos - dist - 1;361if (dist >= dict->pos)362back += dict->end;363364do {365dict->buf[dict->pos++] = dict->buf[back++];366if (back == dict->end)367back = 0;368} while (--left > 0);369370if (dict->full < dict->pos)371dict->full = dict->pos;372373return true;374}375376/* Copy uncompressed data as is from input to dictionary and output buffers. */377static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,378uint32_t *left)379{380size_t copy_size;381382while (*left > 0 && b->in_pos < b->in_size383&& b->out_pos < b->out_size) {384copy_size = min(b->in_size - b->in_pos,385b->out_size - b->out_pos);386if (copy_size > dict->end - dict->pos)387copy_size = dict->end - dict->pos;388if (copy_size > *left)389copy_size = *left;390391*left -= copy_size;392393/*394* If doing in-place decompression in single-call mode and the395* uncompressed size of the file is larger than the caller396* thought (i.e. it is invalid input!), the buffers below may397* overlap and cause undefined behavior with memcpy().398* With valid inputs memcpy() would be fine here.399*/400memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);401dict->pos += copy_size;402403if (dict->full < dict->pos)404dict->full = dict->pos;405406if (DEC_IS_MULTI(dict->mode)) {407if (dict->pos == dict->end)408dict->pos = 0;409410/*411* Like above but for multi-call mode: use memmove()412* to avoid undefined behavior with invalid input.413*/414memmove(b->out + b->out_pos, b->in + b->in_pos,415copy_size);416}417418dict->start = dict->pos;419420b->out_pos += copy_size;421b->in_pos += copy_size;422}423}424425#ifdef XZ_DEC_MICROLZMA426# define DICT_FLUSH_SUPPORTS_SKIPPING true427#else428# define DICT_FLUSH_SUPPORTS_SKIPPING false429#endif430431/*432* Flush pending data from dictionary to b->out. It is assumed that there is433* enough space in b->out. This is guaranteed because caller uses dict_limit()434* before decoding data into the dictionary.435*/436static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)437{438size_t copy_size = dict->pos - dict->start;439440if (DEC_IS_MULTI(dict->mode)) {441if (dict->pos == dict->end)442dict->pos = 0;443444/*445* These buffers cannot overlap even if doing in-place446* decompression because in multi-call mode dict->buf447* has been allocated by us in this file; it's not448* provided by the caller like in single-call mode.449*450* With MicroLZMA, b->out can be NULL to skip bytes that451* the caller doesn't need. This cannot be done with XZ452* because it would break BCJ filters.453*/454if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)455memcpy(b->out + b->out_pos, dict->buf + dict->start,456copy_size);457}458459dict->start = dict->pos;460b->out_pos += copy_size;461return copy_size;462}463464/*****************465* Range decoder *466*****************/467468/* Reset the range decoder. */469static void rc_reset(struct rc_dec *rc)470{471rc->range = (uint32_t)-1;472rc->code = 0;473rc->init_bytes_left = RC_INIT_BYTES;474}475476/*477* Read the first five initial bytes into rc->code if they haven't been478* read already. (Yes, the first byte gets completely ignored.)479*/480static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)481{482while (rc->init_bytes_left > 0) {483if (b->in_pos == b->in_size)484return false;485486rc->code = (rc->code << 8) + b->in[b->in_pos++];487--rc->init_bytes_left;488}489490return true;491}492493/* Return true if there may not be enough input for the next decoding loop. */494static inline bool rc_limit_exceeded(const struct rc_dec *rc)495{496return rc->in_pos > rc->in_limit;497}498499/*500* Return true if it is possible (from point of view of range decoder) that501* we have reached the end of the LZMA chunk.502*/503static inline bool rc_is_finished(const struct rc_dec *rc)504{505return rc->code == 0;506}507508/* Read the next input byte if needed. */509static __always_inline void rc_normalize(struct rc_dec *rc)510{511if (rc->range < RC_TOP_VALUE) {512rc->range <<= RC_SHIFT_BITS;513rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];514}515}516517/*518* Decode one bit. In some versions, this function has been split in three519* functions so that the compiler is supposed to be able to more easily avoid520* an extra branch. In this particular version of the LZMA decoder, this521* doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3522* on x86). Using a non-split version results in nicer looking code too.523*524* NOTE: This must return an int. Do not make it return a bool or the speed525* of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,526* and it generates 10-20 % faster code than GCC 3.x from this file anyway.)527*/528static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)529{530uint32_t bound;531int bit;532533rc_normalize(rc);534bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;535if (rc->code < bound) {536rc->range = bound;537*prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;538bit = 0;539} else {540rc->range -= bound;541rc->code -= bound;542*prob -= *prob >> RC_MOVE_BITS;543bit = 1;544}545546return bit;547}548549/* Decode a bittree starting from the most significant bit. */550static __always_inline uint32_t rc_bittree(struct rc_dec *rc,551uint16_t *probs, uint32_t limit)552{553uint32_t symbol = 1;554555do {556if (rc_bit(rc, &probs[symbol]))557symbol = (symbol << 1) + 1;558else559symbol <<= 1;560} while (symbol < limit);561562return symbol;563}564565/* Decode a bittree starting from the least significant bit. */566static __always_inline void rc_bittree_reverse(struct rc_dec *rc,567uint16_t *probs,568uint32_t *dest, uint32_t limit)569{570uint32_t symbol = 1;571uint32_t i = 0;572573do {574if (rc_bit(rc, &probs[symbol])) {575symbol = (symbol << 1) + 1;576*dest += 1 << i;577} else {578symbol <<= 1;579}580} while (++i < limit);581}582583/* Decode direct bits (fixed fifty-fifty probability) */584static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)585{586uint32_t mask;587588do {589rc_normalize(rc);590rc->range >>= 1;591rc->code -= rc->range;592mask = (uint32_t)0 - (rc->code >> 31);593rc->code += rc->range & mask;594*dest = (*dest << 1) + (mask + 1);595} while (--limit > 0);596}597598/********599* LZMA *600********/601602/* Get pointer to literal coder probability array. */603static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)604{605uint32_t prev_byte = dict_get(&s->dict, 0);606uint32_t low = prev_byte >> (8 - s->lzma.lc);607uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;608return s->lzma.literal[low + high];609}610611/* Decode a literal (one 8-bit byte) */612static void lzma_literal(struct xz_dec_lzma2 *s)613{614uint16_t *probs;615uint32_t symbol;616uint32_t match_byte;617uint32_t match_bit;618uint32_t offset;619uint32_t i;620621probs = lzma_literal_probs(s);622623if (lzma_state_is_literal(s->lzma.state)) {624symbol = rc_bittree(&s->rc, probs, 0x100);625} else {626symbol = 1;627match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;628offset = 0x100;629630do {631match_bit = match_byte & offset;632match_byte <<= 1;633i = offset + match_bit + symbol;634635if (rc_bit(&s->rc, &probs[i])) {636symbol = (symbol << 1) + 1;637offset &= match_bit;638} else {639symbol <<= 1;640offset &= ~match_bit;641}642} while (symbol < 0x100);643}644645dict_put(&s->dict, (uint8_t)symbol);646lzma_state_literal(&s->lzma.state);647}648649/* Decode the length of the match into s->lzma.len. */650static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,651uint32_t pos_state)652{653uint16_t *probs;654uint32_t limit;655656if (!rc_bit(&s->rc, &l->choice)) {657probs = l->low[pos_state];658limit = LEN_LOW_SYMBOLS;659s->lzma.len = MATCH_LEN_MIN;660} else {661if (!rc_bit(&s->rc, &l->choice2)) {662probs = l->mid[pos_state];663limit = LEN_MID_SYMBOLS;664s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;665} else {666probs = l->high;667limit = LEN_HIGH_SYMBOLS;668s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS669+ LEN_MID_SYMBOLS;670}671}672673s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;674}675676/* Decode a match. The distance will be stored in s->lzma.rep0. */677static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)678{679uint16_t *probs;680uint32_t dist_slot;681uint32_t limit;682683lzma_state_match(&s->lzma.state);684685s->lzma.rep3 = s->lzma.rep2;686s->lzma.rep2 = s->lzma.rep1;687s->lzma.rep1 = s->lzma.rep0;688689lzma_len(s, &s->lzma.match_len_dec, pos_state);690691probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];692dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;693694if (dist_slot < DIST_MODEL_START) {695s->lzma.rep0 = dist_slot;696} else {697limit = (dist_slot >> 1) - 1;698s->lzma.rep0 = 2 + (dist_slot & 1);699700if (dist_slot < DIST_MODEL_END) {701s->lzma.rep0 <<= limit;702probs = s->lzma.dist_special + s->lzma.rep0703- dist_slot - 1;704rc_bittree_reverse(&s->rc, probs,705&s->lzma.rep0, limit);706} else {707rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);708s->lzma.rep0 <<= ALIGN_BITS;709rc_bittree_reverse(&s->rc, s->lzma.dist_align,710&s->lzma.rep0, ALIGN_BITS);711}712}713}714715/*716* Decode a repeated match. The distance is one of the four most recently717* seen matches. The distance will be stored in s->lzma.rep0.718*/719static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)720{721uint32_t tmp;722723if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {724if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[725s->lzma.state][pos_state])) {726lzma_state_short_rep(&s->lzma.state);727s->lzma.len = 1;728return;729}730} else {731if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {732tmp = s->lzma.rep1;733} else {734if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {735tmp = s->lzma.rep2;736} else {737tmp = s->lzma.rep3;738s->lzma.rep3 = s->lzma.rep2;739}740741s->lzma.rep2 = s->lzma.rep1;742}743744s->lzma.rep1 = s->lzma.rep0;745s->lzma.rep0 = tmp;746}747748lzma_state_long_rep(&s->lzma.state);749lzma_len(s, &s->lzma.rep_len_dec, pos_state);750}751752/* LZMA decoder core */753static bool lzma_main(struct xz_dec_lzma2 *s)754{755uint32_t pos_state;756757/*758* If the dictionary was reached during the previous call, try to759* finish the possibly pending repeat in the dictionary.760*/761if (dict_has_space(&s->dict) && s->lzma.len > 0)762dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);763764/*765* Decode more LZMA symbols. One iteration may consume up to766* LZMA_IN_REQUIRED - 1 bytes.767*/768while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {769pos_state = s->dict.pos & s->lzma.pos_mask;770771if (!rc_bit(&s->rc, &s->lzma.is_match[772s->lzma.state][pos_state])) {773lzma_literal(s);774} else {775if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))776lzma_rep_match(s, pos_state);777else778lzma_match(s, pos_state);779780if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))781return false;782}783}784785/*786* Having the range decoder always normalized when we are outside787* this function makes it easier to correctly handle end of the chunk.788*/789rc_normalize(&s->rc);790791return true;792}793794/*795* Reset the LZMA decoder and range decoder state. Dictionary is not reset796* here, because LZMA state may be reset without resetting the dictionary.797*/798static void lzma_reset(struct xz_dec_lzma2 *s)799{800uint16_t *probs;801size_t i;802803s->lzma.state = STATE_LIT_LIT;804s->lzma.rep0 = 0;805s->lzma.rep1 = 0;806s->lzma.rep2 = 0;807s->lzma.rep3 = 0;808s->lzma.len = 0;809810/*811* All probabilities are initialized to the same value. This hack812* makes the code smaller by avoiding a separate loop for each813* probability array.814*815* This could be optimized so that only that part of literal816* probabilities that are actually required. In the common case817* we would write 12 KiB less.818*/819probs = s->lzma.is_match[0];820for (i = 0; i < PROBS_TOTAL; ++i)821probs[i] = RC_BIT_MODEL_TOTAL / 2;822823rc_reset(&s->rc);824}825826/*827* Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks828* from the decoded lp and pb values. On success, the LZMA decoder state is829* reset and true is returned.830*/831static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)832{833if (props > (4 * 5 + 4) * 9 + 8)834return false;835836s->lzma.pos_mask = 0;837while (props >= 9 * 5) {838props -= 9 * 5;839++s->lzma.pos_mask;840}841842s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;843844s->lzma.literal_pos_mask = 0;845while (props >= 9) {846props -= 9;847++s->lzma.literal_pos_mask;848}849850s->lzma.lc = props;851852if (s->lzma.lc + s->lzma.literal_pos_mask > 4)853return false;854855s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;856857lzma_reset(s);858859return true;860}861862/*********863* LZMA2 *864*********/865866/*867* The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't868* been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This869* wrapper function takes care of making the LZMA decoder's assumption safe.870*871* As long as there is plenty of input left to be decoded in the current LZMA872* chunk, we decode directly from the caller-supplied input buffer until873* there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into874* s->temp.buf, which (hopefully) gets filled on the next call to this875* function. We decode a few bytes from the temporary buffer so that we can876* continue decoding from the caller-supplied input buffer again.877*/878static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)879{880size_t in_avail;881uint32_t tmp;882883in_avail = b->in_size - b->in_pos;884if (s->temp.size > 0 || s->lzma2.compressed == 0) {885tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;886if (tmp > s->lzma2.compressed - s->temp.size)887tmp = s->lzma2.compressed - s->temp.size;888if (tmp > in_avail)889tmp = in_avail;890891memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);892893if (s->temp.size + tmp == s->lzma2.compressed) {894memzero(s->temp.buf + s->temp.size + tmp,895sizeof(s->temp.buf)896- s->temp.size - tmp);897s->rc.in_limit = s->temp.size + tmp;898} else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {899s->temp.size += tmp;900b->in_pos += tmp;901return true;902} else {903s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;904}905906s->rc.in = s->temp.buf;907s->rc.in_pos = 0;908909if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)910return false;911912s->lzma2.compressed -= s->rc.in_pos;913914if (s->rc.in_pos < s->temp.size) {915s->temp.size -= s->rc.in_pos;916memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,917s->temp.size);918return true;919}920921b->in_pos += s->rc.in_pos - s->temp.size;922s->temp.size = 0;923}924925in_avail = b->in_size - b->in_pos;926if (in_avail >= LZMA_IN_REQUIRED) {927s->rc.in = b->in;928s->rc.in_pos = b->in_pos;929930if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)931s->rc.in_limit = b->in_pos + s->lzma2.compressed;932else933s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;934935if (!lzma_main(s))936return false;937938in_avail = s->rc.in_pos - b->in_pos;939if (in_avail > s->lzma2.compressed)940return false;941942s->lzma2.compressed -= in_avail;943b->in_pos = s->rc.in_pos;944}945946in_avail = b->in_size - b->in_pos;947if (in_avail < LZMA_IN_REQUIRED) {948if (in_avail > s->lzma2.compressed)949in_avail = s->lzma2.compressed;950951memcpy(s->temp.buf, b->in + b->in_pos, in_avail);952s->temp.size = in_avail;953b->in_pos += in_avail;954}955956return true;957}958959/*960* Take care of the LZMA2 control layer, and forward the job of actual LZMA961* decoding or copying of uncompressed chunks to other functions.962*/963XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,964struct xz_buf *b)965{966uint32_t tmp;967968while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {969switch (s->lzma2.sequence) {970case SEQ_CONTROL:971/*972* LZMA2 control byte973*974* Exact values:975* 0x00 End marker976* 0x01 Dictionary reset followed by977* an uncompressed chunk978* 0x02 Uncompressed chunk (no dictionary reset)979*980* Highest three bits (s->control & 0xE0):981* 0xE0 Dictionary reset, new properties and state982* reset, followed by LZMA compressed chunk983* 0xC0 New properties and state reset, followed984* by LZMA compressed chunk (no dictionary985* reset)986* 0xA0 State reset using old properties,987* followed by LZMA compressed chunk (no988* dictionary reset)989* 0x80 LZMA chunk (no dictionary or state reset)990*991* For LZMA compressed chunks, the lowest five bits992* (s->control & 1F) are the highest bits of the993* uncompressed size (bits 16-20).994*995* A new LZMA2 stream must begin with a dictionary996* reset. The first LZMA chunk must set new997* properties and reset the LZMA state.998*999* Values that don't match anything described above1000* are invalid and we return XZ_DATA_ERROR.1001*/1002tmp = b->in[b->in_pos++];10031004if (tmp == 0x00)1005return XZ_STREAM_END;10061007if (tmp >= 0xE0 || tmp == 0x01) {1008s->lzma2.need_props = true;1009s->lzma2.need_dict_reset = false;1010dict_reset(&s->dict, b);1011} else if (s->lzma2.need_dict_reset) {1012return XZ_DATA_ERROR;1013}10141015if (tmp >= 0x80) {1016s->lzma2.uncompressed = (tmp & 0x1F) << 16;1017s->lzma2.sequence = SEQ_UNCOMPRESSED_1;10181019if (tmp >= 0xC0) {1020/*1021* When there are new properties,1022* state reset is done at1023* SEQ_PROPERTIES.1024*/1025s->lzma2.need_props = false;1026s->lzma2.next_sequence1027= SEQ_PROPERTIES;10281029} else if (s->lzma2.need_props) {1030return XZ_DATA_ERROR;10311032} else {1033s->lzma2.next_sequence1034= SEQ_LZMA_PREPARE;1035if (tmp >= 0xA0)1036lzma_reset(s);1037}1038} else {1039if (tmp > 0x02)1040return XZ_DATA_ERROR;10411042s->lzma2.sequence = SEQ_COMPRESSED_0;1043s->lzma2.next_sequence = SEQ_COPY;1044}10451046break;10471048case SEQ_UNCOMPRESSED_1:1049s->lzma2.uncompressed1050+= (uint32_t)b->in[b->in_pos++] << 8;1051s->lzma2.sequence = SEQ_UNCOMPRESSED_2;1052break;10531054case SEQ_UNCOMPRESSED_2:1055s->lzma2.uncompressed1056+= (uint32_t)b->in[b->in_pos++] + 1;1057s->lzma2.sequence = SEQ_COMPRESSED_0;1058break;10591060case SEQ_COMPRESSED_0:1061s->lzma2.compressed1062= (uint32_t)b->in[b->in_pos++] << 8;1063s->lzma2.sequence = SEQ_COMPRESSED_1;1064break;10651066case SEQ_COMPRESSED_1:1067s->lzma2.compressed1068+= (uint32_t)b->in[b->in_pos++] + 1;1069s->lzma2.sequence = s->lzma2.next_sequence;1070break;10711072case SEQ_PROPERTIES:1073if (!lzma_props(s, b->in[b->in_pos++]))1074return XZ_DATA_ERROR;10751076s->lzma2.sequence = SEQ_LZMA_PREPARE;10771078/* Fall through */10791080case SEQ_LZMA_PREPARE:1081if (s->lzma2.compressed < RC_INIT_BYTES)1082return XZ_DATA_ERROR;10831084if (!rc_read_init(&s->rc, b))1085return XZ_OK;10861087s->lzma2.compressed -= RC_INIT_BYTES;1088s->lzma2.sequence = SEQ_LZMA_RUN;10891090/* Fall through */10911092case SEQ_LZMA_RUN:1093/*1094* Set dictionary limit to indicate how much we want1095* to be encoded at maximum. Decode new data into the1096* dictionary. Flush the new data from dictionary to1097* b->out. Check if we finished decoding this chunk.1098* In case the dictionary got full but we didn't fill1099* the output buffer yet, we may run this loop1100* multiple times without changing s->lzma2.sequence.1101*/1102dict_limit(&s->dict, min_t(size_t,1103b->out_size - b->out_pos,1104s->lzma2.uncompressed));1105if (!lzma2_lzma(s, b))1106return XZ_DATA_ERROR;11071108s->lzma2.uncompressed -= dict_flush(&s->dict, b);11091110if (s->lzma2.uncompressed == 0) {1111if (s->lzma2.compressed > 0 || s->lzma.len > 01112|| !rc_is_finished(&s->rc))1113return XZ_DATA_ERROR;11141115rc_reset(&s->rc);1116s->lzma2.sequence = SEQ_CONTROL;11171118} else if (b->out_pos == b->out_size1119|| (b->in_pos == b->in_size1120&& s->temp.size1121< s->lzma2.compressed)) {1122return XZ_OK;1123}11241125break;11261127case SEQ_COPY:1128dict_uncompressed(&s->dict, b, &s->lzma2.compressed);1129if (s->lzma2.compressed > 0)1130return XZ_OK;11311132s->lzma2.sequence = SEQ_CONTROL;1133break;1134}1135}11361137return XZ_OK;1138}11391140XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,1141uint32_t dict_max)1142{1143struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);1144if (s == NULL)1145return NULL;11461147s->dict.mode = mode;1148s->dict.size_max = dict_max;11491150if (DEC_IS_PREALLOC(mode)) {1151s->dict.buf = vmalloc(dict_max);1152if (s->dict.buf == NULL) {1153kfree(s);1154return NULL;1155}1156} else if (DEC_IS_DYNALLOC(mode)) {1157s->dict.buf = NULL;1158s->dict.allocated = 0;1159}11601161return s;1162}11631164XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)1165{1166/* This limits dictionary size to 3 GiB to keep parsing simpler. */1167if (props > 39)1168return XZ_OPTIONS_ERROR;11691170s->dict.size = 2 + (props & 1);1171s->dict.size <<= (props >> 1) + 11;11721173if (DEC_IS_MULTI(s->dict.mode)) {1174if (s->dict.size > s->dict.size_max)1175return XZ_MEMLIMIT_ERROR;11761177s->dict.end = s->dict.size;11781179if (DEC_IS_DYNALLOC(s->dict.mode)) {1180if (s->dict.allocated < s->dict.size) {1181s->dict.allocated = s->dict.size;1182vfree(s->dict.buf);1183s->dict.buf = vmalloc(s->dict.size);1184if (s->dict.buf == NULL) {1185s->dict.allocated = 0;1186return XZ_MEM_ERROR;1187}1188}1189}1190}11911192s->lzma2.sequence = SEQ_CONTROL;1193s->lzma2.need_dict_reset = true;11941195s->temp.size = 0;11961197return XZ_OK;1198}11991200XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)1201{1202if (DEC_IS_MULTI(s->dict.mode))1203vfree(s->dict.buf);12041205kfree(s);1206}12071208#ifdef XZ_DEC_MICROLZMA1209/* This is a wrapper struct to have a nice struct name in the public API. */1210struct xz_dec_microlzma {1211struct xz_dec_lzma2 s;1212};12131214enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,1215struct xz_buf *b)1216{1217struct xz_dec_lzma2 *s = &s_ptr->s;12181219/*1220* sequence is SEQ_PROPERTIES before the first input byte,1221* SEQ_LZMA_PREPARE until a total of five bytes have been read,1222* and SEQ_LZMA_RUN for the rest of the input stream.1223*/1224if (s->lzma2.sequence != SEQ_LZMA_RUN) {1225if (s->lzma2.sequence == SEQ_PROPERTIES) {1226/* One byte is needed for the props. */1227if (b->in_pos >= b->in_size)1228return XZ_OK;12291230/*1231* Don't increment b->in_pos here. The same byte is1232* also passed to rc_read_init() which will ignore it.1233*/1234if (!lzma_props(s, ~b->in[b->in_pos]))1235return XZ_DATA_ERROR;12361237s->lzma2.sequence = SEQ_LZMA_PREPARE;1238}12391240/*1241* xz_dec_microlzma_reset() doesn't validate the compressed1242* size so we do it here. We have to limit the maximum size1243* to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice1244* round number and much more than users of this code should1245* ever need.1246*/1247if (s->lzma2.compressed < RC_INIT_BYTES1248|| s->lzma2.compressed > (3U << 30))1249return XZ_DATA_ERROR;12501251if (!rc_read_init(&s->rc, b))1252return XZ_OK;12531254s->lzma2.compressed -= RC_INIT_BYTES;1255s->lzma2.sequence = SEQ_LZMA_RUN;12561257dict_reset(&s->dict, b);1258}12591260/* This is to allow increasing b->out_size between calls. */1261if (DEC_IS_SINGLE(s->dict.mode))1262s->dict.end = b->out_size - b->out_pos;12631264while (true) {1265dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,1266s->lzma2.uncompressed));12671268if (!lzma2_lzma(s, b))1269return XZ_DATA_ERROR;12701271s->lzma2.uncompressed -= dict_flush(&s->dict, b);12721273if (s->lzma2.uncompressed == 0) {1274if (s->lzma2.pedantic_microlzma) {1275if (s->lzma2.compressed > 0 || s->lzma.len > 01276|| !rc_is_finished(&s->rc))1277return XZ_DATA_ERROR;1278}12791280return XZ_STREAM_END;1281}12821283if (b->out_pos == b->out_size)1284return XZ_OK;12851286if (b->in_pos == b->in_size1287&& s->temp.size < s->lzma2.compressed)1288return XZ_OK;1289}1290}12911292struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,1293uint32_t dict_size)1294{1295struct xz_dec_microlzma *s;12961297/* Restrict dict_size to the same range as in the LZMA2 code. */1298if (dict_size < 4096 || dict_size > (3U << 30))1299return NULL;13001301s = kmalloc(sizeof(*s), GFP_KERNEL);1302if (s == NULL)1303return NULL;13041305s->s.dict.mode = mode;1306s->s.dict.size = dict_size;13071308if (DEC_IS_MULTI(mode)) {1309s->s.dict.end = dict_size;13101311s->s.dict.buf = vmalloc(dict_size);1312if (s->s.dict.buf == NULL) {1313kfree(s);1314return NULL;1315}1316}13171318return s;1319}13201321void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,1322uint32_t uncomp_size, int uncomp_size_is_exact)1323{1324/*1325* comp_size is validated in xz_dec_microlzma_run().1326* uncomp_size can safely be anything.1327*/1328s->s.lzma2.compressed = comp_size;1329s->s.lzma2.uncompressed = uncomp_size;1330s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;13311332s->s.lzma2.sequence = SEQ_PROPERTIES;1333s->s.temp.size = 0;1334}13351336void xz_dec_microlzma_end(struct xz_dec_microlzma *s)1337{1338if (DEC_IS_MULTI(s->s.dict.mode))1339vfree(s->s.dict.buf);13401341kfree(s);1342}1343#endif134413451346