/* Xz.h - Xz interface12024-01-26 : Igor Pavlov : Public domain */23#ifndef ZIP7_INC_XZ_H4#define ZIP7_INC_XZ_H56#include "Sha256.h"7#include "Delta.h"89EXTERN_C_BEGIN1011#define XZ_ID_Subblock 112#define XZ_ID_Delta 313#define XZ_ID_X86 414#define XZ_ID_PPC 515#define XZ_ID_IA64 616#define XZ_ID_ARM 717#define XZ_ID_ARMT 818#define XZ_ID_SPARC 919#define XZ_ID_ARM64 0xa20#define XZ_ID_RISCV 0xb21#define XZ_ID_LZMA2 0x212223unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);24unsigned Xz_WriteVarInt(Byte *buf, UInt64 v);2526/* ---------- xz block ---------- */2728#define XZ_BLOCK_HEADER_SIZE_MAX 10242930#define XZ_NUM_FILTERS_MAX 431#define XZ_BF_NUM_FILTERS_MASK 332#define XZ_BF_PACK_SIZE (1 << 6)33#define XZ_BF_UNPACK_SIZE (1 << 7)3435#define XZ_FILTER_PROPS_SIZE_MAX 203637typedef struct38{39UInt64 id;40UInt32 propsSize;41Byte props[XZ_FILTER_PROPS_SIZE_MAX];42} CXzFilter;4344typedef struct45{46UInt64 packSize;47UInt64 unpackSize;48Byte flags;49CXzFilter filters[XZ_NUM_FILTERS_MAX];50} CXzBlock;5152#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)53#define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0)54#define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)55#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)5657SRes XzBlock_Parse(CXzBlock *p, const Byte *header);58SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes);5960/* ---------- xz stream ---------- */6162#define XZ_SIG_SIZE 663#define XZ_FOOTER_SIG_SIZE 26465extern const Byte XZ_SIG[XZ_SIG_SIZE];6667/*68extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE];69*/7071#define XZ_FOOTER_SIG_0 'Y'72#define XZ_FOOTER_SIG_1 'Z'7374#define XZ_STREAM_FLAGS_SIZE 275#define XZ_STREAM_CRC_SIZE 47677#define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE)78#define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4)7980#define XZ_CHECK_MASK 0xF81#define XZ_CHECK_NO 082#define XZ_CHECK_CRC32 183#define XZ_CHECK_CRC64 484#define XZ_CHECK_SHA256 108586typedef struct87{88unsigned mode;89UInt32 crc;90UInt64 crc64;91CSha256 sha;92} CXzCheck;9394void XzCheck_Init(CXzCheck *p, unsigned mode);95void XzCheck_Update(CXzCheck *p, const void *data, size_t size);96int XzCheck_Final(CXzCheck *p, Byte *digest);9798typedef UInt16 CXzStreamFlags;99100#define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK)101#define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK)102#define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32)103unsigned XzFlags_GetCheckSize(CXzStreamFlags f);104105SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf);106SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream);107108typedef struct109{110UInt64 unpackSize;111UInt64 totalSize;112} CXzBlockSizes;113114typedef struct115{116CXzStreamFlags flags;117// Byte _pad[6];118size_t numBlocks;119CXzBlockSizes *blocks;120UInt64 startOffset;121} CXzStream;122123void Xz_Construct(CXzStream *p);124void Xz_Free(CXzStream *p, ISzAllocPtr alloc);125126#define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1)127128UInt64 Xz_GetUnpackSize(const CXzStream *p);129UInt64 Xz_GetPackSize(const CXzStream *p);130131typedef struct132{133size_t num;134size_t numAllocated;135CXzStream *streams;136} CXzs;137138void Xzs_Construct(CXzs *p);139void Xzs_Free(CXzs *p, ISzAllocPtr alloc);140SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc);141142UInt64 Xzs_GetNumBlocks(const CXzs *p);143UInt64 Xzs_GetUnpackSize(const CXzs *p);144145146// ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder147148typedef enum149{150CODER_STATUS_NOT_SPECIFIED, /* use main error code instead */151CODER_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */152CODER_STATUS_NOT_FINISHED, /* stream was not finished */153CODER_STATUS_NEEDS_MORE_INPUT /* you must provide more input bytes */154} ECoderStatus;155156157// ECoderFinishMode values are identical to ELzmaFinishMode158159typedef enum160{161CODER_FINISH_ANY, /* finish at any point */162CODER_FINISH_END /* block must be finished at the end */163} ECoderFinishMode;164165166typedef struct167{168void *p; // state object;169void (*Free)(void *p, ISzAllocPtr alloc);170SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc);171void (*Init)(void *p);172SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,173int srcWasFinished, ECoderFinishMode finishMode,174// int *wasFinished,175ECoderStatus *status);176SizeT (*Filter)(void *p, Byte *data, SizeT size);177} IStateCoder;178179180typedef struct181{182UInt32 methodId;183UInt32 delta;184UInt32 ip;185UInt32 X86_State;186Byte delta_State[DELTA_STATE_SIZE];187} CXzBcFilterStateBase;188189typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size);190191SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,192Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc);193194195#define MIXCODER_NUM_FILTERS_MAX 4196197typedef struct198{199ISzAllocPtr alloc;200Byte *buf;201unsigned numCoders;202203Byte *outBuf;204size_t outBufSize;205size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode)206BoolInt wasFinished;207SRes res;208ECoderStatus status;209// BoolInt SingleBufMode;210211int finished[MIXCODER_NUM_FILTERS_MAX - 1];212size_t pos[MIXCODER_NUM_FILTERS_MAX - 1];213size_t size[MIXCODER_NUM_FILTERS_MAX - 1];214UInt64 ids[MIXCODER_NUM_FILTERS_MAX];215SRes results[MIXCODER_NUM_FILTERS_MAX];216IStateCoder coders[MIXCODER_NUM_FILTERS_MAX];217} CMixCoder;218219220typedef enum221{222XZ_STATE_STREAM_HEADER,223XZ_STATE_STREAM_INDEX,224XZ_STATE_STREAM_INDEX_CRC,225XZ_STATE_STREAM_FOOTER,226XZ_STATE_STREAM_PADDING,227XZ_STATE_BLOCK_HEADER,228XZ_STATE_BLOCK,229XZ_STATE_BLOCK_FOOTER230} EXzState;231232233typedef struct234{235EXzState state;236unsigned pos;237unsigned alignPos;238unsigned indexPreSize;239240CXzStreamFlags streamFlags;241242unsigned blockHeaderSize;243UInt64 packSize;244UInt64 unpackSize;245246UInt64 numBlocks; // number of finished blocks in current stream247UInt64 indexSize;248UInt64 indexPos;249UInt64 padSize;250251UInt64 numStartedStreams;252UInt64 numFinishedStreams;253UInt64 numTotalBlocks;254255UInt32 crc;256CMixCoder decoder;257CXzBlock block;258CXzCheck check;259CSha256 sha;260261BoolInt parseMode;262BoolInt headerParsedOk;263BoolInt decodeToStreamSignature;264unsigned decodeOnlyOneBlock;265266Byte *outBuf;267size_t outBufSize;268size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked269270Byte shaDigest[SHA256_DIGEST_SIZE];271Byte buf[XZ_BLOCK_HEADER_SIZE_MAX];272} CXzUnpacker;273274/* alloc : aligned for cache line allocation is better */275void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc);276void XzUnpacker_Init(CXzUnpacker *p);277void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize);278void XzUnpacker_Free(CXzUnpacker *p);279280/*281XzUnpacker282The sequence for decoding functions:283{284XzUnpacker_Construct()285[Decoding_Calls]286XzUnpacker_Free()287}288289[Decoding_Calls]290291There are 3 types of interfaces for [Decoding_Calls] calls:292293Interface-1 : Partial output buffers:294{295XzUnpacker_Init()296for()297{298XzUnpacker_Code();299}300XzUnpacker_IsStreamWasFinished()301}302303Interface-2 : Direct output buffer:304Use it, if you know exact size of decoded data, and you need305whole xz unpacked data in one output buffer.306xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode.307{308XzUnpacker_Init()309XzUnpacker_SetOutBufMode(); // to set output buffer and size310for()311{312XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()313}314XzUnpacker_IsStreamWasFinished()315}316317Interface-3 : Direct output buffer : One call full decoding318It unpacks whole input buffer to output buffer in one call.319It uses Interface-2 internally.320{321XzUnpacker_CodeFull()322XzUnpacker_IsStreamWasFinished()323}324*/325326/*327finishMode:328It has meaning only if the decoding reaches output limit (*destLen).329CODER_FINISH_ANY - use smallest number of input bytes330CODER_FINISH_END - read EndOfStream marker after decoding331332Returns:333SZ_OK334status:335CODER_STATUS_NOT_FINISHED,336CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:3371) it needs more input data to finish current xz stream3382) xz stream was finished successfully. But the decoder supports multiple339concatented xz streams. So it expects more input data for new xz streams.340Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.341342SZ_ERROR_MEM - Memory allocation error343SZ_ERROR_DATA - Data error344SZ_ERROR_UNSUPPORTED - Unsupported method or method properties345SZ_ERROR_CRC - CRC error346// SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).347348SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons:349- xz Stream Signature failure350- CRC32 of xz Stream Header is failed351- The size of Stream padding is not multiple of four bytes.352It's possible to get that error, if xz stream was finished and the stream353contains some another data. In that case you can call XzUnpacker_GetExtraSize()354function to get real size of xz stream.355*/356357358SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,359const Byte *src, SizeT *srcLen, int srcFinished,360ECoderFinishMode finishMode, ECoderStatus *status);361362SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,363const Byte *src, SizeT *srcLen,364ECoderFinishMode finishMode, ECoderStatus *status);365366/*367If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()368after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().369*/370371BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);372373/*374XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,375if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.376These bytes can be some data after xz archive, or377it can be start of new xz stream.378379Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of380xz stream in two cases, if XzUnpacker_Code() returns:381res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT382res == SZ_ERROR_NO_ARCHIVE383*/384385UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p);386387388/*389for random block decoding:390XzUnpacker_Init();391set CXzUnpacker::streamFlags392XzUnpacker_PrepareToRandomBlockDecoding()393loop394{395XzUnpacker_Code()396XzUnpacker_IsBlockFinished()397}398*/399400void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p);401BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);402403#define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags))404405406407408409410/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */411412/*413if (CXzDecMtProps::numThreads > 1), the decoder can try to use414Multi-Threading. The decoder analyses xz block header, and if415there are pack size and unpack size values stored in xz block header,416the decoder reads compressed data of block to internal buffers,417and then it can start parallel decoding, if there are another blocks.418The decoder can switch back to Single-Thread decoding after some conditions.419420The sequence of calls for xz decoding with in/out Streams:421{422XzDecMt_Create()423XzDecMtProps_Init(XzDecMtProps) to set default values of properties424// then you can change some XzDecMtProps parameters with required values425// here you can set the number of threads and (memUseMax) - the maximum426Memory usage for multithreading decoding.427for()428{429XzDecMt_Decode() // one call per one file430}431XzDecMt_Destroy()432}433*/434435436typedef struct437{438size_t inBufSize_ST; // size of input buffer for Single-Thread decoding439size_t outStep_ST; // size of output buffer for Single-Thread decoding440BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data.441442#ifndef Z7_ST443unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding444size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created445size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding.446// it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.447#endif448} CXzDecMtProps;449450void XzDecMtProps_Init(CXzDecMtProps *p);451452typedef struct CXzDecMt CXzDecMt;453typedef CXzDecMt * CXzDecMtHandle;454// Z7_DECLARE_HANDLE(CXzDecMtHandle)455456/*457alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).458allocMid : for big allocations, aligned allocation is better459*/460461CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);462void XzDecMt_Destroy(CXzDecMtHandle p);463464465typedef struct466{467Byte UnpackSize_Defined;468Byte NumStreams_Defined;469Byte NumBlocks_Defined;470471Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream.472Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data473474UInt64 InSize; // pack size processed. That value doesn't include the data after475// end of xz stream, if that data was not correct476UInt64 OutSize;477478UInt64 NumStreams;479UInt64 NumBlocks;480481SRes DecodeRes; // the error code of xz streams data decoding482SRes ReadRes; // error code from ISeqInStream:Read()483SRes ProgressRes; // error code from ICompressProgress:Progress()484485SRes CombinedRes; // Combined result error code that shows main rusult486// = S_OK, if there is no error.487// but check also (DataAfterEnd) that can show additional minor errors.488489SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream490// = SZ_ERROR_PROGRESS, if error from ICompressProgress491// = SZ_ERROR_WRITE, if error from ISeqOutStream492// = SZ_ERROR_* codes for decoding493} CXzStatInfo;494495void XzStatInfo_Clear(CXzStatInfo *p);496497/*498499XzDecMt_Decode()500SRes: it's combined decoding result. It also is equal to stat->CombinedRes.501502SZ_OK - no error503check also output value in (stat->DataAfterEnd)504that can show additional possible error505506SZ_ERROR_MEM - Memory allocation error507SZ_ERROR_NO_ARCHIVE - is not xz archive508SZ_ERROR_ARCHIVE - Headers error509SZ_ERROR_DATA - Data Error510SZ_ERROR_UNSUPPORTED - Unsupported method or method properties511SZ_ERROR_CRC - CRC Error512SZ_ERROR_INPUT_EOF - it needs more input data513SZ_ERROR_WRITE - ISeqOutStream error514(SZ_ERROR_READ) - ISeqInStream errors515(SZ_ERROR_PROGRESS) - ICompressProgress errors516// SZ_ERROR_THREAD - error in multi-threading functions517MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function518*/519520SRes XzDecMt_Decode(CXzDecMtHandle p,521const CXzDecMtProps *props,522const UInt64 *outDataSize, // NULL means undefined523int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished524ISeqOutStreamPtr outStream,525// Byte *outBuf, size_t *outBufSize,526ISeqInStreamPtr inStream,527// const Byte *inData, size_t inDataSize,528CXzStatInfo *stat, // out: decoding results and statistics529int *isMT, // out: 0 means that ST (Single-Thread) version was used530// 1 means that MT (Multi-Thread) version was used531ICompressProgressPtr progress);532533EXTERN_C_END534535#endif536537538