/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2)12023-03-02 : Igor Pavlov : Public domain */23#ifndef ZIP7_INC_BCJ2_H4#define ZIP7_INC_BCJ2_H56#include "7zTypes.h"78EXTERN_C_BEGIN910#define BCJ2_NUM_STREAMS 41112enum13{14BCJ2_STREAM_MAIN,15BCJ2_STREAM_CALL,16BCJ2_STREAM_JUMP,17BCJ2_STREAM_RC18};1920enum21{22BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,23BCJ2_DEC_STATE_ORIG_1,24BCJ2_DEC_STATE_ORIG_2,25BCJ2_DEC_STATE_ORIG_3,2627BCJ2_DEC_STATE_ORIG,28BCJ2_DEC_STATE_ERROR /* after detected data error */29};3031enum32{33BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,34BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */35};363738/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */39#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2)4041/*42CBcj2Dec / CBcj2Enc43bufs sizes:44BUF_SIZE(n) = lims[n] - bufs[n]45bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4:46(BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 047(BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 048*/4950// typedef UInt32 CBcj2Prob;51typedef UInt16 CBcj2Prob;5253/*54BCJ2 encoder / decoder internal requirements:55- If last bytes of stream contain marker (e8/e8/0f8x), then56there is also encoded symbol (0 : no conversion) in RC stream.57- One case of overlapped instructions is supported,58if last byte of converted instruction is (0f) and next byte is (8x):59marker [xx xx xx 0f] 8x60then the pair (0f 8x) is treated as marker.61*/6263/* ---------- BCJ2 Decoder ---------- */6465/*66CBcj2Dec:67(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:68bufs[BCJ2_STREAM_MAIN] >= dest &&69bufs[BCJ2_STREAM_MAIN] - dest >=70BUF_SIZE(BCJ2_STREAM_CALL) +71BUF_SIZE(BCJ2_STREAM_JUMP)72reserve = bufs[BCJ2_STREAM_MAIN] - dest -73( BUF_SIZE(BCJ2_STREAM_CALL) +74BUF_SIZE(BCJ2_STREAM_JUMP) )75and additional conditions:76if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init())77{78(reserve != 1) : if (ver < v23.00)79}80else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init())81{82(reserve >= 6) : if (ver < v23.00)83(reserve >= 4) : if (ver >= v23.00)84We need that (reserve) because after first call of Bcj2Dec_Decode(),85CBcj2Dec::temp can contain up to 4 bytes for writing to (dest).86}87(reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode().88(reserve == 0) also is allowed in case of multi-call, if we use fixed buffers,89and (reserve) is calculated from full (final) sizes of all streams before first call.90*/9192typedef struct93{94const Byte *bufs[BCJ2_NUM_STREAMS];95const Byte *lims[BCJ2_NUM_STREAMS];96Byte *dest;97const Byte *destLim;9899unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */100101UInt32 ip; /* property of starting base for decoding */102UInt32 temp; /* Byte temp[4]; */103UInt32 range;104UInt32 code;105CBcj2Prob probs[2 + 256];106} CBcj2Dec;107108109/* Note:110Bcj2Dec_Init() sets (CBcj2Dec::ip = 0)111if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init()112*/113void Bcj2Dec_Init(CBcj2Dec *p);114115116/* Bcj2Dec_Decode():117returns:118SZ_OK119SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct120*/121SRes Bcj2Dec_Decode(CBcj2Dec *p);122123/* To check that decoding was finished you can compare124sizes of processed streams with sizes known from another sources.125You must do at least one mandatory check from the two following options:126- the check for size of processed output (ORIG) stream.127- the check for size of processed input (MAIN) stream.128additional optional checks:129- the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC)130- the checks Bcj2Dec_IsMaybeFinished*()131also before actual decoding you can check that the132following condition is met for stream sizes:133( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) )134*/135136/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for137additional input data in BCJ2_STREAM_MAIN stream.138Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding.139*/140#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN)141142/* if the stream decoding was finished correctly, then range decoder143part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0).144Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding.145*/146#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0)147148/* use Bcj2Dec_IsMaybeFinished() only as additional check149after at least one mandatory check from the two following options:150- the check for size of processed output (ORIG) stream.151- the check for size of processed input (MAIN) stream.152*/153#define Bcj2Dec_IsMaybeFinished(_p_) ( \154Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \155Bcj2Dec_IsMaybeFinished_code(_p_))156157158159/* ---------- BCJ2 Encoder ---------- */160161typedef enum162{163BCJ2_ENC_FINISH_MODE_CONTINUE,164BCJ2_ENC_FINISH_MODE_END_BLOCK,165BCJ2_ENC_FINISH_MODE_END_STREAM166} EBcj2Enc_FinishMode;167168/*169BCJ2_ENC_FINISH_MODE_CONTINUE:170process non finished encoding.171It notifies the encoder that additional further calls172can provide more input data (src) than provided by current call.173In that case the CBcj2Enc encoder still can move (src) pointer174up to (srcLim), but CBcj2Enc encoder can store some of the last175processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer.176at return:177(CBcj2Enc::src will point to position that includes178processed data and data copied to (temp[]) buffer)179That data from (temp[]) buffer will be used in further calls.180181BCJ2_ENC_FINISH_MODE_END_BLOCK:182finish encoding of current block (ended at srcLim) without RC flushing.183at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) &&184CBcj2Enc::src == CBcj2Enc::srcLim)185: it shows that block encoding was finished. And the encoder is186ready for new (src) data or for stream finish operation.187finished block means188{189CBcj2Enc has completed block encoding up to (srcLim).190(1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will191not cross block boundary at (srcLim).192temporary CBcj2Enc buffer for (ORIG) src data is empty.1933 output uncompressed streams (MAIN, CALL, JUMP) were flushed.194RC stream was not flushed. And RC stream will cross block boundary.195}196Note: some possible implementation of BCJ2 encoder could197write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(),198and it could calculate symbol for RC in another call of Bcj2Enc_Encode().199BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol.200And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls.201So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK202to ensure that RC symbol is calculated and written in proper block.203204BCJ2_ENC_FINISH_MODE_END_STREAM205finish encoding of stream (ended at srcLim) fully including RC flushing.206at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED)207: it shows that stream encoding was finished fully,208and all output streams were flushed fully.209also Bcj2Enc_IsFinished() can be called.210*/211212213/*21432-bit relative offset in JUMP/CALL commands is215- (mod 4 GiB) for 32-bit x86 code216- signed Int32 for 64-bit x86-64 code217BCJ2 encoder also does internal relative to absolute address conversions.218And there are 2 possible ways to do it:219before v23: we used 32-bit variables and (mod 4 GiB) conversion220since v23: we use 64-bit variables and (signed Int32 offset) conversion.221The absolute address condition for conversion in v23:222((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64)223note that if (fileSize64 > 2 GiB). there is difference between224old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23).225And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases.226*/227228/*229// for old (v22) way for conversion:230typedef UInt32 CBcj2Enc_ip_unsigned;231typedef Int32 CBcj2Enc_ip_signed;232#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31)233*/234typedef UInt64 CBcj2Enc_ip_unsigned;235typedef Int64 CBcj2Enc_ip_signed;236237/* maximum size of file that can be used for conversion condition */238#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2)239240/* default value of fileSize64_minus1 variable that means241that absolute address limitation will not be used */242#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1)243244/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */245#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \246((CBcj2Enc_ip_unsigned)(fileSize) - 1)247248/* set CBcj2Enc::fileSize64_minus1 variable from size of file */249#define Bcj2Enc_SET_FileSize(p, fileSize) \250(p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize);251252253typedef struct254{255Byte *bufs[BCJ2_NUM_STREAMS];256const Byte *lims[BCJ2_NUM_STREAMS];257const Byte *src;258const Byte *srcLim;259260unsigned state;261EBcj2Enc_FinishMode finishMode;262263Byte context;264Byte flushRem;265Byte isFlushState;266267Byte cache;268UInt32 range;269UInt64 low;270UInt64 cacheSize;271272// UInt32 context; // for marker version, it can include marker flag.273274/* (ip64) and (fileIp64) correspond to virtual source stream position275that doesn't include data in temp[] */276CBcj2Enc_ip_unsigned ip64; /* current (ip) position */277CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */278CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */279UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */280// UInt32 relatExcludeBits;281282UInt32 tempTarget;283unsigned tempPos; /* the number of bytes that were copied to temp[] buffer284(tempPos <= 4) outside of Bcj2Enc_Encode() */285// Byte temp[4]; // for marker version286Byte temp[8];287CBcj2Prob probs[2 + 256];288} CBcj2Enc;289290void Bcj2Enc_Init(CBcj2Enc *p);291292293/*294Bcj2Enc_Encode(): at exit:295p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream296(bufs[p->State] == lims[p->State])297p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream298(src == srcLim)299p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream300*/301void Bcj2Enc_Encode(CBcj2Enc *p);302303/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream.304CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer.305(CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after306fully processed data and after data copied to temp buffer.307So if the caller needs to get real number of fully processed input308bytes (without look ahead data in temp buffer),309the caller must subtruct (CBcj2Enc::tempPos) value from processed size310value that is calculated based on current (CBcj2Enc::src):311cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) -312Bcj2Enc_Get_AvailInputSize_in_Temp(&enc);313*/314/* get the size of input data that was stored in temp[] buffer: */315#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos)316317#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0)318319/* Note : the decoder supports overlapping of marker (0f 80).320But we can eliminate such overlapping cases by setting321the limit for relative offset conversion as322CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB)323*/324/* default value for CBcj2Enc::relatLimit */325#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24)326#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31)327// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5328329EXTERN_C_END330331#endif332333334