Path: blob/master/libs/capstone/arch/X86/X86DisassemblerDecoder.h
4389 views
/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*1*2* The LLVM Compiler Infrastructure3*4* This file is distributed under the University of Illinois Open Source5* License. See LICENSE.TXT for details.6*7*===----------------------------------------------------------------------===*8*9* This file is part of the X86 Disassembler.10* It contains the public interface of the instruction decoder.11* Documentation for the disassembler can be found in X86Disassembler.h.12*13*===----------------------------------------------------------------------===*/1415/* Capstone Disassembly Engine */16/* By Nguyen Anh Quynh <[email protected]>, 2013-2019 */1718#ifndef CS_X86_DISASSEMBLERDECODER_H19#define CS_X86_DISASSEMBLERDECODER_H2021#if defined(CAPSTONE_HAS_OSXKERNEL)22#include <libkern/libkern.h>23#else24#include <stdio.h>25#endif2627#include "X86DisassemblerDecoderCommon.h"2829/*30* Accessor functions for various fields of an Intel instruction31*/32#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)33#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)34#define rmFromModRM(modRM) ((modRM) & 0x7)35#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)36#define indexFromSIB(sib) (((sib) & 0x38) >> 3)37#define baseFromSIB(sib) ((sib) & 0x7)38#define wFromREX(rex) (((rex) & 0x8) >> 3)39#define rFromREX(rex) (((rex) & 0x4) >> 2)40#define xFromREX(rex) (((rex) & 0x2) >> 1)41#define bFromREX(rex) ((rex) & 0x1)4243#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)44#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)45#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)46#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)47#define mmFromEVEX2of4(evex) ((evex) & 0x3)48#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)49#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)50#define ppFromEVEX3of4(evex) ((evex) & 0x3)51#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)52#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)53#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)54#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)55#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)56#define aaaFromEVEX4of4(evex) ((evex) & 0x7)5758#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)59#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)60#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)61#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)62#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)63#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)64#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)65#define ppFromVEX3of3(vex) ((vex) & 0x3)6667#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)68#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)69#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)70#define ppFromVEX2of2(vex) ((vex) & 0x3)7172#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)73#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)74#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)75#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)76#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)77#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)78#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)79#define ppFromXOP3of3(xop) ((xop) & 0x3)8081/*82* These enums represent Intel registers for use by the decoder.83*/8485#define REGS_8BIT \86ENTRY(AL) \87ENTRY(CL) \88ENTRY(DL) \89ENTRY(BL) \90ENTRY(AH) \91ENTRY(CH) \92ENTRY(DH) \93ENTRY(BH) \94ENTRY(R8B) \95ENTRY(R9B) \96ENTRY(R10B) \97ENTRY(R11B) \98ENTRY(R12B) \99ENTRY(R13B) \100ENTRY(R14B) \101ENTRY(R15B) \102ENTRY(SPL) \103ENTRY(BPL) \104ENTRY(SIL) \105ENTRY(DIL)106107#define EA_BASES_16BIT \108ENTRY(BX_SI) \109ENTRY(BX_DI) \110ENTRY(BP_SI) \111ENTRY(BP_DI) \112ENTRY(SI) \113ENTRY(DI) \114ENTRY(BP) \115ENTRY(BX) \116ENTRY(R8W) \117ENTRY(R9W) \118ENTRY(R10W) \119ENTRY(R11W) \120ENTRY(R12W) \121ENTRY(R13W) \122ENTRY(R14W) \123ENTRY(R15W)124125#define REGS_16BIT \126ENTRY(AX) \127ENTRY(CX) \128ENTRY(DX) \129ENTRY(BX) \130ENTRY(SP) \131ENTRY(BP) \132ENTRY(SI) \133ENTRY(DI) \134ENTRY(R8W) \135ENTRY(R9W) \136ENTRY(R10W) \137ENTRY(R11W) \138ENTRY(R12W) \139ENTRY(R13W) \140ENTRY(R14W) \141ENTRY(R15W)142143#define EA_BASES_32BIT \144ENTRY(EAX) \145ENTRY(ECX) \146ENTRY(EDX) \147ENTRY(EBX) \148ENTRY(sib) \149ENTRY(EBP) \150ENTRY(ESI) \151ENTRY(EDI) \152ENTRY(R8D) \153ENTRY(R9D) \154ENTRY(R10D) \155ENTRY(R11D) \156ENTRY(R12D) \157ENTRY(R13D) \158ENTRY(R14D) \159ENTRY(R15D)160161#define REGS_32BIT \162ENTRY(EAX) \163ENTRY(ECX) \164ENTRY(EDX) \165ENTRY(EBX) \166ENTRY(ESP) \167ENTRY(EBP) \168ENTRY(ESI) \169ENTRY(EDI) \170ENTRY(R8D) \171ENTRY(R9D) \172ENTRY(R10D) \173ENTRY(R11D) \174ENTRY(R12D) \175ENTRY(R13D) \176ENTRY(R14D) \177ENTRY(R15D)178179#define EA_BASES_64BIT \180ENTRY(RAX) \181ENTRY(RCX) \182ENTRY(RDX) \183ENTRY(RBX) \184ENTRY(sib64) \185ENTRY(RBP) \186ENTRY(RSI) \187ENTRY(RDI) \188ENTRY(R8) \189ENTRY(R9) \190ENTRY(R10) \191ENTRY(R11) \192ENTRY(R12) \193ENTRY(R13) \194ENTRY(R14) \195ENTRY(R15)196197#define REGS_64BIT \198ENTRY(RAX) \199ENTRY(RCX) \200ENTRY(RDX) \201ENTRY(RBX) \202ENTRY(RSP) \203ENTRY(RBP) \204ENTRY(RSI) \205ENTRY(RDI) \206ENTRY(R8) \207ENTRY(R9) \208ENTRY(R10) \209ENTRY(R11) \210ENTRY(R12) \211ENTRY(R13) \212ENTRY(R14) \213ENTRY(R15)214215#define REGS_MMX \216ENTRY(MM0) \217ENTRY(MM1) \218ENTRY(MM2) \219ENTRY(MM3) \220ENTRY(MM4) \221ENTRY(MM5) \222ENTRY(MM6) \223ENTRY(MM7)224225#define REGS_XMM \226ENTRY(XMM0) \227ENTRY(XMM1) \228ENTRY(XMM2) \229ENTRY(XMM3) \230ENTRY(XMM4) \231ENTRY(XMM5) \232ENTRY(XMM6) \233ENTRY(XMM7) \234ENTRY(XMM8) \235ENTRY(XMM9) \236ENTRY(XMM10) \237ENTRY(XMM11) \238ENTRY(XMM12) \239ENTRY(XMM13) \240ENTRY(XMM14) \241ENTRY(XMM15) \242ENTRY(XMM16) \243ENTRY(XMM17) \244ENTRY(XMM18) \245ENTRY(XMM19) \246ENTRY(XMM20) \247ENTRY(XMM21) \248ENTRY(XMM22) \249ENTRY(XMM23) \250ENTRY(XMM24) \251ENTRY(XMM25) \252ENTRY(XMM26) \253ENTRY(XMM27) \254ENTRY(XMM28) \255ENTRY(XMM29) \256ENTRY(XMM30) \257ENTRY(XMM31)258259260#define REGS_YMM \261ENTRY(YMM0) \262ENTRY(YMM1) \263ENTRY(YMM2) \264ENTRY(YMM3) \265ENTRY(YMM4) \266ENTRY(YMM5) \267ENTRY(YMM6) \268ENTRY(YMM7) \269ENTRY(YMM8) \270ENTRY(YMM9) \271ENTRY(YMM10) \272ENTRY(YMM11) \273ENTRY(YMM12) \274ENTRY(YMM13) \275ENTRY(YMM14) \276ENTRY(YMM15) \277ENTRY(YMM16) \278ENTRY(YMM17) \279ENTRY(YMM18) \280ENTRY(YMM19) \281ENTRY(YMM20) \282ENTRY(YMM21) \283ENTRY(YMM22) \284ENTRY(YMM23) \285ENTRY(YMM24) \286ENTRY(YMM25) \287ENTRY(YMM26) \288ENTRY(YMM27) \289ENTRY(YMM28) \290ENTRY(YMM29) \291ENTRY(YMM30) \292ENTRY(YMM31)293294#define REGS_ZMM \295ENTRY(ZMM0) \296ENTRY(ZMM1) \297ENTRY(ZMM2) \298ENTRY(ZMM3) \299ENTRY(ZMM4) \300ENTRY(ZMM5) \301ENTRY(ZMM6) \302ENTRY(ZMM7) \303ENTRY(ZMM8) \304ENTRY(ZMM9) \305ENTRY(ZMM10) \306ENTRY(ZMM11) \307ENTRY(ZMM12) \308ENTRY(ZMM13) \309ENTRY(ZMM14) \310ENTRY(ZMM15) \311ENTRY(ZMM16) \312ENTRY(ZMM17) \313ENTRY(ZMM18) \314ENTRY(ZMM19) \315ENTRY(ZMM20) \316ENTRY(ZMM21) \317ENTRY(ZMM22) \318ENTRY(ZMM23) \319ENTRY(ZMM24) \320ENTRY(ZMM25) \321ENTRY(ZMM26) \322ENTRY(ZMM27) \323ENTRY(ZMM28) \324ENTRY(ZMM29) \325ENTRY(ZMM30) \326ENTRY(ZMM31)327328#define REGS_MASKS \329ENTRY(K0) \330ENTRY(K1) \331ENTRY(K2) \332ENTRY(K3) \333ENTRY(K4) \334ENTRY(K5) \335ENTRY(K6) \336ENTRY(K7)337338#define REGS_SEGMENT \339ENTRY(ES) \340ENTRY(CS) \341ENTRY(SS) \342ENTRY(DS) \343ENTRY(FS) \344ENTRY(GS)345346#define REGS_DEBUG \347ENTRY(DR0) \348ENTRY(DR1) \349ENTRY(DR2) \350ENTRY(DR3) \351ENTRY(DR4) \352ENTRY(DR5) \353ENTRY(DR6) \354ENTRY(DR7) \355ENTRY(DR8) \356ENTRY(DR9) \357ENTRY(DR10) \358ENTRY(DR11) \359ENTRY(DR12) \360ENTRY(DR13) \361ENTRY(DR14) \362ENTRY(DR15)363364#define REGS_CONTROL \365ENTRY(CR0) \366ENTRY(CR1) \367ENTRY(CR2) \368ENTRY(CR3) \369ENTRY(CR4) \370ENTRY(CR5) \371ENTRY(CR6) \372ENTRY(CR7) \373ENTRY(CR8) \374ENTRY(CR9) \375ENTRY(CR10) \376ENTRY(CR11) \377ENTRY(CR12) \378ENTRY(CR13) \379ENTRY(CR14) \380ENTRY(CR15)381382#define REGS_BOUND \383ENTRY(BND0) \384ENTRY(BND1) \385ENTRY(BND2) \386ENTRY(BND3)387388#define ALL_EA_BASES \389EA_BASES_16BIT \390EA_BASES_32BIT \391EA_BASES_64BIT392393#define ALL_SIB_BASES \394REGS_32BIT \395REGS_64BIT396397#define ALL_REGS \398REGS_8BIT \399REGS_16BIT \400REGS_32BIT \401REGS_64BIT \402REGS_MMX \403REGS_XMM \404REGS_YMM \405REGS_ZMM \406REGS_MASKS \407REGS_SEGMENT \408REGS_DEBUG \409REGS_CONTROL \410REGS_BOUND \411ENTRY(RIP)412413/*414* EABase - All possible values of the base field for effective-address415* computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We416* distinguish between bases (EA_BASE_*) and registers that just happen to be417* referred to when Mod == 0b11 (EA_REG_*).418*/419typedef enum {420EA_BASE_NONE,421#define ENTRY(x) EA_BASE_##x,422ALL_EA_BASES423#undef ENTRY424#define ENTRY(x) EA_REG_##x,425ALL_REGS426#undef ENTRY427EA_max428} EABase;429430/*431* SIBIndex - All possible values of the SIB index field.432* Borrows entries from ALL_EA_BASES with the special case that433* sib is synonymous with NONE.434* Vector SIB: index can be XMM or YMM.435*/436typedef enum {437SIB_INDEX_NONE,438#define ENTRY(x) SIB_INDEX_##x,439ALL_EA_BASES440REGS_XMM441REGS_YMM442REGS_ZMM443#undef ENTRY444SIB_INDEX_max445} SIBIndex;446447/*448* SIBBase - All possible values of the SIB base field.449*/450typedef enum {451SIB_BASE_NONE,452#define ENTRY(x) SIB_BASE_##x,453ALL_SIB_BASES454#undef ENTRY455SIB_BASE_max456} SIBBase;457458/*459* EADisplacement - Possible displacement types for effective-address460* computations.461*/462typedef enum {463EA_DISP_NONE,464EA_DISP_8,465EA_DISP_16,466EA_DISP_32467} EADisplacement;468469/*470* Reg - All possible values of the reg field in the ModR/M byte.471*/472typedef enum {473#define ENTRY(x) MODRM_REG_##x,474ALL_REGS475#undef ENTRY476MODRM_REG_max477} Reg;478479/*480* SegmentOverride - All possible segment overrides.481*/482typedef enum {483SEG_OVERRIDE_NONE,484SEG_OVERRIDE_CS,485SEG_OVERRIDE_SS,486SEG_OVERRIDE_DS,487SEG_OVERRIDE_ES,488SEG_OVERRIDE_FS,489SEG_OVERRIDE_GS,490SEG_OVERRIDE_max491} SegmentOverride;492493/*494* VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field495*/496typedef enum {497VEX_LOB_0F = 0x1,498VEX_LOB_0F38 = 0x2,499VEX_LOB_0F3A = 0x3500} VEXLeadingOpcodeByte;501502typedef enum {503XOP_MAP_SELECT_8 = 0x8,504XOP_MAP_SELECT_9 = 0x9,505XOP_MAP_SELECT_A = 0xA506} XOPMapSelect;507508/*509* VEXPrefixCode - Possible values for the VEX.pp/EVEX.pp field510*/511typedef enum {512VEX_PREFIX_NONE = 0x0,513VEX_PREFIX_66 = 0x1,514VEX_PREFIX_F3 = 0x2,515VEX_PREFIX_F2 = 0x3516} VEXPrefixCode;517518typedef enum {519TYPE_NO_VEX_XOP = 0x0,520TYPE_VEX_2B = 0x1,521TYPE_VEX_3B = 0x2,522TYPE_EVEX = 0x3,523TYPE_XOP = 0x4524} VectorExtensionType;525526struct reader_info {527const uint8_t *code;528uint64_t size;529uint64_t offset;530};531532/*533* byteReader_t - Type for the byte reader that the consumer must provide to534* the decoder. Reads a single byte from the instruction's address space.535* @param arg - A baton that the consumer can associate with any internal536* state that it needs.537* @param byte - A pointer to a single byte in memory that should be set to538* contain the value at address.539* @param address - The address in the instruction's address space that should540* be read from.541* @return - -1 if the byte cannot be read for any reason; 0 otherwise.542*/543typedef int (*byteReader_t)(const struct reader_info *arg, uint8_t* byte, uint64_t address);544545/// The specification for how to extract and interpret a full instruction and546/// its operands.547struct InstructionSpecifier {548#ifdef CAPSTONE_X86_REDUCE549uint8_t operands;550#else551uint16_t operands;552#endif553};554555/*556* The x86 internal instruction, which is produced by the decoder.557*/558typedef struct InternalInstruction {559// from here, all members must be initialized to ZERO to work properly560uint8_t operandSize;561uint8_t prefix0, prefix1, prefix2, prefix3;562/* The value of the REX prefix, if present */563uint8_t rexPrefix;564/* The segment override type */565SegmentOverride segmentOverride;566bool consumedModRM;567uint8_t orgModRM; // save original modRM because we will modify modRM568/* The SIB byte, used for more complex 32- or 64-bit memory operands */569bool consumedSIB;570uint8_t sib;571/* The displacement, used for memory operands */572bool consumedDisplacement;573int64_t displacement;574/* The value of the two-byte escape prefix (usually 0x0f) */575uint8_t twoByteEscape;576/* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */577uint8_t threeByteEscape;578/* SIB state */579SIBIndex sibIndexBase;580SIBIndex sibIndex;581uint8_t sibScale;582SIBBase sibBase;583584// Embedded rounding control.585uint8_t RC;586587uint8_t numImmediatesConsumed;588/* 0xf2 or 0xf3 is xacquire or xrelease */589uint8_t xAcquireRelease;590591// Address-size override592bool hasAdSize;593// Operand-size override594bool hasOpSize;595// Lock prefix596bool hasLockPrefix;597// The repeat prefix if any598uint8_t repeatPrefix;599600// The possible mandatory prefix601uint8_t mandatoryPrefix;602603/* The value of the vector extension prefix(EVEX/VEX/XOP), if present */604uint8_t vectorExtensionPrefix[4];605606/* Offsets from the start of the instruction to the pieces of data, which is607needed to find relocation entries for adding symbolic operands */608uint8_t displacementOffset;609uint8_t immediateOffset;610uint8_t modRMOffset;611612// end-of-zero-members613614/* Reader interface (C) */615byteReader_t reader;616617/* Opaque value passed to the reader */618const void* readerArg;619/* The address of the next byte to read via the reader */620uint64_t readerCursor;621622/* General instruction information */623624/* The mode to disassemble for (64-bit, protected, real) */625DisassemblerMode mode;626/* The start of the instruction, usable with the reader */627uint64_t startLocation;628/* The length of the instruction, in bytes */629size_t length;630631/* Prefix state */632633/* The type of the vector extension prefix */634VectorExtensionType vectorExtensionType;635636/* Sizes of various critical pieces of data, in bytes */637uint8_t registerSize;638uint8_t addressSize;639uint8_t displacementSize;640uint8_t immediateSize;641642uint8_t immSize; // immediate size for X86_OP_IMM operand643644/* opcode state */645646/* The last byte of the opcode, not counting any ModR/M extension */647uint8_t opcode;648649/* decode state */650651/* The type of opcode, used for indexing into the array of decode tables */652OpcodeType opcodeType;653/* The instruction ID, extracted from the decode table */654uint16_t instructionID;655/* The specifier for the instruction, from the instruction info table */656const struct InstructionSpecifier *spec;657658/* state for additional bytes, consumed during operand decode. Pattern:659consumed___ indicates that the byte was already consumed and does not660need to be consumed again */661662/* The VEX.vvvv field, which contains a third register operand for some AVX663instructions */664Reg vvvv;665666/* The writemask for AVX-512 instructions which is contained in EVEX.aaa */667Reg writemask;668669/* The ModR/M byte, which contains most register operands and some portion of670all memory operands */671uint8_t modRM;672673// special data to handle MOVcr, MOVdr, MOVrc, MOVrd674uint8_t firstByte; // save the first byte in stream675676/* Immediates. There can be two in some cases */677uint8_t numImmediatesTranslated;678uint64_t immediates[2];679680/* A register or immediate operand encoded into the opcode */681Reg opcodeRegister;682683/* Portions of the ModR/M byte */684685/* These fields determine the allowable values for the ModR/M fields, which686depend on operand and address widths */687EABase eaRegBase;688Reg regBase;689690/* The Mod and R/M fields can encode a base for an effective address, or a691register. These are separated into two fields here */692EABase eaBase;693EADisplacement eaDisplacement;694/* The reg field always encodes a register */695Reg reg;696697const struct OperandSpecifier *operands;698} InternalInstruction;699700/* decodeInstruction - Decode one instruction and store the decoding results in701* a buffer provided by the consumer.702* @param insn - The buffer to store the instruction in. Allocated by the703* consumer.704* @param reader - The byteReader_t for the bytes to be read.705* @param readerArg - An argument to pass to the reader for storing context706* specific to the consumer. May be NULL.707* @param logger - The dlog_t to be used in printing status messages from the708* disassembler. May be NULL.709* @param loggerArg - An argument to pass to the logger for storing context710* specific to the logger. May be NULL.711* @param startLoc - The address (in the reader's address space) of the first712* byte in the instruction.713* @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.714* @return - Nonzero if there was an error during decode, 0 otherwise.715*/716int decodeInstruction(struct InternalInstruction* insn,717byteReader_t reader,718const void* readerArg,719uint64_t startLoc,720DisassemblerMode mode);721722//const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);723724#endif725726727