Path: blob/master/libs/capstone/arch/X86/X86DisassemblerDecoder.c
4389 views
/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*1*2* The LLVM Compiler Infrastructure3*4* This file is distributed under the University of Illinois Open Source5* License. See LICENSE.TXT for details.6*7*===----------------------------------------------------------------------===*8*9* This file is part of the X86 Disassembler.10* It contains the implementation of the instruction decoder.11* Documentation for the disassembler can be found in X86Disassembler.h.12*13*===----------------------------------------------------------------------===*/1415/* Capstone Disassembly Engine */16/* By Nguyen Anh Quynh <[email protected]>, 2013-2019 */1718#ifdef CAPSTONE_HAS_X861920#include <stdarg.h> /* for va_*() */21#if defined(CAPSTONE_HAS_OSXKERNEL)22#include <libkern/libkern.h>23#else24#include <stdlib.h> /* for exit() */25#endif2627#include <string.h>2829#include "../../cs_priv.h"30#include "../../utils.h"3132#include "X86DisassemblerDecoder.h"33#include "X86Mapping.h"3435/// Specifies whether a ModR/M byte is needed and (if so) which36/// instruction each possible value of the ModR/M byte corresponds to. Once37/// this information is known, we have narrowed down to a single instruction.38struct ModRMDecision {39uint8_t modrm_type;40uint16_t instructionIDs;41};4243/// Specifies which set of ModR/M->instruction tables to look at44/// given a particular opcode.45struct OpcodeDecision {46struct ModRMDecision modRMDecisions[256];47};4849/// Specifies which opcode->instruction tables to look at given50/// a particular context (set of attributes). Since there are many possible51/// contexts, the decoder first uses CONTEXTS_SYM to determine which context52/// applies given a specific set of attributes. Hence there are only IC_max53/// entries in this table, rather than 2^(ATTR_max).54struct ContextDecision {55struct OpcodeDecision opcodeDecisions[IC_max];56};5758#ifdef CAPSTONE_X86_REDUCE59#include "X86GenDisassemblerTables_reduce.inc"60#include "X86GenDisassemblerTables_reduce2.inc"61#include "X86Lookup16_reduce.inc"62#else63#include "X86GenDisassemblerTables.inc"64#include "X86GenDisassemblerTables2.inc"65#include "X86Lookup16.inc"66#endif6768/*69* contextForAttrs - Client for the instruction context table. Takes a set of70* attributes and returns the appropriate decode context.71*72* @param attrMask - Attributes, from the enumeration attributeBits.73* @return - The InstructionContext to use when looking up an74* an instruction with these attributes.75*/76static InstructionContext contextForAttrs(uint16_t attrMask)77{78return CONTEXTS_SYM[attrMask];79}8081/*82* modRMRequired - Reads the appropriate instruction table to determine whether83* the ModR/M byte is required to decode a particular instruction.84*85* @param type - The opcode type (i.e., how many bytes it has).86* @param insnContext - The context for the instruction, as returned by87* contextForAttrs.88* @param opcode - The last byte of the instruction's opcode, not counting89* ModR/M extensions and escapes.90* @return - true if the ModR/M byte is required, false otherwise.91*/92static int modRMRequired(OpcodeType type,93InstructionContext insnContext,94uint16_t opcode)95{96const struct OpcodeDecision *decision = NULL;97const uint8_t *indextable = NULL;98unsigned int index;99100switch (type) {101default: break;102case ONEBYTE:103decision = ONEBYTE_SYM;104indextable = index_x86DisassemblerOneByteOpcodes;105break;106case TWOBYTE:107decision = TWOBYTE_SYM;108indextable = index_x86DisassemblerTwoByteOpcodes;109break;110case THREEBYTE_38:111decision = THREEBYTE38_SYM;112indextable = index_x86DisassemblerThreeByte38Opcodes;113break;114case THREEBYTE_3A:115decision = THREEBYTE3A_SYM;116indextable = index_x86DisassemblerThreeByte3AOpcodes;117break;118#ifndef CAPSTONE_X86_REDUCE119case XOP8_MAP:120decision = XOP8_MAP_SYM;121indextable = index_x86DisassemblerXOP8Opcodes;122break;123case XOP9_MAP:124decision = XOP9_MAP_SYM;125indextable = index_x86DisassemblerXOP9Opcodes;126break;127case XOPA_MAP:128decision = XOPA_MAP_SYM;129indextable = index_x86DisassemblerXOPAOpcodes;130break;131case THREEDNOW_MAP:132// 3DNow instructions always have ModRM byte133return true;134#endif135}136137// return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;138index = indextable[insnContext];139if (index)140return decision[index - 1].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;141else142return false;143}144145/*146* decode - Reads the appropriate instruction table to obtain the unique ID of147* an instruction.148*149* @param type - See modRMRequired().150* @param insnContext - See modRMRequired().151* @param opcode - See modRMRequired().152* @param modRM - The ModR/M byte if required, or any value if not.153* @return - The UID of the instruction, or 0 on failure.154*/155static InstrUID decode(OpcodeType type,156InstructionContext insnContext,157uint8_t opcode,158uint8_t modRM)159{160const struct ModRMDecision *dec = NULL;161unsigned int index;162static const struct OpcodeDecision emptyDecision = { 0 };163164switch (type) {165default: break; // never reach166case ONEBYTE:167// dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];168index = index_x86DisassemblerOneByteOpcodes[insnContext];169if (index)170dec = &ONEBYTE_SYM[index - 1].modRMDecisions[opcode];171else172dec = &emptyDecision.modRMDecisions[opcode];173break;174case TWOBYTE:175//dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];176index = index_x86DisassemblerTwoByteOpcodes[insnContext];177if (index)178dec = &TWOBYTE_SYM[index - 1].modRMDecisions[opcode];179else180dec = &emptyDecision.modRMDecisions[opcode];181break;182case THREEBYTE_38:183// dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];184index = index_x86DisassemblerThreeByte38Opcodes[insnContext];185if (index)186dec = &THREEBYTE38_SYM[index - 1].modRMDecisions[opcode];187else188dec = &emptyDecision.modRMDecisions[opcode];189break;190case THREEBYTE_3A:191//dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];192index = index_x86DisassemblerThreeByte3AOpcodes[insnContext];193if (index)194dec = &THREEBYTE3A_SYM[index - 1].modRMDecisions[opcode];195else196dec = &emptyDecision.modRMDecisions[opcode];197break;198#ifndef CAPSTONE_X86_REDUCE199case XOP8_MAP:200// dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];201index = index_x86DisassemblerXOP8Opcodes[insnContext];202if (index)203dec = &XOP8_MAP_SYM[index - 1].modRMDecisions[opcode];204else205dec = &emptyDecision.modRMDecisions[opcode];206break;207case XOP9_MAP:208// dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];209index = index_x86DisassemblerXOP9Opcodes[insnContext];210if (index)211dec = &XOP9_MAP_SYM[index - 1].modRMDecisions[opcode];212else213dec = &emptyDecision.modRMDecisions[opcode];214break;215case XOPA_MAP:216// dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];217index = index_x86DisassemblerXOPAOpcodes[insnContext];218if (index)219dec = &XOPA_MAP_SYM[index - 1].modRMDecisions[opcode];220else221dec = &emptyDecision.modRMDecisions[opcode];222break;223case THREEDNOW_MAP:224// dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];225index = index_x86Disassembler3DNowOpcodes[insnContext];226if (index)227dec = &THREEDNOW_MAP_SYM[index - 1].modRMDecisions[opcode];228else229dec = &emptyDecision.modRMDecisions[opcode];230break;231#endif232}233234switch (dec->modrm_type) {235default:236// debug("Corrupt table! Unknown modrm_type");237return 0;238case MODRM_ONEENTRY:239return modRMTable[dec->instructionIDs];240case MODRM_SPLITRM:241if (modFromModRM(modRM) == 0x3)242return modRMTable[dec->instructionIDs + 1];243return modRMTable[dec->instructionIDs];244case MODRM_SPLITREG:245if (modFromModRM(modRM) == 0x3)246return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3) + 8];247return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];248case MODRM_SPLITMISC:249if (modFromModRM(modRM) == 0x3)250return modRMTable[dec->instructionIDs+(modRM & 0x3f) + 8];251return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];252case MODRM_FULL:253return modRMTable[dec->instructionIDs+modRM];254}255}256257/*258* specifierForUID - Given a UID, returns the name and operand specification for259* that instruction.260*261* @param uid - The unique ID for the instruction. This should be returned by262* decode(); specifierForUID will not check bounds.263* @return - A pointer to the specification for that instruction.264*/265static const struct InstructionSpecifier *specifierForUID(InstrUID uid)266{267return &INSTRUCTIONS_SYM[uid];268}269270/*271* consumeByte - Uses the reader function provided by the user to consume one272* byte from the instruction's memory and advance the cursor.273*274* @param insn - The instruction with the reader function to use. The cursor275* for this instruction is advanced.276* @param byte - A pointer to a pre-allocated memory buffer to be populated277* with the data read.278* @return - 0 if the read was successful; nonzero otherwise.279*/280static int consumeByte(struct InternalInstruction* insn, uint8_t* byte)281{282int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);283284if (!ret)285++(insn->readerCursor);286287return ret;288}289290/*291* lookAtByte - Like consumeByte, but does not advance the cursor.292*293* @param insn - See consumeByte().294* @param byte - See consumeByte().295* @return - See consumeByte().296*/297static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte)298{299return insn->reader(insn->readerArg, byte, insn->readerCursor);300}301302static void unconsumeByte(struct InternalInstruction* insn)303{304insn->readerCursor--;305}306307#define CONSUME_FUNC(name, type) \308static int name(struct InternalInstruction* insn, type* ptr) { \309type combined = 0; \310unsigned offset; \311for (offset = 0; offset < sizeof(type); ++offset) { \312uint8_t byte; \313int ret = insn->reader(insn->readerArg, \314&byte, \315insn->readerCursor + offset); \316if (ret) \317return ret; \318combined = combined | ((uint64_t)byte << (offset * 8)); \319} \320*ptr = combined; \321insn->readerCursor += sizeof(type); \322return 0; \323}324325/*326* consume* - Use the reader function provided by the user to consume data327* values of various sizes from the instruction's memory and advance the328* cursor appropriately. These readers perform endian conversion.329*330* @param insn - See consumeByte().331* @param ptr - A pointer to a pre-allocated memory of appropriate size to332* be populated with the data read.333* @return - See consumeByte().334*/335CONSUME_FUNC(consumeInt8, int8_t)336CONSUME_FUNC(consumeInt16, int16_t)337CONSUME_FUNC(consumeInt32, int32_t)338CONSUME_FUNC(consumeUInt16, uint16_t)339CONSUME_FUNC(consumeUInt32, uint32_t)340CONSUME_FUNC(consumeUInt64, uint64_t)341342static bool isREX(struct InternalInstruction *insn, uint8_t prefix)343{344if (insn->mode == MODE_64BIT)345return prefix >= 0x40 && prefix <= 0x4f;346347return false;348}349350/*351* setPrefixPresent - Marks that a particular prefix is present as mandatory352*353* @param insn - The instruction to be marked as having the prefix.354* @param prefix - The prefix that is present.355*/356static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix)357{358uint8_t nextByte;359360switch (prefix) {361case 0xf0: // LOCK362insn->hasLockPrefix = true;363insn->repeatPrefix = 0;364break;365366case 0xf2: // REPNE/REPNZ367case 0xf3: // REP or REPE/REPZ368if (lookAtByte(insn, &nextByte))369break;370// TODO:371// 1. There could be several 0x66372// 2. if (nextByte == 0x66) and nextNextByte != 0x0f then373// it's not mandatory prefix374// 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need375// 0x0f exactly after it to be mandatory prefix376if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)377// The last of 0xf2 /0xf3 is mandatory prefix378insn->mandatoryPrefix = prefix;379380insn->repeatPrefix = prefix;381insn->hasLockPrefix = false;382break;383384case 0x66:385if (lookAtByte(insn, &nextByte))386break;387// 0x66 can't overwrite existing mandatory prefix and should be ignored388if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))389insn->mandatoryPrefix = prefix;390break;391}392}393394/*395* readPrefixes - Consumes all of an instruction's prefix bytes, and marks the396* instruction as having them. Also sets the instruction's default operand,397* address, and other relevant data sizes to report operands correctly.398*399* @param insn - The instruction whose prefixes are to be read.400* @return - 0 if the instruction could be read until the end of the prefix401* bytes, and no prefixes conflicted; nonzero otherwise.402*/403static int readPrefixes(struct InternalInstruction* insn)404{405bool isPrefix = true;406uint8_t byte = 0;407uint8_t nextByte;408409while (isPrefix) {410if (insn->mode == MODE_64BIT) {411// eliminate consecutive redundant REX bytes in front412if (consumeByte(insn, &byte))413return -1;414415if ((byte & 0xf0) == 0x40) {416while(true) {417if (lookAtByte(insn, &byte)) // out of input code418return -1;419if ((byte & 0xf0) == 0x40) {420// another REX prefix, but we only remember the last one421if (consumeByte(insn, &byte))422return -1;423} else424break;425}426427// recover the last REX byte if next byte is not a legacy prefix428switch (byte) {429case 0xf2: /* REPNE/REPNZ */430case 0xf3: /* REP or REPE/REPZ */431case 0xf0: /* LOCK */432case 0x2e: /* CS segment override -OR- Branch not taken */433case 0x36: /* SS segment override -OR- Branch taken */434case 0x3e: /* DS segment override */435case 0x26: /* ES segment override */436case 0x64: /* FS segment override */437case 0x65: /* GS segment override */438case 0x66: /* Operand-size override */439case 0x67: /* Address-size override */440break;441default: /* Not a prefix byte */442unconsumeByte(insn);443break;444}445} else {446unconsumeByte(insn);447}448}449450/* If we fail reading prefixes, just stop here and let the opcode reader deal with it */451if (consumeByte(insn, &byte))452return -1;453454if (insn->readerCursor - 1 == insn->startLocation455&& (byte == 0xf2 || byte == 0xf3)) {456// prefix requires next byte457if (lookAtByte(insn, &nextByte))458return -1;459460/*461* If the byte is 0xf2 or 0xf3, and any of the following conditions are462* met:463* - it is followed by a LOCK (0xf0) prefix464* - it is followed by an xchg instruction465* then it should be disassembled as a xacquire/xrelease not repne/rep.466*/467if (((nextByte == 0xf0) ||468((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {469insn->xAcquireRelease = byte;470}471472/*473* Also if the byte is 0xf3, and the following condition is met:474* - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or475* "mov mem, imm" (opcode 0xc6/0xc7) instructions.476* then it should be disassembled as an xrelease not rep.477*/478if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||479nextByte == 0xc6 || nextByte == 0xc7)) {480insn->xAcquireRelease = byte;481}482483if (isREX(insn, nextByte)) {484uint8_t nnextByte;485486// Go to REX prefix after the current one487if (consumeByte(insn, &nnextByte))488return -1;489490// We should be able to read next byte after REX prefix491if (lookAtByte(insn, &nnextByte))492return -1;493494unconsumeByte(insn);495}496}497498switch (byte) {499case 0xf0: /* LOCK */500case 0xf2: /* REPNE/REPNZ */501case 0xf3: /* REP or REPE/REPZ */502// only accept the last prefix503setPrefixPresent(insn, byte);504insn->prefix0 = byte;505break;506507case 0x2e: /* CS segment override -OR- Branch not taken */508case 0x36: /* SS segment override -OR- Branch taken */509case 0x3e: /* DS segment override */510case 0x26: /* ES segment override */511case 0x64: /* FS segment override */512case 0x65: /* GS segment override */513switch (byte) {514case 0x2e:515insn->segmentOverride = SEG_OVERRIDE_CS;516insn->prefix1 = byte;517break;518case 0x36:519insn->segmentOverride = SEG_OVERRIDE_SS;520insn->prefix1 = byte;521break;522case 0x3e:523insn->segmentOverride = SEG_OVERRIDE_DS;524insn->prefix1 = byte;525break;526case 0x26:527insn->segmentOverride = SEG_OVERRIDE_ES;528insn->prefix1 = byte;529break;530case 0x64:531insn->segmentOverride = SEG_OVERRIDE_FS;532insn->prefix1 = byte;533break;534case 0x65:535insn->segmentOverride = SEG_OVERRIDE_GS;536insn->prefix1 = byte;537break;538default:539// debug("Unhandled override");540return -1;541}542setPrefixPresent(insn, byte);543break;544545case 0x66: /* Operand-size override */546insn->hasOpSize = true;547setPrefixPresent(insn, byte);548insn->prefix2 = byte;549break;550551case 0x67: /* Address-size override */552insn->hasAdSize = true;553setPrefixPresent(insn, byte);554insn->prefix3 = byte;555break;556default: /* Not a prefix byte */557isPrefix = false;558break;559}560}561562insn->vectorExtensionType = TYPE_NO_VEX_XOP;563564if (byte == 0x62) {565uint8_t byte1, byte2;566567if (consumeByte(insn, &byte1)) {568// dbgprintf(insn, "Couldn't read second byte of EVEX prefix");569return -1;570}571572if (lookAtByte(insn, &byte2)) {573// dbgprintf(insn, "Couldn't read third byte of EVEX prefix");574unconsumeByte(insn); /* unconsume byte1 */575unconsumeByte(insn); /* unconsume byte */576} else {577if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&578((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {579insn->vectorExtensionType = TYPE_EVEX;580} else {581unconsumeByte(insn); /* unconsume byte1 */582unconsumeByte(insn); /* unconsume byte */583}584}585586if (insn->vectorExtensionType == TYPE_EVEX) {587insn->vectorExtensionPrefix[0] = byte;588insn->vectorExtensionPrefix[1] = byte1;589if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {590// dbgprintf(insn, "Couldn't read third byte of EVEX prefix");591return -1;592}593594if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {595// dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");596return -1;597}598599/* We simulate the REX prefix for simplicity's sake */600if (insn->mode == MODE_64BIT) {601insn->rexPrefix = 0x40602| (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)603| (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)604| (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)605| (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);606}607608// dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",609// insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],610// insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);611}612} else if (byte == 0xc4) {613uint8_t byte1;614615if (lookAtByte(insn, &byte1)) {616// dbgprintf(insn, "Couldn't read second byte of VEX");617return -1;618}619620if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)621insn->vectorExtensionType = TYPE_VEX_3B;622else623unconsumeByte(insn);624625if (insn->vectorExtensionType == TYPE_VEX_3B) {626insn->vectorExtensionPrefix[0] = byte;627consumeByte(insn, &insn->vectorExtensionPrefix[1]);628consumeByte(insn, &insn->vectorExtensionPrefix[2]);629630/* We simulate the REX prefix for simplicity's sake */631if (insn->mode == MODE_64BIT)632insn->rexPrefix = 0x40633| (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)634| (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)635| (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)636| (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);637638// dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",639// insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],640// insn->vectorExtensionPrefix[2]);641}642} else if (byte == 0xc5) {643uint8_t byte1;644645if (lookAtByte(insn, &byte1)) {646// dbgprintf(insn, "Couldn't read second byte of VEX");647return -1;648}649650if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)651insn->vectorExtensionType = TYPE_VEX_2B;652else653unconsumeByte(insn);654655if (insn->vectorExtensionType == TYPE_VEX_2B) {656insn->vectorExtensionPrefix[0] = byte;657consumeByte(insn, &insn->vectorExtensionPrefix[1]);658659if (insn->mode == MODE_64BIT)660insn->rexPrefix = 0x40661| (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);662663switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {664default:665break;666case VEX_PREFIX_66:667insn->hasOpSize = true;668break;669}670671// dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",672// insn->vectorExtensionPrefix[0],673// insn->vectorExtensionPrefix[1]);674}675} else if (byte == 0x8f) {676uint8_t byte1;677678if (lookAtByte(insn, &byte1)) {679// dbgprintf(insn, "Couldn't read second byte of XOP");680return -1;681}682683if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */684insn->vectorExtensionType = TYPE_XOP;685else686unconsumeByte(insn);687688if (insn->vectorExtensionType == TYPE_XOP) {689insn->vectorExtensionPrefix[0] = byte;690consumeByte(insn, &insn->vectorExtensionPrefix[1]);691consumeByte(insn, &insn->vectorExtensionPrefix[2]);692693/* We simulate the REX prefix for simplicity's sake */694if (insn->mode == MODE_64BIT)695insn->rexPrefix = 0x40696| (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)697| (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)698| (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)699| (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);700701switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {702default:703break;704case VEX_PREFIX_66:705insn->hasOpSize = true;706break;707}708709// dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",710// insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],711// insn->vectorExtensionPrefix[2]);712}713} else if (isREX(insn, byte)) {714if (lookAtByte(insn, &nextByte))715return -1;716717insn->rexPrefix = byte;718// dbgprintf(insn, "Found REX prefix 0x%hhx", byte);719} else720unconsumeByte(insn);721722if (insn->mode == MODE_16BIT) {723insn->registerSize = (insn->hasOpSize ? 4 : 2);724insn->addressSize = (insn->hasAdSize ? 4 : 2);725insn->displacementSize = (insn->hasAdSize ? 4 : 2);726insn->immediateSize = (insn->hasOpSize ? 4 : 2);727insn->immSize = (insn->hasOpSize ? 4 : 2);728} else if (insn->mode == MODE_32BIT) {729insn->registerSize = (insn->hasOpSize ? 2 : 4);730insn->addressSize = (insn->hasAdSize ? 2 : 4);731insn->displacementSize = (insn->hasAdSize ? 2 : 4);732insn->immediateSize = (insn->hasOpSize ? 2 : 4);733insn->immSize = (insn->hasOpSize ? 2 : 4);734} else if (insn->mode == MODE_64BIT) {735if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {736insn->registerSize = 8;737insn->addressSize = (insn->hasAdSize ? 4 : 8);738insn->displacementSize = 4;739insn->immediateSize = 4;740insn->immSize = 4;741} else {742insn->registerSize = (insn->hasOpSize ? 2 : 4);743insn->addressSize = (insn->hasAdSize ? 4 : 8);744insn->displacementSize = (insn->hasOpSize ? 2 : 4);745insn->immediateSize = (insn->hasOpSize ? 2 : 4);746insn->immSize = (insn->hasOpSize ? 4 : 8);747}748}749750return 0;751}752753static int readModRM(struct InternalInstruction* insn);754755/*756* readOpcode - Reads the opcode (excepting the ModR/M byte in the case of757* extended or escape opcodes).758*759* @param insn - The instruction whose opcode is to be read.760* @return - 0 if the opcode could be read successfully; nonzero otherwise.761*/762static int readOpcode(struct InternalInstruction* insn)763{764uint8_t current;765766// dbgprintf(insn, "readOpcode()");767768insn->opcodeType = ONEBYTE;769770if (insn->vectorExtensionType == TYPE_EVEX) {771switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {772default:773// dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",774// mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));775return -1;776case VEX_LOB_0F:777insn->opcodeType = TWOBYTE;778return consumeByte(insn, &insn->opcode);779case VEX_LOB_0F38:780insn->opcodeType = THREEBYTE_38;781return consumeByte(insn, &insn->opcode);782case VEX_LOB_0F3A:783insn->opcodeType = THREEBYTE_3A;784return consumeByte(insn, &insn->opcode);785}786} else if (insn->vectorExtensionType == TYPE_VEX_3B) {787switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {788default:789// dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",790// mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));791return -1;792case VEX_LOB_0F:793//insn->twoByteEscape = 0x0f;794insn->opcodeType = TWOBYTE;795return consumeByte(insn, &insn->opcode);796case VEX_LOB_0F38:797//insn->twoByteEscape = 0x0f;798insn->opcodeType = THREEBYTE_38;799return consumeByte(insn, &insn->opcode);800case VEX_LOB_0F3A:801//insn->twoByteEscape = 0x0f;802insn->opcodeType = THREEBYTE_3A;803return consumeByte(insn, &insn->opcode);804}805} else if (insn->vectorExtensionType == TYPE_VEX_2B) {806//insn->twoByteEscape = 0x0f;807insn->opcodeType = TWOBYTE;808return consumeByte(insn, &insn->opcode);809} else if (insn->vectorExtensionType == TYPE_XOP) {810switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {811default:812// dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",813// mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));814return -1;815case XOP_MAP_SELECT_8:816insn->opcodeType = XOP8_MAP;817return consumeByte(insn, &insn->opcode);818case XOP_MAP_SELECT_9:819insn->opcodeType = XOP9_MAP;820return consumeByte(insn, &insn->opcode);821case XOP_MAP_SELECT_A:822insn->opcodeType = XOPA_MAP;823return consumeByte(insn, &insn->opcode);824}825}826827if (consumeByte(insn, ¤t))828return -1;829830// save this first byte for MOVcr, MOVdr, MOVrc, MOVrd831insn->firstByte = current;832833if (current == 0x0f) {834// dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);835insn->twoByteEscape = current;836837if (consumeByte(insn, ¤t))838return -1;839840if (current == 0x38) {841// dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);842if (consumeByte(insn, ¤t))843return -1;844845insn->opcodeType = THREEBYTE_38;846} else if (current == 0x3a) {847// dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);848if (consumeByte(insn, ¤t))849return -1;850851insn->opcodeType = THREEBYTE_3A;852} else if (current == 0x0f) {853// dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);854// Consume operands before the opcode to comply with the 3DNow encoding855if (readModRM(insn))856return -1;857858if (consumeByte(insn, ¤t))859return -1;860861insn->opcodeType = THREEDNOW_MAP;862} else {863// dbgprintf(insn, "Didn't find a three-byte escape prefix");864insn->opcodeType = TWOBYTE;865}866} else if (insn->mandatoryPrefix)867// The opcode with mandatory prefix must start with opcode escape.868// If not it's legacy repeat prefix869insn->mandatoryPrefix = 0;870871/*872* At this point we have consumed the full opcode.873* Anything we consume from here on must be unconsumed.874*/875876insn->opcode = current;877878return 0;879}880881// Hacky for FEMMS882#define GET_INSTRINFO_ENUM883#ifndef CAPSTONE_X86_REDUCE884#include "X86GenInstrInfo.inc"885#else886#include "X86GenInstrInfo_reduce.inc"887#endif888889/*890* getIDWithAttrMask - Determines the ID of an instruction, consuming891* the ModR/M byte as appropriate for extended and escape opcodes,892* and using a supplied attribute mask.893*894* @param instructionID - A pointer whose target is filled in with the ID of the895* instruction.896* @param insn - The instruction whose ID is to be determined.897* @param attrMask - The attribute mask to search.898* @return - 0 if the ModR/M could be read when needed or was not899* needed; nonzero otherwise.900*/901static int getIDWithAttrMask(uint16_t *instructionID,902struct InternalInstruction* insn,903uint16_t attrMask)904{905bool hasModRMExtension;906907InstructionContext instructionClass = contextForAttrs(attrMask);908909hasModRMExtension = modRMRequired(insn->opcodeType,910instructionClass,911insn->opcode);912913if (hasModRMExtension) {914if (readModRM(insn))915return -1;916917*instructionID = decode(insn->opcodeType,918instructionClass,919insn->opcode,920insn->modRM);921} else {922*instructionID = decode(insn->opcodeType,923instructionClass,924insn->opcode,9250);926}927928return 0;929}930931/*932* is16BitEquivalent - Determines whether two instruction names refer to933* equivalent instructions but one is 16-bit whereas the other is not.934*935* @param orig - The instruction ID that is not 16-bit936* @param equiv - The instruction ID that is 16-bit937*/938static bool is16BitEquivalent(unsigned orig, unsigned equiv)939{940size_t i;941uint16_t idx;942943if ((idx = x86_16_bit_eq_lookup[orig]) != 0) {944for (i = idx - 1; i < ARR_SIZE(x86_16_bit_eq_tbl) && x86_16_bit_eq_tbl[i].first == orig; i++) {945if (x86_16_bit_eq_tbl[i].second == equiv)946return true;947}948}949950return false;951}952953/*954* is64Bit - Determines whether this instruction is a 64-bit instruction.955*956* @param name - The instruction that is not 16-bit957*/958static bool is64Bit(uint16_t id)959{960unsigned int i = find_insn(id);961if (i != -1) {962return insns[i].is64bit;963}964965// not found??966return false;967}968969/*970* getID - Determines the ID of an instruction, consuming the ModR/M byte as971* appropriate for extended and escape opcodes. Determines the attributes and972* context for the instruction before doing so.973*974* @param insn - The instruction whose ID is to be determined.975* @return - 0 if the ModR/M could be read when needed or was not needed;976* nonzero otherwise.977*/978static int getID(struct InternalInstruction *insn)979{980uint16_t attrMask;981uint16_t instructionID;982983attrMask = ATTR_NONE;984985if (insn->mode == MODE_64BIT)986attrMask |= ATTR_64BIT;987988if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {989attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;990991if (insn->vectorExtensionType == TYPE_EVEX) {992switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {993case VEX_PREFIX_66:994attrMask |= ATTR_OPSIZE;995break;996case VEX_PREFIX_F3:997attrMask |= ATTR_XS;998break;999case VEX_PREFIX_F2:1000attrMask |= ATTR_XD;1001break;1002}10031004if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))1005attrMask |= ATTR_EVEXKZ;1006if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))1007attrMask |= ATTR_EVEXB;1008if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))1009attrMask |= ATTR_EVEXK;1010if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))1011attrMask |= ATTR_EVEXL;1012if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))1013attrMask |= ATTR_EVEXL2;1014} else if (insn->vectorExtensionType == TYPE_VEX_3B) {1015switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {1016case VEX_PREFIX_66:1017attrMask |= ATTR_OPSIZE;1018break;1019case VEX_PREFIX_F3:1020attrMask |= ATTR_XS;1021break;1022case VEX_PREFIX_F2:1023attrMask |= ATTR_XD;1024break;1025}10261027if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))1028attrMask |= ATTR_VEXL;1029} else if (insn->vectorExtensionType == TYPE_VEX_2B) {1030switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {1031case VEX_PREFIX_66:1032attrMask |= ATTR_OPSIZE;1033break;1034case VEX_PREFIX_F3:1035attrMask |= ATTR_XS;1036break;1037case VEX_PREFIX_F2:1038attrMask |= ATTR_XD;1039break;1040}10411042if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))1043attrMask |= ATTR_VEXL;1044} else if (insn->vectorExtensionType == TYPE_XOP) {1045switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {1046case VEX_PREFIX_66:1047attrMask |= ATTR_OPSIZE;1048break;1049case VEX_PREFIX_F3:1050attrMask |= ATTR_XS;1051break;1052case VEX_PREFIX_F2:1053attrMask |= ATTR_XD;1054break;1055}10561057if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))1058attrMask |= ATTR_VEXL;1059} else {1060return -1;1061}1062} else if (!insn->mandatoryPrefix) {1063// If we don't have mandatory prefix we should use legacy prefixes here1064if (insn->hasOpSize && (insn->mode != MODE_16BIT))1065attrMask |= ATTR_OPSIZE;1066if (insn->hasAdSize)1067attrMask |= ATTR_ADSIZE;1068if (insn->opcodeType == ONEBYTE) {1069if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))1070// Special support for PAUSE1071attrMask |= ATTR_XS;1072} else {1073if (insn->repeatPrefix == 0xf2)1074attrMask |= ATTR_XD;1075else if (insn->repeatPrefix == 0xf3)1076attrMask |= ATTR_XS;1077}1078} else {1079switch (insn->mandatoryPrefix) {1080case 0xf2:1081attrMask |= ATTR_XD;1082break;1083case 0xf3:1084attrMask |= ATTR_XS;1085break;1086case 0x66:1087if (insn->mode != MODE_16BIT)1088attrMask |= ATTR_OPSIZE;1089break;1090case 0x67:1091attrMask |= ATTR_ADSIZE;1092break;1093}10941095}10961097if (insn->rexPrefix & 0x08) {1098attrMask |= ATTR_REXW;1099attrMask &= ~ATTR_ADSIZE;1100}11011102/*1103* JCXZ/JECXZ need special handling for 16-bit mode because the meaning1104* of the AdSize prefix is inverted w.r.t. 32-bit mode.1105*/1106if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&1107insn->opcode == 0xE3)1108attrMask ^= ATTR_ADSIZE;11091110/*1111* In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix1112* CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes1113*/1114if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {1115switch (insn->opcode) {1116case 0xE8:1117case 0xE9:1118// Take care of psubsb and other mmx instructions.1119if (insn->opcodeType == ONEBYTE) {1120attrMask ^= ATTR_OPSIZE;1121insn->immediateSize = 4;1122insn->displacementSize = 4;1123}1124break;1125case 0x82:1126case 0x83:1127case 0x84:1128case 0x85:1129case 0x86:1130case 0x87:1131case 0x88:1132case 0x89:1133case 0x8A:1134case 0x8B:1135case 0x8C:1136case 0x8D:1137case 0x8E:1138case 0x8F:1139// Take care of lea and three byte ops.1140if (insn->opcodeType == TWOBYTE) {1141attrMask ^= ATTR_OPSIZE;1142insn->immediateSize = 4;1143insn->displacementSize = 4;1144}1145break;1146}1147}11481149/* The following clauses compensate for limitations of the tables. */1150if (insn->mode != MODE_64BIT &&1151insn->vectorExtensionType != TYPE_NO_VEX_XOP) {1152if (getIDWithAttrMask(&instructionID, insn, attrMask)) {1153return -1;1154}11551156/*1157* The tables can't distinquish between cases where the W-bit is used to1158* select register size and cases where its a required part of the opcode.1159*/1160if ((insn->vectorExtensionType == TYPE_EVEX &&1161wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||1162(insn->vectorExtensionType == TYPE_VEX_3B &&1163wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||1164(insn->vectorExtensionType == TYPE_XOP &&1165wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {1166uint16_t instructionIDWithREXW;11671168if (getIDWithAttrMask(&instructionIDWithREXW,1169insn, attrMask | ATTR_REXW)) {1170insn->instructionID = instructionID;1171insn->spec = specifierForUID(instructionID);1172return 0;1173}11741175// If not a 64-bit instruction. Switch the opcode.1176if (!is64Bit(instructionIDWithREXW)) {1177insn->instructionID = instructionIDWithREXW;1178insn->spec = specifierForUID(instructionIDWithREXW);11791180return 0;1181}1182}1183}11841185/*1186* Absolute moves, umonitor, and movdir64b need special handling.1187* -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are1188* inverted w.r.t.1189* -For 32-bit mode we need to ensure the ADSIZE prefix is observed in1190* any position.1191*/1192if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||1193(insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||1194(insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {1195/* Make sure we observed the prefixes in any position. */1196if (insn->hasAdSize)1197attrMask |= ATTR_ADSIZE;11981199if (insn->hasOpSize)1200attrMask |= ATTR_OPSIZE;12011202/* In 16-bit, invert the attributes. */1203if (insn->mode == MODE_16BIT) {1204attrMask ^= ATTR_ADSIZE;12051206/* The OpSize attribute is only valid with the absolute moves. */1207if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))1208attrMask ^= ATTR_OPSIZE;1209}12101211if (getIDWithAttrMask(&instructionID, insn, attrMask)) {1212return -1;1213}12141215insn->instructionID = instructionID;1216insn->spec = specifierForUID(instructionID);12171218return 0;1219}1220if (getIDWithAttrMask(&instructionID, insn, attrMask)) {1221return -1;1222}12231224if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&1225!(attrMask & ATTR_OPSIZE)) {1226/*1227* The instruction tables make no distinction between instructions that1228* allow OpSize anywhere (i.e., 16-bit operations) and that need it in a1229* particular spot (i.e., many MMX operations). In general we're1230* conservative, but in the specific case where OpSize is present but not1231* in the right place we check if there's a 16-bit operation.1232*/1233const struct InstructionSpecifier *spec;1234uint16_t instructionIDWithOpsize;12351236spec = specifierForUID(instructionID);12371238if (getIDWithAttrMask(&instructionIDWithOpsize,1239insn,1240attrMask | ATTR_OPSIZE)) {1241/*1242* ModRM required with OpSize but not present; give up and return version1243* without OpSize set1244*/1245insn->instructionID = instructionID;1246insn->spec = spec;12471248return 0;1249}12501251if (is16BitEquivalent(instructionID, instructionIDWithOpsize) &&1252(insn->mode == MODE_16BIT) ^ insn->hasOpSize) {1253insn->instructionID = instructionIDWithOpsize;1254insn->spec = specifierForUID(instructionIDWithOpsize);1255} else {1256insn->instructionID = instructionID;1257insn->spec = spec;1258}12591260return 0;1261}12621263if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&1264insn->rexPrefix & 0x01) {1265/*1266* NOOP shouldn't decode as NOOP if REX.b is set. Instead1267* it should decode as XCHG %r8, %eax.1268*/1269const struct InstructionSpecifier *spec;1270uint16_t instructionIDWithNewOpcode;1271const struct InstructionSpecifier *specWithNewOpcode;12721273spec = specifierForUID(instructionID);12741275/* Borrow opcode from one of the other XCHGar opcodes */1276insn->opcode = 0x91;12771278if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) {1279insn->opcode = 0x90;12801281insn->instructionID = instructionID;1282insn->spec = spec;12831284return 0;1285}12861287specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);12881289/* Change back */1290insn->opcode = 0x90;12911292insn->instructionID = instructionIDWithNewOpcode;1293insn->spec = specWithNewOpcode;12941295return 0;1296}12971298insn->instructionID = instructionID;1299insn->spec = specifierForUID(insn->instructionID);13001301return 0;1302}13031304/*1305* readSIB - Consumes the SIB byte to determine addressing information for an1306* instruction.1307*1308* @param insn - The instruction whose SIB byte is to be read.1309* @return - 0 if the SIB byte was successfully read; nonzero otherwise.1310*/1311static int readSIB(struct InternalInstruction* insn)1312{1313SIBBase sibBaseBase = SIB_BASE_NONE;1314uint8_t index, base;13151316// dbgprintf(insn, "readSIB()");13171318if (insn->consumedSIB)1319return 0;13201321insn->consumedSIB = true;13221323switch (insn->addressSize) {1324case 2:1325// dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");1326return -1;1327case 4:1328insn->sibIndexBase = SIB_INDEX_EAX;1329sibBaseBase = SIB_BASE_EAX;1330break;1331case 8:1332insn->sibIndexBase = SIB_INDEX_RAX;1333sibBaseBase = SIB_BASE_RAX;1334break;1335}13361337if (consumeByte(insn, &insn->sib))1338return -1;13391340index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);13411342if (index == 0x4) {1343insn->sibIndex = SIB_INDEX_NONE;1344} else {1345insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);1346}13471348insn->sibScale = 1 << scaleFromSIB(insn->sib);13491350base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);13511352switch (base) {1353case 0x5:1354case 0xd:1355switch (modFromModRM(insn->modRM)) {1356case 0x0:1357insn->eaDisplacement = EA_DISP_32;1358insn->sibBase = SIB_BASE_NONE;1359break;1360case 0x1:1361insn->eaDisplacement = EA_DISP_8;1362insn->sibBase = (SIBBase)(sibBaseBase + base);1363break;1364case 0x2:1365insn->eaDisplacement = EA_DISP_32;1366insn->sibBase = (SIBBase)(sibBaseBase + base);1367break;1368case 0x3:1369// debug("Cannot have Mod = 0b11 and a SIB byte");1370return -1;1371}1372break;1373default:1374insn->sibBase = (SIBBase)(sibBaseBase + base);1375break;1376}13771378return 0;1379}13801381/*1382* readDisplacement - Consumes the displacement of an instruction.1383*1384* @param insn - The instruction whose displacement is to be read.1385* @return - 0 if the displacement byte was successfully read; nonzero1386* otherwise.1387*/1388static int readDisplacement(struct InternalInstruction* insn)1389{1390int8_t d8;1391int16_t d16;1392int32_t d32;13931394// dbgprintf(insn, "readDisplacement()");13951396if (insn->consumedDisplacement)1397return 0;13981399insn->consumedDisplacement = true;1400insn->displacementOffset = insn->readerCursor - insn->startLocation;14011402switch (insn->eaDisplacement) {1403case EA_DISP_NONE:1404insn->consumedDisplacement = false;1405break;1406case EA_DISP_8:1407if (consumeInt8(insn, &d8))1408return -1;1409insn->displacement = d8;1410break;1411case EA_DISP_16:1412if (consumeInt16(insn, &d16))1413return -1;1414insn->displacement = d16;1415break;1416case EA_DISP_32:1417if (consumeInt32(insn, &d32))1418return -1;1419insn->displacement = d32;1420break;1421}142214231424return 0;1425}14261427/*1428* readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and1429* displacement) for an instruction and interprets it.1430*1431* @param insn - The instruction whose addressing information is to be read.1432* @return - 0 if the information was successfully read; nonzero otherwise.1433*/1434static int readModRM(struct InternalInstruction* insn)1435{1436uint8_t mod, rm, reg, evexrm;14371438// dbgprintf(insn, "readModRM()");14391440if (insn->consumedModRM)1441return 0;14421443insn->modRMOffset = (uint8_t)(insn->readerCursor - insn->startLocation);14441445if (consumeByte(insn, &insn->modRM))1446return -1;14471448insn->consumedModRM = true;14491450// save original ModRM for later reference1451insn->orgModRM = insn->modRM;14521453// handle MOVcr, MOVdr, MOVrc, MOVrd by pretending they have MRM.mod = 31454if ((insn->firstByte == 0x0f && insn->opcodeType == TWOBYTE) &&1455(insn->opcode >= 0x20 && insn->opcode <= 0x23 ))1456insn->modRM |= 0xC0;14571458mod = modFromModRM(insn->modRM);1459rm = rmFromModRM(insn->modRM);1460reg = regFromModRM(insn->modRM);14611462/*1463* This goes by insn->registerSize to pick the correct register, which messes1464* up if we're using (say) XMM or 8-bit register operands. That gets fixed in1465* fixupReg().1466*/1467switch (insn->registerSize) {1468case 2:1469insn->regBase = MODRM_REG_AX;1470insn->eaRegBase = EA_REG_AX;1471break;1472case 4:1473insn->regBase = MODRM_REG_EAX;1474insn->eaRegBase = EA_REG_EAX;1475break;1476case 8:1477insn->regBase = MODRM_REG_RAX;1478insn->eaRegBase = EA_REG_RAX;1479break;1480}14811482reg |= rFromREX(insn->rexPrefix) << 3;1483rm |= bFromREX(insn->rexPrefix) << 3;14841485evexrm = 0;1486if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {1487reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;1488evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;1489}14901491insn->reg = (Reg)(insn->regBase + reg);14921493switch (insn->addressSize) {1494case 2: {1495EABase eaBaseBase = EA_BASE_BX_SI;14961497switch (mod) {1498case 0x0:1499if (rm == 0x6) {1500insn->eaBase = EA_BASE_NONE;1501insn->eaDisplacement = EA_DISP_16;1502if (readDisplacement(insn))1503return -1;1504} else {1505insn->eaBase = (EABase)(eaBaseBase + rm);1506insn->eaDisplacement = EA_DISP_NONE;1507}1508break;1509case 0x1:1510insn->eaBase = (EABase)(eaBaseBase + rm);1511insn->eaDisplacement = EA_DISP_8;1512insn->displacementSize = 1;1513if (readDisplacement(insn))1514return -1;1515break;1516case 0x2:1517insn->eaBase = (EABase)(eaBaseBase + rm);1518insn->eaDisplacement = EA_DISP_16;1519if (readDisplacement(insn))1520return -1;1521break;1522case 0x3:1523insn->eaBase = (EABase)(insn->eaRegBase + rm);1524if (readDisplacement(insn))1525return -1;1526break;1527}1528break;1529}15301531case 4:1532case 8: {1533EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);15341535switch (mod) {1536default: break;1537case 0x0:1538insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */1539// In determining whether RIP-relative mode is used (rm=5),1540// or whether a SIB byte is present (rm=4),1541// the extension bits (REX.b and EVEX.x) are ignored.1542switch (rm & 7) {1543case 0x4: // SIB byte is present1544insn->eaBase = (insn->addressSize == 4 ?1545EA_BASE_sib : EA_BASE_sib64);1546if (readSIB(insn) || readDisplacement(insn))1547return -1;1548break;1549case 0x5: // RIP-relative1550insn->eaBase = EA_BASE_NONE;1551insn->eaDisplacement = EA_DISP_32;1552if (readDisplacement(insn))1553return -1;1554break;1555default:1556insn->eaBase = (EABase)(eaBaseBase + rm);1557break;1558}1559break;1560case 0x1:1561insn->displacementSize = 1;1562/* FALLTHROUGH */1563case 0x2:1564insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);1565switch (rm & 7) {1566case 0x4: // SIB byte is present1567insn->eaBase = EA_BASE_sib;1568if (readSIB(insn) || readDisplacement(insn))1569return -1;1570break;1571default:1572insn->eaBase = (EABase)(eaBaseBase + rm);1573if (readDisplacement(insn))1574return -1;1575break;1576}1577break;1578case 0x3:1579insn->eaDisplacement = EA_DISP_NONE;1580insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);1581break;1582}15831584break;1585}1586} /* switch (insn->addressSize) */15871588return 0;1589}15901591#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \1592static uint16_t name(struct InternalInstruction *insn, \1593OperandType type, \1594uint8_t index, \1595uint8_t *valid) { \1596*valid = 1; \1597switch (type) { \1598default: \1599*valid = 0; \1600return 0; \1601case TYPE_Rv: \1602return base + index; \1603case TYPE_R8: \1604index &= mask; \1605if (index > 0xf) \1606*valid = 0; \1607if (insn->rexPrefix && \1608index >= 4 && index <= 7) { \1609return prefix##_SPL + (index - 4); \1610} else { \1611return prefix##_AL + index; \1612} \1613case TYPE_R16: \1614index &= mask; \1615if (index > 0xf) \1616*valid = 0; \1617return prefix##_AX + index; \1618case TYPE_R32: \1619index &= mask; \1620if (index > 0xf) \1621*valid = 0; \1622return prefix##_EAX + index; \1623case TYPE_R64: \1624index &= mask; \1625if (index > 0xf) \1626*valid = 0; \1627return prefix##_RAX + index; \1628case TYPE_ZMM: \1629return prefix##_ZMM0 + index; \1630case TYPE_YMM: \1631return prefix##_YMM0 + index; \1632case TYPE_XMM: \1633return prefix##_XMM0 + index; \1634case TYPE_VK: \1635index &= 0xf; \1636if (index > 7) \1637*valid = 0; \1638return prefix##_K0 + index; \1639case TYPE_MM64: \1640return prefix##_MM0 + (index & 0x7); \1641case TYPE_SEGMENTREG: \1642if ((index & 7) > 5) \1643*valid = 0; \1644return prefix##_ES + (index & 7); \1645case TYPE_DEBUGREG: \1646return prefix##_DR0 + index; \1647case TYPE_CONTROLREG: \1648return prefix##_CR0 + index; \1649case TYPE_BNDR: \1650if (index > 3) \1651*valid = 0; \1652return prefix##_BND0 + index; \1653case TYPE_MVSIBX: \1654return prefix##_XMM0 + index; \1655case TYPE_MVSIBY: \1656return prefix##_YMM0 + index; \1657case TYPE_MVSIBZ: \1658return prefix##_ZMM0 + index; \1659} \1660}16611662/*1663* fixup*Value - Consults an operand type to determine the meaning of the1664* reg or R/M field. If the operand is an XMM operand, for example, an1665* operand would be XMM0 instead of AX, which readModRM() would otherwise1666* misinterpret it as.1667*1668* @param insn - The instruction containing the operand.1669* @param type - The operand type.1670* @param index - The existing value of the field as reported by readModRM().1671* @param valid - The address of a uint8_t. The target is set to 1 if the1672* field is valid for the register class; 0 if not.1673* @return - The proper value.1674*/1675GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)1676GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)16771678/*1679* fixupReg - Consults an operand specifier to determine which of the1680* fixup*Value functions to use in correcting readModRM()'ss interpretation.1681*1682* @param insn - See fixup*Value().1683* @param op - The operand specifier.1684* @return - 0 if fixup was successful; -1 if the register returned was1685* invalid for its class.1686*/1687static int fixupReg(struct InternalInstruction *insn,1688const struct OperandSpecifier *op)1689{1690uint8_t valid;16911692switch ((OperandEncoding)op->encoding) {1693default:1694// debug("Expected a REG or R/M encoding in fixupReg");1695return -1;1696case ENCODING_VVVV:1697insn->vvvv = (Reg)fixupRegValue(insn,1698(OperandType)op->type,1699insn->vvvv,1700&valid);1701if (!valid)1702return -1;1703break;1704case ENCODING_REG:1705insn->reg = (Reg)fixupRegValue(insn,1706(OperandType)op->type,1707insn->reg - insn->regBase,1708&valid);1709if (!valid)1710return -1;1711break;1712CASE_ENCODING_RM:1713if (insn->eaBase >= insn->eaRegBase) {1714insn->eaBase = (EABase)fixupRMValue(insn,1715(OperandType)op->type,1716insn->eaBase - insn->eaRegBase,1717&valid);1718if (!valid)1719return -1;1720}1721break;1722}17231724return 0;1725}17261727/*1728* readOpcodeRegister - Reads an operand from the opcode field of an1729* instruction and interprets it appropriately given the operand width.1730* Handles AddRegFrm instructions.1731*1732* @param insn - the instruction whose opcode field is to be read.1733* @param size - The width (in bytes) of the register being specified.1734* 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means1735* RAX.1736* @return - 0 on success; nonzero otherwise.1737*/1738static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size)1739{1740if (size == 0)1741size = insn->registerSize;17421743switch (size) {1744case 1:1745insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)1746| (insn->opcode & 7)));1747if (insn->rexPrefix &&1748insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&1749insn->opcodeRegister < MODRM_REG_AL + 0x8) {1750insn->opcodeRegister = (Reg)(MODRM_REG_SPL1751+ (insn->opcodeRegister - MODRM_REG_AL - 4));1752}17531754break;1755case 2:1756insn->opcodeRegister = (Reg)(MODRM_REG_AX1757+ ((bFromREX(insn->rexPrefix) << 3)1758| (insn->opcode & 7)));1759break;1760case 4:1761insn->opcodeRegister = (Reg)(MODRM_REG_EAX1762+ ((bFromREX(insn->rexPrefix) << 3)1763| (insn->opcode & 7)));1764break;1765case 8:1766insn->opcodeRegister = (Reg)(MODRM_REG_RAX1767+ ((bFromREX(insn->rexPrefix) << 3)1768| (insn->opcode & 7)));1769break;1770}17711772return 0;1773}17741775/*1776* readImmediate - Consumes an immediate operand from an instruction, given the1777* desired operand size.1778*1779* @param insn - The instruction whose operand is to be read.1780* @param size - The width (in bytes) of the operand.1781* @return - 0 if the immediate was successfully consumed; nonzero1782* otherwise.1783*/1784static int readImmediate(struct InternalInstruction* insn, uint8_t size)1785{1786uint8_t imm8;1787uint16_t imm16;1788uint32_t imm32;1789uint64_t imm64;17901791if (insn->numImmediatesConsumed == 2) {1792// debug("Already consumed two immediates");1793return -1;1794}17951796if (size == 0)1797size = insn->immediateSize;1798else1799insn->immediateSize = size;18001801insn->immediateOffset = insn->readerCursor - insn->startLocation;18021803switch (size) {1804case 1:1805if (consumeByte(insn, &imm8))1806return -1;18071808insn->immediates[insn->numImmediatesConsumed] = imm8;1809break;1810case 2:1811if (consumeUInt16(insn, &imm16))1812return -1;18131814insn->immediates[insn->numImmediatesConsumed] = imm16;1815break;1816case 4:1817if (consumeUInt32(insn, &imm32))1818return -1;18191820insn->immediates[insn->numImmediatesConsumed] = imm32;1821break;1822case 8:1823if (consumeUInt64(insn, &imm64))1824return -1;1825insn->immediates[insn->numImmediatesConsumed] = imm64;1826break;1827}18281829insn->numImmediatesConsumed++;18301831return 0;1832}18331834/*1835* readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.1836*1837* @param insn - The instruction whose operand is to be read.1838* @return - 0 if the vvvv was successfully consumed; nonzero1839* otherwise.1840*/1841static int readVVVV(struct InternalInstruction* insn)1842{1843int vvvv;18441845if (insn->vectorExtensionType == TYPE_EVEX)1846vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |1847vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));1848else if (insn->vectorExtensionType == TYPE_VEX_3B)1849vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);1850else if (insn->vectorExtensionType == TYPE_VEX_2B)1851vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);1852else if (insn->vectorExtensionType == TYPE_XOP)1853vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);1854else1855return -1;18561857if (insn->mode != MODE_64BIT)1858vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.18591860insn->vvvv = (Reg)vvvv;18611862return 0;1863}18641865/*1866* readMaskRegister - Reads an mask register from the opcode field of an1867* instruction.1868*1869* @param insn - The instruction whose opcode field is to be read.1870* @return - 0 on success; nonzero otherwise.1871*/1872static int readMaskRegister(struct InternalInstruction* insn)1873{1874if (insn->vectorExtensionType != TYPE_EVEX)1875return -1;18761877insn->writemask = (Reg)(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));18781879return 0;1880}18811882/*1883* readOperands - Consults the specifier for an instruction and consumes all1884* operands for that instruction, interpreting them as it goes.1885*1886* @param insn - The instruction whose operands are to be read and interpreted.1887* @return - 0 if all operands could be read; nonzero otherwise.1888*/1889static int readOperands(struct InternalInstruction* insn)1890{1891int hasVVVV, needVVVV;1892int sawRegImm = 0;1893int i;18941895/* If non-zero vvvv specified, need to make sure one of the operands1896uses it. */1897hasVVVV = !readVVVV(insn);1898needVVVV = hasVVVV && (insn->vvvv != 0);18991900for (i = 0; i < X86_MAX_OPERANDS; ++i) {1901const OperandSpecifier *op = &x86OperandSets[insn->spec->operands][i];1902switch (op->encoding) {1903case ENCODING_NONE:1904case ENCODING_SI:1905case ENCODING_DI:1906break;19071908CASE_ENCODING_VSIB:1909// VSIB can use the V2 bit so check only the other bits.1910if (needVVVV)1911needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);19121913if (readModRM(insn))1914return -1;19151916// Reject if SIB wasn't used.1917if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)1918return -1;19191920// If sibIndex was set to SIB_INDEX_NONE, index offset is 4.1921if (insn->sibIndex == SIB_INDEX_NONE)1922insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);19231924// If EVEX.v2 is set this is one of the 16-31 registers.1925if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&1926v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))1927insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);19281929// Adjust the index register to the correct size.1930switch (op->type) {1931default:1932// debug("Unhandled VSIB index type");1933return -1;1934case TYPE_MVSIBX:1935insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +1936(insn->sibIndex - insn->sibIndexBase));1937break;1938case TYPE_MVSIBY:1939insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +1940(insn->sibIndex - insn->sibIndexBase));1941break;1942case TYPE_MVSIBZ:1943insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +1944(insn->sibIndex - insn->sibIndexBase));1945break;1946}19471948// Apply the AVX512 compressed displacement scaling factor.1949if (op->encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)1950insn->displacement *= 1 << (op->encoding - ENCODING_VSIB);1951break;19521953case ENCODING_REG:1954CASE_ENCODING_RM:1955if (readModRM(insn))1956return -1;19571958if (fixupReg(insn, op))1959return -1;19601961// Apply the AVX512 compressed displacement scaling factor.1962if (op->encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)1963insn->displacement *= 1 << (op->encoding - ENCODING_RM);1964break;19651966case ENCODING_IB:1967if (sawRegImm) {1968/* Saw a register immediate so don't read again and instead split the1969previous immediate. FIXME: This is a hack. */1970insn->immediates[insn->numImmediatesConsumed] =1971insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;1972++insn->numImmediatesConsumed;1973break;1974}1975if (readImmediate(insn, 1))1976return -1;1977if (op->type == TYPE_XMM || op->type == TYPE_YMM)1978sawRegImm = 1;1979break;19801981case ENCODING_IW:1982if (readImmediate(insn, 2))1983return -1;1984break;19851986case ENCODING_ID:1987if (readImmediate(insn, 4))1988return -1;1989break;19901991case ENCODING_IO:1992if (readImmediate(insn, 8))1993return -1;1994break;19951996case ENCODING_Iv:1997if (readImmediate(insn, insn->immediateSize))1998return -1;1999break;20002001case ENCODING_Ia:2002if (readImmediate(insn, insn->addressSize))2003return -1;2004/* Direct memory-offset (moffset) immediate will get mapped2005to memory operand later. We want the encoding info to2006reflect that as well. */2007insn->displacementOffset = insn->immediateOffset;2008insn->consumedDisplacement = true;2009insn->displacementSize = insn->immediateSize;2010insn->displacement = insn->immediates[insn->numImmediatesConsumed - 1];2011insn->immediateOffset = 0;2012insn->immediateSize = 0;2013break;20142015case ENCODING_IRC:2016insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |2017lFromEVEX4of4(insn->vectorExtensionPrefix[3]);2018break;20192020case ENCODING_RB:2021if (readOpcodeRegister(insn, 1))2022return -1;2023break;20242025case ENCODING_RW:2026if (readOpcodeRegister(insn, 2))2027return -1;2028break;20292030case ENCODING_RD:2031if (readOpcodeRegister(insn, 4))2032return -1;2033break;20342035case ENCODING_RO:2036if (readOpcodeRegister(insn, 8))2037return -1;2038break;20392040case ENCODING_Rv:2041if (readOpcodeRegister(insn, 0))2042return -1;2043break;20442045case ENCODING_FP:2046break;20472048case ENCODING_VVVV:2049if (!hasVVVV)2050return -1;20512052needVVVV = 0; /* Mark that we have found a VVVV operand. */20532054if (insn->mode != MODE_64BIT)2055insn->vvvv = (Reg)(insn->vvvv & 0x7);20562057if (fixupReg(insn, op))2058return -1;2059break;20602061case ENCODING_WRITEMASK:2062if (readMaskRegister(insn))2063return -1;2064break;20652066case ENCODING_DUP:2067break;20682069default:2070// dbgprintf(insn, "Encountered an operand with an unknown encoding.");2071return -1;2072}2073}20742075/* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */2076if (needVVVV)2077return -1;20782079return 0;2080}20812082// return True if instruction is illegal to use with prefixes2083// This also check & fix the isPrefixNN when a prefix is irrelevant.2084static bool checkPrefix(struct InternalInstruction *insn)2085{2086// LOCK prefix2087if (insn->hasLockPrefix) {2088switch(insn->instructionID) {2089default:2090// invalid LOCK2091return true;20922093// nop dword [rax]2094case X86_NOOPL:20952096// DEC2097case X86_DEC16m:2098case X86_DEC32m:2099case X86_DEC64m:2100case X86_DEC8m:21012102// ADC2103case X86_ADC16mi:2104case X86_ADC16mi8:2105case X86_ADC16mr:2106case X86_ADC32mi:2107case X86_ADC32mi8:2108case X86_ADC32mr:2109case X86_ADC64mi32:2110case X86_ADC64mi8:2111case X86_ADC64mr:2112case X86_ADC8mi:2113case X86_ADC8mi8:2114case X86_ADC8mr:2115case X86_ADC8rm:2116case X86_ADC16rm:2117case X86_ADC32rm:2118case X86_ADC64rm:21192120// ADD2121case X86_ADD16mi:2122case X86_ADD16mi8:2123case X86_ADD16mr:2124case X86_ADD32mi:2125case X86_ADD32mi8:2126case X86_ADD32mr:2127case X86_ADD64mi32:2128case X86_ADD64mi8:2129case X86_ADD64mr:2130case X86_ADD8mi:2131case X86_ADD8mi8:2132case X86_ADD8mr:2133case X86_ADD8rm:2134case X86_ADD16rm:2135case X86_ADD32rm:2136case X86_ADD64rm:21372138// AND2139case X86_AND16mi:2140case X86_AND16mi8:2141case X86_AND16mr:2142case X86_AND32mi:2143case X86_AND32mi8:2144case X86_AND32mr:2145case X86_AND64mi32:2146case X86_AND64mi8:2147case X86_AND64mr:2148case X86_AND8mi:2149case X86_AND8mi8:2150case X86_AND8mr:2151case X86_AND8rm:2152case X86_AND16rm:2153case X86_AND32rm:2154case X86_AND64rm:21552156// BTC2157case X86_BTC16mi8:2158case X86_BTC16mr:2159case X86_BTC32mi8:2160case X86_BTC32mr:2161case X86_BTC64mi8:2162case X86_BTC64mr:21632164// BTR2165case X86_BTR16mi8:2166case X86_BTR16mr:2167case X86_BTR32mi8:2168case X86_BTR32mr:2169case X86_BTR64mi8:2170case X86_BTR64mr:21712172// BTS2173case X86_BTS16mi8:2174case X86_BTS16mr:2175case X86_BTS32mi8:2176case X86_BTS32mr:2177case X86_BTS64mi8:2178case X86_BTS64mr:21792180// CMPXCHG2181case X86_CMPXCHG16B:2182case X86_CMPXCHG16rm:2183case X86_CMPXCHG32rm:2184case X86_CMPXCHG64rm:2185case X86_CMPXCHG8rm:2186case X86_CMPXCHG8B:21872188// INC2189case X86_INC16m:2190case X86_INC32m:2191case X86_INC64m:2192case X86_INC8m:21932194// NEG2195case X86_NEG16m:2196case X86_NEG32m:2197case X86_NEG64m:2198case X86_NEG8m:21992200// NOT2201case X86_NOT16m:2202case X86_NOT32m:2203case X86_NOT64m:2204case X86_NOT8m:22052206// OR2207case X86_OR16mi:2208case X86_OR16mi8:2209case X86_OR16mr:2210case X86_OR32mi:2211case X86_OR32mi8:2212case X86_OR32mr:2213case X86_OR64mi32:2214case X86_OR64mi8:2215case X86_OR64mr:2216case X86_OR8mi8:2217case X86_OR8mi:2218case X86_OR8mr:2219case X86_OR8rm:2220case X86_OR16rm:2221case X86_OR32rm:2222case X86_OR64rm:22232224// SBB2225case X86_SBB16mi:2226case X86_SBB16mi8:2227case X86_SBB16mr:2228case X86_SBB32mi:2229case X86_SBB32mi8:2230case X86_SBB32mr:2231case X86_SBB64mi32:2232case X86_SBB64mi8:2233case X86_SBB64mr:2234case X86_SBB8mi:2235case X86_SBB8mi8:2236case X86_SBB8mr:22372238// SUB2239case X86_SUB16mi:2240case X86_SUB16mi8:2241case X86_SUB16mr:2242case X86_SUB32mi:2243case X86_SUB32mi8:2244case X86_SUB32mr:2245case X86_SUB64mi32:2246case X86_SUB64mi8:2247case X86_SUB64mr:2248case X86_SUB8mi8:2249case X86_SUB8mi:2250case X86_SUB8mr:2251case X86_SUB8rm:2252case X86_SUB16rm:2253case X86_SUB32rm:2254case X86_SUB64rm:22552256// XADD2257case X86_XADD16rm:2258case X86_XADD32rm:2259case X86_XADD64rm:2260case X86_XADD8rm:22612262// XCHG2263case X86_XCHG16rm:2264case X86_XCHG32rm:2265case X86_XCHG64rm:2266case X86_XCHG8rm:22672268// XOR2269case X86_XOR16mi:2270case X86_XOR16mi8:2271case X86_XOR16mr:2272case X86_XOR32mi:2273case X86_XOR32mi8:2274case X86_XOR32mr:2275case X86_XOR64mi32:2276case X86_XOR64mi8:2277case X86_XOR64mr:2278case X86_XOR8mi8:2279case X86_XOR8mi:2280case X86_XOR8mr:2281case X86_XOR8rm:2282case X86_XOR16rm:2283case X86_XOR32rm:2284case X86_XOR64rm:22852286// this instruction can be used with LOCK prefix2287return false;2288}2289}22902291#if 02292// REPNE prefix2293if (insn->repeatPrefix) {2294// 0xf2 can be a part of instruction encoding, but not really a prefix.2295// In such a case, clear it.2296if (insn->twoByteEscape == 0x0f) {2297insn->prefix0 = 0;2298}2299}2300#endif23012302// no invalid prefixes2303return false;2304}23052306/*2307* decodeInstruction - Reads and interprets a full instruction provided by the2308* user.2309*2310* @param insn - A pointer to the instruction to be populated. Must be2311* pre-allocated.2312* @param reader - The function to be used to read the instruction's bytes.2313* @param readerArg - A generic argument to be passed to the reader to store2314* any internal state.2315* @param startLoc - The address (in the reader's address space) of the first2316* byte in the instruction.2317* @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to2318* decode the instruction in.2319* @return - 0 if instruction is valid; nonzero if not.2320*/2321int decodeInstruction(struct InternalInstruction *insn,2322byteReader_t reader,2323const void *readerArg,2324uint64_t startLoc,2325DisassemblerMode mode)2326{2327insn->reader = reader;2328insn->readerArg = readerArg;2329insn->startLocation = startLoc;2330insn->readerCursor = startLoc;2331insn->mode = mode;2332insn->numImmediatesConsumed = 0;23332334if (readPrefixes(insn) ||2335readOpcode(insn) ||2336getID(insn) ||2337insn->instructionID == 0 ||2338checkPrefix(insn) ||2339readOperands(insn))2340return -1;23412342insn->length = (size_t)(insn->readerCursor - insn->startLocation);23432344// instruction length must be <= 15 to be valid2345if (insn->length > 15)2346return -1;23472348if (insn->operandSize == 0)2349insn->operandSize = insn->registerSize;23502351insn->operands = &x86OperandSets[insn->spec->operands][0];23522353return 0;2354}23552356#endif235723582359