Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
35293 views
//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file is part of the X86 Disassembler.9// It contains code to translate the data produced by the decoder into10// MCInsts.11//12//13// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and14// 64-bit X86 instruction sets. The main decode sequence for an assembly15// instruction in this disassembler is:16//17// 1. Read the prefix bytes and determine the attributes of the instruction.18// These attributes, recorded in enum attributeBits19// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM20// provides a mapping from bitmasks to contexts, which are represented by21// enum InstructionContext (ibid.).22//23// 2. Read the opcode, and determine what kind of opcode it is. The24// disassembler distinguishes four kinds of opcodes, which are enumerated in25// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte26// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a27// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.28//29// 3. Depending on the opcode type, look in one of four ClassDecision structures30// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which31// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get32// a ModRMDecision (ibid.).33//34// 4. Some instructions, such as escape opcodes or extended opcodes, or even35// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the36// ModR/M byte to complete decode. The ModRMDecision's type is an entry from37// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the38// ModR/M byte is required and how to interpret it.39//40// 5. After resolving the ModRMDecision, the disassembler has a unique ID41// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in42// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and43// meanings of its operands.44//45// 6. For each operand, its encoding is an entry from OperandEncoding46// (X86DisassemblerDecoderCommon.h) and its type is an entry from47// OperandType (ibid.). The encoding indicates how to read it from the48// instruction; the type indicates how to interpret the value once it has49// been read. For example, a register operand could be stored in the R/M50// field of the ModR/M byte, the REG field of the ModR/M byte, or added to51// the main opcode. This is orthogonal from its meaning (an GPR or an XMM52// register, for instance). Given this information, the operands can be53// extracted and interpreted.54//55// 7. As the last step, the disassembler translates the instruction information56// and operands into a format understandable by the client - in this case, an57// MCInst for use by the MC infrastructure.58//59// The disassembler is broken broadly into two parts: the table emitter that60// emits the instruction decode tables discussed above during compilation, and61// the disassembler itself. The table emitter is documented in more detail in62// utils/TableGen/X86DisassemblerEmitter.h.63//64// X86Disassembler.cpp contains the code responsible for step 7, and for65// invoking the decoder to execute steps 1-6.66// X86DisassemblerDecoderCommon.h contains the definitions needed by both the67// table emitter and the disassembler.68// X86DisassemblerDecoder.h contains the public interface of the decoder,69// factored out into C for possible use by other projects.70// X86DisassemblerDecoder.c contains the source code of the decoder, which is71// responsible for steps 1-6.72//73//===----------------------------------------------------------------------===//7475#include "MCTargetDesc/X86BaseInfo.h"76#include "MCTargetDesc/X86MCTargetDesc.h"77#include "TargetInfo/X86TargetInfo.h"78#include "X86DisassemblerDecoder.h"79#include "llvm/MC/MCContext.h"80#include "llvm/MC/MCDisassembler/MCDisassembler.h"81#include "llvm/MC/MCExpr.h"82#include "llvm/MC/MCInst.h"83#include "llvm/MC/MCInstrInfo.h"84#include "llvm/MC/MCSubtargetInfo.h"85#include "llvm/MC/TargetRegistry.h"86#include "llvm/Support/Debug.h"87#include "llvm/Support/Format.h"88#include "llvm/Support/raw_ostream.h"8990using namespace llvm;91using namespace llvm::X86Disassembler;9293#define DEBUG_TYPE "x86-disassembler"9495#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);9697// Specifies whether a ModR/M byte is needed and (if so) which98// instruction each possible value of the ModR/M byte corresponds to. Once99// this information is known, we have narrowed down to a single instruction.100struct ModRMDecision {101uint8_t modrm_type;102uint16_t instructionIDs;103};104105// Specifies which set of ModR/M->instruction tables to look at106// given a particular opcode.107struct OpcodeDecision {108ModRMDecision modRMDecisions[256];109};110111// Specifies which opcode->instruction tables to look at given112// a particular context (set of attributes). Since there are many possible113// contexts, the decoder first uses CONTEXTS_SYM to determine which context114// applies given a specific set of attributes. Hence there are only IC_max115// entries in this table, rather than 2^(ATTR_max).116struct ContextDecision {117OpcodeDecision opcodeDecisions[IC_max];118};119120#include "X86GenDisassemblerTables.inc"121122static InstrUID decode(OpcodeType type, InstructionContext insnContext,123uint8_t opcode, uint8_t modRM) {124const struct ModRMDecision *dec;125126switch (type) {127case ONEBYTE:128dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];129break;130case TWOBYTE:131dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];132break;133case THREEBYTE_38:134dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];135break;136case THREEBYTE_3A:137dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];138break;139case XOP8_MAP:140dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];141break;142case XOP9_MAP:143dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];144break;145case XOPA_MAP:146dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];147break;148case THREEDNOW_MAP:149dec =150&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];151break;152case MAP4:153dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];154break;155case MAP5:156dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];157break;158case MAP6:159dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];160break;161case MAP7:162dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];163break;164}165166switch (dec->modrm_type) {167default:168llvm_unreachable("Corrupt table! Unknown modrm_type");169return 0;170case MODRM_ONEENTRY:171return modRMTable[dec->instructionIDs];172case MODRM_SPLITRM:173if (modFromModRM(modRM) == 0x3)174return modRMTable[dec->instructionIDs + 1];175return modRMTable[dec->instructionIDs];176case MODRM_SPLITREG:177if (modFromModRM(modRM) == 0x3)178return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];179return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];180case MODRM_SPLITMISC:181if (modFromModRM(modRM) == 0x3)182return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];183return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];184case MODRM_FULL:185return modRMTable[dec->instructionIDs + modRM];186}187}188189static bool peek(struct InternalInstruction *insn, uint8_t &byte) {190uint64_t offset = insn->readerCursor - insn->startLocation;191if (offset >= insn->bytes.size())192return true;193byte = insn->bytes[offset];194return false;195}196197template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {198auto r = insn->bytes;199uint64_t offset = insn->readerCursor - insn->startLocation;200if (offset + sizeof(T) > r.size())201return true;202ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);203insn->readerCursor += sizeof(T);204return false;205}206207static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {208return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;209}210211static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {212return insn->mode == MODE_64BIT && prefix == 0xd5;213}214215// Consumes all of an instruction's prefix bytes, and marks the216// instruction as having them. Also sets the instruction's default operand,217// address, and other relevant data sizes to report operands correctly.218//219// insn must not be empty.220static int readPrefixes(struct InternalInstruction *insn) {221bool isPrefix = true;222uint8_t byte = 0;223uint8_t nextByte;224225LLVM_DEBUG(dbgs() << "readPrefixes()");226227while (isPrefix) {228// If we fail reading prefixes, just stop here and let the opcode reader229// deal with it.230if (consume(insn, byte))231break;232233// If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then234// break and let it be disassembled as a normal "instruction".235if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK236break;237238if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {239// If the byte is 0xf2 or 0xf3, and any of the following conditions are240// met:241// - it is followed by a LOCK (0xf0) prefix242// - it is followed by an xchg instruction243// then it should be disassembled as a xacquire/xrelease not repne/rep.244if (((nextByte == 0xf0) ||245((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {246insn->xAcquireRelease = true;247if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support248break;249}250// Also if the byte is 0xf3, and the following condition is met:251// - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or252// "mov mem, imm" (opcode 0xc6/0xc7) instructions.253// then it should be disassembled as an xrelease not rep.254if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||255nextByte == 0xc6 || nextByte == 0xc7)) {256insn->xAcquireRelease = true;257break;258}259if (isREX(insn, nextByte)) {260uint8_t nnextByte;261// Go to REX prefix after the current one262if (consume(insn, nnextByte))263return -1;264// We should be able to read next byte after REX prefix265if (peek(insn, nnextByte))266return -1;267--insn->readerCursor;268}269}270271switch (byte) {272case 0xf0: // LOCK273insn->hasLockPrefix = true;274break;275case 0xf2: // REPNE/REPNZ276case 0xf3: { // REP or REPE/REPZ277uint8_t nextByte;278if (peek(insn, nextByte))279break;280// TODO:281// 1. There could be several 0x66282// 2. if (nextByte == 0x66) and nextNextByte != 0x0f then283// it's not mandatory prefix284// 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need285// 0x0f exactly after it to be mandatory prefix286// 4. if (nextByte == 0xd5) it's REX2 and we need287// 0x0f exactly after it to be mandatory prefix288if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f ||289nextByte == 0x66)290// The last of 0xf2 /0xf3 is mandatory prefix291insn->mandatoryPrefix = byte;292insn->repeatPrefix = byte;293break;294}295case 0x2e: // CS segment override -OR- Branch not taken296insn->segmentOverride = SEG_OVERRIDE_CS;297break;298case 0x36: // SS segment override -OR- Branch taken299insn->segmentOverride = SEG_OVERRIDE_SS;300break;301case 0x3e: // DS segment override302insn->segmentOverride = SEG_OVERRIDE_DS;303break;304case 0x26: // ES segment override305insn->segmentOverride = SEG_OVERRIDE_ES;306break;307case 0x64: // FS segment override308insn->segmentOverride = SEG_OVERRIDE_FS;309break;310case 0x65: // GS segment override311insn->segmentOverride = SEG_OVERRIDE_GS;312break;313case 0x66: { // Operand-size override {314uint8_t nextByte;315insn->hasOpSize = true;316if (peek(insn, nextByte))317break;318// 0x66 can't overwrite existing mandatory prefix and should be ignored319if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))320insn->mandatoryPrefix = byte;321break;322}323case 0x67: // Address-size override324insn->hasAdSize = true;325break;326default: // Not a prefix byte327isPrefix = false;328break;329}330331if (isPrefix)332LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));333}334335insn->vectorExtensionType = TYPE_NO_VEX_XOP;336337if (byte == 0x62) {338uint8_t byte1, byte2;339if (consume(insn, byte1)) {340LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");341return -1;342}343344if (peek(insn, byte2)) {345LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");346return -1;347}348349if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {350insn->vectorExtensionType = TYPE_EVEX;351} else {352--insn->readerCursor; // unconsume byte1353--insn->readerCursor; // unconsume byte354}355356if (insn->vectorExtensionType == TYPE_EVEX) {357insn->vectorExtensionPrefix[0] = byte;358insn->vectorExtensionPrefix[1] = byte1;359if (consume(insn, insn->vectorExtensionPrefix[2])) {360LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");361return -1;362}363if (consume(insn, insn->vectorExtensionPrefix[3])) {364LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");365return -1;366}367368if (insn->mode == MODE_64BIT) {369// We simulate the REX prefix for simplicity's sake370insn->rexPrefix = 0x40 |371(wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |372(rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |373(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |374(bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);375376// We simulate the REX2 prefix for simplicity's sake377insn->rex2ExtensionPrefix[1] =378(r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |379(x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |380(b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);381}382383LLVM_DEBUG(384dbgs() << format(385"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",386insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],387insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));388}389} else if (byte == 0xc4) {390uint8_t byte1;391if (peek(insn, byte1)) {392LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");393return -1;394}395396if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)397insn->vectorExtensionType = TYPE_VEX_3B;398else399--insn->readerCursor;400401if (insn->vectorExtensionType == TYPE_VEX_3B) {402insn->vectorExtensionPrefix[0] = byte;403consume(insn, insn->vectorExtensionPrefix[1]);404consume(insn, insn->vectorExtensionPrefix[2]);405406// We simulate the REX prefix for simplicity's sake407408if (insn->mode == MODE_64BIT)409insn->rexPrefix = 0x40 |410(wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |411(rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |412(xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |413(bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);414415LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",416insn->vectorExtensionPrefix[0],417insn->vectorExtensionPrefix[1],418insn->vectorExtensionPrefix[2]));419}420} else if (byte == 0xc5) {421uint8_t byte1;422if (peek(insn, byte1)) {423LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");424return -1;425}426427if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)428insn->vectorExtensionType = TYPE_VEX_2B;429else430--insn->readerCursor;431432if (insn->vectorExtensionType == TYPE_VEX_2B) {433insn->vectorExtensionPrefix[0] = byte;434consume(insn, insn->vectorExtensionPrefix[1]);435436if (insn->mode == MODE_64BIT)437insn->rexPrefix =4380x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);439440switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {441default:442break;443case VEX_PREFIX_66:444insn->hasOpSize = true;445break;446}447448LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",449insn->vectorExtensionPrefix[0],450insn->vectorExtensionPrefix[1]));451}452} else if (byte == 0x8f) {453uint8_t byte1;454if (peek(insn, byte1)) {455LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");456return -1;457}458459if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.460insn->vectorExtensionType = TYPE_XOP;461else462--insn->readerCursor;463464if (insn->vectorExtensionType == TYPE_XOP) {465insn->vectorExtensionPrefix[0] = byte;466consume(insn, insn->vectorExtensionPrefix[1]);467consume(insn, insn->vectorExtensionPrefix[2]);468469// We simulate the REX prefix for simplicity's sake470471if (insn->mode == MODE_64BIT)472insn->rexPrefix = 0x40 |473(wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |474(rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |475(xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |476(bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);477478switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {479default:480break;481case VEX_PREFIX_66:482insn->hasOpSize = true;483break;484}485486LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",487insn->vectorExtensionPrefix[0],488insn->vectorExtensionPrefix[1],489insn->vectorExtensionPrefix[2]));490}491} else if (isREX2(insn, byte)) {492uint8_t byte1;493if (peek(insn, byte1)) {494LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");495return -1;496}497insn->rex2ExtensionPrefix[0] = byte;498consume(insn, insn->rex2ExtensionPrefix[1]);499500// We simulate the REX prefix for simplicity's sake501insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |502(rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |503(xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |504(bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);505LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",506insn->rex2ExtensionPrefix[0],507insn->rex2ExtensionPrefix[1]));508} else if (isREX(insn, byte)) {509if (peek(insn, nextByte))510return -1;511insn->rexPrefix = byte;512LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));513} else514--insn->readerCursor;515516if (insn->mode == MODE_16BIT) {517insn->registerSize = (insn->hasOpSize ? 4 : 2);518insn->addressSize = (insn->hasAdSize ? 4 : 2);519insn->displacementSize = (insn->hasAdSize ? 4 : 2);520insn->immediateSize = (insn->hasOpSize ? 4 : 2);521} else if (insn->mode == MODE_32BIT) {522insn->registerSize = (insn->hasOpSize ? 2 : 4);523insn->addressSize = (insn->hasAdSize ? 2 : 4);524insn->displacementSize = (insn->hasAdSize ? 2 : 4);525insn->immediateSize = (insn->hasOpSize ? 2 : 4);526} else if (insn->mode == MODE_64BIT) {527insn->displacementSize = 4;528if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {529insn->registerSize = 8;530insn->addressSize = (insn->hasAdSize ? 4 : 8);531insn->immediateSize = 4;532insn->hasOpSize = false;533} else {534insn->registerSize = (insn->hasOpSize ? 2 : 4);535insn->addressSize = (insn->hasAdSize ? 4 : 8);536insn->immediateSize = (insn->hasOpSize ? 2 : 4);537}538}539540return 0;541}542543// Consumes the SIB byte to determine addressing information.544static int readSIB(struct InternalInstruction *insn) {545SIBBase sibBaseBase = SIB_BASE_NONE;546uint8_t index, base;547548LLVM_DEBUG(dbgs() << "readSIB()");549switch (insn->addressSize) {550case 2:551default:552llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");553case 4:554insn->sibIndexBase = SIB_INDEX_EAX;555sibBaseBase = SIB_BASE_EAX;556break;557case 8:558insn->sibIndexBase = SIB_INDEX_RAX;559sibBaseBase = SIB_BASE_RAX;560break;561}562563if (consume(insn, insn->sib))564return -1;565566index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |567(x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);568569if (index == 0x4) {570insn->sibIndex = SIB_INDEX_NONE;571} else {572insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);573}574575insn->sibScale = 1 << scaleFromSIB(insn->sib);576577base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |578(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);579580switch (base) {581case 0x5:582case 0xd:583switch (modFromModRM(insn->modRM)) {584case 0x0:585insn->eaDisplacement = EA_DISP_32;586insn->sibBase = SIB_BASE_NONE;587break;588case 0x1:589insn->eaDisplacement = EA_DISP_8;590insn->sibBase = (SIBBase)(sibBaseBase + base);591break;592case 0x2:593insn->eaDisplacement = EA_DISP_32;594insn->sibBase = (SIBBase)(sibBaseBase + base);595break;596default:597llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");598}599break;600default:601insn->sibBase = (SIBBase)(sibBaseBase + base);602break;603}604605return 0;606}607608static int readDisplacement(struct InternalInstruction *insn) {609int8_t d8;610int16_t d16;611int32_t d32;612LLVM_DEBUG(dbgs() << "readDisplacement()");613614insn->displacementOffset = insn->readerCursor - insn->startLocation;615switch (insn->eaDisplacement) {616case EA_DISP_NONE:617break;618case EA_DISP_8:619if (consume(insn, d8))620return -1;621insn->displacement = d8;622break;623case EA_DISP_16:624if (consume(insn, d16))625return -1;626insn->displacement = d16;627break;628case EA_DISP_32:629if (consume(insn, d32))630return -1;631insn->displacement = d32;632break;633}634635return 0;636}637638// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.639static int readModRM(struct InternalInstruction *insn) {640uint8_t mod, rm, reg;641LLVM_DEBUG(dbgs() << "readModRM()");642643if (insn->consumedModRM)644return 0;645646if (consume(insn, insn->modRM))647return -1;648insn->consumedModRM = true;649650mod = modFromModRM(insn->modRM);651rm = rmFromModRM(insn->modRM);652reg = regFromModRM(insn->modRM);653654// This goes by insn->registerSize to pick the correct register, which messes655// up if we're using (say) XMM or 8-bit register operands. That gets fixed in656// fixupReg().657switch (insn->registerSize) {658case 2:659insn->regBase = MODRM_REG_AX;660insn->eaRegBase = EA_REG_AX;661break;662case 4:663insn->regBase = MODRM_REG_EAX;664insn->eaRegBase = EA_REG_EAX;665break;666case 8:667insn->regBase = MODRM_REG_RAX;668insn->eaRegBase = EA_REG_RAX;669break;670}671672reg |= (rFromREX(insn->rexPrefix) << 3) |673(r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);674rm |= (bFromREX(insn->rexPrefix) << 3) |675(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);676677if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)678reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;679680insn->reg = (Reg)(insn->regBase + reg);681682switch (insn->addressSize) {683case 2: {684EABase eaBaseBase = EA_BASE_BX_SI;685686switch (mod) {687case 0x0:688if (rm == 0x6) {689insn->eaBase = EA_BASE_NONE;690insn->eaDisplacement = EA_DISP_16;691if (readDisplacement(insn))692return -1;693} else {694insn->eaBase = (EABase)(eaBaseBase + rm);695insn->eaDisplacement = EA_DISP_NONE;696}697break;698case 0x1:699insn->eaBase = (EABase)(eaBaseBase + rm);700insn->eaDisplacement = EA_DISP_8;701insn->displacementSize = 1;702if (readDisplacement(insn))703return -1;704break;705case 0x2:706insn->eaBase = (EABase)(eaBaseBase + rm);707insn->eaDisplacement = EA_DISP_16;708if (readDisplacement(insn))709return -1;710break;711case 0x3:712insn->eaBase = (EABase)(insn->eaRegBase + rm);713if (readDisplacement(insn))714return -1;715break;716}717break;718}719case 4:720case 8: {721EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);722723switch (mod) {724case 0x0:725insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this726// In determining whether RIP-relative mode is used (rm=5),727// or whether a SIB byte is present (rm=4),728// the extension bits (REX.b and EVEX.x) are ignored.729switch (rm & 7) {730case 0x4: // SIB byte is present731insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);732if (readSIB(insn) || readDisplacement(insn))733return -1;734break;735case 0x5: // RIP-relative736insn->eaBase = EA_BASE_NONE;737insn->eaDisplacement = EA_DISP_32;738if (readDisplacement(insn))739return -1;740break;741default:742insn->eaBase = (EABase)(eaBaseBase + rm);743break;744}745break;746case 0x1:747insn->displacementSize = 1;748[[fallthrough]];749case 0x2:750insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);751switch (rm & 7) {752case 0x4: // SIB byte is present753insn->eaBase = EA_BASE_sib;754if (readSIB(insn) || readDisplacement(insn))755return -1;756break;757default:758insn->eaBase = (EABase)(eaBaseBase + rm);759if (readDisplacement(insn))760return -1;761break;762}763break;764case 0x3:765insn->eaDisplacement = EA_DISP_NONE;766insn->eaBase = (EABase)(insn->eaRegBase + rm);767break;768}769break;770}771} // switch (insn->addressSize)772773return 0;774}775776#define GENERIC_FIXUP_FUNC(name, base, prefix) \777static uint16_t name(struct InternalInstruction *insn, OperandType type, \778uint8_t index, uint8_t *valid) { \779*valid = 1; \780switch (type) { \781default: \782debug("Unhandled register type"); \783*valid = 0; \784return 0; \785case TYPE_Rv: \786return base + index; \787case TYPE_R8: \788if (insn->rexPrefix && index >= 4 && index <= 7) \789return prefix##_SPL + (index - 4); \790else \791return prefix##_AL + index; \792case TYPE_R16: \793return prefix##_AX + index; \794case TYPE_R32: \795return prefix##_EAX + index; \796case TYPE_R64: \797return prefix##_RAX + index; \798case TYPE_ZMM: \799return prefix##_ZMM0 + index; \800case TYPE_YMM: \801return prefix##_YMM0 + index; \802case TYPE_XMM: \803return prefix##_XMM0 + index; \804case TYPE_TMM: \805if (index > 7) \806*valid = 0; \807return prefix##_TMM0 + index; \808case TYPE_VK: \809index &= 0xf; \810if (index > 7) \811*valid = 0; \812return prefix##_K0 + index; \813case TYPE_VK_PAIR: \814if (index > 7) \815*valid = 0; \816return prefix##_K0_K1 + (index / 2); \817case TYPE_MM64: \818return prefix##_MM0 + (index & 0x7); \819case TYPE_SEGMENTREG: \820if ((index & 7) > 5) \821*valid = 0; \822return prefix##_ES + (index & 7); \823case TYPE_DEBUGREG: \824if (index > 15) \825*valid = 0; \826return prefix##_DR0 + index; \827case TYPE_CONTROLREG: \828if (index > 15) \829*valid = 0; \830return prefix##_CR0 + index; \831case TYPE_MVSIBX: \832return prefix##_XMM0 + index; \833case TYPE_MVSIBY: \834return prefix##_YMM0 + index; \835case TYPE_MVSIBZ: \836return prefix##_ZMM0 + index; \837} \838}839840// Consult an operand type to determine the meaning of the reg or R/M field. If841// the operand is an XMM operand, for example, an operand would be XMM0 instead842// of AX, which readModRM() would otherwise misinterpret it as.843//844// @param insn - The instruction containing the operand.845// @param type - The operand type.846// @param index - The existing value of the field as reported by readModRM().847// @param valid - The address of a uint8_t. The target is set to 1 if the848// field is valid for the register class; 0 if not.849// @return - The proper value.850GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)851GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)852853// Consult an operand specifier to determine which of the fixup*Value functions854// to use in correcting readModRM()'ss interpretation.855//856// @param insn - See fixup*Value().857// @param op - The operand specifier.858// @return - 0 if fixup was successful; -1 if the register returned was859// invalid for its class.860static int fixupReg(struct InternalInstruction *insn,861const struct OperandSpecifier *op) {862uint8_t valid;863LLVM_DEBUG(dbgs() << "fixupReg()");864865switch ((OperandEncoding)op->encoding) {866default:867debug("Expected a REG or R/M encoding in fixupReg");868return -1;869case ENCODING_VVVV:870insn->vvvv =871(Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);872if (!valid)873return -1;874break;875case ENCODING_REG:876insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,877insn->reg - insn->regBase, &valid);878if (!valid)879return -1;880break;881CASE_ENCODING_RM:882if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&883modFromModRM(insn->modRM) == 3) {884// EVEX_X can extend the register id to 32 for a non-GPR register that is885// encoded in RM.886// mode : MODE_64_BIT887// Only 8 vector registers are available in 32 bit mode888// mod : 3889// RM encodes a register890switch (op->type) {891case TYPE_Rv:892case TYPE_R8:893case TYPE_R16:894case TYPE_R32:895case TYPE_R64:896break;897default:898insn->eaBase =899(EABase)(insn->eaBase +900(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));901break;902}903}904[[fallthrough]];905case ENCODING_SIB:906if (insn->eaBase >= insn->eaRegBase) {907insn->eaBase = (EABase)fixupRMValue(908insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);909if (!valid)910return -1;911}912break;913}914915return 0;916}917918// Read the opcode (except the ModR/M byte in the case of extended or escape919// opcodes).920static bool readOpcode(struct InternalInstruction *insn) {921uint8_t current;922LLVM_DEBUG(dbgs() << "readOpcode()");923924insn->opcodeType = ONEBYTE;925if (insn->vectorExtensionType == TYPE_EVEX) {926switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {927default:928LLVM_DEBUG(929dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",930mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));931return true;932case VEX_LOB_0F:933insn->opcodeType = TWOBYTE;934return consume(insn, insn->opcode);935case VEX_LOB_0F38:936insn->opcodeType = THREEBYTE_38;937return consume(insn, insn->opcode);938case VEX_LOB_0F3A:939insn->opcodeType = THREEBYTE_3A;940return consume(insn, insn->opcode);941case VEX_LOB_MAP4:942insn->opcodeType = MAP4;943return consume(insn, insn->opcode);944case VEX_LOB_MAP5:945insn->opcodeType = MAP5;946return consume(insn, insn->opcode);947case VEX_LOB_MAP6:948insn->opcodeType = MAP6;949return consume(insn, insn->opcode);950case VEX_LOB_MAP7:951insn->opcodeType = MAP7;952return consume(insn, insn->opcode);953}954} else if (insn->vectorExtensionType == TYPE_VEX_3B) {955switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {956default:957LLVM_DEBUG(958dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",959mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));960return true;961case VEX_LOB_0F:962insn->opcodeType = TWOBYTE;963return consume(insn, insn->opcode);964case VEX_LOB_0F38:965insn->opcodeType = THREEBYTE_38;966return consume(insn, insn->opcode);967case VEX_LOB_0F3A:968insn->opcodeType = THREEBYTE_3A;969return consume(insn, insn->opcode);970case VEX_LOB_MAP5:971insn->opcodeType = MAP5;972return consume(insn, insn->opcode);973case VEX_LOB_MAP6:974insn->opcodeType = MAP6;975return consume(insn, insn->opcode);976case VEX_LOB_MAP7:977insn->opcodeType = MAP7;978return consume(insn, insn->opcode);979}980} else if (insn->vectorExtensionType == TYPE_VEX_2B) {981insn->opcodeType = TWOBYTE;982return consume(insn, insn->opcode);983} else if (insn->vectorExtensionType == TYPE_XOP) {984switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {985default:986LLVM_DEBUG(987dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",988mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));989return true;990case XOP_MAP_SELECT_8:991insn->opcodeType = XOP8_MAP;992return consume(insn, insn->opcode);993case XOP_MAP_SELECT_9:994insn->opcodeType = XOP9_MAP;995return consume(insn, insn->opcode);996case XOP_MAP_SELECT_A:997insn->opcodeType = XOPA_MAP;998return consume(insn, insn->opcode);999}1000} else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {1001// m bit indicates opcode map 11002insn->opcodeType = TWOBYTE;1003return consume(insn, insn->opcode);1004}10051006if (consume(insn, current))1007return true;10081009if (current == 0x0f) {1010LLVM_DEBUG(1011dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));1012if (consume(insn, current))1013return true;10141015if (current == 0x38) {1016LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",1017current));1018if (consume(insn, current))1019return true;10201021insn->opcodeType = THREEBYTE_38;1022} else if (current == 0x3a) {1023LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",1024current));1025if (consume(insn, current))1026return true;10271028insn->opcodeType = THREEBYTE_3A;1029} else if (current == 0x0f) {1030LLVM_DEBUG(1031dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));10321033// Consume operands before the opcode to comply with the 3DNow encoding1034if (readModRM(insn))1035return true;10361037if (consume(insn, current))1038return true;10391040insn->opcodeType = THREEDNOW_MAP;1041} else {1042LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");1043insn->opcodeType = TWOBYTE;1044}1045} else if (insn->mandatoryPrefix)1046// The opcode with mandatory prefix must start with opcode escape.1047// If not it's legacy repeat prefix1048insn->mandatoryPrefix = 0;10491050// At this point we have consumed the full opcode.1051// Anything we consume from here on must be unconsumed.1052insn->opcode = current;10531054return false;1055}10561057// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).1058static bool is16BitEquivalent(const char *orig, const char *equiv) {1059for (int i = 0;; i++) {1060if (orig[i] == '\0' && equiv[i] == '\0')1061return true;1062if (orig[i] == '\0' || equiv[i] == '\0')1063return false;1064if (orig[i] != equiv[i]) {1065if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')1066continue;1067if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')1068continue;1069if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')1070continue;1071return false;1072}1073}1074}10751076// Determine whether this instruction is a 64-bit instruction.1077static bool is64Bit(const char *name) {1078for (int i = 0;; ++i) {1079if (name[i] == '\0')1080return false;1081if (name[i] == '6' && name[i + 1] == '4')1082return true;1083}1084}10851086// Determine the ID of an instruction, consuming the ModR/M byte as appropriate1087// for extended and escape opcodes, and using a supplied attribute mask.1088static int getInstructionIDWithAttrMask(uint16_t *instructionID,1089struct InternalInstruction *insn,1090uint16_t attrMask) {1091auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);1092const ContextDecision *decision;1093switch (insn->opcodeType) {1094case ONEBYTE:1095decision = &ONEBYTE_SYM;1096break;1097case TWOBYTE:1098decision = &TWOBYTE_SYM;1099break;1100case THREEBYTE_38:1101decision = &THREEBYTE38_SYM;1102break;1103case THREEBYTE_3A:1104decision = &THREEBYTE3A_SYM;1105break;1106case XOP8_MAP:1107decision = &XOP8_MAP_SYM;1108break;1109case XOP9_MAP:1110decision = &XOP9_MAP_SYM;1111break;1112case XOPA_MAP:1113decision = &XOPA_MAP_SYM;1114break;1115case THREEDNOW_MAP:1116decision = &THREEDNOW_MAP_SYM;1117break;1118case MAP4:1119decision = &MAP4_SYM;1120break;1121case MAP5:1122decision = &MAP5_SYM;1123break;1124case MAP6:1125decision = &MAP6_SYM;1126break;1127case MAP7:1128decision = &MAP7_SYM;1129break;1130}11311132if (decision->opcodeDecisions[insnCtx]1133.modRMDecisions[insn->opcode]1134.modrm_type != MODRM_ONEENTRY) {1135if (readModRM(insn))1136return -1;1137*instructionID =1138decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);1139} else {1140*instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);1141}11421143return 0;1144}11451146static bool isCCMPOrCTEST(InternalInstruction *insn) {1147if (insn->opcodeType != MAP4)1148return false;1149if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7)1150return true;1151switch (insn->opcode & 0xfe) {1152default:1153return false;1154case 0x38:1155case 0x3a:1156case 0x84:1157return true;1158case 0x80:1159return regFromModRM(insn->modRM) == 7;1160case 0xf6:1161return regFromModRM(insn->modRM) == 0;1162}1163}11641165static bool isNF(InternalInstruction *insn) {1166if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3]))1167return false;1168if (insn->opcodeType == MAP4)1169return true;1170// Below NF instructions are not in map4.1171if (insn->opcodeType == THREEBYTE_38 &&1172ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) {1173switch (insn->opcode) {1174case 0xf2: // ANDN1175case 0xf3: // BLSI, BLSR, BLSMSK1176case 0xf5: // BZHI1177case 0xf7: // BEXTR1178return true;1179default:1180break;1181}1182}1183return false;1184}11851186// Determine the ID of an instruction, consuming the ModR/M byte as appropriate1187// for extended and escape opcodes. Determines the attributes and context for1188// the instruction before doing so.1189static int getInstructionID(struct InternalInstruction *insn,1190const MCInstrInfo *mii) {1191uint16_t attrMask;1192uint16_t instructionID;11931194LLVM_DEBUG(dbgs() << "getID()");11951196attrMask = ATTR_NONE;11971198if (insn->mode == MODE_64BIT)1199attrMask |= ATTR_64BIT;12001201if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {1202attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;12031204if (insn->vectorExtensionType == TYPE_EVEX) {1205switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {1206case VEX_PREFIX_66:1207attrMask |= ATTR_OPSIZE;1208break;1209case VEX_PREFIX_F3:1210attrMask |= ATTR_XS;1211break;1212case VEX_PREFIX_F2:1213attrMask |= ATTR_XD;1214break;1215}12161217if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))1218attrMask |= ATTR_EVEXKZ;1219if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))1220attrMask |= ATTR_EVEXB;1221if (isNF(insn) && !readModRM(insn) &&1222!isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.1223attrMask |= ATTR_EVEXNF;1224// aaa is not used a opmask in MAP41225else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) &&1226(insn->opcodeType != MAP4))1227attrMask |= ATTR_EVEXK;1228if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))1229attrMask |= ATTR_VEXL;1230if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))1231attrMask |= ATTR_EVEXL2;1232} else if (insn->vectorExtensionType == TYPE_VEX_3B) {1233switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {1234case VEX_PREFIX_66:1235attrMask |= ATTR_OPSIZE;1236break;1237case VEX_PREFIX_F3:1238attrMask |= ATTR_XS;1239break;1240case VEX_PREFIX_F2:1241attrMask |= ATTR_XD;1242break;1243}12441245if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))1246attrMask |= ATTR_VEXL;1247} else if (insn->vectorExtensionType == TYPE_VEX_2B) {1248switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {1249case VEX_PREFIX_66:1250attrMask |= ATTR_OPSIZE;1251if (insn->hasAdSize)1252attrMask |= ATTR_ADSIZE;1253break;1254case VEX_PREFIX_F3:1255attrMask |= ATTR_XS;1256break;1257case VEX_PREFIX_F2:1258attrMask |= ATTR_XD;1259break;1260}12611262if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))1263attrMask |= ATTR_VEXL;1264} else if (insn->vectorExtensionType == TYPE_XOP) {1265switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {1266case VEX_PREFIX_66:1267attrMask |= ATTR_OPSIZE;1268break;1269case VEX_PREFIX_F3:1270attrMask |= ATTR_XS;1271break;1272case VEX_PREFIX_F2:1273attrMask |= ATTR_XD;1274break;1275}12761277if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))1278attrMask |= ATTR_VEXL;1279} else {1280return -1;1281}1282} else if (!insn->mandatoryPrefix) {1283// If we don't have mandatory prefix we should use legacy prefixes here1284if (insn->hasOpSize && (insn->mode != MODE_16BIT))1285attrMask |= ATTR_OPSIZE;1286if (insn->hasAdSize)1287attrMask |= ATTR_ADSIZE;1288if (insn->opcodeType == ONEBYTE) {1289if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))1290// Special support for PAUSE1291attrMask |= ATTR_XS;1292} else {1293if (insn->repeatPrefix == 0xf2)1294attrMask |= ATTR_XD;1295else if (insn->repeatPrefix == 0xf3)1296attrMask |= ATTR_XS;1297}1298} else {1299switch (insn->mandatoryPrefix) {1300case 0xf2:1301attrMask |= ATTR_XD;1302break;1303case 0xf3:1304attrMask |= ATTR_XS;1305break;1306case 0x66:1307if (insn->mode != MODE_16BIT)1308attrMask |= ATTR_OPSIZE;1309if (insn->hasAdSize)1310attrMask |= ATTR_ADSIZE;1311break;1312case 0x67:1313attrMask |= ATTR_ADSIZE;1314break;1315}1316}13171318if (insn->rexPrefix & 0x08) {1319attrMask |= ATTR_REXW;1320attrMask &= ~ATTR_ADSIZE;1321}13221323// Absolute jump and pushp/popp need special handling1324if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&1325(insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))1326attrMask |= ATTR_REX2;13271328if (insn->mode == MODE_16BIT) {1329// JCXZ/JECXZ need special handling for 16-bit mode because the meaning1330// of the AdSize prefix is inverted w.r.t. 32-bit mode.1331if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)1332attrMask ^= ATTR_ADSIZE;1333// If we're in 16-bit mode and this is one of the relative jumps and opsize1334// prefix isn't present, we need to force the opsize attribute since the1335// prefix is inverted relative to 32-bit mode.1336if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&1337(insn->opcode == 0xE8 || insn->opcode == 0xE9))1338attrMask |= ATTR_OPSIZE;13391340if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&1341insn->opcode >= 0x80 && insn->opcode <= 0x8F)1342attrMask |= ATTR_OPSIZE;1343}134413451346if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))1347return -1;13481349// The following clauses compensate for limitations of the tables.13501351if (insn->mode != MODE_64BIT &&1352insn->vectorExtensionType != TYPE_NO_VEX_XOP) {1353// The tables can't distinquish between cases where the W-bit is used to1354// select register size and cases where its a required part of the opcode.1355if ((insn->vectorExtensionType == TYPE_EVEX &&1356wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||1357(insn->vectorExtensionType == TYPE_VEX_3B &&1358wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||1359(insn->vectorExtensionType == TYPE_XOP &&1360wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {13611362uint16_t instructionIDWithREXW;1363if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,1364attrMask | ATTR_REXW)) {1365insn->instructionID = instructionID;1366insn->spec = &INSTRUCTIONS_SYM[instructionID];1367return 0;1368}13691370auto SpecName = mii->getName(instructionIDWithREXW);1371// If not a 64-bit instruction. Switch the opcode.1372if (!is64Bit(SpecName.data())) {1373insn->instructionID = instructionIDWithREXW;1374insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];1375return 0;1376}1377}1378}13791380// Absolute moves, umonitor, and movdir64b need special handling.1381// -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are1382// inverted w.r.t.1383// -For 32-bit mode we need to ensure the ADSIZE prefix is observed in1384// any position.1385if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||1386(insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||1387(insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||1388(insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {1389// Make sure we observed the prefixes in any position.1390if (insn->hasAdSize)1391attrMask |= ATTR_ADSIZE;1392if (insn->hasOpSize)1393attrMask |= ATTR_OPSIZE;13941395// In 16-bit, invert the attributes.1396if (insn->mode == MODE_16BIT) {1397attrMask ^= ATTR_ADSIZE;13981399// The OpSize attribute is only valid with the absolute moves.1400if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))1401attrMask ^= ATTR_OPSIZE;1402}14031404if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))1405return -1;14061407insn->instructionID = instructionID;1408insn->spec = &INSTRUCTIONS_SYM[instructionID];1409return 0;1410}14111412if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&1413!(attrMask & ATTR_OPSIZE)) {1414// The instruction tables make no distinction between instructions that1415// allow OpSize anywhere (i.e., 16-bit operations) and that need it in a1416// particular spot (i.e., many MMX operations). In general we're1417// conservative, but in the specific case where OpSize is present but not in1418// the right place we check if there's a 16-bit operation.1419const struct InstructionSpecifier *spec;1420uint16_t instructionIDWithOpsize;1421llvm::StringRef specName, specWithOpSizeName;14221423spec = &INSTRUCTIONS_SYM[instructionID];14241425if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,1426attrMask | ATTR_OPSIZE)) {1427// ModRM required with OpSize but not present. Give up and return the1428// version without OpSize set.1429insn->instructionID = instructionID;1430insn->spec = spec;1431return 0;1432}14331434specName = mii->getName(instructionID);1435specWithOpSizeName = mii->getName(instructionIDWithOpsize);14361437if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&1438(insn->mode == MODE_16BIT) ^ insn->hasOpSize) {1439insn->instructionID = instructionIDWithOpsize;1440insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];1441} else {1442insn->instructionID = instructionID;1443insn->spec = spec;1444}1445return 0;1446}14471448if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&1449insn->rexPrefix & 0x01) {1450// NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode1451// as XCHG %r8, %eax.1452const struct InstructionSpecifier *spec;1453uint16_t instructionIDWithNewOpcode;1454const struct InstructionSpecifier *specWithNewOpcode;14551456spec = &INSTRUCTIONS_SYM[instructionID];14571458// Borrow opcode from one of the other XCHGar opcodes1459insn->opcode = 0x91;14601461if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,1462attrMask)) {1463insn->opcode = 0x90;14641465insn->instructionID = instructionID;1466insn->spec = spec;1467return 0;1468}14691470specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];14711472// Change back1473insn->opcode = 0x90;14741475insn->instructionID = instructionIDWithNewOpcode;1476insn->spec = specWithNewOpcode;14771478return 0;1479}14801481insn->instructionID = instructionID;1482insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];14831484return 0;1485}14861487// Read an operand from the opcode field of an instruction and interprets it1488// appropriately given the operand width. Handles AddRegFrm instructions.1489//1490// @param insn - the instruction whose opcode field is to be read.1491// @param size - The width (in bytes) of the register being specified.1492// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means1493// RAX.1494// @return - 0 on success; nonzero otherwise.1495static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {1496LLVM_DEBUG(dbgs() << "readOpcodeRegister()");14971498if (size == 0)1499size = insn->registerSize;15001501auto setOpcodeRegister = [&](unsigned base) {1502insn->opcodeRegister =1503(Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |1504(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |1505(insn->opcode & 7)));1506};15071508switch (size) {1509case 1:1510setOpcodeRegister(MODRM_REG_AL);1511if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&1512insn->opcodeRegister < MODRM_REG_AL + 0x8) {1513insn->opcodeRegister =1514(Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));1515}15161517break;1518case 2:1519setOpcodeRegister(MODRM_REG_AX);1520break;1521case 4:1522setOpcodeRegister(MODRM_REG_EAX);1523break;1524case 8:1525setOpcodeRegister(MODRM_REG_RAX);1526break;1527}15281529return 0;1530}15311532// Consume an immediate operand from an instruction, given the desired operand1533// size.1534//1535// @param insn - The instruction whose operand is to be read.1536// @param size - The width (in bytes) of the operand.1537// @return - 0 if the immediate was successfully consumed; nonzero1538// otherwise.1539static int readImmediate(struct InternalInstruction *insn, uint8_t size) {1540uint8_t imm8;1541uint16_t imm16;1542uint32_t imm32;1543uint64_t imm64;15441545LLVM_DEBUG(dbgs() << "readImmediate()");15461547assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");15481549insn->immediateSize = size;1550insn->immediateOffset = insn->readerCursor - insn->startLocation;15511552switch (size) {1553case 1:1554if (consume(insn, imm8))1555return -1;1556insn->immediates[insn->numImmediatesConsumed] = imm8;1557break;1558case 2:1559if (consume(insn, imm16))1560return -1;1561insn->immediates[insn->numImmediatesConsumed] = imm16;1562break;1563case 4:1564if (consume(insn, imm32))1565return -1;1566insn->immediates[insn->numImmediatesConsumed] = imm32;1567break;1568case 8:1569if (consume(insn, imm64))1570return -1;1571insn->immediates[insn->numImmediatesConsumed] = imm64;1572break;1573default:1574llvm_unreachable("invalid size");1575}15761577insn->numImmediatesConsumed++;15781579return 0;1580}15811582// Consume vvvv from an instruction if it has a VEX prefix.1583static int readVVVV(struct InternalInstruction *insn) {1584LLVM_DEBUG(dbgs() << "readVVVV()");15851586int vvvv;1587if (insn->vectorExtensionType == TYPE_EVEX)1588vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |1589vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));1590else if (insn->vectorExtensionType == TYPE_VEX_3B)1591vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);1592else if (insn->vectorExtensionType == TYPE_VEX_2B)1593vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);1594else if (insn->vectorExtensionType == TYPE_XOP)1595vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);1596else1597return -1;15981599if (insn->mode != MODE_64BIT)1600vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.16011602insn->vvvv = static_cast<Reg>(vvvv);1603return 0;1604}16051606// Read an mask register from the opcode field of an instruction.1607//1608// @param insn - The instruction whose opcode field is to be read.1609// @return - 0 on success; nonzero otherwise.1610static int readMaskRegister(struct InternalInstruction *insn) {1611LLVM_DEBUG(dbgs() << "readMaskRegister()");16121613if (insn->vectorExtensionType != TYPE_EVEX)1614return -1;16151616insn->writemask =1617static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));1618return 0;1619}16201621// Consults the specifier for an instruction and consumes all1622// operands for that instruction, interpreting them as it goes.1623static int readOperands(struct InternalInstruction *insn) {1624int hasVVVV, needVVVV;1625int sawRegImm = 0;16261627LLVM_DEBUG(dbgs() << "readOperands()");16281629// If non-zero vvvv specified, make sure one of the operands uses it.1630hasVVVV = !readVVVV(insn);1631needVVVV = hasVVVV && (insn->vvvv != 0);16321633for (const auto &Op : x86OperandSets[insn->spec->operands]) {1634switch (Op.encoding) {1635case ENCODING_NONE:1636case ENCODING_SI:1637case ENCODING_DI:1638break;1639CASE_ENCODING_VSIB:1640// VSIB can use the V2 bit so check only the other bits.1641if (needVVVV)1642needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);1643if (readModRM(insn))1644return -1;16451646// Reject if SIB wasn't used.1647if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)1648return -1;16491650// If sibIndex was set to SIB_INDEX_NONE, index offset is 4.1651if (insn->sibIndex == SIB_INDEX_NONE)1652insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);16531654// If EVEX.v2 is set this is one of the 16-31 registers.1655if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&1656v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))1657insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);16581659// Adjust the index register to the correct size.1660switch ((OperandType)Op.type) {1661default:1662debug("Unhandled VSIB index type");1663return -1;1664case TYPE_MVSIBX:1665insn->sibIndex =1666(SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));1667break;1668case TYPE_MVSIBY:1669insn->sibIndex =1670(SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));1671break;1672case TYPE_MVSIBZ:1673insn->sibIndex =1674(SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));1675break;1676}16771678// Apply the AVX512 compressed displacement scaling factor.1679if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)1680insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);1681break;1682case ENCODING_SIB:1683// Reject if SIB wasn't used.1684if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)1685return -1;1686if (readModRM(insn))1687return -1;1688if (fixupReg(insn, &Op))1689return -1;1690break;1691case ENCODING_REG:1692CASE_ENCODING_RM:1693if (readModRM(insn))1694return -1;1695if (fixupReg(insn, &Op))1696return -1;1697// Apply the AVX512 compressed displacement scaling factor.1698if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)1699insn->displacement *= 1 << (Op.encoding - ENCODING_RM);1700break;1701case ENCODING_IB:1702if (sawRegImm) {1703// Saw a register immediate so don't read again and instead split the1704// previous immediate. FIXME: This is a hack.1705insn->immediates[insn->numImmediatesConsumed] =1706insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;1707++insn->numImmediatesConsumed;1708break;1709}1710if (readImmediate(insn, 1))1711return -1;1712if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)1713sawRegImm = 1;1714break;1715case ENCODING_IW:1716if (readImmediate(insn, 2))1717return -1;1718break;1719case ENCODING_ID:1720if (readImmediate(insn, 4))1721return -1;1722break;1723case ENCODING_IO:1724if (readImmediate(insn, 8))1725return -1;1726break;1727case ENCODING_Iv:1728if (readImmediate(insn, insn->immediateSize))1729return -1;1730break;1731case ENCODING_Ia:1732if (readImmediate(insn, insn->addressSize))1733return -1;1734break;1735case ENCODING_IRC:1736insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |1737lFromEVEX4of4(insn->vectorExtensionPrefix[3]);1738break;1739case ENCODING_RB:1740if (readOpcodeRegister(insn, 1))1741return -1;1742break;1743case ENCODING_RW:1744if (readOpcodeRegister(insn, 2))1745return -1;1746break;1747case ENCODING_RD:1748if (readOpcodeRegister(insn, 4))1749return -1;1750break;1751case ENCODING_RO:1752if (readOpcodeRegister(insn, 8))1753return -1;1754break;1755case ENCODING_Rv:1756if (readOpcodeRegister(insn, 0))1757return -1;1758break;1759case ENCODING_CF:1760insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]);1761needVVVV = false; // oszc shares the same bits with VVVV1762break;1763case ENCODING_CC:1764if (isCCMPOrCTEST(insn))1765insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]);1766else1767insn->immediates[1] = insn->opcode & 0xf;1768break;1769case ENCODING_FP:1770break;1771case ENCODING_VVVV:1772needVVVV = 0; // Mark that we have found a VVVV operand.1773if (!hasVVVV)1774return -1;1775if (insn->mode != MODE_64BIT)1776insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);1777if (fixupReg(insn, &Op))1778return -1;1779break;1780case ENCODING_WRITEMASK:1781if (readMaskRegister(insn))1782return -1;1783break;1784case ENCODING_DUP:1785break;1786default:1787LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");1788return -1;1789}1790}17911792// If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail1793if (needVVVV)1794return -1;17951796return 0;1797}17981799namespace llvm {18001801// Fill-ins to make the compiler happy. These constants are never actually1802// assigned; they are just filler to make an automatically-generated switch1803// statement work.1804namespace X86 {1805enum {1806BX_SI = 500,1807BX_DI = 501,1808BP_SI = 502,1809BP_DI = 503,1810sib = 504,1811sib64 = 5051812};1813} // namespace X8618141815} // namespace llvm18161817static bool translateInstruction(MCInst &target,1818InternalInstruction &source,1819const MCDisassembler *Dis);18201821namespace {18221823/// Generic disassembler for all X86 platforms. All each platform class should1824/// have to do is subclass the constructor, and provide a different1825/// disassemblerMode value.1826class X86GenericDisassembler : public MCDisassembler {1827std::unique_ptr<const MCInstrInfo> MII;1828public:1829X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,1830std::unique_ptr<const MCInstrInfo> MII);1831public:1832DecodeStatus getInstruction(MCInst &instr, uint64_t &size,1833ArrayRef<uint8_t> Bytes, uint64_t Address,1834raw_ostream &cStream) const override;18351836private:1837DisassemblerMode fMode;1838};18391840} // namespace18411842X86GenericDisassembler::X86GenericDisassembler(1843const MCSubtargetInfo &STI,1844MCContext &Ctx,1845std::unique_ptr<const MCInstrInfo> MII)1846: MCDisassembler(STI, Ctx), MII(std::move(MII)) {1847const FeatureBitset &FB = STI.getFeatureBits();1848if (FB[X86::Is16Bit]) {1849fMode = MODE_16BIT;1850return;1851} else if (FB[X86::Is32Bit]) {1852fMode = MODE_32BIT;1853return;1854} else if (FB[X86::Is64Bit]) {1855fMode = MODE_64BIT;1856return;1857}18581859llvm_unreachable("Invalid CPU mode");1860}18611862MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(1863MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,1864raw_ostream &CStream) const {1865CommentStream = &CStream;18661867InternalInstruction Insn;1868memset(&Insn, 0, sizeof(InternalInstruction));1869Insn.bytes = Bytes;1870Insn.startLocation = Address;1871Insn.readerCursor = Address;1872Insn.mode = fMode;18731874if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||1875getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||1876readOperands(&Insn)) {1877Size = Insn.readerCursor - Address;1878return Fail;1879}18801881Insn.operands = x86OperandSets[Insn.spec->operands];1882Insn.length = Insn.readerCursor - Insn.startLocation;1883Size = Insn.length;1884if (Size > 15)1885LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");18861887bool Ret = translateInstruction(Instr, Insn, this);1888if (!Ret) {1889unsigned Flags = X86::IP_NO_PREFIX;1890if (Insn.hasAdSize)1891Flags |= X86::IP_HAS_AD_SIZE;1892if (!Insn.mandatoryPrefix) {1893if (Insn.hasOpSize)1894Flags |= X86::IP_HAS_OP_SIZE;1895if (Insn.repeatPrefix == 0xf2)1896Flags |= X86::IP_HAS_REPEAT_NE;1897else if (Insn.repeatPrefix == 0xf3 &&1898// It should not be 'pause' f3 901899Insn.opcode != 0x90)1900Flags |= X86::IP_HAS_REPEAT;1901if (Insn.hasLockPrefix)1902Flags |= X86::IP_HAS_LOCK;1903}1904Instr.setFlags(Flags);1905}1906return (!Ret) ? Success : Fail;1907}19081909//1910// Private code that translates from struct InternalInstructions to MCInsts.1911//19121913/// translateRegister - Translates an internal register to the appropriate LLVM1914/// register, and appends it as an operand to an MCInst.1915///1916/// @param mcInst - The MCInst to append to.1917/// @param reg - The Reg to append.1918static void translateRegister(MCInst &mcInst, Reg reg) {1919#define ENTRY(x) X86::x,1920static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};1921#undef ENTRY19221923MCPhysReg llvmRegnum = llvmRegnums[reg];1924mcInst.addOperand(MCOperand::createReg(llvmRegnum));1925}19261927static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {19280, // SEG_OVERRIDE_NONE1929X86::CS,1930X86::SS,1931X86::DS,1932X86::ES,1933X86::FS,1934X86::GS1935};19361937/// translateSrcIndex - Appends a source index operand to an MCInst.1938///1939/// @param mcInst - The MCInst to append to.1940/// @param insn - The internal instruction.1941static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {1942unsigned baseRegNo;19431944if (insn.mode == MODE_64BIT)1945baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;1946else if (insn.mode == MODE_32BIT)1947baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;1948else {1949assert(insn.mode == MODE_16BIT);1950baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;1951}1952MCOperand baseReg = MCOperand::createReg(baseRegNo);1953mcInst.addOperand(baseReg);19541955MCOperand segmentReg;1956segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);1957mcInst.addOperand(segmentReg);1958return false;1959}19601961/// translateDstIndex - Appends a destination index operand to an MCInst.1962///1963/// @param mcInst - The MCInst to append to.1964/// @param insn - The internal instruction.19651966static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {1967unsigned baseRegNo;19681969if (insn.mode == MODE_64BIT)1970baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;1971else if (insn.mode == MODE_32BIT)1972baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;1973else {1974assert(insn.mode == MODE_16BIT);1975baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;1976}1977MCOperand baseReg = MCOperand::createReg(baseRegNo);1978mcInst.addOperand(baseReg);1979return false;1980}19811982/// translateImmediate - Appends an immediate operand to an MCInst.1983///1984/// @param mcInst - The MCInst to append to.1985/// @param immediate - The immediate value to append.1986/// @param operand - The operand, as stored in the descriptor table.1987/// @param insn - The internal instruction.1988static void translateImmediate(MCInst &mcInst, uint64_t immediate,1989const OperandSpecifier &operand,1990InternalInstruction &insn,1991const MCDisassembler *Dis) {1992// Sign-extend the immediate if necessary.19931994OperandType type = (OperandType)operand.type;19951996bool isBranch = false;1997uint64_t pcrel = 0;1998if (type == TYPE_REL) {1999isBranch = true;2000pcrel = insn.startLocation + insn.length;2001switch (operand.encoding) {2002default:2003break;2004case ENCODING_Iv:2005switch (insn.displacementSize) {2006default:2007break;2008case 1:2009if(immediate & 0x80)2010immediate |= ~(0xffull);2011break;2012case 2:2013if(immediate & 0x8000)2014immediate |= ~(0xffffull);2015break;2016case 4:2017if(immediate & 0x80000000)2018immediate |= ~(0xffffffffull);2019break;2020case 8:2021break;2022}2023break;2024case ENCODING_IB:2025if(immediate & 0x80)2026immediate |= ~(0xffull);2027break;2028case ENCODING_IW:2029if(immediate & 0x8000)2030immediate |= ~(0xffffull);2031break;2032case ENCODING_ID:2033if(immediate & 0x80000000)2034immediate |= ~(0xffffffffull);2035break;2036}2037}2038// By default sign-extend all X86 immediates based on their encoding.2039else if (type == TYPE_IMM) {2040switch (operand.encoding) {2041default:2042break;2043case ENCODING_IB:2044if(immediate & 0x80)2045immediate |= ~(0xffull);2046break;2047case ENCODING_IW:2048if(immediate & 0x8000)2049immediate |= ~(0xffffull);2050break;2051case ENCODING_ID:2052if(immediate & 0x80000000)2053immediate |= ~(0xffffffffull);2054break;2055case ENCODING_IO:2056break;2057}2058}20592060switch (type) {2061case TYPE_XMM:2062mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));2063return;2064case TYPE_YMM:2065mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));2066return;2067case TYPE_ZMM:2068mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));2069return;2070default:2071// operand is 64 bits wide. Do nothing.2072break;2073}20742075if (!Dis->tryAddingSymbolicOperand(2076mcInst, immediate + pcrel, insn.startLocation, isBranch,2077insn.immediateOffset, insn.immediateSize, insn.length))2078mcInst.addOperand(MCOperand::createImm(immediate));20792080if (type == TYPE_MOFFS) {2081MCOperand segmentReg;2082segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);2083mcInst.addOperand(segmentReg);2084}2085}20862087/// translateRMRegister - Translates a register stored in the R/M field of the2088/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.2089/// @param mcInst - The MCInst to append to.2090/// @param insn - The internal instruction to extract the R/M field2091/// from.2092/// @return - 0 on success; -1 otherwise2093static bool translateRMRegister(MCInst &mcInst,2094InternalInstruction &insn) {2095if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {2096debug("A R/M register operand may not have a SIB byte");2097return true;2098}20992100switch (insn.eaBase) {2101default:2102debug("Unexpected EA base register");2103return true;2104case EA_BASE_NONE:2105debug("EA_BASE_NONE for ModR/M base");2106return true;2107#define ENTRY(x) case EA_BASE_##x:2108ALL_EA_BASES2109#undef ENTRY2110debug("A R/M register operand may not have a base; "2111"the operand must be a register.");2112return true;2113#define ENTRY(x) \2114case EA_REG_##x: \2115mcInst.addOperand(MCOperand::createReg(X86::x)); break;2116ALL_REGS2117#undef ENTRY2118}21192120return false;2121}21222123/// translateRMMemory - Translates a memory operand stored in the Mod and R/M2124/// fields of an internal instruction (and possibly its SIB byte) to a memory2125/// operand in LLVM's format, and appends it to an MCInst.2126///2127/// @param mcInst - The MCInst to append to.2128/// @param insn - The instruction to extract Mod, R/M, and SIB fields2129/// from.2130/// @param ForceSIB - The instruction must use SIB.2131/// @return - 0 on success; nonzero otherwise2132static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,2133const MCDisassembler *Dis,2134bool ForceSIB = false) {2135// Addresses in an MCInst are represented as five operands:2136// 1. basereg (register) The R/M base, or (if there is a SIB) the2137// SIB base2138// 2. scaleamount (immediate) 1, or (if there is a SIB) the specified2139// scale amount2140// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)2141// the index (which is multiplied by the2142// scale amount)2143// 4. displacement (immediate) 0, or the displacement if there is one2144// 5. segmentreg (register) x86_registerNONE for now, but could be set2145// if we have segment overrides21462147MCOperand baseReg;2148MCOperand scaleAmount;2149MCOperand indexReg;2150MCOperand displacement;2151MCOperand segmentReg;2152uint64_t pcrel = 0;21532154if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {2155if (insn.sibBase != SIB_BASE_NONE) {2156switch (insn.sibBase) {2157default:2158debug("Unexpected sibBase");2159return true;2160#define ENTRY(x) \2161case SIB_BASE_##x: \2162baseReg = MCOperand::createReg(X86::x); break;2163ALL_SIB_BASES2164#undef ENTRY2165}2166} else {2167baseReg = MCOperand::createReg(X86::NoRegister);2168}21692170if (insn.sibIndex != SIB_INDEX_NONE) {2171switch (insn.sibIndex) {2172default:2173debug("Unexpected sibIndex");2174return true;2175#define ENTRY(x) \2176case SIB_INDEX_##x: \2177indexReg = MCOperand::createReg(X86::x); break;2178EA_BASES_32BIT2179EA_BASES_64BIT2180REGS_XMM2181REGS_YMM2182REGS_ZMM2183#undef ENTRY2184}2185} else {2186// Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,2187// but no index is used and modrm alone should have been enough.2188// -No base register in 32-bit mode. In 64-bit mode this is used to2189// avoid rip-relative addressing.2190// -Any base register used other than ESP/RSP/R12D/R12. Using these as a2191// base always requires a SIB byte.2192// -A scale other than 1 is used.2193if (!ForceSIB &&2194(insn.sibScale != 1 ||2195(insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||2196(insn.sibBase != SIB_BASE_NONE &&2197insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&2198insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {2199indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :2200X86::RIZ);2201} else2202indexReg = MCOperand::createReg(X86::NoRegister);2203}22042205scaleAmount = MCOperand::createImm(insn.sibScale);2206} else {2207switch (insn.eaBase) {2208case EA_BASE_NONE:2209if (insn.eaDisplacement == EA_DISP_NONE) {2210debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");2211return true;2212}2213if (insn.mode == MODE_64BIT){2214pcrel = insn.startLocation + insn.length;2215Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,2216insn.startLocation +2217insn.displacementOffset);2218// Section 2.2.1.62219baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :2220X86::RIP);2221}2222else2223baseReg = MCOperand::createReg(X86::NoRegister);22242225indexReg = MCOperand::createReg(X86::NoRegister);2226break;2227case EA_BASE_BX_SI:2228baseReg = MCOperand::createReg(X86::BX);2229indexReg = MCOperand::createReg(X86::SI);2230break;2231case EA_BASE_BX_DI:2232baseReg = MCOperand::createReg(X86::BX);2233indexReg = MCOperand::createReg(X86::DI);2234break;2235case EA_BASE_BP_SI:2236baseReg = MCOperand::createReg(X86::BP);2237indexReg = MCOperand::createReg(X86::SI);2238break;2239case EA_BASE_BP_DI:2240baseReg = MCOperand::createReg(X86::BP);2241indexReg = MCOperand::createReg(X86::DI);2242break;2243default:2244indexReg = MCOperand::createReg(X86::NoRegister);2245switch (insn.eaBase) {2246default:2247debug("Unexpected eaBase");2248return true;2249// Here, we will use the fill-ins defined above. However,2250// BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and2251// sib and sib64 were handled in the top-level if, so they're only2252// placeholders to keep the compiler happy.2253#define ENTRY(x) \2254case EA_BASE_##x: \2255baseReg = MCOperand::createReg(X86::x); break;2256ALL_EA_BASES2257#undef ENTRY2258#define ENTRY(x) case EA_REG_##x:2259ALL_REGS2260#undef ENTRY2261debug("A R/M memory operand may not be a register; "2262"the base field must be a base.");2263return true;2264}2265}22662267scaleAmount = MCOperand::createImm(1);2268}22692270displacement = MCOperand::createImm(insn.displacement);22712272segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);22732274mcInst.addOperand(baseReg);2275mcInst.addOperand(scaleAmount);2276mcInst.addOperand(indexReg);22772278const uint8_t dispSize =2279(insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;22802281if (!Dis->tryAddingSymbolicOperand(2282mcInst, insn.displacement + pcrel, insn.startLocation, false,2283insn.displacementOffset, dispSize, insn.length))2284mcInst.addOperand(displacement);2285mcInst.addOperand(segmentReg);2286return false;2287}22882289/// translateRM - Translates an operand stored in the R/M (and possibly SIB)2290/// byte of an instruction to LLVM form, and appends it to an MCInst.2291///2292/// @param mcInst - The MCInst to append to.2293/// @param operand - The operand, as stored in the descriptor table.2294/// @param insn - The instruction to extract Mod, R/M, and SIB fields2295/// from.2296/// @return - 0 on success; nonzero otherwise2297static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,2298InternalInstruction &insn, const MCDisassembler *Dis) {2299switch (operand.type) {2300default:2301debug("Unexpected type for a R/M operand");2302return true;2303case TYPE_R8:2304case TYPE_R16:2305case TYPE_R32:2306case TYPE_R64:2307case TYPE_Rv:2308case TYPE_MM64:2309case TYPE_XMM:2310case TYPE_YMM:2311case TYPE_ZMM:2312case TYPE_TMM:2313case TYPE_VK_PAIR:2314case TYPE_VK:2315case TYPE_DEBUGREG:2316case TYPE_CONTROLREG:2317case TYPE_BNDR:2318return translateRMRegister(mcInst, insn);2319case TYPE_M:2320case TYPE_MVSIBX:2321case TYPE_MVSIBY:2322case TYPE_MVSIBZ:2323return translateRMMemory(mcInst, insn, Dis);2324case TYPE_MSIB:2325return translateRMMemory(mcInst, insn, Dis, true);2326}2327}23282329/// translateFPRegister - Translates a stack position on the FPU stack to its2330/// LLVM form, and appends it to an MCInst.2331///2332/// @param mcInst - The MCInst to append to.2333/// @param stackPos - The stack position to translate.2334static void translateFPRegister(MCInst &mcInst,2335uint8_t stackPos) {2336mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));2337}23382339/// translateMaskRegister - Translates a 3-bit mask register number to2340/// LLVM form, and appends it to an MCInst.2341///2342/// @param mcInst - The MCInst to append to.2343/// @param maskRegNum - Number of mask register from 0 to 7.2344/// @return - false on success; true otherwise.2345static bool translateMaskRegister(MCInst &mcInst,2346uint8_t maskRegNum) {2347if (maskRegNum >= 8) {2348debug("Invalid mask register number");2349return true;2350}23512352mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));2353return false;2354}23552356/// translateOperand - Translates an operand stored in an internal instruction2357/// to LLVM's format and appends it to an MCInst.2358///2359/// @param mcInst - The MCInst to append to.2360/// @param operand - The operand, as stored in the descriptor table.2361/// @param insn - The internal instruction.2362/// @return - false on success; true otherwise.2363static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,2364InternalInstruction &insn,2365const MCDisassembler *Dis) {2366switch (operand.encoding) {2367default:2368debug("Unhandled operand encoding during translation");2369return true;2370case ENCODING_REG:2371translateRegister(mcInst, insn.reg);2372return false;2373case ENCODING_WRITEMASK:2374return translateMaskRegister(mcInst, insn.writemask);2375case ENCODING_SIB:2376CASE_ENCODING_RM:2377CASE_ENCODING_VSIB:2378return translateRM(mcInst, operand, insn, Dis);2379case ENCODING_IB:2380case ENCODING_IW:2381case ENCODING_ID:2382case ENCODING_IO:2383case ENCODING_Iv:2384case ENCODING_Ia:2385translateImmediate(mcInst,2386insn.immediates[insn.numImmediatesTranslated++],2387operand,2388insn,2389Dis);2390return false;2391case ENCODING_IRC:2392mcInst.addOperand(MCOperand::createImm(insn.RC));2393return false;2394case ENCODING_SI:2395return translateSrcIndex(mcInst, insn);2396case ENCODING_DI:2397return translateDstIndex(mcInst, insn);2398case ENCODING_RB:2399case ENCODING_RW:2400case ENCODING_RD:2401case ENCODING_RO:2402case ENCODING_Rv:2403translateRegister(mcInst, insn.opcodeRegister);2404return false;2405case ENCODING_CF:2406mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));2407return false;2408case ENCODING_CC:2409if (isCCMPOrCTEST(&insn))2410mcInst.addOperand(MCOperand::createImm(insn.immediates[2]));2411else2412mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));2413return false;2414case ENCODING_FP:2415translateFPRegister(mcInst, insn.modRM & 7);2416return false;2417case ENCODING_VVVV:2418translateRegister(mcInst, insn.vvvv);2419return false;2420case ENCODING_DUP:2421return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],2422insn, Dis);2423}2424}24252426/// translateInstruction - Translates an internal instruction and all its2427/// operands to an MCInst.2428///2429/// @param mcInst - The MCInst to populate with the instruction's data.2430/// @param insn - The internal instruction.2431/// @return - false on success; true otherwise.2432static bool translateInstruction(MCInst &mcInst,2433InternalInstruction &insn,2434const MCDisassembler *Dis) {2435if (!insn.spec) {2436debug("Instruction has no specification");2437return true;2438}24392440mcInst.clear();2441mcInst.setOpcode(insn.instructionID);2442// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf32443// prefix bytes should be disassembled as xrelease and xacquire then set the2444// opcode to those instead of the rep and repne opcodes.2445if (insn.xAcquireRelease) {2446if(mcInst.getOpcode() == X86::REP_PREFIX)2447mcInst.setOpcode(X86::XRELEASE_PREFIX);2448else if(mcInst.getOpcode() == X86::REPNE_PREFIX)2449mcInst.setOpcode(X86::XACQUIRE_PREFIX);2450}24512452insn.numImmediatesTranslated = 0;24532454for (const auto &Op : insn.operands) {2455if (Op.encoding != ENCODING_NONE) {2456if (translateOperand(mcInst, Op, insn, Dis)) {2457return true;2458}2459}2460}24612462return false;2463}24642465static MCDisassembler *createX86Disassembler(const Target &T,2466const MCSubtargetInfo &STI,2467MCContext &Ctx) {2468std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());2469return new X86GenericDisassembler(STI, Ctx, std::move(MII));2470}24712472extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {2473// Register the disassembler.2474TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),2475createX86Disassembler);2476TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),2477createX86Disassembler);2478}247924802481