Path: blob/master/libs/capstone/arch/X86/X86Disassembler.c
4389 views
//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//1//2// The LLVM Compiler Infrastructure3//4// This file is distributed under the University of Illinois Open Source5// License. See LICENSE.TXT for details.6//7//===----------------------------------------------------------------------===//8//9// This file is part of the X86 Disassembler.10// It contains code to translate the data produced by the decoder into11// MCInsts.12//13// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and14// 64-bit X86 instruction sets. The main decode sequence for an assembly15// instruction in this disassembler is:16//17// 1. Read the prefix bytes and determine the attributes of the instruction.18// These attributes, recorded in enum attributeBits19// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM20// provides a mapping from bitmasks to contexts, which are represented by21// enum InstructionContext (ibid.).22//23// 2. Read the opcode, and determine what kind of opcode it is. The24// disassembler distinguishes four kinds of opcodes, which are enumerated in25// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte26// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a27// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.28//29// 3. Depending on the opcode type, look in one of four ClassDecision structures30// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which31// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get32// a ModRMDecision (ibid.).33//34// 4. Some instructions, such as escape opcodes or extended opcodes, or even35// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the36// ModR/M byte to complete decode. The ModRMDecision's type is an entry from37// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the38// ModR/M byte is required and how to interpret it.39//40// 5. After resolving the ModRMDecision, the disassembler has a unique ID41// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in42// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and43// meanings of its operands.44//45// 6. For each operand, its encoding is an entry from OperandEncoding46// (X86DisassemblerDecoderCommon.h) and its type is an entry from47// OperandType (ibid.). The encoding indicates how to read it from the48// instruction; the type indicates how to interpret the value once it has49// been read. For example, a register operand could be stored in the R/M50// field of the ModR/M byte, the REG field of the ModR/M byte, or added to51// the main opcode. This is orthogonal from its meaning (an GPR or an XMM52// register, for instance). Given this information, the operands can be53// extracted and interpreted.54//55// 7. As the last step, the disassembler translates the instruction information56// and operands into a format understandable by the client - in this case, an57// MCInst for use by the MC infrastructure.58//59// The disassembler is broken broadly into two parts: the table emitter that60// emits the instruction decode tables discussed above during compilation, and61// the disassembler itself. The table emitter is documented in more detail in62// utils/TableGen/X86DisassemblerEmitter.h.63//64// X86Disassembler.cpp contains the code responsible for step 7, and for65// invoking the decoder to execute steps 1-6.66// X86DisassemblerDecoderCommon.h contains the definitions needed by both the67// table emitter and the disassembler.68// X86DisassemblerDecoder.h contains the public interface of the decoder,69// factored out into C for possible use by other projects.70// X86DisassemblerDecoder.c contains the source code of the decoder, which is71// responsible for steps 1-6.72//73//===----------------------------------------------------------------------===//7475/* Capstone Disassembly Engine */76/* By Nguyen Anh Quynh <[email protected]>, 2013-2019 */7778#ifdef CAPSTONE_HAS_X867980#if defined (WIN32) || defined (WIN64) || defined (_WIN32) || defined (_WIN64)81#pragma warning(disable:4996) // disable MSVC's warning on strncpy()82#pragma warning(disable:28719) // disable MSVC's warning on strncpy()83#endif8485#include <capstone/platform.h>8687#if defined(CAPSTONE_HAS_OSXKERNEL)88#include <Availability.h>89#endif9091#include <string.h>9293#include "../../cs_priv.h"9495#include "X86BaseInfo.h"96#include "X86Disassembler.h"97#include "X86DisassemblerDecoderCommon.h"98#include "X86DisassemblerDecoder.h"99#include "../../MCInst.h"100#include "../../utils.h"101#include "X86Mapping.h"102103#define GET_REGINFO_ENUM104#define GET_REGINFO_MC_DESC105#include "X86GenRegisterInfo.inc"106107#define GET_INSTRINFO_ENUM108#ifdef CAPSTONE_X86_REDUCE109#include "X86GenInstrInfo_reduce.inc"110#else111#include "X86GenInstrInfo.inc"112#endif113114// Fill-ins to make the compiler happy. These constants are never actually115// assigned; they are just filler to make an automatically-generated switch116// statement work.117enum {118X86_BX_SI = 500,119X86_BX_DI = 501,120X86_BP_SI = 502,121X86_BP_DI = 503,122X86_sib = 504,123X86_sib64 = 505124};125126//127// Private code that translates from struct InternalInstructions to MCInsts.128//129130/// translateRegister - Translates an internal register to the appropriate LLVM131/// register, and appends it as an operand to an MCInst.132///133/// @param mcInst - The MCInst to append to.134/// @param reg - The Reg to append.135static void translateRegister(MCInst *mcInst, Reg reg)136{137#define ENTRY(x) X86_##x,138static const uint16_t llvmRegnums[] = {139ALL_REGS1400141};142#undef ENTRY143144uint16_t llvmRegnum = llvmRegnums[reg];145MCOperand_CreateReg0(mcInst, llvmRegnum);146}147148static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {1490, // SEG_OVERRIDE_NONE150X86_CS,151X86_SS,152X86_DS,153X86_ES,154X86_FS,155X86_GS156};157158/// translateSrcIndex - Appends a source index operand to an MCInst.159///160/// @param mcInst - The MCInst to append to.161/// @param insn - The internal instruction.162static bool translateSrcIndex(MCInst *mcInst, InternalInstruction *insn)163{164unsigned baseRegNo;165166if (insn->mode == MODE_64BIT)167baseRegNo = insn->hasAdSize ? X86_ESI : X86_RSI;168else if (insn->mode == MODE_32BIT)169baseRegNo = insn->hasAdSize ? X86_SI : X86_ESI;170else {171// assert(insn->mode == MODE_16BIT);172baseRegNo = insn->hasAdSize ? X86_ESI : X86_SI;173}174175MCOperand_CreateReg0(mcInst, baseRegNo);176177MCOperand_CreateReg0(mcInst, segmentRegnums[insn->segmentOverride]);178179return false;180}181182/// translateDstIndex - Appends a destination index operand to an MCInst.183///184/// @param mcInst - The MCInst to append to.185/// @param insn - The internal instruction.186static bool translateDstIndex(MCInst *mcInst, InternalInstruction *insn)187{188unsigned baseRegNo;189190if (insn->mode == MODE_64BIT)191baseRegNo = insn->hasAdSize ? X86_EDI : X86_RDI;192else if (insn->mode == MODE_32BIT)193baseRegNo = insn->hasAdSize ? X86_DI : X86_EDI;194else {195// assert(insn->mode == MODE_16BIT);196baseRegNo = insn->hasAdSize ? X86_EDI : X86_DI;197}198199MCOperand_CreateReg0(mcInst, baseRegNo);200201return false;202}203204/// translateImmediate - Appends an immediate operand to an MCInst.205///206/// @param mcInst - The MCInst to append to.207/// @param immediate - The immediate value to append.208/// @param operand - The operand, as stored in the descriptor table.209/// @param insn - The internal instruction.210static void translateImmediate(MCInst *mcInst, uint64_t immediate,211const OperandSpecifier *operand, InternalInstruction *insn)212{213OperandType type;214215type = (OperandType)operand->type;216if (type == TYPE_REL) {217//isBranch = true;218//pcrel = insn->startLocation + insn->immediateOffset + insn->immediateSize;219switch (operand->encoding) {220default:221break;222case ENCODING_Iv:223switch (insn->displacementSize) {224default:225break;226case 1:227if(immediate & 0x80)228immediate |= ~(0xffull);229break;230case 2:231if(immediate & 0x8000)232immediate |= ~(0xffffull);233break;234case 4:235if(immediate & 0x80000000)236immediate |= ~(0xffffffffull);237break;238case 8:239break;240}241break;242case ENCODING_IB:243if (immediate & 0x80)244immediate |= ~(0xffull);245break;246case ENCODING_IW:247if (immediate & 0x8000)248immediate |= ~(0xffffull);249break;250case ENCODING_ID:251if (immediate & 0x80000000)252immediate |= ~(0xffffffffull);253break;254}255} // By default sign-extend all X86 immediates based on their encoding.256else if (type == TYPE_IMM) {257switch (operand->encoding) {258default:259break;260case ENCODING_IB:261if(immediate & 0x80)262immediate |= ~(0xffull);263break;264case ENCODING_IW:265if(immediate & 0x8000)266immediate |= ~(0xffffull);267break;268case ENCODING_ID:269if(immediate & 0x80000000)270immediate |= ~(0xffffffffull);271break;272case ENCODING_IO:273break;274}275} else if (type == TYPE_IMM3) {276#ifndef CAPSTONE_X86_REDUCE277// Check for immediates that printSSECC can't handle.278if (immediate >= 8) {279unsigned NewOpc = 0;280281switch (MCInst_getOpcode(mcInst)) {282default: break; // never reach283case X86_CMPPDrmi: NewOpc = X86_CMPPDrmi_alt; break;284case X86_CMPPDrri: NewOpc = X86_CMPPDrri_alt; break;285case X86_CMPPSrmi: NewOpc = X86_CMPPSrmi_alt; break;286case X86_CMPPSrri: NewOpc = X86_CMPPSrri_alt; break;287case X86_CMPSDrm: NewOpc = X86_CMPSDrm_alt; break;288case X86_CMPSDrr: NewOpc = X86_CMPSDrr_alt; break;289case X86_CMPSSrm: NewOpc = X86_CMPSSrm_alt; break;290case X86_CMPSSrr: NewOpc = X86_CMPSSrr_alt; break;291case X86_VPCOMBri: NewOpc = X86_VPCOMBri_alt; break;292case X86_VPCOMBmi: NewOpc = X86_VPCOMBmi_alt; break;293case X86_VPCOMWri: NewOpc = X86_VPCOMWri_alt; break;294case X86_VPCOMWmi: NewOpc = X86_VPCOMWmi_alt; break;295case X86_VPCOMDri: NewOpc = X86_VPCOMDri_alt; break;296case X86_VPCOMDmi: NewOpc = X86_VPCOMDmi_alt; break;297case X86_VPCOMQri: NewOpc = X86_VPCOMQri_alt; break;298case X86_VPCOMQmi: NewOpc = X86_VPCOMQmi_alt; break;299case X86_VPCOMUBri: NewOpc = X86_VPCOMUBri_alt; break;300case X86_VPCOMUBmi: NewOpc = X86_VPCOMUBmi_alt; break;301case X86_VPCOMUWri: NewOpc = X86_VPCOMUWri_alt; break;302case X86_VPCOMUWmi: NewOpc = X86_VPCOMUWmi_alt; break;303case X86_VPCOMUDri: NewOpc = X86_VPCOMUDri_alt; break;304case X86_VPCOMUDmi: NewOpc = X86_VPCOMUDmi_alt; break;305case X86_VPCOMUQri: NewOpc = X86_VPCOMUQri_alt; break;306case X86_VPCOMUQmi: NewOpc = X86_VPCOMUQmi_alt; break;307}308309// Switch opcode to the one that doesn't get special printing.310if (NewOpc != 0) {311MCInst_setOpcode(mcInst, NewOpc);312}313}314#endif315} else if (type == TYPE_IMM5) {316#ifndef CAPSTONE_X86_REDUCE317// Check for immediates that printAVXCC can't handle.318if (immediate >= 32) {319unsigned NewOpc = 0;320321switch (MCInst_getOpcode(mcInst)) {322default: break; // unexpected opcode323case X86_VCMPPDrmi: NewOpc = X86_VCMPPDrmi_alt; break;324case X86_VCMPPDrri: NewOpc = X86_VCMPPDrri_alt; break;325case X86_VCMPPSrmi: NewOpc = X86_VCMPPSrmi_alt; break;326case X86_VCMPPSrri: NewOpc = X86_VCMPPSrri_alt; break;327case X86_VCMPSDrm: NewOpc = X86_VCMPSDrm_alt; break;328case X86_VCMPSDrr: NewOpc = X86_VCMPSDrr_alt; break;329case X86_VCMPSSrm: NewOpc = X86_VCMPSSrm_alt; break;330case X86_VCMPSSrr: NewOpc = X86_VCMPSSrr_alt; break;331case X86_VCMPPDYrmi: NewOpc = X86_VCMPPDYrmi_alt; break;332case X86_VCMPPDYrri: NewOpc = X86_VCMPPDYrri_alt; break;333case X86_VCMPPSYrmi: NewOpc = X86_VCMPPSYrmi_alt; break;334case X86_VCMPPSYrri: NewOpc = X86_VCMPPSYrri_alt; break;335case X86_VCMPPDZrmi: NewOpc = X86_VCMPPDZrmi_alt; break;336case X86_VCMPPDZrri: NewOpc = X86_VCMPPDZrri_alt; break;337case X86_VCMPPDZrrib: NewOpc = X86_VCMPPDZrrib_alt; break;338case X86_VCMPPSZrmi: NewOpc = X86_VCMPPSZrmi_alt; break;339case X86_VCMPPSZrri: NewOpc = X86_VCMPPSZrri_alt; break;340case X86_VCMPPSZrrib: NewOpc = X86_VCMPPSZrrib_alt; break;341case X86_VCMPPDZ128rmi: NewOpc = X86_VCMPPDZ128rmi_alt; break;342case X86_VCMPPDZ128rri: NewOpc = X86_VCMPPDZ128rri_alt; break;343case X86_VCMPPSZ128rmi: NewOpc = X86_VCMPPSZ128rmi_alt; break;344case X86_VCMPPSZ128rri: NewOpc = X86_VCMPPSZ128rri_alt; break;345case X86_VCMPPDZ256rmi: NewOpc = X86_VCMPPDZ256rmi_alt; break;346case X86_VCMPPDZ256rri: NewOpc = X86_VCMPPDZ256rri_alt; break;347case X86_VCMPPSZ256rmi: NewOpc = X86_VCMPPSZ256rmi_alt; break;348case X86_VCMPPSZ256rri: NewOpc = X86_VCMPPSZ256rri_alt; break;349case X86_VCMPSDZrm_Int: NewOpc = X86_VCMPSDZrmi_alt; break;350case X86_VCMPSDZrr_Int: NewOpc = X86_VCMPSDZrri_alt; break;351case X86_VCMPSDZrrb_Int: NewOpc = X86_VCMPSDZrrb_alt; break;352case X86_VCMPSSZrm_Int: NewOpc = X86_VCMPSSZrmi_alt; break;353case X86_VCMPSSZrr_Int: NewOpc = X86_VCMPSSZrri_alt; break;354case X86_VCMPSSZrrb_Int: NewOpc = X86_VCMPSSZrrb_alt; break;355}356357// Switch opcode to the one that doesn't get special printing.358if (NewOpc != 0) {359MCInst_setOpcode(mcInst, NewOpc);360}361}362#endif363} else if (type == TYPE_AVX512ICC) {364#ifndef CAPSTONE_X86_REDUCE365if (immediate >= 8 || ((immediate & 0x3) == 3)) {366unsigned NewOpc = 0;367switch (MCInst_getOpcode(mcInst)) {368default: // llvm_unreachable("unexpected opcode");369case X86_VPCMPBZ128rmi: NewOpc = X86_VPCMPBZ128rmi_alt; break;370case X86_VPCMPBZ128rmik: NewOpc = X86_VPCMPBZ128rmik_alt; break;371case X86_VPCMPBZ128rri: NewOpc = X86_VPCMPBZ128rri_alt; break;372case X86_VPCMPBZ128rrik: NewOpc = X86_VPCMPBZ128rrik_alt; break;373case X86_VPCMPBZ256rmi: NewOpc = X86_VPCMPBZ256rmi_alt; break;374case X86_VPCMPBZ256rmik: NewOpc = X86_VPCMPBZ256rmik_alt; break;375case X86_VPCMPBZ256rri: NewOpc = X86_VPCMPBZ256rri_alt; break;376case X86_VPCMPBZ256rrik: NewOpc = X86_VPCMPBZ256rrik_alt; break;377case X86_VPCMPBZrmi: NewOpc = X86_VPCMPBZrmi_alt; break;378case X86_VPCMPBZrmik: NewOpc = X86_VPCMPBZrmik_alt; break;379case X86_VPCMPBZrri: NewOpc = X86_VPCMPBZrri_alt; break;380case X86_VPCMPBZrrik: NewOpc = X86_VPCMPBZrrik_alt; break;381case X86_VPCMPDZ128rmi: NewOpc = X86_VPCMPDZ128rmi_alt; break;382case X86_VPCMPDZ128rmib: NewOpc = X86_VPCMPDZ128rmib_alt; break;383case X86_VPCMPDZ128rmibk: NewOpc = X86_VPCMPDZ128rmibk_alt; break;384case X86_VPCMPDZ128rmik: NewOpc = X86_VPCMPDZ128rmik_alt; break;385case X86_VPCMPDZ128rri: NewOpc = X86_VPCMPDZ128rri_alt; break;386case X86_VPCMPDZ128rrik: NewOpc = X86_VPCMPDZ128rrik_alt; break;387case X86_VPCMPDZ256rmi: NewOpc = X86_VPCMPDZ256rmi_alt; break;388case X86_VPCMPDZ256rmib: NewOpc = X86_VPCMPDZ256rmib_alt; break;389case X86_VPCMPDZ256rmibk: NewOpc = X86_VPCMPDZ256rmibk_alt; break;390case X86_VPCMPDZ256rmik: NewOpc = X86_VPCMPDZ256rmik_alt; break;391case X86_VPCMPDZ256rri: NewOpc = X86_VPCMPDZ256rri_alt; break;392case X86_VPCMPDZ256rrik: NewOpc = X86_VPCMPDZ256rrik_alt; break;393case X86_VPCMPDZrmi: NewOpc = X86_VPCMPDZrmi_alt; break;394case X86_VPCMPDZrmib: NewOpc = X86_VPCMPDZrmib_alt; break;395case X86_VPCMPDZrmibk: NewOpc = X86_VPCMPDZrmibk_alt; break;396case X86_VPCMPDZrmik: NewOpc = X86_VPCMPDZrmik_alt; break;397case X86_VPCMPDZrri: NewOpc = X86_VPCMPDZrri_alt; break;398case X86_VPCMPDZrrik: NewOpc = X86_VPCMPDZrrik_alt; break;399case X86_VPCMPQZ128rmi: NewOpc = X86_VPCMPQZ128rmi_alt; break;400case X86_VPCMPQZ128rmib: NewOpc = X86_VPCMPQZ128rmib_alt; break;401case X86_VPCMPQZ128rmibk: NewOpc = X86_VPCMPQZ128rmibk_alt; break;402case X86_VPCMPQZ128rmik: NewOpc = X86_VPCMPQZ128rmik_alt; break;403case X86_VPCMPQZ128rri: NewOpc = X86_VPCMPQZ128rri_alt; break;404case X86_VPCMPQZ128rrik: NewOpc = X86_VPCMPQZ128rrik_alt; break;405case X86_VPCMPQZ256rmi: NewOpc = X86_VPCMPQZ256rmi_alt; break;406case X86_VPCMPQZ256rmib: NewOpc = X86_VPCMPQZ256rmib_alt; break;407case X86_VPCMPQZ256rmibk: NewOpc = X86_VPCMPQZ256rmibk_alt; break;408case X86_VPCMPQZ256rmik: NewOpc = X86_VPCMPQZ256rmik_alt; break;409case X86_VPCMPQZ256rri: NewOpc = X86_VPCMPQZ256rri_alt; break;410case X86_VPCMPQZ256rrik: NewOpc = X86_VPCMPQZ256rrik_alt; break;411case X86_VPCMPQZrmi: NewOpc = X86_VPCMPQZrmi_alt; break;412case X86_VPCMPQZrmib: NewOpc = X86_VPCMPQZrmib_alt; break;413case X86_VPCMPQZrmibk: NewOpc = X86_VPCMPQZrmibk_alt; break;414case X86_VPCMPQZrmik: NewOpc = X86_VPCMPQZrmik_alt; break;415case X86_VPCMPQZrri: NewOpc = X86_VPCMPQZrri_alt; break;416case X86_VPCMPQZrrik: NewOpc = X86_VPCMPQZrrik_alt; break;417case X86_VPCMPUBZ128rmi: NewOpc = X86_VPCMPUBZ128rmi_alt; break;418case X86_VPCMPUBZ128rmik: NewOpc = X86_VPCMPUBZ128rmik_alt; break;419case X86_VPCMPUBZ128rri: NewOpc = X86_VPCMPUBZ128rri_alt; break;420case X86_VPCMPUBZ128rrik: NewOpc = X86_VPCMPUBZ128rrik_alt; break;421case X86_VPCMPUBZ256rmi: NewOpc = X86_VPCMPUBZ256rmi_alt; break;422case X86_VPCMPUBZ256rmik: NewOpc = X86_VPCMPUBZ256rmik_alt; break;423case X86_VPCMPUBZ256rri: NewOpc = X86_VPCMPUBZ256rri_alt; break;424case X86_VPCMPUBZ256rrik: NewOpc = X86_VPCMPUBZ256rrik_alt; break;425case X86_VPCMPUBZrmi: NewOpc = X86_VPCMPUBZrmi_alt; break;426case X86_VPCMPUBZrmik: NewOpc = X86_VPCMPUBZrmik_alt; break;427case X86_VPCMPUBZrri: NewOpc = X86_VPCMPUBZrri_alt; break;428case X86_VPCMPUBZrrik: NewOpc = X86_VPCMPUBZrrik_alt; break;429case X86_VPCMPUDZ128rmi: NewOpc = X86_VPCMPUDZ128rmi_alt; break;430case X86_VPCMPUDZ128rmib: NewOpc = X86_VPCMPUDZ128rmib_alt; break;431case X86_VPCMPUDZ128rmibk: NewOpc = X86_VPCMPUDZ128rmibk_alt; break;432case X86_VPCMPUDZ128rmik: NewOpc = X86_VPCMPUDZ128rmik_alt; break;433case X86_VPCMPUDZ128rri: NewOpc = X86_VPCMPUDZ128rri_alt; break;434case X86_VPCMPUDZ128rrik: NewOpc = X86_VPCMPUDZ128rrik_alt; break;435case X86_VPCMPUDZ256rmi: NewOpc = X86_VPCMPUDZ256rmi_alt; break;436case X86_VPCMPUDZ256rmib: NewOpc = X86_VPCMPUDZ256rmib_alt; break;437case X86_VPCMPUDZ256rmibk: NewOpc = X86_VPCMPUDZ256rmibk_alt; break;438case X86_VPCMPUDZ256rmik: NewOpc = X86_VPCMPUDZ256rmik_alt; break;439case X86_VPCMPUDZ256rri: NewOpc = X86_VPCMPUDZ256rri_alt; break;440case X86_VPCMPUDZ256rrik: NewOpc = X86_VPCMPUDZ256rrik_alt; break;441case X86_VPCMPUDZrmi: NewOpc = X86_VPCMPUDZrmi_alt; break;442case X86_VPCMPUDZrmib: NewOpc = X86_VPCMPUDZrmib_alt; break;443case X86_VPCMPUDZrmibk: NewOpc = X86_VPCMPUDZrmibk_alt; break;444case X86_VPCMPUDZrmik: NewOpc = X86_VPCMPUDZrmik_alt; break;445case X86_VPCMPUDZrri: NewOpc = X86_VPCMPUDZrri_alt; break;446case X86_VPCMPUDZrrik: NewOpc = X86_VPCMPUDZrrik_alt; break;447case X86_VPCMPUQZ128rmi: NewOpc = X86_VPCMPUQZ128rmi_alt; break;448case X86_VPCMPUQZ128rmib: NewOpc = X86_VPCMPUQZ128rmib_alt; break;449case X86_VPCMPUQZ128rmibk: NewOpc = X86_VPCMPUQZ128rmibk_alt; break;450case X86_VPCMPUQZ128rmik: NewOpc = X86_VPCMPUQZ128rmik_alt; break;451case X86_VPCMPUQZ128rri: NewOpc = X86_VPCMPUQZ128rri_alt; break;452case X86_VPCMPUQZ128rrik: NewOpc = X86_VPCMPUQZ128rrik_alt; break;453case X86_VPCMPUQZ256rmi: NewOpc = X86_VPCMPUQZ256rmi_alt; break;454case X86_VPCMPUQZ256rmib: NewOpc = X86_VPCMPUQZ256rmib_alt; break;455case X86_VPCMPUQZ256rmibk: NewOpc = X86_VPCMPUQZ256rmibk_alt; break;456case X86_VPCMPUQZ256rmik: NewOpc = X86_VPCMPUQZ256rmik_alt; break;457case X86_VPCMPUQZ256rri: NewOpc = X86_VPCMPUQZ256rri_alt; break;458case X86_VPCMPUQZ256rrik: NewOpc = X86_VPCMPUQZ256rrik_alt; break;459case X86_VPCMPUQZrmi: NewOpc = X86_VPCMPUQZrmi_alt; break;460case X86_VPCMPUQZrmib: NewOpc = X86_VPCMPUQZrmib_alt; break;461case X86_VPCMPUQZrmibk: NewOpc = X86_VPCMPUQZrmibk_alt; break;462case X86_VPCMPUQZrmik: NewOpc = X86_VPCMPUQZrmik_alt; break;463case X86_VPCMPUQZrri: NewOpc = X86_VPCMPUQZrri_alt; break;464case X86_VPCMPUQZrrik: NewOpc = X86_VPCMPUQZrrik_alt; break;465case X86_VPCMPUWZ128rmi: NewOpc = X86_VPCMPUWZ128rmi_alt; break;466case X86_VPCMPUWZ128rmik: NewOpc = X86_VPCMPUWZ128rmik_alt; break;467case X86_VPCMPUWZ128rri: NewOpc = X86_VPCMPUWZ128rri_alt; break;468case X86_VPCMPUWZ128rrik: NewOpc = X86_VPCMPUWZ128rrik_alt; break;469case X86_VPCMPUWZ256rmi: NewOpc = X86_VPCMPUWZ256rmi_alt; break;470case X86_VPCMPUWZ256rmik: NewOpc = X86_VPCMPUWZ256rmik_alt; break;471case X86_VPCMPUWZ256rri: NewOpc = X86_VPCMPUWZ256rri_alt; break;472case X86_VPCMPUWZ256rrik: NewOpc = X86_VPCMPUWZ256rrik_alt; break;473case X86_VPCMPUWZrmi: NewOpc = X86_VPCMPUWZrmi_alt; break;474case X86_VPCMPUWZrmik: NewOpc = X86_VPCMPUWZrmik_alt; break;475case X86_VPCMPUWZrri: NewOpc = X86_VPCMPUWZrri_alt; break;476case X86_VPCMPUWZrrik: NewOpc = X86_VPCMPUWZrrik_alt; break;477case X86_VPCMPWZ128rmi: NewOpc = X86_VPCMPWZ128rmi_alt; break;478case X86_VPCMPWZ128rmik: NewOpc = X86_VPCMPWZ128rmik_alt; break;479case X86_VPCMPWZ128rri: NewOpc = X86_VPCMPWZ128rri_alt; break;480case X86_VPCMPWZ128rrik: NewOpc = X86_VPCMPWZ128rrik_alt; break;481case X86_VPCMPWZ256rmi: NewOpc = X86_VPCMPWZ256rmi_alt; break;482case X86_VPCMPWZ256rmik: NewOpc = X86_VPCMPWZ256rmik_alt; break;483case X86_VPCMPWZ256rri: NewOpc = X86_VPCMPWZ256rri_alt; break;484case X86_VPCMPWZ256rrik: NewOpc = X86_VPCMPWZ256rrik_alt; break;485case X86_VPCMPWZrmi: NewOpc = X86_VPCMPWZrmi_alt; break;486case X86_VPCMPWZrmik: NewOpc = X86_VPCMPWZrmik_alt; break;487case X86_VPCMPWZrri: NewOpc = X86_VPCMPWZrri_alt; break;488case X86_VPCMPWZrrik: NewOpc = X86_VPCMPWZrrik_alt; break;489}490491// Switch opcode to the one that doesn't get special printing.492if (NewOpc != 0) {493MCInst_setOpcode(mcInst, NewOpc);494}495}496#endif497}498499switch (type) {500case TYPE_XMM:501MCOperand_CreateReg0(mcInst, X86_XMM0 + ((uint32_t)immediate >> 4));502return;503case TYPE_YMM:504MCOperand_CreateReg0(mcInst, X86_YMM0 + ((uint32_t)immediate >> 4));505return;506case TYPE_ZMM:507MCOperand_CreateReg0(mcInst, X86_ZMM0 + ((uint32_t)immediate >> 4));508return;509default:510// operand is 64 bits wide. Do nothing.511break;512}513514MCOperand_CreateImm0(mcInst, immediate);515516if (type == TYPE_MOFFS) {517MCOperand_CreateReg0(mcInst, segmentRegnums[insn->segmentOverride]);518}519}520521/// translateRMRegister - Translates a register stored in the R/M field of the522/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.523/// @param mcInst - The MCInst to append to.524/// @param insn - The internal instruction to extract the R/M field525/// from.526/// @return - 0 on success; -1 otherwise527static bool translateRMRegister(MCInst *mcInst, InternalInstruction *insn)528{529if (insn->eaBase == EA_BASE_sib || insn->eaBase == EA_BASE_sib64) {530//debug("A R/M register operand may not have a SIB byte");531return true;532}533534switch (insn->eaBase) {535case EA_BASE_NONE:536//debug("EA_BASE_NONE for ModR/M base");537return true;538#define ENTRY(x) case EA_BASE_##x:539ALL_EA_BASES540#undef ENTRY541//debug("A R/M register operand may not have a base; "542// "the operand must be a register.");543return true;544#define ENTRY(x) \545case EA_REG_##x: \546MCOperand_CreateReg0(mcInst, X86_##x); break;547ALL_REGS548#undef ENTRY549default:550//debug("Unexpected EA base register");551return true;552}553554return false;555}556557/// translateRMMemory - Translates a memory operand stored in the Mod and R/M558/// fields of an internal instruction (and possibly its SIB byte) to a memory559/// operand in LLVM's format, and appends it to an MCInst.560///561/// @param mcInst - The MCInst to append to.562/// @param insn - The instruction to extract Mod, R/M, and SIB fields563/// from.564/// @return - 0 on success; nonzero otherwise565static bool translateRMMemory(MCInst *mcInst, InternalInstruction *insn)566{567// Addresses in an MCInst are represented as five operands:568// 1. basereg (register) The R/M base, or (if there is a SIB) the569// SIB base570// 2. scaleamount (immediate) 1, or (if there is a SIB) the specified571// scale amount572// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)573// the index (which is multiplied by the574// scale amount)575// 4. displacement (immediate) 0, or the displacement if there is one576// 5. segmentreg (register) x86_registerNONE for now, but could be set577// if we have segment overrides578int scaleAmount, indexReg;579580if (insn->eaBase == EA_BASE_sib || insn->eaBase == EA_BASE_sib64) {581if (insn->sibBase != SIB_BASE_NONE) {582switch (insn->sibBase) {583#define ENTRY(x) \584case SIB_BASE_##x: \585MCOperand_CreateReg0(mcInst, X86_##x); break;586ALL_SIB_BASES587#undef ENTRY588default:589//debug("Unexpected sibBase");590return true;591}592} else {593MCOperand_CreateReg0(mcInst, 0);594}595596if (insn->sibIndex != SIB_INDEX_NONE) {597switch (insn->sibIndex) {598default:599//debug("Unexpected sibIndex");600return true;601#define ENTRY(x) \602case SIB_INDEX_##x: \603indexReg = X86_##x; break;604EA_BASES_32BIT605EA_BASES_64BIT606REGS_XMM607REGS_YMM608REGS_ZMM609#undef ENTRY610}611} else {612// Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,613// but no index is used and modrm alone should have been enough.614// -No base register in 32-bit mode. In 64-bit mode this is used to615// avoid rip-relative addressing.616// -Any base register used other than ESP/RSP/R12D/R12. Using these as a617// base always requires a SIB byte.618// -A scale other than 1 is used.619if (insn->sibScale != 1 ||620(insn->sibBase == SIB_BASE_NONE && insn->mode != MODE_64BIT) ||621(insn->sibBase != SIB_BASE_NONE &&622insn->sibBase != SIB_BASE_ESP && insn->sibBase != SIB_BASE_RSP &&623insn->sibBase != SIB_BASE_R12D && insn->sibBase != SIB_BASE_R12)) {624indexReg = insn->addressSize == 4? X86_EIZ : X86_RIZ;625} else626indexReg = 0;627}628629scaleAmount = insn->sibScale;630} else {631switch (insn->eaBase) {632case EA_BASE_NONE:633if (insn->eaDisplacement == EA_DISP_NONE) {634//debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");635return true;636}637if (insn->mode == MODE_64BIT) {638if (insn->prefix3 == 0x67) // address-size prefix overrides RIP relative addressing639MCOperand_CreateReg0(mcInst, X86_EIP);640else641// Section 2.2.1.6642MCOperand_CreateReg0(mcInst, insn->addressSize == 4 ? X86_EIP : X86_RIP);643} else {644MCOperand_CreateReg0(mcInst, 0);645}646647indexReg = 0;648break;649case EA_BASE_BX_SI:650MCOperand_CreateReg0(mcInst, X86_BX);651indexReg = X86_SI;652break;653case EA_BASE_BX_DI:654MCOperand_CreateReg0(mcInst, X86_BX);655indexReg = X86_DI;656break;657case EA_BASE_BP_SI:658MCOperand_CreateReg0(mcInst, X86_BP);659indexReg = X86_SI;660break;661case EA_BASE_BP_DI:662MCOperand_CreateReg0(mcInst, X86_BP);663indexReg = X86_DI;664break;665default:666indexReg = 0;667switch (insn->eaBase) {668default:669//debug("Unexpected eaBase");670return true;671// Here, we will use the fill-ins defined above. However,672// BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and673// sib and sib64 were handled in the top-level if, so they're only674// placeholders to keep the compiler happy.675#define ENTRY(x) \676case EA_BASE_##x: \677MCOperand_CreateReg0(mcInst, X86_##x); break;678ALL_EA_BASES679#undef ENTRY680#define ENTRY(x) case EA_REG_##x:681ALL_REGS682#undef ENTRY683//debug("A R/M memory operand may not be a register; "684// "the base field must be a base.");685return true;686}687}688689scaleAmount = 1;690}691692MCOperand_CreateImm0(mcInst, scaleAmount);693MCOperand_CreateReg0(mcInst, indexReg);694MCOperand_CreateImm0(mcInst, insn->displacement);695696MCOperand_CreateReg0(mcInst, segmentRegnums[insn->segmentOverride]);697698return false;699}700701/// translateRM - Translates an operand stored in the R/M (and possibly SIB)702/// byte of an instruction to LLVM form, and appends it to an MCInst.703///704/// @param mcInst - The MCInst to append to.705/// @param operand - The operand, as stored in the descriptor table.706/// @param insn - The instruction to extract Mod, R/M, and SIB fields707/// from.708/// @return - 0 on success; nonzero otherwise709static bool translateRM(MCInst *mcInst, const OperandSpecifier *operand,710InternalInstruction *insn)711{712switch (operand->type) {713default:714//debug("Unexpected type for a R/M operand");715return true;716case TYPE_R8:717case TYPE_R16:718case TYPE_R32:719case TYPE_R64:720case TYPE_Rv:721case TYPE_MM64:722case TYPE_XMM:723case TYPE_YMM:724case TYPE_ZMM:725case TYPE_VK:726case TYPE_DEBUGREG:727case TYPE_CONTROLREG:728case TYPE_BNDR:729return translateRMRegister(mcInst, insn);730case TYPE_M:731case TYPE_MVSIBX:732case TYPE_MVSIBY:733case TYPE_MVSIBZ:734return translateRMMemory(mcInst, insn);735}736}737738/// translateFPRegister - Translates a stack position on the FPU stack to its739/// LLVM form, and appends it to an MCInst.740///741/// @param mcInst - The MCInst to append to.742/// @param stackPos - The stack position to translate.743static void translateFPRegister(MCInst *mcInst, uint8_t stackPos)744{745MCOperand_CreateReg0(mcInst, X86_ST0 + stackPos);746}747748/// translateMaskRegister - Translates a 3-bit mask register number to749/// LLVM form, and appends it to an MCInst.750///751/// @param mcInst - The MCInst to append to.752/// @param maskRegNum - Number of mask register from 0 to 7.753/// @return - false on success; true otherwise.754static bool translateMaskRegister(MCInst *mcInst, uint8_t maskRegNum)755{756if (maskRegNum >= 8) {757// debug("Invalid mask register number");758return true;759}760761MCOperand_CreateReg0(mcInst, X86_K0 + maskRegNum);762763return false;764}765766/// translateOperand - Translates an operand stored in an internal instruction767/// to LLVM's format and appends it to an MCInst.768///769/// @param mcInst - The MCInst to append to.770/// @param operand - The operand, as stored in the descriptor table.771/// @param insn - The internal instruction.772/// @return - false on success; true otherwise.773static bool translateOperand(MCInst *mcInst, const OperandSpecifier *operand, InternalInstruction *insn)774{775switch (operand->encoding) {776case ENCODING_REG:777translateRegister(mcInst, insn->reg);778return false;779case ENCODING_WRITEMASK:780return translateMaskRegister(mcInst, insn->writemask);781CASE_ENCODING_RM:782CASE_ENCODING_VSIB:783return translateRM(mcInst, operand, insn);784case ENCODING_IB:785case ENCODING_IW:786case ENCODING_ID:787case ENCODING_IO:788case ENCODING_Iv:789case ENCODING_Ia:790translateImmediate(mcInst, insn->immediates[insn->numImmediatesTranslated++], operand, insn);791return false;792case ENCODING_IRC:793MCOperand_CreateImm0(mcInst, insn->RC);794return false;795case ENCODING_SI:796return translateSrcIndex(mcInst, insn);797case ENCODING_DI:798return translateDstIndex(mcInst, insn);799case ENCODING_RB:800case ENCODING_RW:801case ENCODING_RD:802case ENCODING_RO:803case ENCODING_Rv:804translateRegister(mcInst, insn->opcodeRegister);805return false;806case ENCODING_FP:807translateFPRegister(mcInst, insn->modRM & 7);808return false;809case ENCODING_VVVV:810translateRegister(mcInst, insn->vvvv);811return false;812case ENCODING_DUP:813return translateOperand(mcInst, &insn->operands[operand->type - TYPE_DUP0], insn);814default:815//debug("Unhandled operand encoding during translation");816return true;817}818}819820static bool translateInstruction(MCInst *mcInst, InternalInstruction *insn)821{822int index;823824if (!insn->spec) {825//debug("Instruction has no specification");826return true;827}828829MCInst_clear(mcInst);830MCInst_setOpcode(mcInst, insn->instructionID);831832// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3833// prefix bytes should be disassembled as xrelease and xacquire then set the834// opcode to those instead of the rep and repne opcodes.835#ifndef CAPSTONE_X86_REDUCE836if (insn->xAcquireRelease) {837if (MCInst_getOpcode(mcInst) == X86_REP_PREFIX)838MCInst_setOpcode(mcInst, X86_XRELEASE_PREFIX);839else if (MCInst_getOpcode(mcInst) == X86_REPNE_PREFIX)840MCInst_setOpcode(mcInst, X86_XACQUIRE_PREFIX);841}842#endif843844insn->numImmediatesTranslated = 0;845846for (index = 0; index < X86_MAX_OPERANDS; ++index) {847if (insn->operands[index].encoding != ENCODING_NONE) {848if (translateOperand(mcInst, &insn->operands[index], insn)) {849return true;850}851}852}853854return false;855}856857static int reader(const struct reader_info *info, uint8_t *byte, uint64_t address)858{859if (address - info->offset >= info->size)860// out of buffer range861return -1;862863*byte = info->code[address - info->offset];864865return 0;866}867868// copy x86 detail information from internal structure to public structure869static void update_pub_insn(cs_insn *pub, InternalInstruction *inter)870{871if (inter->vectorExtensionType != 0) {872memcpy(pub->detail->x86.opcode, inter->vectorExtensionPrefix, sizeof(pub->detail->x86.opcode));873} else {874if (inter->twoByteEscape) {875if (inter->threeByteEscape) {876pub->detail->x86.opcode[0] = inter->twoByteEscape;877pub->detail->x86.opcode[1] = inter->threeByteEscape;878pub->detail->x86.opcode[2] = inter->opcode;879} else {880pub->detail->x86.opcode[0] = inter->twoByteEscape;881pub->detail->x86.opcode[1] = inter->opcode;882}883} else {884pub->detail->x86.opcode[0] = inter->opcode;885}886}887888pub->detail->x86.rex = inter->rexPrefix;889890pub->detail->x86.addr_size = inter->addressSize;891892pub->detail->x86.modrm = inter->orgModRM;893pub->detail->x86.encoding.modrm_offset = inter->modRMOffset;894895pub->detail->x86.sib = inter->sib;896pub->detail->x86.sib_index = x86_map_sib_index(inter->sibIndex);897pub->detail->x86.sib_scale = inter->sibScale;898pub->detail->x86.sib_base = x86_map_sib_base(inter->sibBase);899900pub->detail->x86.disp = inter->displacement;901if (inter->consumedDisplacement) {902pub->detail->x86.encoding.disp_offset = inter->displacementOffset;903pub->detail->x86.encoding.disp_size = inter->displacementSize;904}905906pub->detail->x86.encoding.imm_offset = inter->immediateOffset;907if (pub->detail->x86.encoding.imm_size == 0 && inter->immediateOffset != 0)908pub->detail->x86.encoding.imm_size = inter->immediateSize;909}910911void X86_init(MCRegisterInfo *MRI)912{913// InitMCRegisterInfo(), X86GenRegisterInfo.inc914// RI->InitMCRegisterInfo(X86RegDesc, 277,915// RA, PC,916// X86MCRegisterClasses, 86,917// X86RegUnitRoots, 162, X86RegDiffLists, X86LaneMaskLists, X86RegStrings,918// X86RegClassStrings,919// X86SubRegIdxLists, 9,920// X86SubRegIdxRanges, X86RegEncodingTable);921/*922InitMCRegisterInfo(X86RegDesc, 234,923RA, PC,924X86MCRegisterClasses, 79,925X86RegUnitRoots, 119, X86RegDiffLists, X86RegStrings,926X86SubRegIdxLists, 7,927X86SubRegIdxRanges, X86RegEncodingTable);928*/929930MCRegisterInfo_InitMCRegisterInfo(MRI, X86RegDesc, 277,9310, 0,932X86MCRegisterClasses, 86,9330, 0, X86RegDiffLists, 0,934X86SubRegIdxLists, 9,9350);936}937938// Public interface for the disassembler939bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len,940MCInst *instr, uint16_t *size, uint64_t address, void *_info)941{942cs_struct *handle = (cs_struct *)(uintptr_t)ud;943InternalInstruction insn = { 0 };944struct reader_info info;945int ret;946bool result;947948info.code = code;949info.size = code_len;950info.offset = address;951952if (instr->flat_insn->detail) {953// instr->flat_insn->detail initialization: 3 alternatives954955// 1. The whole structure, this is how it's done in other arch disassemblers956// Probably overkill since cs_detail is huge because of the 36 operands of ARM957958//memset(instr->flat_insn->detail, 0, sizeof(cs_detail));959960// 2. Only the part relevant to x86961memset(instr->flat_insn->detail, 0, offsetof(cs_detail, x86) + sizeof(cs_x86));962963// 3. The relevant part except for x86.operands964// sizeof(cs_x86) is 0x1c0, sizeof(x86.operands) is 0x180965// marginally faster, should be okay since x86.op_count is set to 0966967//memset(instr->flat_insn->detail, 0, offsetof(cs_detail, x86)+offsetof(cs_x86, operands));968}969970if (handle->mode & CS_MODE_16)971ret = decodeInstruction(&insn,972reader, &info,973address,974MODE_16BIT);975else if (handle->mode & CS_MODE_32)976ret = decodeInstruction(&insn,977reader, &info,978address,979MODE_32BIT);980else981ret = decodeInstruction(&insn,982reader, &info,983address,984MODE_64BIT);985986if (ret) {987// *size = (uint16_t)(insn.readerCursor - address);988return false;989} else {990*size = (uint16_t)insn.length;991992result = (!translateInstruction(instr, &insn)) ? true : false;993if (result) {994unsigned Flags = X86_IP_NO_PREFIX;995instr->imm_size = insn.immSize;996997// copy all prefixes998instr->x86_prefix[0] = insn.prefix0;999instr->x86_prefix[1] = insn.prefix1;1000instr->x86_prefix[2] = insn.prefix2;1001instr->x86_prefix[3] = insn.prefix3;1002instr->xAcquireRelease = insn.xAcquireRelease;10031004if (handle->detail) {1005update_pub_insn(instr->flat_insn, &insn);1006}10071008if (insn.hasAdSize)1009Flags |= X86_IP_HAS_AD_SIZE;10101011if (!insn.mandatoryPrefix) {1012if (insn.hasOpSize)1013Flags |= X86_IP_HAS_OP_SIZE;10141015if (insn.repeatPrefix == 0xf2)1016Flags |= X86_IP_HAS_REPEAT_NE;1017else if (insn.repeatPrefix == 0xf3 &&1018// It should not be 'pause' f3 901019insn.opcode != 0x90)1020Flags |= X86_IP_HAS_REPEAT;1021if (insn.hasLockPrefix)1022Flags |= X86_IP_HAS_LOCK;1023}10241025instr->flags = Flags;1026}10271028return result;1029}1030}10311032#endif103310341035