Path: blob/main/contrib/llvm-project/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
39644 views
//===-- DisassemblerLLVMC.cpp ---------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "DisassemblerLLVMC.h"910#include "llvm-c/Disassembler.h"11#include "llvm/ADT/SmallString.h"12#include "llvm/ADT/StringExtras.h"13#include "llvm/MC/MCAsmInfo.h"14#include "llvm/MC/MCContext.h"15#include "llvm/MC/MCDisassembler/MCDisassembler.h"16#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"17#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"18#include "llvm/MC/MCInst.h"19#include "llvm/MC/MCInstPrinter.h"20#include "llvm/MC/MCInstrAnalysis.h"21#include "llvm/MC/MCInstrInfo.h"22#include "llvm/MC/MCRegisterInfo.h"23#include "llvm/MC/MCSubtargetInfo.h"24#include "llvm/MC/MCTargetOptions.h"25#include "llvm/MC/TargetRegistry.h"26#include "llvm/Support/ErrorHandling.h"27#include "llvm/Support/ScopedPrinter.h"28#include "llvm/Support/TargetSelect.h"29#include "llvm/TargetParser/AArch64TargetParser.h"3031#include "lldb/Core/Address.h"32#include "lldb/Core/Module.h"33#include "lldb/Symbol/SymbolContext.h"34#include "lldb/Target/ExecutionContext.h"35#include "lldb/Target/Process.h"36#include "lldb/Target/RegisterContext.h"37#include "lldb/Target/SectionLoadList.h"38#include "lldb/Target/StackFrame.h"39#include "lldb/Target/Target.h"40#include "lldb/Utility/DataExtractor.h"41#include "lldb/Utility/LLDBLog.h"42#include "lldb/Utility/Log.h"43#include "lldb/Utility/RegularExpression.h"44#include "lldb/Utility/Stream.h"45#include <optional>4647using namespace lldb;48using namespace lldb_private;4950LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)5152class DisassemblerLLVMC::MCDisasmInstance {53public:54static std::unique_ptr<MCDisasmInstance>55Create(const char *triple, const char *cpu, const char *features_str,56unsigned flavor, DisassemblerLLVMC &owner);5758~MCDisasmInstance() = default;5960uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,61lldb::addr_t pc, llvm::MCInst &mc_inst) const;62void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc,63std::string &inst_string, std::string &comments_string);64void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);65void SetUseColor(bool use_color);66bool GetUseColor() const;67bool CanBranch(llvm::MCInst &mc_inst) const;68bool HasDelaySlot(llvm::MCInst &mc_inst) const;69bool IsCall(llvm::MCInst &mc_inst) const;70bool IsLoad(llvm::MCInst &mc_inst) const;71bool IsAuthenticated(llvm::MCInst &mc_inst) const;7273private:74MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,75std::unique_ptr<llvm::MCRegisterInfo> &®_info_up,76std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,77std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,78std::unique_ptr<llvm::MCContext> &&context_up,79std::unique_ptr<llvm::MCDisassembler> &&disasm_up,80std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up,81std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up);8283std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;84std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;85std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;86std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;87std::unique_ptr<llvm::MCContext> m_context_up;88std::unique_ptr<llvm::MCDisassembler> m_disasm_up;89std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;90std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up;91};9293namespace x86 {9495/// These are the three values deciding instruction control flow kind.96/// InstructionLengthDecode function decodes an instruction and get this struct.97///98/// primary_opcode99/// Primary opcode of the instruction.100/// For one-byte opcode instruction, it's the first byte after prefix.101/// For two- and three-byte opcodes, it's the second byte.102///103/// opcode_len104/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.105///106/// modrm107/// ModR/M byte of the instruction.108/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]109/// may contain a register or specify an addressing mode, depending on MOD.110struct InstructionOpcodeAndModrm {111uint8_t primary_opcode;112uint8_t opcode_len;113uint8_t modrm;114};115116/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.117/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and118/// instruction set.119///120/// \param[in] opcode_and_modrm121/// Contains primary_opcode byte, its length, and ModR/M byte.122/// Refer to the struct InstructionOpcodeAndModrm for details.123///124/// \return125/// The control flow kind of the instruction or126/// eInstructionControlFlowKindOther if the instruction doesn't affect127/// the control flow of the program.128lldb::InstructionControlFlowKind129MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {130uint8_t opcode = opcode_and_modrm.primary_opcode;131uint8_t opcode_len = opcode_and_modrm.opcode_len;132uint8_t modrm = opcode_and_modrm.modrm;133134if (opcode_len > 2)135return lldb::eInstructionControlFlowKindOther;136137if (opcode >= 0x70 && opcode <= 0x7F) {138if (opcode_len == 1)139return lldb::eInstructionControlFlowKindCondJump;140else141return lldb::eInstructionControlFlowKindOther;142}143144if (opcode >= 0x80 && opcode <= 0x8F) {145if (opcode_len == 2)146return lldb::eInstructionControlFlowKindCondJump;147else148return lldb::eInstructionControlFlowKindOther;149}150151switch (opcode) {152case 0x9A:153if (opcode_len == 1)154return lldb::eInstructionControlFlowKindFarCall;155break;156case 0xFF:157if (opcode_len == 1) {158uint8_t modrm_reg = (modrm >> 3) & 7;159if (modrm_reg == 2)160return lldb::eInstructionControlFlowKindCall;161else if (modrm_reg == 3)162return lldb::eInstructionControlFlowKindFarCall;163else if (modrm_reg == 4)164return lldb::eInstructionControlFlowKindJump;165else if (modrm_reg == 5)166return lldb::eInstructionControlFlowKindFarJump;167}168break;169case 0xE8:170if (opcode_len == 1)171return lldb::eInstructionControlFlowKindCall;172break;173case 0xCD:174case 0xCC:175case 0xCE:176case 0xF1:177if (opcode_len == 1)178return lldb::eInstructionControlFlowKindFarCall;179break;180case 0xCF:181if (opcode_len == 1)182return lldb::eInstructionControlFlowKindFarReturn;183break;184case 0xE9:185case 0xEB:186if (opcode_len == 1)187return lldb::eInstructionControlFlowKindJump;188break;189case 0xEA:190if (opcode_len == 1)191return lldb::eInstructionControlFlowKindFarJump;192break;193case 0xE3:194case 0xE0:195case 0xE1:196case 0xE2:197if (opcode_len == 1)198return lldb::eInstructionControlFlowKindCondJump;199break;200case 0xC3:201case 0xC2:202if (opcode_len == 1)203return lldb::eInstructionControlFlowKindReturn;204break;205case 0xCB:206case 0xCA:207if (opcode_len == 1)208return lldb::eInstructionControlFlowKindFarReturn;209break;210case 0x05:211case 0x34:212if (opcode_len == 2)213return lldb::eInstructionControlFlowKindFarCall;214break;215case 0x35:216case 0x07:217if (opcode_len == 2)218return lldb::eInstructionControlFlowKindFarReturn;219break;220case 0x01:221if (opcode_len == 2) {222switch (modrm) {223case 0xc1:224return lldb::eInstructionControlFlowKindFarCall;225case 0xc2:226case 0xc3:227return lldb::eInstructionControlFlowKindFarReturn;228default:229break;230}231}232break;233default:234break;235}236237return lldb::eInstructionControlFlowKindOther;238}239240/// Decode an instruction into opcode, modrm and opcode_len.241/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.242/// Opcodes in x86 are generally the first byte of instruction, though two-byte243/// instructions and prefixes exist. ModR/M is the byte following the opcode244/// and adds additional information for how the instruction is executed.245///246/// \param[in] inst_bytes247/// Raw bytes of the instruction248///249///250/// \param[in] bytes_len251/// The length of the inst_bytes array.252///253/// \param[in] is_exec_mode_64b254/// If true, the execution mode is 64 bit.255///256/// \return257/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding258/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition259/// for more details.260/// Otherwise if the given instruction is invalid, returns std::nullopt.261std::optional<InstructionOpcodeAndModrm>262InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,263bool is_exec_mode_64b) {264int op_idx = 0;265bool prefix_done = false;266InstructionOpcodeAndModrm ret = {0, 0, 0};267268// In most cases, the primary_opcode is the first byte of the instruction269// but some instructions have a prefix to be skipped for these calculations.270// The following mapping is inspired from libipt's instruction decoding logic271// in `src/pt_ild.c`272while (!prefix_done) {273if (op_idx >= bytes_len)274return std::nullopt;275276ret.primary_opcode = inst_bytes[op_idx];277switch (ret.primary_opcode) {278// prefix_ignore279case 0x26:280case 0x2e:281case 0x36:282case 0x3e:283case 0x64:284case 0x65:285// prefix_osz, prefix_asz286case 0x66:287case 0x67:288// prefix_lock, prefix_f2, prefix_f3289case 0xf0:290case 0xf2:291case 0xf3:292op_idx++;293break;294295// prefix_rex296case 0x40:297case 0x41:298case 0x42:299case 0x43:300case 0x44:301case 0x45:302case 0x46:303case 0x47:304case 0x48:305case 0x49:306case 0x4a:307case 0x4b:308case 0x4c:309case 0x4d:310case 0x4e:311case 0x4f:312if (is_exec_mode_64b)313op_idx++;314else315prefix_done = true;316break;317318// prefix_vex_c4, c5319case 0xc5:320if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {321prefix_done = true;322break;323}324325ret.opcode_len = 2;326ret.primary_opcode = inst_bytes[op_idx + 2];327ret.modrm = inst_bytes[op_idx + 3];328return ret;329330case 0xc4:331if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {332prefix_done = true;333break;334}335ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;336ret.primary_opcode = inst_bytes[op_idx + 3];337ret.modrm = inst_bytes[op_idx + 4];338return ret;339340// prefix_evex341case 0x62:342if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {343prefix_done = true;344break;345}346ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;347ret.primary_opcode = inst_bytes[op_idx + 4];348ret.modrm = inst_bytes[op_idx + 5];349return ret;350351default:352prefix_done = true;353break;354}355} // prefix done356357ret.primary_opcode = inst_bytes[op_idx];358ret.modrm = inst_bytes[op_idx + 1];359ret.opcode_len = 1;360361// If the first opcode is 0F, it's two- or three- byte opcodes.362if (ret.primary_opcode == 0x0F) {363ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte364365if (ret.primary_opcode == 0x38) {366ret.opcode_len = 3;367ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte368ret.modrm = inst_bytes[op_idx + 1];369} else if (ret.primary_opcode == 0x3A) {370ret.opcode_len = 3;371ret.primary_opcode = inst_bytes[++op_idx];372ret.modrm = inst_bytes[op_idx + 1];373} else if ((ret.primary_opcode & 0xf8) == 0x38) {374ret.opcode_len = 0;375ret.primary_opcode = inst_bytes[++op_idx];376ret.modrm = inst_bytes[op_idx + 1];377} else if (ret.primary_opcode == 0x0F) {378ret.opcode_len = 3;379// opcode is 0x0F, no needs to update380ret.modrm = inst_bytes[op_idx + 1];381} else {382ret.opcode_len = 2;383ret.modrm = inst_bytes[op_idx + 1];384}385}386387return ret;388}389390lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,391Opcode m_opcode) {392std::optional<InstructionOpcodeAndModrm> ret;393394if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {395// x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes396return lldb::eInstructionControlFlowKindUnknown;397}398399// Opcode bytes will be decoded into primary_opcode, modrm and opcode length.400// These are the three values deciding instruction control flow kind.401ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),402m_opcode.GetByteSize(), is_exec_mode_64b);403if (!ret)404return lldb::eInstructionControlFlowKindUnknown;405else406return MapOpcodeIntoControlFlowKind(*ret);407}408409} // namespace x86410411class InstructionLLVMC : public lldb_private::Instruction {412public:413InstructionLLVMC(DisassemblerLLVMC &disasm,414const lldb_private::Address &address,415AddressClass addr_class)416: Instruction(address, addr_class),417m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(418disasm.shared_from_this())) {}419420~InstructionLLVMC() override = default;421422bool DoesBranch() override {423VisitInstruction();424return m_does_branch;425}426427bool HasDelaySlot() override {428VisitInstruction();429return m_has_delay_slot;430}431432bool IsLoad() override {433VisitInstruction();434return m_is_load;435}436437bool IsAuthenticated() override {438VisitInstruction();439return m_is_authenticated;440}441442DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {443DisassemblerScope disasm(*this);444return GetDisasmToUse(is_alternate_isa, disasm);445}446447size_t Decode(const lldb_private::Disassembler &disassembler,448const lldb_private::DataExtractor &data,449lldb::offset_t data_offset) override {450// All we have to do is read the opcode which can be easy for some451// architectures452bool got_op = false;453DisassemblerScope disasm(*this);454if (disasm) {455const ArchSpec &arch = disasm->GetArchitecture();456const lldb::ByteOrder byte_order = data.GetByteOrder();457458const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();459const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();460if (min_op_byte_size == max_op_byte_size) {461// Fixed size instructions, just read that amount of data.462if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))463return false;464465switch (min_op_byte_size) {466case 1:467m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);468got_op = true;469break;470471case 2:472m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);473got_op = true;474break;475476case 4:477m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);478got_op = true;479break;480481case 8:482m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);483got_op = true;484break;485486default:487m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),488min_op_byte_size);489got_op = true;490break;491}492}493if (!got_op) {494bool is_alternate_isa = false;495DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =496GetDisasmToUse(is_alternate_isa, disasm);497498const llvm::Triple::ArchType machine = arch.GetMachine();499if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {500if (machine == llvm::Triple::thumb || is_alternate_isa) {501uint32_t thumb_opcode = data.GetU16(&data_offset);502if ((thumb_opcode & 0xe000) != 0xe000 ||503((thumb_opcode & 0x1800u) == 0)) {504m_opcode.SetOpcode16(thumb_opcode, byte_order);505m_is_valid = true;506} else {507thumb_opcode <<= 16;508thumb_opcode |= data.GetU16(&data_offset);509m_opcode.SetOpcode16_2(thumb_opcode, byte_order);510m_is_valid = true;511}512} else {513m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);514m_is_valid = true;515}516} else {517// The opcode isn't evenly sized, so we need to actually use the llvm518// disassembler to parse it and get the size.519uint8_t *opcode_data =520const_cast<uint8_t *>(data.PeekData(data_offset, 1));521const size_t opcode_data_len = data.BytesLeft(data_offset);522const addr_t pc = m_address.GetFileAddress();523llvm::MCInst inst;524525const size_t inst_size =526mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);527if (inst_size == 0)528m_opcode.Clear();529else {530m_opcode.SetOpcodeBytes(opcode_data, inst_size);531m_is_valid = true;532}533}534}535return m_opcode.GetByteSize();536}537return 0;538}539540void AppendComment(std::string &description) {541if (m_comment.empty())542m_comment.swap(description);543else {544m_comment.append(", ");545m_comment.append(description);546}547}548549lldb::InstructionControlFlowKind550GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {551DisassemblerScope disasm(*this, exe_ctx);552if (disasm){553if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)554return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);555else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)556return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);557}558559return eInstructionControlFlowKindUnknown;560}561562void CalculateMnemonicOperandsAndComment(563const lldb_private::ExecutionContext *exe_ctx) override {564DataExtractor data;565const AddressClass address_class = GetAddressClass();566567if (m_opcode.GetData(data)) {568std::string out_string;569std::string markup_out_string;570std::string comment_string;571std::string markup_comment_string;572573DisassemblerScope disasm(*this, exe_ctx);574if (disasm) {575DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;576577if (address_class == AddressClass::eCodeAlternateISA)578mc_disasm_ptr = disasm->m_alternate_disasm_up.get();579else580mc_disasm_ptr = disasm->m_disasm_up.get();581582lldb::addr_t pc = m_address.GetFileAddress();583m_using_file_addr = true;584585const bool data_from_file = disasm->m_data_from_file;586bool use_hex_immediates = true;587Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;588589if (exe_ctx) {590Target *target = exe_ctx->GetTargetPtr();591if (target) {592use_hex_immediates = target->GetUseHexImmediates();593hex_style = target->GetHexImmediateStyle();594595if (!data_from_file) {596const lldb::addr_t load_addr = m_address.GetLoadAddress(target);597if (load_addr != LLDB_INVALID_ADDRESS) {598pc = load_addr;599m_using_file_addr = false;600}601}602}603}604605const uint8_t *opcode_data = data.GetDataStart();606const size_t opcode_data_len = data.GetByteSize();607llvm::MCInst inst;608size_t inst_size =609mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);610611if (inst_size > 0) {612mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);613614const bool saved_use_color = mc_disasm_ptr->GetUseColor();615mc_disasm_ptr->SetUseColor(false);616mc_disasm_ptr->PrintMCInst(inst, pc, out_string, comment_string);617mc_disasm_ptr->SetUseColor(true);618mc_disasm_ptr->PrintMCInst(inst, pc, markup_out_string,619markup_comment_string);620mc_disasm_ptr->SetUseColor(saved_use_color);621622if (!comment_string.empty()) {623AppendComment(comment_string);624}625}626627if (inst_size == 0) {628m_comment.assign("unknown opcode");629inst_size = m_opcode.GetByteSize();630StreamString mnemonic_strm;631lldb::offset_t offset = 0;632lldb::ByteOrder byte_order = data.GetByteOrder();633switch (inst_size) {634case 1: {635const uint8_t uval8 = data.GetU8(&offset);636m_opcode.SetOpcode8(uval8, byte_order);637m_opcode_name.assign(".byte");638mnemonic_strm.Printf("0x%2.2x", uval8);639} break;640case 2: {641const uint16_t uval16 = data.GetU16(&offset);642m_opcode.SetOpcode16(uval16, byte_order);643m_opcode_name.assign(".short");644mnemonic_strm.Printf("0x%4.4x", uval16);645} break;646case 4: {647const uint32_t uval32 = data.GetU32(&offset);648m_opcode.SetOpcode32(uval32, byte_order);649m_opcode_name.assign(".long");650mnemonic_strm.Printf("0x%8.8x", uval32);651} break;652case 8: {653const uint64_t uval64 = data.GetU64(&offset);654m_opcode.SetOpcode64(uval64, byte_order);655m_opcode_name.assign(".quad");656mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);657} break;658default:659if (inst_size == 0)660return;661else {662const uint8_t *bytes = data.PeekData(offset, inst_size);663if (bytes == nullptr)664return;665m_opcode_name.assign(".byte");666m_opcode.SetOpcodeBytes(bytes, inst_size);667mnemonic_strm.Printf("0x%2.2x", bytes[0]);668for (uint32_t i = 1; i < inst_size; ++i)669mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);670}671break;672}673m_mnemonics = std::string(mnemonic_strm.GetString());674return;675}676677static RegularExpression s_regex(678llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));679680llvm::SmallVector<llvm::StringRef, 4> matches;681if (s_regex.Execute(out_string, &matches)) {682m_opcode_name = matches[1].str();683m_mnemonics = matches[2].str();684}685matches.clear();686if (s_regex.Execute(markup_out_string, &matches)) {687m_markup_opcode_name = matches[1].str();688m_markup_mnemonics = matches[2].str();689}690}691}692}693694bool IsValid() const { return m_is_valid; }695696bool UsingFileAddress() const { return m_using_file_addr; }697size_t GetByteSize() const { return m_opcode.GetByteSize(); }698699/// Grants exclusive access to the disassembler and initializes it with the700/// given InstructionLLVMC and an optional ExecutionContext.701class DisassemblerScope {702std::shared_ptr<DisassemblerLLVMC> m_disasm;703704public:705explicit DisassemblerScope(706InstructionLLVMC &i,707const lldb_private::ExecutionContext *exe_ctx = nullptr)708: m_disasm(i.m_disasm_wp.lock()) {709m_disasm->m_mutex.lock();710m_disasm->m_inst = &i;711m_disasm->m_exe_ctx = exe_ctx;712}713~DisassemblerScope() { m_disasm->m_mutex.unlock(); }714715/// Evaluates to true if this scope contains a valid disassembler.716operator bool() const { return static_cast<bool>(m_disasm); }717718std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }719};720721static llvm::StringRef::const_iterator722ConsumeWhitespace(llvm::StringRef::const_iterator osi,723llvm::StringRef::const_iterator ose) {724while (osi != ose) {725switch (*osi) {726default:727return osi;728case ' ':729case '\t':730break;731}732++osi;733}734735return osi;736}737738static std::pair<bool, llvm::StringRef::const_iterator>739ConsumeChar(llvm::StringRef::const_iterator osi, const char c,740llvm::StringRef::const_iterator ose) {741bool found = false;742743osi = ConsumeWhitespace(osi, ose);744if (osi != ose && *osi == c) {745found = true;746++osi;747}748749return std::make_pair(found, osi);750}751752static std::pair<Operand, llvm::StringRef::const_iterator>753ParseRegisterName(llvm::StringRef::const_iterator osi,754llvm::StringRef::const_iterator ose) {755Operand ret;756ret.m_type = Operand::Type::Register;757std::string str;758759osi = ConsumeWhitespace(osi, ose);760761while (osi != ose) {762if (*osi >= '0' && *osi <= '9') {763if (str.empty()) {764return std::make_pair(Operand(), osi);765} else {766str.push_back(*osi);767}768} else if (*osi >= 'a' && *osi <= 'z') {769str.push_back(*osi);770} else {771switch (*osi) {772default:773if (str.empty()) {774return std::make_pair(Operand(), osi);775} else {776ret.m_register = ConstString(str);777return std::make_pair(ret, osi);778}779case '%':780if (!str.empty()) {781return std::make_pair(Operand(), osi);782}783break;784}785}786++osi;787}788789ret.m_register = ConstString(str);790return std::make_pair(ret, osi);791}792793static std::pair<Operand, llvm::StringRef::const_iterator>794ParseImmediate(llvm::StringRef::const_iterator osi,795llvm::StringRef::const_iterator ose) {796Operand ret;797ret.m_type = Operand::Type::Immediate;798std::string str;799bool is_hex = false;800801osi = ConsumeWhitespace(osi, ose);802803while (osi != ose) {804if (*osi >= '0' && *osi <= '9') {805str.push_back(*osi);806} else if (*osi >= 'a' && *osi <= 'f') {807if (is_hex) {808str.push_back(*osi);809} else {810return std::make_pair(Operand(), osi);811}812} else {813switch (*osi) {814default:815if (str.empty()) {816return std::make_pair(Operand(), osi);817} else {818ret.m_immediate = strtoull(str.c_str(), nullptr, 0);819return std::make_pair(ret, osi);820}821case 'x':822if (!str.compare("0")) {823is_hex = true;824str.push_back(*osi);825} else {826return std::make_pair(Operand(), osi);827}828break;829case '#':830case '$':831if (!str.empty()) {832return std::make_pair(Operand(), osi);833}834break;835case '-':836if (str.empty()) {837ret.m_negative = true;838} else {839return std::make_pair(Operand(), osi);840}841}842}843++osi;844}845846ret.m_immediate = strtoull(str.c_str(), nullptr, 0);847return std::make_pair(ret, osi);848}849850// -0x5(%rax,%rax,2)851static std::pair<Operand, llvm::StringRef::const_iterator>852ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,853llvm::StringRef::const_iterator ose) {854std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =855ParseImmediate(osi, ose);856if (offset_and_iterator.first.IsValid()) {857osi = offset_and_iterator.second;858}859860bool found = false;861std::tie(found, osi) = ConsumeChar(osi, '(', ose);862if (!found) {863return std::make_pair(Operand(), osi);864}865866std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =867ParseRegisterName(osi, ose);868if (base_and_iterator.first.IsValid()) {869osi = base_and_iterator.second;870} else {871return std::make_pair(Operand(), osi);872}873874std::tie(found, osi) = ConsumeChar(osi, ',', ose);875if (!found) {876return std::make_pair(Operand(), osi);877}878879std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =880ParseRegisterName(osi, ose);881if (index_and_iterator.first.IsValid()) {882osi = index_and_iterator.second;883} else {884return std::make_pair(Operand(), osi);885}886887std::tie(found, osi) = ConsumeChar(osi, ',', ose);888if (!found) {889return std::make_pair(Operand(), osi);890}891892std::pair<Operand, llvm::StringRef::const_iterator>893multiplier_and_iterator = ParseImmediate(osi, ose);894if (index_and_iterator.first.IsValid()) {895osi = index_and_iterator.second;896} else {897return std::make_pair(Operand(), osi);898}899900std::tie(found, osi) = ConsumeChar(osi, ')', ose);901if (!found) {902return std::make_pair(Operand(), osi);903}904905Operand product;906product.m_type = Operand::Type::Product;907product.m_children.push_back(index_and_iterator.first);908product.m_children.push_back(multiplier_and_iterator.first);909910Operand index;911index.m_type = Operand::Type::Sum;912index.m_children.push_back(base_and_iterator.first);913index.m_children.push_back(product);914915if (offset_and_iterator.first.IsValid()) {916Operand offset;917offset.m_type = Operand::Type::Sum;918offset.m_children.push_back(offset_and_iterator.first);919offset.m_children.push_back(index);920921Operand deref;922deref.m_type = Operand::Type::Dereference;923deref.m_children.push_back(offset);924return std::make_pair(deref, osi);925} else {926Operand deref;927deref.m_type = Operand::Type::Dereference;928deref.m_children.push_back(index);929return std::make_pair(deref, osi);930}931}932933// -0x10(%rbp)934static std::pair<Operand, llvm::StringRef::const_iterator>935ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,936llvm::StringRef::const_iterator ose) {937std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =938ParseImmediate(osi, ose);939if (offset_and_iterator.first.IsValid()) {940osi = offset_and_iterator.second;941}942943bool found = false;944std::tie(found, osi) = ConsumeChar(osi, '(', ose);945if (!found) {946return std::make_pair(Operand(), osi);947}948949std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =950ParseRegisterName(osi, ose);951if (base_and_iterator.first.IsValid()) {952osi = base_and_iterator.second;953} else {954return std::make_pair(Operand(), osi);955}956957std::tie(found, osi) = ConsumeChar(osi, ')', ose);958if (!found) {959return std::make_pair(Operand(), osi);960}961962if (offset_and_iterator.first.IsValid()) {963Operand offset;964offset.m_type = Operand::Type::Sum;965offset.m_children.push_back(offset_and_iterator.first);966offset.m_children.push_back(base_and_iterator.first);967968Operand deref;969deref.m_type = Operand::Type::Dereference;970deref.m_children.push_back(offset);971return std::make_pair(deref, osi);972} else {973Operand deref;974deref.m_type = Operand::Type::Dereference;975deref.m_children.push_back(base_and_iterator.first);976return std::make_pair(deref, osi);977}978}979980// [sp, #8]!981static std::pair<Operand, llvm::StringRef::const_iterator>982ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,983llvm::StringRef::const_iterator ose) {984bool found = false;985std::tie(found, osi) = ConsumeChar(osi, '[', ose);986if (!found) {987return std::make_pair(Operand(), osi);988}989990std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =991ParseRegisterName(osi, ose);992if (base_and_iterator.first.IsValid()) {993osi = base_and_iterator.second;994} else {995return std::make_pair(Operand(), osi);996}997998std::tie(found, osi) = ConsumeChar(osi, ',', ose);999if (!found) {1000return std::make_pair(Operand(), osi);1001}10021003std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =1004ParseImmediate(osi, ose);1005if (offset_and_iterator.first.IsValid()) {1006osi = offset_and_iterator.second;1007}10081009std::tie(found, osi) = ConsumeChar(osi, ']', ose);1010if (!found) {1011return std::make_pair(Operand(), osi);1012}10131014Operand offset;1015offset.m_type = Operand::Type::Sum;1016offset.m_children.push_back(offset_and_iterator.first);1017offset.m_children.push_back(base_and_iterator.first);10181019Operand deref;1020deref.m_type = Operand::Type::Dereference;1021deref.m_children.push_back(offset);1022return std::make_pair(deref, osi);1023}10241025// [sp]1026static std::pair<Operand, llvm::StringRef::const_iterator>1027ParseARMDerefAccess(llvm::StringRef::const_iterator osi,1028llvm::StringRef::const_iterator ose) {1029bool found = false;1030std::tie(found, osi) = ConsumeChar(osi, '[', ose);1031if (!found) {1032return std::make_pair(Operand(), osi);1033}10341035std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =1036ParseRegisterName(osi, ose);1037if (base_and_iterator.first.IsValid()) {1038osi = base_and_iterator.second;1039} else {1040return std::make_pair(Operand(), osi);1041}10421043std::tie(found, osi) = ConsumeChar(osi, ']', ose);1044if (!found) {1045return std::make_pair(Operand(), osi);1046}10471048Operand deref;1049deref.m_type = Operand::Type::Dereference;1050deref.m_children.push_back(base_and_iterator.first);1051return std::make_pair(deref, osi);1052}10531054static void DumpOperand(const Operand &op, Stream &s) {1055switch (op.m_type) {1056case Operand::Type::Dereference:1057s.PutCString("*");1058DumpOperand(op.m_children[0], s);1059break;1060case Operand::Type::Immediate:1061if (op.m_negative) {1062s.PutCString("-");1063}1064s.PutCString(llvm::to_string(op.m_immediate));1065break;1066case Operand::Type::Invalid:1067s.PutCString("Invalid");1068break;1069case Operand::Type::Product:1070s.PutCString("(");1071DumpOperand(op.m_children[0], s);1072s.PutCString("*");1073DumpOperand(op.m_children[1], s);1074s.PutCString(")");1075break;1076case Operand::Type::Register:1077s.PutCString(op.m_register.GetStringRef());1078break;1079case Operand::Type::Sum:1080s.PutCString("(");1081DumpOperand(op.m_children[0], s);1082s.PutCString("+");1083DumpOperand(op.m_children[1], s);1084s.PutCString(")");1085break;1086}1087}10881089bool ParseOperands(1090llvm::SmallVectorImpl<Instruction::Operand> &operands) override {1091const char *operands_string = GetOperands(nullptr);10921093if (!operands_string) {1094return false;1095}10961097llvm::StringRef operands_ref(operands_string);10981099llvm::StringRef::const_iterator osi = operands_ref.begin();1100llvm::StringRef::const_iterator ose = operands_ref.end();11011102while (osi != ose) {1103Operand operand;1104llvm::StringRef::const_iterator iter;11051106if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),1107operand.IsValid()) ||1108(std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),1109operand.IsValid()) ||1110(std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),1111operand.IsValid()) ||1112(std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),1113operand.IsValid()) ||1114(std::tie(operand, iter) = ParseRegisterName(osi, ose),1115operand.IsValid()) ||1116(std::tie(operand, iter) = ParseImmediate(osi, ose),1117operand.IsValid())) {1118osi = iter;1119operands.push_back(operand);1120} else {1121return false;1122}11231124std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =1125ConsumeChar(osi, ',', ose);1126if (found_and_iter.first) {1127osi = found_and_iter.second;1128}11291130osi = ConsumeWhitespace(osi, ose);1131}11321133DisassemblerSP disasm_sp = m_disasm_wp.lock();11341135if (disasm_sp && operands.size() > 1) {1136// TODO tie this into the MC Disassembler's notion of clobbers.1137switch (disasm_sp->GetArchitecture().GetMachine()) {1138default:1139break;1140case llvm::Triple::x86:1141case llvm::Triple::x86_64:1142operands[operands.size() - 1].m_clobbered = true;1143break;1144case llvm::Triple::arm:1145operands[0].m_clobbered = true;1146break;1147}1148}11491150if (Log *log = GetLog(LLDBLog::Process)) {1151StreamString ss;11521153ss.Printf("[%s] expands to %zu operands:\n", operands_string,1154operands.size());1155for (const Operand &operand : operands) {1156ss.PutCString(" ");1157DumpOperand(operand, ss);1158ss.PutCString("\n");1159}11601161log->PutString(ss.GetString());1162}11631164return true;1165}11661167bool IsCall() override {1168VisitInstruction();1169return m_is_call;1170}11711172protected:1173std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;11741175bool m_is_valid = false;1176bool m_using_file_addr = false;1177bool m_has_visited_instruction = false;11781179// Be conservative. If we didn't understand the instruction, say it:1180// - Might branch1181// - Does not have a delay slot1182// - Is not a call1183// - Is not a load1184// - Is not an authenticated instruction1185bool m_does_branch = true;1186bool m_has_delay_slot = false;1187bool m_is_call = false;1188bool m_is_load = false;1189bool m_is_authenticated = false;11901191void VisitInstruction() {1192if (m_has_visited_instruction)1193return;11941195DisassemblerScope disasm(*this);1196if (!disasm)1197return;11981199DataExtractor data;1200if (!m_opcode.GetData(data))1201return;12021203bool is_alternate_isa;1204lldb::addr_t pc = m_address.GetFileAddress();1205DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =1206GetDisasmToUse(is_alternate_isa, disasm);1207const uint8_t *opcode_data = data.GetDataStart();1208const size_t opcode_data_len = data.GetByteSize();1209llvm::MCInst inst;1210const size_t inst_size =1211mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);1212if (inst_size == 0)1213return;12141215m_has_visited_instruction = true;1216m_does_branch = mc_disasm_ptr->CanBranch(inst);1217m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);1218m_is_call = mc_disasm_ptr->IsCall(inst);1219m_is_load = mc_disasm_ptr->IsLoad(inst);1220m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst);1221}12221223private:1224DisassemblerLLVMC::MCDisasmInstance *1225GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {1226is_alternate_isa = false;1227if (disasm) {1228if (disasm->m_alternate_disasm_up) {1229const AddressClass address_class = GetAddressClass();12301231if (address_class == AddressClass::eCodeAlternateISA) {1232is_alternate_isa = true;1233return disasm->m_alternate_disasm_up.get();1234}1235}1236return disasm->m_disasm_up.get();1237}1238return nullptr;1239}1240};12411242std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>1243DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,1244const char *features_str,1245unsigned flavor,1246DisassemblerLLVMC &owner) {1247using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;12481249std::string Status;1250const llvm::Target *curr_target =1251llvm::TargetRegistry::lookupTarget(triple, Status);1252if (!curr_target)1253return Instance();12541255std::unique_ptr<llvm::MCInstrInfo> instr_info_up(1256curr_target->createMCInstrInfo());1257if (!instr_info_up)1258return Instance();12591260std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(1261curr_target->createMCRegInfo(triple));1262if (!reg_info_up)1263return Instance();12641265std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(1266curr_target->createMCSubtargetInfo(triple, cpu, features_str));1267if (!subtarget_info_up)1268return Instance();12691270llvm::MCTargetOptions MCOptions;1271std::unique_ptr<llvm::MCAsmInfo> asm_info_up(1272curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions));1273if (!asm_info_up)1274return Instance();12751276std::unique_ptr<llvm::MCContext> context_up(1277new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(),1278reg_info_up.get(), subtarget_info_up.get()));1279if (!context_up)1280return Instance();12811282std::unique_ptr<llvm::MCDisassembler> disasm_up(1283curr_target->createMCDisassembler(*subtarget_info_up, *context_up));1284if (!disasm_up)1285return Instance();12861287std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(1288curr_target->createMCRelocationInfo(triple, *context_up));1289if (!rel_info_up)1290return Instance();12911292std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(1293curr_target->createMCSymbolizer(1294triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,1295context_up.get(), std::move(rel_info_up)));1296disasm_up->setSymbolizer(std::move(symbolizer_up));12971298unsigned asm_printer_variant =1299flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;13001301std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(1302curr_target->createMCInstPrinter(llvm::Triple{triple},1303asm_printer_variant, *asm_info_up,1304*instr_info_up, *reg_info_up));1305if (!instr_printer_up)1306return Instance();13071308instr_printer_up->setPrintBranchImmAsAddress(true);13091310// Not all targets may have registered createMCInstrAnalysis().1311std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up(1312curr_target->createMCInstrAnalysis(instr_info_up.get()));13131314return Instance(new MCDisasmInstance(1315std::move(instr_info_up), std::move(reg_info_up),1316std::move(subtarget_info_up), std::move(asm_info_up),1317std::move(context_up), std::move(disasm_up), std::move(instr_printer_up),1318std::move(instr_analysis_up)));1319}13201321DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(1322std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,1323std::unique_ptr<llvm::MCRegisterInfo> &®_info_up,1324std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,1325std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,1326std::unique_ptr<llvm::MCContext> &&context_up,1327std::unique_ptr<llvm::MCDisassembler> &&disasm_up,1328std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up,1329std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up)1330: m_instr_info_up(std::move(instr_info_up)),1331m_reg_info_up(std::move(reg_info_up)),1332m_subtarget_info_up(std::move(subtarget_info_up)),1333m_asm_info_up(std::move(asm_info_up)),1334m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),1335m_instr_printer_up(std::move(instr_printer_up)),1336m_instr_analysis_up(std::move(instr_analysis_up)) {1337assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&1338m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);1339}13401341uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(1342const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,1343llvm::MCInst &mc_inst) const {1344llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);1345llvm::MCDisassembler::DecodeStatus status;13461347uint64_t new_inst_size;1348status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,1349llvm::nulls());1350if (status == llvm::MCDisassembler::Success)1351return new_inst_size;1352else1353return 0;1354}13551356void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(1357llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string,1358std::string &comments_string) {1359llvm::raw_string_ostream inst_stream(inst_string);1360llvm::raw_string_ostream comments_stream(comments_string);13611362inst_stream.enable_colors(m_instr_printer_up->getUseColor());1363m_instr_printer_up->setCommentStream(comments_stream);1364m_instr_printer_up->printInst(&mc_inst, pc, llvm::StringRef(),1365*m_subtarget_info_up, inst_stream);1366m_instr_printer_up->setCommentStream(llvm::nulls());13671368comments_stream.flush();13691370static std::string g_newlines("\r\n");13711372for (size_t newline_pos = 0;1373(newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=1374comments_string.npos;1375/**/) {1376comments_string.replace(comments_string.begin() + newline_pos,1377comments_string.begin() + newline_pos + 1, 1, ' ');1378}1379}13801381void DisassemblerLLVMC::MCDisasmInstance::SetStyle(1382bool use_hex_immed, HexImmediateStyle hex_style) {1383m_instr_printer_up->setPrintImmHex(use_hex_immed);1384switch (hex_style) {1385case eHexStyleC:1386m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);1387break;1388case eHexStyleAsm:1389m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);1390break;1391}1392}13931394void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) {1395m_instr_printer_up->setUseColor(use_color);1396}13971398bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const {1399return m_instr_printer_up->getUseColor();1400}14011402bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(1403llvm::MCInst &mc_inst) const {1404if (m_instr_analysis_up)1405return m_instr_analysis_up->mayAffectControlFlow(mc_inst, *m_reg_info_up);1406return m_instr_info_up->get(mc_inst.getOpcode())1407.mayAffectControlFlow(mc_inst, *m_reg_info_up);1408}14091410bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(1411llvm::MCInst &mc_inst) const {1412return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();1413}14141415bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {1416if (m_instr_analysis_up)1417return m_instr_analysis_up->isCall(mc_inst);1418return m_instr_info_up->get(mc_inst.getOpcode()).isCall();1419}14201421bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {1422return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad();1423}14241425bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(1426llvm::MCInst &mc_inst) const {1427const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode());14281429// Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc41430// == 'a' + 'c') as authenticated instructions for reporting purposes, in1431// addition to the standard authenticated instructions specified in ARMv8.3.1432bool IsBrkC47x = false;1433if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {1434const llvm::MCOperand &Op0 = mc_inst.getOperand(0);1435if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)1436IsBrkC47x = true;1437}14381439return InstrDesc.isAuthenticated() || IsBrkC47x;1440}14411442DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,1443const char *flavor_string)1444: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),1445m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),1446m_adrp_insn() {1447if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {1448m_flavor.assign("default");1449}14501451unsigned flavor = ~0U;1452llvm::Triple triple = arch.GetTriple();14531454// So far the only supported flavor is "intel" on x86. The base class will1455// set this correctly coming in.1456if (triple.getArch() == llvm::Triple::x86 ||1457triple.getArch() == llvm::Triple::x86_64) {1458if (m_flavor == "intel") {1459flavor = 1;1460} else if (m_flavor == "att") {1461flavor = 0;1462}1463}14641465ArchSpec thumb_arch(arch);1466if (triple.getArch() == llvm::Triple::arm) {1467std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());1468// Replace "arm" with "thumb" so we get all thumb variants correct1469if (thumb_arch_name.size() > 3) {1470thumb_arch_name.erase(0, 3);1471thumb_arch_name.insert(0, "thumb");1472} else {1473thumb_arch_name = "thumbv9.3a";1474}1475thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));1476}14771478// If no sub architecture specified then use the most recent arm architecture1479// so the disassembler will return all instructions. Without it we will see a1480// lot of unknown opcodes if the code uses instructions which are not1481// available in the oldest arm version (which is used when no sub architecture1482// is specified).1483if (triple.getArch() == llvm::Triple::arm &&1484triple.getSubArch() == llvm::Triple::NoSubArch)1485triple.setArchName("armv9.3a");14861487std::string features_str;1488const char *triple_str = triple.getTriple().c_str();14891490// ARM Cortex M0-M7 devices only execute thumb instructions1491if (arch.IsAlwaysThumbInstructions()) {1492triple_str = thumb_arch.GetTriple().getTriple().c_str();1493features_str += "+fp-armv8,";1494}14951496const char *cpu = "";14971498switch (arch.GetCore()) {1499case ArchSpec::eCore_mips32:1500case ArchSpec::eCore_mips32el:1501cpu = "mips32";1502break;1503case ArchSpec::eCore_mips32r2:1504case ArchSpec::eCore_mips32r2el:1505cpu = "mips32r2";1506break;1507case ArchSpec::eCore_mips32r3:1508case ArchSpec::eCore_mips32r3el:1509cpu = "mips32r3";1510break;1511case ArchSpec::eCore_mips32r5:1512case ArchSpec::eCore_mips32r5el:1513cpu = "mips32r5";1514break;1515case ArchSpec::eCore_mips32r6:1516case ArchSpec::eCore_mips32r6el:1517cpu = "mips32r6";1518break;1519case ArchSpec::eCore_mips64:1520case ArchSpec::eCore_mips64el:1521cpu = "mips64";1522break;1523case ArchSpec::eCore_mips64r2:1524case ArchSpec::eCore_mips64r2el:1525cpu = "mips64r2";1526break;1527case ArchSpec::eCore_mips64r3:1528case ArchSpec::eCore_mips64r3el:1529cpu = "mips64r3";1530break;1531case ArchSpec::eCore_mips64r5:1532case ArchSpec::eCore_mips64r5el:1533cpu = "mips64r5";1534break;1535case ArchSpec::eCore_mips64r6:1536case ArchSpec::eCore_mips64r6el:1537cpu = "mips64r6";1538break;1539default:1540cpu = "";1541break;1542}15431544if (arch.IsMIPS()) {1545uint32_t arch_flags = arch.GetFlags();1546if (arch_flags & ArchSpec::eMIPSAse_msa)1547features_str += "+msa,";1548if (arch_flags & ArchSpec::eMIPSAse_dsp)1549features_str += "+dsp,";1550if (arch_flags & ArchSpec::eMIPSAse_dspr2)1551features_str += "+dspr2,";1552}15531554// If any AArch64 variant, enable latest ISA with all extensions.1555if (triple.isAArch64()) {1556features_str += "+all,";15571558if (triple.getVendor() == llvm::Triple::Apple)1559cpu = "apple-latest";1560}15611562if (triple.isRISCV()) {1563uint32_t arch_flags = arch.GetFlags();1564if (arch_flags & ArchSpec::eRISCV_rvc)1565features_str += "+c,";1566if (arch_flags & ArchSpec::eRISCV_rve)1567features_str += "+e,";1568if ((arch_flags & ArchSpec::eRISCV_float_abi_single) ==1569ArchSpec::eRISCV_float_abi_single)1570features_str += "+f,";1571if ((arch_flags & ArchSpec::eRISCV_float_abi_double) ==1572ArchSpec::eRISCV_float_abi_double)1573features_str += "+f,+d,";1574if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) ==1575ArchSpec::eRISCV_float_abi_quad)1576features_str += "+f,+d,+q,";1577// FIXME: how do we detect features such as `+a`, `+m`?1578// Turn them on by default now, since everyone seems to use them1579features_str += "+a,+m,";1580}15811582// We use m_disasm_up.get() to tell whether we are valid or not, so if this1583// isn't good for some reason, we won't be valid and FindPlugin will fail and1584// we won't get used.1585m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),1586flavor, *this);15871588llvm::Triple::ArchType llvm_arch = triple.getArch();15891590// For arm CPUs that can execute arm or thumb instructions, also create a1591// thumb instruction disassembler.1592if (llvm_arch == llvm::Triple::arm) {1593std::string thumb_triple(thumb_arch.GetTriple().getTriple());1594m_alternate_disasm_up =1595MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),1596flavor, *this);1597if (!m_alternate_disasm_up)1598m_disasm_up.reset();15991600} else if (arch.IsMIPS()) {1601/* Create alternate disassembler for MIPS16 and microMIPS */1602uint32_t arch_flags = arch.GetFlags();1603if (arch_flags & ArchSpec::eMIPSAse_mips16)1604features_str += "+mips16,";1605else if (arch_flags & ArchSpec::eMIPSAse_micromips)1606features_str += "+micromips,";16071608m_alternate_disasm_up = MCDisasmInstance::Create(1609triple_str, cpu, features_str.c_str(), flavor, *this);1610if (!m_alternate_disasm_up)1611m_disasm_up.reset();1612}1613}16141615DisassemblerLLVMC::~DisassemblerLLVMC() = default;16161617lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,1618const char *flavor) {1619if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {1620auto disasm_sp = std::make_shared<DisassemblerLLVMC>(arch, flavor);1621if (disasm_sp && disasm_sp->IsValid())1622return disasm_sp;1623}1624return lldb::DisassemblerSP();1625}16261627size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,1628const DataExtractor &data,1629lldb::offset_t data_offset,1630size_t num_instructions,1631bool append, bool data_from_file) {1632if (!append)1633m_instruction_list.Clear();16341635if (!IsValid())1636return 0;16371638m_data_from_file = data_from_file;1639uint32_t data_cursor = data_offset;1640const size_t data_byte_size = data.GetByteSize();1641uint32_t instructions_parsed = 0;1642Address inst_addr(base_addr);16431644while (data_cursor < data_byte_size &&1645instructions_parsed < num_instructions) {16461647AddressClass address_class = AddressClass::eCode;16481649if (m_alternate_disasm_up)1650address_class = inst_addr.GetAddressClass();16511652InstructionSP inst_sp(1653new InstructionLLVMC(*this, inst_addr, address_class));16541655if (!inst_sp)1656break;16571658uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);16591660if (inst_size == 0)1661break;16621663m_instruction_list.Append(inst_sp);1664data_cursor += inst_size;1665inst_addr.Slide(inst_size);1666instructions_parsed++;1667}16681669return data_cursor - data_offset;1670}16711672void DisassemblerLLVMC::Initialize() {1673PluginManager::RegisterPlugin(GetPluginNameStatic(),1674"Disassembler that uses LLVM MC to disassemble "1675"i386, x86_64, ARM, and ARM64.",1676CreateInstance);16771678llvm::InitializeAllTargetInfos();1679llvm::InitializeAllTargetMCs();1680llvm::InitializeAllAsmParsers();1681llvm::InitializeAllDisassemblers();1682}16831684void DisassemblerLLVMC::Terminate() {1685PluginManager::UnregisterPlugin(CreateInstance);1686}16871688int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,1689uint64_t offset, uint64_t size,1690int tag_type, void *tag_bug) {1691return static_cast<DisassemblerLLVMC *>(disassembler)1692->OpInfo(pc, offset, size, tag_type, tag_bug);1693}16941695const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,1696uint64_t value,1697uint64_t *type, uint64_t pc,1698const char **name) {1699return static_cast<DisassemblerLLVMC *>(disassembler)1700->SymbolLookup(value, type, pc, name);1701}17021703bool DisassemblerLLVMC::FlavorValidForArchSpec(1704const lldb_private::ArchSpec &arch, const char *flavor) {1705llvm::Triple triple = arch.GetTriple();1706if (flavor == nullptr || strcmp(flavor, "default") == 0)1707return true;17081709if (triple.getArch() == llvm::Triple::x86 ||1710triple.getArch() == llvm::Triple::x86_64) {1711return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;1712} else1713return false;1714}17151716bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }17171718int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,1719int tag_type, void *tag_bug) {1720switch (tag_type) {1721default:1722break;1723case 1:1724memset(tag_bug, 0, sizeof(::LLVMOpInfo1));1725break;1726}1727return 0;1728}17291730const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,1731uint64_t pc, const char **name) {1732if (*type_ptr) {1733if (m_exe_ctx && m_inst) {1734// std::string remove_this_prior_to_checkin;1735Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;1736Address value_so_addr;1737Address pc_so_addr;1738if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||1739target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||1740target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {1741if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {1742m_adrp_address = pc;1743m_adrp_insn = value;1744*name = nullptr;1745*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;1746return nullptr;1747}1748// If this instruction is an ADD and1749// the previous instruction was an ADRP and1750// the ADRP's register and this ADD's register are the same,1751// then this is a pc-relative address calculation.1752if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&1753m_adrp_insn && m_adrp_address == pc - 4 &&1754(*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) {1755uint32_t addxri_inst;1756uint64_t adrp_imm, addxri_imm;1757// Get immlo and immhi bits, OR them together to get the ADRP imm1758// value.1759adrp_imm =1760((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3);1761// if high bit of immhi after right-shifting set, sign extend1762if (adrp_imm & (1ULL << 20))1763adrp_imm |= ~((1ULL << 21) - 1);17641765addxri_inst = value;1766addxri_imm = (addxri_inst >> 10) & 0xfff;1767// check if 'sh' bit is set, shift imm value up if so1768// (this would make no sense, ADRP already gave us this part)1769if ((addxri_inst >> (12 + 5 + 5)) & 1)1770addxri_imm <<= 12;1771value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +1772addxri_imm;1773}1774m_adrp_address = LLDB_INVALID_ADDRESS;1775m_adrp_insn.reset();1776}17771778if (m_inst->UsingFileAddress()) {1779ModuleSP module_sp(m_inst->GetAddress().GetModule());1780if (module_sp) {1781module_sp->ResolveFileAddress(value, value_so_addr);1782module_sp->ResolveFileAddress(pc, pc_so_addr);1783}1784} else if (target && !target->GetSectionLoadList().IsEmpty()) {1785target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);1786target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);1787}17881789SymbolContext sym_ctx;1790const SymbolContextItem resolve_scope =1791eSymbolContextFunction | eSymbolContextSymbol;1792if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {1793pc_so_addr.GetModule()->ResolveSymbolContextForAddress(1794pc_so_addr, resolve_scope, sym_ctx);1795}17961797if (value_so_addr.IsValid() && value_so_addr.GetSection()) {1798StreamString ss;17991800bool format_omitting_current_func_name = false;1801if (sym_ctx.symbol || sym_ctx.function) {1802AddressRange range;1803if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&1804range.GetBaseAddress().IsValid() &&1805range.ContainsLoadAddress(value_so_addr, target)) {1806format_omitting_current_func_name = true;1807}1808}18091810// If the "value" address (the target address we're symbolicating) is1811// inside the same SymbolContext as the current instruction pc1812// (pc_so_addr), don't print the full function name - just print it1813// with DumpStyleNoFunctionName style, e.g. "<+36>".1814if (format_omitting_current_func_name) {1815value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,1816Address::DumpStyleSectionNameOffset);1817} else {1818value_so_addr.Dump(1819&ss, target,1820Address::DumpStyleResolvedDescriptionNoFunctionArguments,1821Address::DumpStyleSectionNameOffset);1822}18231824if (!ss.GetString().empty()) {1825// If Address::Dump returned a multi-line description, most commonly1826// seen when we have multiple levels of inlined functions at an1827// address, only show the first line.1828std::string str = std::string(ss.GetString());1829size_t first_eol_char = str.find_first_of("\r\n");1830if (first_eol_char != std::string::npos) {1831str.erase(first_eol_char);1832}1833m_inst->AppendComment(str);1834}1835}1836}1837}18381839// TODO: llvm-objdump sets the type_ptr to the1840// LLVMDisassembler_ReferenceType_Out_* values1841// based on where value_so_addr is pointing, with1842// Mach-O specific augmentations in MachODump.cpp. e.g.1843// see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand1844// handles.1845*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;1846*name = nullptr;1847return nullptr;1848}184918501851