Path: blob/main/contrib/llvm-project/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
35266 views
//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"9#include "llvm/MC/MCContext.h"10#include "llvm/MC/MCExpr.h"11#include "llvm/MC/MCInst.h"12#include "llvm/Support/raw_ostream.h"13#include <cstring>1415using namespace llvm;1617namespace llvm {18class Triple;19}2021// This function tries to add a symbolic operand in place of the immediate22// Value in the MCInst. The immediate Value has had any PC adjustment made by23// the caller. If the instruction is a branch instruction then IsBranch is true,24// else false. If the getOpInfo() function was set as part of the25// setupForSymbolicDisassembly() call then that function is called to get any26// symbolic information at the Address for this instruction. If that returns27// non-zero then the symbolic information it returns is used to create an MCExpr28// and that is added as an operand to the MCInst. If getOpInfo() returns zero29// and IsBranch is true then a symbol look up for Value is done and if a symbol30// is found an MCExpr is created with that, else an MCExpr with Value is31// created. This function returns true if it adds an operand to the MCInst and32// false otherwise.33bool MCExternalSymbolizer::tryAddingSymbolicOperand(34MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,35bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {36struct LLVMOpInfo1 SymbolicOp;37std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));38SymbolicOp.Value = Value;3940if (!GetOpInfo ||41!GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {42// Clear SymbolicOp.Value from above and also all other fields.43std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));4445// At this point, GetOpInfo() did not find any relocation information about46// this operand and we are left to use the SymbolLookUp() call back to guess47// if the Value is the address of a symbol. In the case this is a branch48// that always makes sense to guess. But in the case of an immediate it is49// a bit more questionable if it is an address of a symbol or some other50// reference. So if the immediate Value comes from a width of 1 byte,51// OpSize, we will not guess it is an address of a symbol. Because in52// object files assembled starting at address 0 this usually leads to53// incorrect symbolication.54if (!SymbolLookUp || (OpSize == 1 && !IsBranch))55return false;5657uint64_t ReferenceType;58if (IsBranch)59ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;60else61ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;62const char *ReferenceName;63const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,64&ReferenceName);65if (Name) {66SymbolicOp.AddSymbol.Name = Name;67SymbolicOp.AddSymbol.Present = true;68// If Name is a C++ symbol name put the human readable name in a comment.69if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)70cStream << ReferenceName;71}72// For branches always create an MCExpr so it gets printed as hex address.73else if (IsBranch) {74SymbolicOp.Value = Value;75}76if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)77cStream << "symbol stub for: " << ReferenceName;78else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)79cStream << "Objc message: " << ReferenceName;80if (!Name && !IsBranch)81return false;82}8384const MCExpr *Add = nullptr;85if (SymbolicOp.AddSymbol.Present) {86if (SymbolicOp.AddSymbol.Name) {87StringRef Name(SymbolicOp.AddSymbol.Name);88MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);89Add = MCSymbolRefExpr::create(Sym, Ctx);90} else {91Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);92}93}9495const MCExpr *Sub = nullptr;96if (SymbolicOp.SubtractSymbol.Present) {97if (SymbolicOp.SubtractSymbol.Name) {98StringRef Name(SymbolicOp.SubtractSymbol.Name);99MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);100Sub = MCSymbolRefExpr::create(Sym, Ctx);101} else {102Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);103}104}105106const MCExpr *Off = nullptr;107if (SymbolicOp.Value != 0)108Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);109110const MCExpr *Expr;111if (Sub) {112const MCExpr *LHS;113if (Add)114LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);115else116LHS = MCUnaryExpr::createMinus(Sub, Ctx);117if (Off)118Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);119else120Expr = LHS;121} else if (Add) {122if (Off)123Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);124else125Expr = Add;126} else {127if (Off)128Expr = Off;129else130Expr = MCConstantExpr::create(0, Ctx);131}132133Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);134if (!Expr)135return false;136137MI.addOperand(MCOperand::createExpr(Expr));138return true;139}140141// This function tries to add a comment as to what is being referenced by a load142// instruction with the base register that is the Pc. These can often be values143// in a literal pool near the Address of the instruction. The Address of the144// instruction and its immediate Value are used as a possible literal pool entry.145// The SymbolLookUp call back will return the name of a symbol referenced by the146// literal pool's entry if the referenced address is that of a symbol. Or it147// will return a pointer to a literal 'C' string if the referenced address of148// the literal pool's entry is an address into a section with C string literals.149// Or if the reference is to an Objective-C data structure it will return a150// specific reference type for it and a string.151void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,152int64_t Value,153uint64_t Address) {154if (SymbolLookUp) {155uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;156const char *ReferenceName;157(void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);158if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)159cStream << "literal pool symbol address: " << ReferenceName;160else if(ReferenceType ==161LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {162cStream << "literal pool for: \"";163cStream.write_escaped(ReferenceName);164cStream << "\"";165}166else if(ReferenceType ==167LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)168cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";169else if(ReferenceType ==170LLVMDisassembler_ReferenceType_Out_Objc_Message)171cStream << "Objc message: " << ReferenceName;172else if(ReferenceType ==173LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)174cStream << "Objc message ref: " << ReferenceName;175else if(ReferenceType ==176LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)177cStream << "Objc selector ref: " << ReferenceName;178else if(ReferenceType ==179LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)180cStream << "Objc class ref: " << ReferenceName;181}182}183184namespace llvm {185MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,186LLVMSymbolLookupCallback SymbolLookUp,187void *DisInfo, MCContext *Ctx,188std::unique_ptr<MCRelocationInfo> &&RelInfo) {189assert(Ctx && "No MCContext given for symbolic disassembly");190191return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,192SymbolLookUp, DisInfo);193}194}195196197