Path: blob/main/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp
35231 views
//===- Disassembler.cpp - Disassembler for hex strings --------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This class implements the disassembler of strings of bytes written in9// hexadecimal, from standard input or from a file.10//11//===----------------------------------------------------------------------===//1213#include "Disassembler.h"14#include "llvm/MC/MCAsmInfo.h"15#include "llvm/MC/MCContext.h"16#include "llvm/MC/MCDisassembler/MCDisassembler.h"17#include "llvm/MC/MCInst.h"18#include "llvm/MC/MCObjectFileInfo.h"19#include "llvm/MC/MCRegisterInfo.h"20#include "llvm/MC/MCStreamer.h"21#include "llvm/MC/MCSubtargetInfo.h"22#include "llvm/MC/TargetRegistry.h"23#include "llvm/Support/MemoryBuffer.h"24#include "llvm/Support/SourceMgr.h"25#include "llvm/Support/raw_ostream.h"26#include "llvm/TargetParser/Triple.h"2728using namespace llvm;2930typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>31ByteArrayTy;3233static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes,34SourceMgr &SM, MCStreamer &Streamer, bool InAtomicBlock,35const MCSubtargetInfo &STI) {36ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());3738// Disassemble it to strings.39uint64_t Size;40uint64_t Index;4142for (Index = 0; Index < Bytes.first.size(); Index += Size) {43MCInst Inst;4445MCDisassembler::DecodeStatus S;46S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());47switch (S) {48case MCDisassembler::Fail:49SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),50SourceMgr::DK_Warning,51"invalid instruction encoding");52// Don't try to resynchronise the stream in a block53if (InAtomicBlock)54return true;5556if (Size == 0)57Size = 1; // skip illegible bytes5859break;6061case MCDisassembler::SoftFail:62SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),63SourceMgr::DK_Warning,64"potentially undefined instruction encoding");65[[fallthrough]];6667case MCDisassembler::Success:68Streamer.emitInstruction(Inst, STI);69break;70}71}7273return false;74}7576static bool SkipToToken(StringRef &Str) {77for (;;) {78if (Str.empty())79return false;8081// Strip horizontal whitespace and commas.82if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {83Str = Str.substr(Pos);84continue;85}8687// If this is the start of a comment, remove the rest of the line.88if (Str[0] == '#') {89Str = Str.substr(Str.find_first_of('\n'));90continue;91}92return true;93}94}959697static bool ByteArrayFromString(ByteArrayTy &ByteArray,98StringRef &Str,99SourceMgr &SM) {100while (SkipToToken(Str)) {101// Handled by higher level102if (Str[0] == '[' || Str[0] == ']')103return false;104105// Get the current token.106size_t Next = Str.find_first_of(" \t\n\r,#[]");107StringRef Value = Str.substr(0, Next);108109// Convert to a byte and add to the byte vector.110unsigned ByteVal;111if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {112// If we have an error, print it and skip to the end of line.113SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,114"invalid input token");115Str = Str.substr(Str.find('\n'));116ByteArray.first.clear();117ByteArray.second.clear();118continue;119}120121ByteArray.first.push_back(ByteVal);122ByteArray.second.push_back(Value.data());123Str = Str.substr(Next);124}125126return false;127}128129int Disassembler::disassemble(const Target &T, const std::string &Triple,130MCSubtargetInfo &STI, MCStreamer &Streamer,131MemoryBuffer &Buffer, SourceMgr &SM,132MCContext &Ctx,133const MCTargetOptions &MCOptions) {134135std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));136if (!MRI) {137errs() << "error: no register info for target " << Triple << "\n";138return -1;139}140141std::unique_ptr<const MCAsmInfo> MAI(142T.createMCAsmInfo(*MRI, Triple, MCOptions));143if (!MAI) {144errs() << "error: no assembly info for target " << Triple << "\n";145return -1;146}147148std::unique_ptr<const MCDisassembler> DisAsm(149T.createMCDisassembler(STI, Ctx));150if (!DisAsm) {151errs() << "error: no disassembler for target " << Triple << "\n";152return -1;153}154155// Set up initial section manually here156Streamer.initSections(false, STI);157158bool ErrorOccurred = false;159160// Convert the input to a vector for disassembly.161ByteArrayTy ByteArray;162StringRef Str = Buffer.getBuffer();163bool InAtomicBlock = false;164165while (SkipToToken(Str)) {166ByteArray.first.clear();167ByteArray.second.clear();168169if (Str[0] == '[') {170if (InAtomicBlock) {171SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,172"nested atomic blocks make no sense");173ErrorOccurred = true;174}175InAtomicBlock = true;176Str = Str.drop_front();177continue;178} else if (Str[0] == ']') {179if (!InAtomicBlock) {180SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,181"attempt to close atomic block without opening");182ErrorOccurred = true;183}184InAtomicBlock = false;185Str = Str.drop_front();186continue;187}188189// It's a real token, get the bytes and emit them190ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);191192if (!ByteArray.first.empty())193ErrorOccurred |=194PrintInsts(*DisAsm, ByteArray, SM, Streamer, InAtomicBlock, STI);195}196197if (InAtomicBlock) {198SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,199"unclosed atomic block");200ErrorOccurred = true;201}202203return ErrorOccurred;204}205206207