Path: blob/main/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
39642 views
//===-- ObjectFileWasm.cpp ------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "ObjectFileWasm.h"9#include "lldb/Core/Module.h"10#include "lldb/Core/ModuleSpec.h"11#include "lldb/Core/PluginManager.h"12#include "lldb/Core/Section.h"13#include "lldb/Target/Process.h"14#include "lldb/Target/SectionLoadList.h"15#include "lldb/Target/Target.h"16#include "lldb/Utility/DataBufferHeap.h"17#include "lldb/Utility/LLDBLog.h"18#include "lldb/Utility/Log.h"19#include "llvm/ADT/ArrayRef.h"20#include "llvm/ADT/SmallVector.h"21#include "llvm/ADT/StringRef.h"22#include "llvm/BinaryFormat/Magic.h"23#include "llvm/BinaryFormat/Wasm.h"24#include "llvm/Support/Endian.h"25#include "llvm/Support/Format.h"26#include <optional>2728using namespace lldb;29using namespace lldb_private;30using namespace lldb_private::wasm;3132LLDB_PLUGIN_DEFINE(ObjectFileWasm)3334static const uint32_t kWasmHeaderSize =35sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);3637/// Checks whether the data buffer starts with a valid Wasm module header.38static bool ValidateModuleHeader(const DataBufferSP &data_sp) {39if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)40return false;4142if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=43llvm::file_magic::wasm_object)44return false;4546const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);4748uint32_t version = llvm::support::endian::read32le(Ptr);49return version == llvm::wasm::WasmVersion;50}5152static std::optional<ConstString>53GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {54// A Wasm string is encoded as a vector of UTF-8 codes.55// Vectors are encoded with their u32 length followed by the element56// sequence.57uint64_t len = data.getULEB128(c);58if (!c) {59consumeError(c.takeError());60return std::nullopt;61}6263if (len >= (uint64_t(1) << 32)) {64return std::nullopt;65}6667llvm::SmallVector<uint8_t, 32> str_storage;68data.getU8(c, str_storage, len);69if (!c) {70consumeError(c.takeError());71return std::nullopt;72}7374llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));75return ConstString(str);76}7778char ObjectFileWasm::ID;7980void ObjectFileWasm::Initialize() {81PluginManager::RegisterPlugin(GetPluginNameStatic(),82GetPluginDescriptionStatic(), CreateInstance,83CreateMemoryInstance, GetModuleSpecifications);84}8586void ObjectFileWasm::Terminate() {87PluginManager::UnregisterPlugin(CreateInstance);88}8990ObjectFile *91ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,92offset_t data_offset, const FileSpec *file,93offset_t file_offset, offset_t length) {94Log *log = GetLog(LLDBLog::Object);9596if (!data_sp) {97data_sp = MapFileData(*file, length, file_offset);98if (!data_sp) {99LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",100file->GetPath().c_str());101return nullptr;102}103data_offset = 0;104}105106assert(data_sp);107if (!ValidateModuleHeader(data_sp)) {108LLDB_LOGF(log,109"Failed to create ObjectFileWasm instance: invalid Wasm header");110return nullptr;111}112113// Update the data to contain the entire file if it doesn't contain it114// already.115if (data_sp->GetByteSize() < length) {116data_sp = MapFileData(*file, length, file_offset);117if (!data_sp) {118LLDB_LOGF(log,119"Failed to create ObjectFileWasm instance: cannot read file %s",120file->GetPath().c_str());121return nullptr;122}123data_offset = 0;124}125126std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(127module_sp, data_sp, data_offset, file, file_offset, length));128ArchSpec spec = objfile_up->GetArchitecture();129if (spec && objfile_up->SetModulesArchitecture(spec)) {130LLDB_LOGF(log,131"%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",132static_cast<void *>(objfile_up.get()),133static_cast<void *>(objfile_up->GetModule().get()),134objfile_up->GetModule()->GetSpecificationDescription().c_str(),135file ? file->GetPath().c_str() : "<NULL>");136return objfile_up.release();137}138139LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");140return nullptr;141}142143ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,144WritableDataBufferSP data_sp,145const ProcessSP &process_sp,146addr_t header_addr) {147if (!ValidateModuleHeader(data_sp))148return nullptr;149150std::unique_ptr<ObjectFileWasm> objfile_up(151new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));152ArchSpec spec = objfile_up->GetArchitecture();153if (spec && objfile_up->SetModulesArchitecture(spec))154return objfile_up.release();155return nullptr;156}157158bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {159// Buffer sufficient to read a section header and find the pointer to the next160// section.161const uint32_t kBufferSize = 1024;162DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);163164llvm::DataExtractor data = section_header_data.GetAsLLVM();165llvm::DataExtractor::Cursor c(0);166167// Each section consists of:168// - a one-byte section id,169// - the u32 size of the contents, in bytes,170// - the actual contents.171uint8_t section_id = data.getU8(c);172uint64_t payload_len = data.getULEB128(c);173if (!c)174return !llvm::errorToBool(c.takeError());175176if (payload_len >= (uint64_t(1) << 32))177return false;178179if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {180// Custom sections have the id 0. Their contents consist of a name181// identifying the custom section, followed by an uninterpreted sequence182// of bytes.183lldb::offset_t prev_offset = c.tell();184std::optional<ConstString> sect_name = GetWasmString(data, c);185if (!sect_name)186return false;187188if (payload_len < c.tell() - prev_offset)189return false;190191uint32_t section_length = payload_len - (c.tell() - prev_offset);192m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,193section_id, *sect_name});194*offset_ptr += (c.tell() + section_length);195} else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {196m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),197static_cast<uint32_t>(payload_len),198section_id, ConstString()});199*offset_ptr += (c.tell() + payload_len);200} else {201// Invalid section id.202return false;203}204return true;205}206207bool ObjectFileWasm::DecodeSections() {208lldb::offset_t offset = kWasmHeaderSize;209if (IsInMemory()) {210offset += m_memory_addr;211}212213while (DecodeNextSection(&offset))214;215return true;216}217218size_t ObjectFileWasm::GetModuleSpecifications(219const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,220offset_t file_offset, offset_t length, ModuleSpecList &specs) {221if (!ValidateModuleHeader(data_sp)) {222return 0;223}224225ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));226specs.Append(spec);227return 1;228}229230ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp,231offset_t data_offset, const FileSpec *file,232offset_t offset, offset_t length)233: ObjectFile(module_sp, file, offset, length, data_sp, data_offset),234m_arch("wasm32-unknown-unknown-wasm") {235m_data.SetAddressByteSize(4);236}237238ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,239lldb::WritableDataBufferSP header_data_sp,240const lldb::ProcessSP &process_sp,241lldb::addr_t header_addr)242: ObjectFile(module_sp, process_sp, header_addr, header_data_sp),243m_arch("wasm32-unknown-unknown-wasm") {}244245bool ObjectFileWasm::ParseHeader() {246// We already parsed the header during initialization.247return true;248}249250void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}251252static SectionType GetSectionTypeFromName(llvm::StringRef Name) {253if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {254return llvm::StringSwitch<SectionType>(Name)255.Case("abbrev", eSectionTypeDWARFDebugAbbrev)256.Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)257.Case("addr", eSectionTypeDWARFDebugAddr)258.Case("aranges", eSectionTypeDWARFDebugAranges)259.Case("cu_index", eSectionTypeDWARFDebugCuIndex)260.Case("frame", eSectionTypeDWARFDebugFrame)261.Case("info", eSectionTypeDWARFDebugInfo)262.Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)263.Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)264.Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)265.Case("loc", eSectionTypeDWARFDebugLoc)266.Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)267.Case("loclists", eSectionTypeDWARFDebugLocLists)268.Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)269.Case("macinfo", eSectionTypeDWARFDebugMacInfo)270.Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)271.Case("names", eSectionTypeDWARFDebugNames)272.Case("pubnames", eSectionTypeDWARFDebugPubNames)273.Case("pubtypes", eSectionTypeDWARFDebugPubTypes)274.Case("ranges", eSectionTypeDWARFDebugRanges)275.Case("rnglists", eSectionTypeDWARFDebugRngLists)276.Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)277.Case("str", eSectionTypeDWARFDebugStr)278.Case("str.dwo", eSectionTypeDWARFDebugStrDwo)279.Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)280.Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)281.Case("tu_index", eSectionTypeDWARFDebugTuIndex)282.Case("types", eSectionTypeDWARFDebugTypes)283.Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)284.Default(eSectionTypeOther);285}286return eSectionTypeOther;287}288289void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {290if (m_sections_up)291return;292293m_sections_up = std::make_unique<SectionList>();294295if (m_sect_infos.empty()) {296DecodeSections();297}298299for (const section_info §_info : m_sect_infos) {300SectionType section_type = eSectionTypeOther;301ConstString section_name;302offset_t file_offset = sect_info.offset & 0xffffffff;303addr_t vm_addr = file_offset;304size_t vm_size = sect_info.size;305306if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {307section_type = eSectionTypeCode;308section_name = ConstString("code");309310// A code address in DWARF for WebAssembly is the offset of an311// instruction relative within the Code section of the WebAssembly file.312// For this reason Section::GetFileAddress() must return zero for the313// Code section.314vm_addr = 0;315} else {316section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());317if (section_type == eSectionTypeOther)318continue;319section_name = sect_info.name;320if (!IsInMemory()) {321vm_size = 0;322vm_addr = 0;323}324}325326SectionSP section_sp(327new Section(GetModule(), // Module to which this section belongs.328this, // ObjectFile to which this section belongs and329// should read section data from.330section_type, // Section ID.331section_name, // Section name.332section_type, // Section type.333vm_addr, // VM address.334vm_size, // VM size in bytes of this section.335file_offset, // Offset of this section in the file.336sect_info.size, // Size of the section as found in the file.3370, // Alignment of the section3380, // Flags for this section.3391)); // Number of host bytes per target byte340m_sections_up->AddSection(section_sp);341unified_section_list.AddSection(section_sp);342}343}344345bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,346bool value_is_offset) {347/// In WebAssembly, linear memory is disjointed from code space. The VM can348/// load multiple instances of a module, which logically share the same code.349/// We represent a wasm32 code address with 64-bits, like:350/// 63 32 31 0351/// +---------------+---------------+352/// + module_id | offset |353/// +---------------+---------------+354/// where the lower 32 bits represent a module offset (relative to the module355/// start not to the beginning of the code section) and the higher 32 bits356/// uniquely identify the module in the WebAssembly VM.357/// In other words, we assume that each WebAssembly module is loaded by the358/// engine at a 64-bit address that starts at the boundary of 4GB pages, like359/// 0x0000000400000000 for module_id == 4.360/// These 64-bit addresses will be used to request code ranges for a specific361/// module from the WebAssembly engine.362363assert(m_memory_addr == LLDB_INVALID_ADDRESS ||364m_memory_addr == load_address);365366ModuleSP module_sp = GetModule();367if (!module_sp)368return false;369370DecodeSections();371372size_t num_loaded_sections = 0;373SectionList *section_list = GetSectionList();374if (!section_list)375return false;376377const size_t num_sections = section_list->GetSize();378for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {379SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));380if (target.SetSectionLoadAddress(381section_sp, load_address | section_sp->GetFileOffset())) {382++num_loaded_sections;383}384}385386return num_loaded_sections > 0;387}388389DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {390DataExtractor data;391if (m_file) {392if (offset < GetByteSize()) {393size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);394auto buffer_sp = MapFileData(m_file, size, offset);395return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());396}397} else {398ProcessSP process_sp(m_process_wp.lock());399if (process_sp) {400auto data_up = std::make_unique<DataBufferHeap>(size, 0);401Status readmem_error;402size_t bytes_read = process_sp->ReadMemory(403offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);404if (bytes_read > 0) {405DataBufferSP buffer_sp(data_up.release());406data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());407}408}409}410411data.SetByteOrder(GetByteOrder());412return data;413}414415std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {416static ConstString g_sect_name_external_debug_info("external_debug_info");417418for (const section_info §_info : m_sect_infos) {419if (g_sect_name_external_debug_info == sect_info.name) {420const uint32_t kBufferSize = 1024;421DataExtractor section_header_data =422ReadImageData(sect_info.offset, kBufferSize);423llvm::DataExtractor data = section_header_data.GetAsLLVM();424llvm::DataExtractor::Cursor c(0);425std::optional<ConstString> symbols_url = GetWasmString(data, c);426if (symbols_url)427return FileSpec(symbols_url->GetStringRef());428}429}430return std::nullopt;431}432433void ObjectFileWasm::Dump(Stream *s) {434ModuleSP module_sp(GetModule());435if (!module_sp)436return;437438std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());439440llvm::raw_ostream &ostream = s->AsRawOstream();441ostream << static_cast<void *>(this) << ": ";442s->Indent();443ostream << "ObjectFileWasm, file = '";444m_file.Dump(ostream);445ostream << "', arch = ";446ostream << GetArchitecture().GetArchitectureName() << "\n";447448SectionList *sections = GetSectionList();449if (sections) {450sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,451UINT32_MAX);452}453ostream << "\n";454DumpSectionHeaders(ostream);455ostream << "\n";456}457458void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,459const section_info_t &sh) {460ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "461<< llvm::format_hex(sh.offset, 10) << " "462<< llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)463<< "\n";464}465466void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {467ostream << "Section Headers\n";468ostream << "IDX name addr size id\n";469ostream << "==== ---------------- ---------- ---------- ------\n";470471uint32_t idx = 0;472for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();473++pos, ++idx) {474ostream << "[" << llvm::format_decimal(idx, 2) << "] ";475ObjectFileWasm::DumpSectionHeader(ostream, *pos);476}477}478479480