Path: blob/main/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
35323 views
//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Register objects for access by profilers via the perf JIT interface.9//10//===----------------------------------------------------------------------===//1112#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"1314#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"1516#include "llvm/Support/FileSystem.h"17#include "llvm/Support/MemoryBuffer.h"18#include "llvm/Support/Path.h"19#include "llvm/Support/Process.h"20#include "llvm/Support/Threading.h"2122#include <mutex>23#include <optional>2425#ifdef __linux__2627#include <sys/mman.h> // mmap()28#include <time.h> // clock_gettime(), time(), localtime_r() */29#include <unistd.h> // for read(), close()3031#define DEBUG_TYPE "orc"3233// language identifier (XXX: should we generate something better from debug34// info?)35#define JIT_LANG "llvm-IR"36#define LLVM_PERF_JIT_MAGIC \37((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \38(uint32_t)'D')39#define LLVM_PERF_JIT_VERSION 14041using namespace llvm;42using namespace llvm::orc;4344struct PerfState {45// cache lookups46uint32_t Pid;4748// base directory for output data49std::string JitPath;5051// output data stream, closed via Dumpstream52int DumpFd = -1;5354// output data stream55std::unique_ptr<raw_fd_ostream> Dumpstream;5657// perf mmap marker58void *MarkerAddr = NULL;59};6061// prevent concurrent dumps from messing up the output file62static std::mutex Mutex;63static std::optional<PerfState> State;6465struct RecHeader {66uint32_t Id;67uint32_t TotalSize;68uint64_t Timestamp;69};7071struct DIR {72RecHeader Prefix;73uint64_t CodeAddr;74uint64_t NrEntry;75};7677struct DIE {78uint64_t CodeAddr;79uint32_t Line;80uint32_t Discrim;81};8283struct CLR {84RecHeader Prefix;85uint32_t Pid;86uint32_t Tid;87uint64_t Vma;88uint64_t CodeAddr;89uint64_t CodeSize;90uint64_t CodeIndex;91};9293struct UWR {94RecHeader Prefix;95uint64_t UnwindDataSize;96uint64_t EhFrameHeaderSize;97uint64_t MappedSize;98};99100static inline uint64_t timespec_to_ns(const struct timespec *TS) {101const uint64_t NanoSecPerSec = 1000000000;102return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;103}104105static inline uint64_t perf_get_timestamp() {106timespec TS;107if (clock_gettime(CLOCK_MONOTONIC, &TS))108return 0;109110return timespec_to_ns(&TS);111}112113static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {114assert(State && "PerfState not initialized");115LLVM_DEBUG(dbgs() << "Writing debug record with "116<< DebugRecord.Entries.size() << " entries\n");117[[maybe_unused]] size_t Written = 0;118DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),119DebugRecord.Prefix.TotalSize, perf_get_timestamp()},120DebugRecord.CodeAddr, DebugRecord.Entries.size()};121State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));122Written += sizeof(Dir);123for (auto &Die : DebugRecord.Entries) {124DIE d{Die.Addr, Die.Lineno, Die.Discrim};125State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));126State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);127Written += sizeof(d) + Die.Name.size() + 1;128}129LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");130}131132static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {133assert(State && "PerfState not initialized");134uint32_t Tid = get_threadid();135LLVM_DEBUG(dbgs() << "Writing code record with code size "136<< CodeRecord.CodeSize << " and code index "137<< CodeRecord.CodeIndex << "\n");138CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),139CodeRecord.Prefix.TotalSize, perf_get_timestamp()},140State->Pid,141Tid,142CodeRecord.Vma,143CodeRecord.CodeAddr,144CodeRecord.CodeSize,145CodeRecord.CodeIndex};146LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "147<< CodeRecord.Name.size() + 1 << " bytes of name, "148<< CodeRecord.CodeSize << " bytes of code\n");149State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));150State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);151State->Dumpstream->write((const char *)CodeRecord.CodeAddr,152CodeRecord.CodeSize);153}154155static void156writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {157assert(State && "PerfState not initialized");158dbgs() << "Writing unwind record with unwind data size "159<< UnwindRecord.UnwindDataSize << " and EH frame header size "160<< UnwindRecord.EHFrameHdrSize << " and mapped size "161<< UnwindRecord.MappedSize << "\n";162UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),163UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},164UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,165UnwindRecord.MappedSize};166LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "167<< UnwindRecord.EHFrameHdrSize168<< " bytes of EH frame header, "169<< UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize170<< " bytes of EH frame\n");171State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));172if (UnwindRecord.EHFrameHdrAddr)173State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,174UnwindRecord.EHFrameHdrSize);175else176State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),177UnwindRecord.EHFrameHdrSize);178State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,179UnwindRecord.UnwindDataSize -180UnwindRecord.EHFrameHdrSize);181}182183static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {184if (!State)185return make_error<StringError>("PerfState not initialized",186inconvertibleErrorCode());187188// Serialize the batch189std::lock_guard<std::mutex> Lock(Mutex);190if (Batch.UnwindingRecord.Prefix.TotalSize > 0)191writeUnwindRecord(Batch.UnwindingRecord);192193for (const auto &DebugInfo : Batch.DebugInfoRecords)194writeDebugRecord(DebugInfo);195196for (const auto &CodeLoad : Batch.CodeLoadRecords)197writeCodeRecord(CodeLoad);198199State->Dumpstream->flush();200201return Error::success();202}203204struct Header {205uint32_t Magic; // characters "JiTD"206uint32_t Version; // header version207uint32_t TotalSize; // total size of header208uint32_t ElfMach; // elf mach target209uint32_t Pad1; // reserved210uint32_t Pid;211uint64_t Timestamp; // timestamp212uint64_t Flags; // flags213};214215static Error OpenMarker(PerfState &State) {216// We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap217// is captured either live (perf record running when we mmap) or in deferred218// mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump219// file for more meta data info about the jitted code. Perf report/annotate220// detect this special filename and process the jitdump file.221//222// Mapping must be PROT_EXEC to ensure it is captured by perf record223// even when not using -d option.224State.MarkerAddr =225::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,226MAP_PRIVATE, State.DumpFd, 0);227228if (State.MarkerAddr == MAP_FAILED)229return make_error<llvm::StringError>("could not mmap JIT marker",230inconvertibleErrorCode());231232return Error::success();233}234235void CloseMarker(PerfState &State) {236if (!State.MarkerAddr)237return;238239munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());240State.MarkerAddr = nullptr;241}242243static Expected<Header> FillMachine(PerfState &State) {244Header Hdr;245Hdr.Magic = LLVM_PERF_JIT_MAGIC;246Hdr.Version = LLVM_PERF_JIT_VERSION;247Hdr.TotalSize = sizeof(Hdr);248Hdr.Pid = State.Pid;249Hdr.Timestamp = perf_get_timestamp();250251char Id[16];252struct {253uint16_t e_type;254uint16_t e_machine;255} Info;256257size_t RequiredMemory = sizeof(Id) + sizeof(Info);258259ErrorOr<std::unique_ptr<MemoryBuffer>> MB =260MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);261262// This'll not guarantee that enough data was actually read from the263// underlying file. Instead the trailing part of the buffer would be264// zeroed. Given the ELF signature check below that seems ok though,265// it's unlikely that the file ends just after that, and the266// consequence would just be that perf wouldn't recognize the267// signature.268if (!MB)269return make_error<llvm::StringError>("could not open /proc/self/exe",270MB.getError());271272memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));273memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));274275// check ELF signature276if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')277return make_error<llvm::StringError>("invalid ELF signature",278inconvertibleErrorCode());279280Hdr.ElfMach = Info.e_machine;281282return Hdr;283}284285static Error InitDebuggingDir(PerfState &State) {286time_t Time;287struct tm LocalTime;288char TimeBuffer[sizeof("YYYYMMDD")];289SmallString<64> Path;290291// search for location to dump data to292if (const char *BaseDir = getenv("JITDUMPDIR"))293Path.append(BaseDir);294else if (!sys::path::home_directory(Path))295Path = ".";296297// create debug directory298Path += "/.debug/jit/";299if (auto EC = sys::fs::create_directories(Path)) {300std::string ErrStr;301raw_string_ostream ErrStream(ErrStr);302ErrStream << "could not create jit cache directory " << Path << ": "303<< EC.message() << "\n";304return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());305}306307// create unique directory for dump data related to this process308time(&Time);309localtime_r(&Time, &LocalTime);310strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);311Path += JIT_LANG "-jit-";312Path += TimeBuffer;313314SmallString<128> UniqueDebugDir;315316using sys::fs::createUniqueDirectory;317if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {318std::string ErrStr;319raw_string_ostream ErrStream(ErrStr);320ErrStream << "could not create unique jit cache directory "321<< UniqueDebugDir << ": " << EC.message() << "\n";322return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());323}324325State.JitPath = std::string(UniqueDebugDir);326327return Error::success();328}329330static Error registerJITLoaderPerfStartImpl() {331PerfState Tentative;332Tentative.Pid = sys::Process::getProcessId();333// check if clock-source is supported334if (!perf_get_timestamp())335return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",336inconvertibleErrorCode());337338if (auto Err = InitDebuggingDir(Tentative))339return Err;340341std::string Filename;342raw_string_ostream FilenameBuf(Filename);343FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";344345// Need to open ourselves, because we need to hand the FD to OpenMarker() and346// raw_fd_ostream doesn't expose the FD.347using sys::fs::openFileForWrite;348if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd,349sys::fs::CD_CreateNew, sys::fs::OF_None)) {350std::string ErrStr;351raw_string_ostream ErrStream(ErrStr);352ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": "353<< EC.message() << "\n";354return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());355}356357Tentative.Dumpstream =358std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);359360auto Header = FillMachine(Tentative);361if (!Header)362return Header.takeError();363364// signal this process emits JIT information365if (auto Err = OpenMarker(Tentative))366return Err;367368Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),369sizeof(*Header));370371// Everything initialized, can do profiling now.372if (Tentative.Dumpstream->has_error())373return make_error<StringError>("could not write JIT dump header",374inconvertibleErrorCode());375376State = std::move(Tentative);377return Error::success();378}379380static Error registerJITLoaderPerfEndImpl() {381if (!State)382return make_error<StringError>("PerfState not initialized",383inconvertibleErrorCode());384385RecHeader Close;386Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);387Close.TotalSize = sizeof(Close);388Close.Timestamp = perf_get_timestamp();389State->Dumpstream->write(reinterpret_cast<const char *>(&Close),390sizeof(Close));391if (State->MarkerAddr)392CloseMarker(*State);393394State.reset();395return Error::success();396}397398extern "C" llvm::orc::shared::CWrapperFunctionResult399llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {400using namespace orc::shared;401return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(402Data, Size, registerJITLoaderPerfImpl)403.release();404}405406extern "C" llvm::orc::shared::CWrapperFunctionResult407llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {408using namespace orc::shared;409return WrapperFunction<SPSError()>::handle(Data, Size,410registerJITLoaderPerfStartImpl)411.release();412}413414extern "C" llvm::orc::shared::CWrapperFunctionResult415llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {416using namespace orc::shared;417return WrapperFunction<SPSError()>::handle(Data, Size,418registerJITLoaderPerfEndImpl)419.release();420}421422#else423424using namespace llvm;425using namespace llvm::orc;426427static Error badOS() {428using namespace llvm;429return llvm::make_error<StringError>(430"unsupported OS (perf support is only available on linux!)",431inconvertibleErrorCode());432}433434static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }435436extern "C" llvm::orc::shared::CWrapperFunctionResult437llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {438using namespace shared;439return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,440badOSBatch)441.release();442}443444extern "C" llvm::orc::shared::CWrapperFunctionResult445llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {446using namespace shared;447return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();448}449450extern "C" llvm::orc::shared::CWrapperFunctionResult451llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {452using namespace shared;453return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();454}455456#endif457458459