Path: blob/main/crates/jit-debug/src/perf_jitdump.rs
3064 views
//! Support for jitdump files which can be used by perf for profiling jitted code.1//! Spec definitions for the output format is as described here:2//! <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt>3//!4//! Usage Example:5//! Record6//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm7//! Combine8//! sudo perf inject -v -j -i perf.data -o perf.jit.data9//! Report10//! sudo perf report -i perf.jit.data -F+period,srcline1112use std::fmt::Debug;13use std::fs::{File, OpenOptions};14use std::io::{self, Write};15use std::path::Path;16use std::ptr;17use std::string::String;18use std::vec::Vec;19use std::{mem, process};2021/// Defines jitdump record types22#[repr(u32)]23pub enum RecordId {24/// Value 0: JIT_CODE_LOAD: record describing a jitted function25JitCodeLoad = 0,26/// Value 1: JIT_CODE_MOVE: record describing an already jitted function which is moved27JitCodeMove = 1,28/// Value 2: JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function29JitCodeDebugInfo = 2,30/// Value 3: JIT_CODE_CLOSE: record marking the end of the jit runtime (optional)31JitCodeClose = 3,32/// Value 4: JIT_CODE_UNWINDING_INFO: record describing a function unwinding information33JitCodeUnwindingInfo = 4,34}3536/// Each record starts with this fixed size record header which describes the record that follows37#[derive(Debug, Default, Clone, Copy)]38#[repr(C)]39pub struct RecordHeader {40/// uint32_t id: a value identifying the record type (see below)41pub id: u32,42/// uint32_t total_size: the size in bytes of the record including the header.43pub record_size: u32,44/// uint64_t timestamp: a timestamp of when the record was created.45pub timestamp: u64,46}4748unsafe impl object::Pod for RecordHeader {}4950/// The CodeLoadRecord is used for describing jitted functions51#[derive(Debug, Default, Clone, Copy)]52#[repr(C)]53pub struct CodeLoadRecord {54/// Fixed sized header that describes this record55pub header: RecordHeader,56/// `uint32_t pid`: OS process id of the runtime generating the jitted code57pub pid: u32,58/// `uint32_t tid`: OS thread identification of the runtime thread generating the jitted code59pub tid: u32,60/// `uint64_t vma`: virtual address of jitted code start61pub virtual_address: u64,62/// `uint64_t code_addr`: code start address for the jitted code. By default vma = code_addr63pub address: u64,64/// `uint64_t code_size`: size in bytes of the generated jitted code65pub size: u64,66/// `uint64_t code_index`: unique identifier for the jitted code (see below)67pub index: u64,68}6970unsafe impl object::Pod for CodeLoadRecord {}7172/// Describes source line information for a jitted function73#[derive(Debug, Default)]74#[repr(C)]75pub struct DebugEntry {76/// `uint64_t code_addr`: address of function for which the debug information is generated77pub address: u64,78/// `uint32_t line`: source file line number (starting at 1)79pub line: u32,80/// `uint32_t discrim`: column discriminator, 0 is default81pub discriminator: u32,82/// `char name[n]`: source file name in ASCII, including null termination83pub filename: String,84}8586/// Describes debug information for a jitted function. An array of debug entries are87/// appended to this record during writing. Note, this record must precede the code88/// load record that describes the same jitted function.89#[derive(Debug, Default, Clone, Copy)]90#[repr(C)]91pub struct DebugInfoRecord {92/// Fixed sized header that describes this record93pub header: RecordHeader,94/// `uint64_t code_addr`: address of function for which the debug information is generated95pub address: u64,96/// `uint64_t nr_entry`: number of debug entries for the function appended to this record97pub count: u64,98}99100unsafe impl object::Pod for DebugInfoRecord {}101102/// Fixed-sized header for each jitdump file103#[derive(Debug, Default, Clone, Copy)]104#[repr(C)]105pub struct FileHeader {106/// `uint32_t magic`: a magic number tagging the file type. The value is 4-byte long and represents the107/// string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can108/// be used to detect the endianness of the file109pub magic: u32,110/// `uint32_t version`: a 4-byte value representing the format version. It is currently set to 2111pub version: u32,112/// `uint32_t total_size`: size in bytes of file header113pub size: u32,114/// `uint32_t elf_mach`: ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)115pub e_machine: u32,116/// `uint32_t pad1`: padding. Reserved for future use117pub pad1: u32,118/// `uint32_t pid`: JIT runtime process identification (OS specific)119pub pid: u32,120/// `uint64_t timestamp`: timestamp of when the file was created121pub timestamp: u64,122/// `uint64_t flags`: a bitmask of flags123pub flags: u64,124}125126unsafe impl object::Pod for FileHeader {}127128/// Interface for driving the creation of jitdump files129pub struct JitDumpFile {130/// File instance for the jit dump file131jitdump_file: File,132133map_addr: usize,134map_len: usize,135136/// Unique identifier for jitted code137code_index: u64,138139e_machine: u32,140}141142impl JitDumpFile {143/// Initialize a JitDumpAgent and write out the header144pub fn new(filename: impl AsRef<Path>, e_machine: u32) -> io::Result<Self> {145// Note that the file here is opened in `append` mode to handle the case146// that multiple JIT engines in the same process are all writing to the147// same jitdump file. In this situation we want to append new records148// with what Wasmtime reports and we ideally don't want to interfere149// with anything else.150let jitdump_file = OpenOptions::new()151.read(true)152.write(true)153.create(true)154.append(true)155.open(filename.as_ref())?;156157// After we make our `*.dump` file we execute an `mmap` syscall,158// specifically with executable permissions, to map it into our address159// space. This is required so `perf inject` will work later. The `perf160// inject` command will see that an mmap syscall happened, and it'll see161// the filename we mapped, and that'll trigger it to actually read and162// parse the file.163//164// To match what some perf examples are doing we keep this `mmap` alive165// until this agent goes away.166let map_len = 1024;167let map_addr = unsafe {168let ptr = rustix::mm::mmap(169ptr::null_mut(),170map_len,171rustix::mm::ProtFlags::EXEC | rustix::mm::ProtFlags::READ,172rustix::mm::MapFlags::PRIVATE,173&jitdump_file,1740,175)?;176ptr as usize177};178let state = JitDumpFile {179jitdump_file,180map_addr,181map_len,182code_index: 0,183e_machine,184};185state.maybe_write_file_header()?;186Ok(state)187}188}189190impl JitDumpFile {191/// Returns timestamp from a single source192pub fn get_time_stamp(&self) -> u64 {193// We need to use `CLOCK_MONOTONIC` on Linux which is what `Instant`194// conveniently also uses, but `Instant` doesn't allow us to get access195// to nanoseconds as an internal detail, so we calculate the nanoseconds196// ourselves here.197let ts = rustix::time::clock_gettime(rustix::time::ClockId::Monotonic);198// TODO: What does it mean for either sec or nsec to be negative?199(ts.tv_sec * 1_000_000_000 + ts.tv_nsec) as u64200}201202/// Returns the next code index203pub fn next_code_index(&mut self) -> u64 {204let code_index = self.code_index;205self.code_index += 1;206code_index207}208209/// Helper function to write `bytes` to the jitdump file.210///211/// This is effectively a workaround for the limitation of the jitdump file212/// format. Ideally Wasmtime would be writing to its own personal file and213/// wouldn't have to worry about concurrent modifications, but we don't have214/// the luxury of doing that. The jitdump file format requires that there's215/// a single file-per-process with records in it. Additionally there might216/// be multiple JIT engines in the same process all writing to this file.217///218/// To handle this situation a best effort is made to write the entirety of219/// `bytes` to the file in one go. The file itself is opened with `O_APPEND`220/// meaning that this should work out just fine if the bytes are written in221/// one call to the `write` syscall. The problem though is what happens on a222/// partial write?223///224/// If there are parallel actors in the same process then a partial write225/// may mean that the file is now corrupted. For example we could write most226/// of `bytes`, but not all, then some other thread writes to the file. The227/// question then is what to do in this situation? On one hand an error228/// could be returned to inform the user that it's corrupt. On the other229/// hand though it's a pretty niche case to have multiple JIT engines in one230/// process and it'd be a bummer if we failed to profile functions that231/// happened to be big enough to require two calls to `write`.232///233/// In the end this for now uses the `write_all` helper in the standard234/// library. That means that this will produce corrupt files in the face of235/// partial writes when there are other engines also writing to the file. In236/// lieu of some actual synchronization protocol between engines though this237/// is about the best that we can do.238fn maybe_atomic_write_all(&self, bytes: &[u8]) -> io::Result<()> {239(&self.jitdump_file).write_all(bytes)?;240Ok(())241}242243fn maybe_write_file_header(&self) -> io::Result<()> {244let header = FileHeader {245timestamp: self.get_time_stamp(),246e_machine: self.e_machine,247magic: 0x4A695444,248version: 1,249size: mem::size_of::<FileHeader>() as u32,250pad1: 0,251pid: process::id(),252flags: 0,253};254255// If it looks like some other engine in the same process has opened the256// file and added data already then assume that they were the ones to257// add the file header. If it's empty, though, assume we're the ones to258// add the file header.259//260// This is subject to a TOCTOU-style race condition but there's not261// really anything we can do about that. That'd require higher-level262// coordination in the application to boot up profiling agents serially263// or something like that. Either that or a better dump format where we264// can place output in our own engine-specific file. Alas.265if self.jitdump_file.metadata()?.len() == 0 {266self.maybe_atomic_write_all(object::bytes_of(&header))?;267}268Ok(())269}270271/// Get raw access to the underlying file that is being written to.272pub fn file(&self) -> &File {273&self.jitdump_file274}275276/// Get raw mutable access to the underlying file that is being written to.277pub fn file_mut(&mut self) -> &mut File {278&mut self.jitdump_file279}280281pub fn dump_code_load_record(282&mut self,283method_name: &str,284code: &[u8],285timestamp: u64,286pid: u32,287tid: u32,288) -> io::Result<()> {289let name_len = method_name.len() + 1;290let size_limit = mem::size_of::<CodeLoadRecord>();291292let rh = RecordHeader {293id: RecordId::JitCodeLoad as u32,294record_size: size_limit as u32 + name_len as u32 + code.len() as u32,295timestamp,296};297298let clr = CodeLoadRecord {299header: rh,300pid,301tid,302virtual_address: code.as_ptr() as u64,303address: code.as_ptr() as u64,304size: code.len() as u64,305index: self.next_code_index(),306};307308let mut record = Vec::new();309record.extend_from_slice(object::bytes_of(&clr));310record.extend_from_slice(method_name.as_bytes());311record.push(0); // null terminator for the method name312record.extend_from_slice(code);313self.maybe_atomic_write_all(&record)?;314Ok(())315}316}317318impl Drop for JitDumpFile {319fn drop(&mut self) {320unsafe {321rustix::mm::munmap(self.map_addr as *mut _, self.map_len).unwrap();322}323}324}325326327