Path: blob/main/crates/wasi-preview1-component-adapter/build.rs
1690 views
use std::env;1use std::path::PathBuf;23fn main() {4let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());56let wasm = build_raw_intrinsics();7let archive = build_archive(&wasm);89std::fs::write(out_dir.join("libwasm-raw-intrinsics.a"), &archive).unwrap();10println!("cargo:rustc-link-lib=static=wasm-raw-intrinsics");11println!(12"cargo:rustc-link-search=native={}",13out_dir.to_str().unwrap()14);1516// Some specific flags to `wasm-ld` to inform the shape of this adapter.17// Notably we're importing memory from the main module and additionally our18// own module has no stack at all since it's specifically allocated at19// startup.20println!("cargo:rustc-link-arg=--import-memory");21println!("cargo:rustc-link-arg=-zstack-size=0");22}2324/// This function will produce a wasm module which is itself an object file25/// that is the basic equivalent of:26///27/// ```rust28/// std::arch::global_asm!(29/// "30/// .globaltype internal_state_ptr, i3231/// internal_state_ptr:32/// "33/// );34///35/// #[unsafe(no_mangle)]36/// extern "C" fn get_state_ptr() -> *mut u8 {37/// unsafe {38/// let ret: *mut u8;39/// std::arch::asm!(40/// "41/// global.get internal_state_ptr42/// ",43/// out(local) ret,44/// options(nostack, readonly)45/// );46/// ret47/// }48/// }49///50/// #[unsafe(no_mangle)]51/// extern "C" fn set_state_ptr(val: *mut u8) {52/// unsafe {53/// std::arch::asm!(54/// "55/// local.get {}56/// global.set internal_state_ptr57/// ",58/// in(local) val,59/// options(nostack, readonly)60/// );61/// }62/// }63///64/// // And likewise for `allocation_state`, `get_allocation_state`, and `set_allocation_state`65/// ```66///67/// The main trickiness here is getting the `reloc.CODE` and `linking` sections68/// right.69fn build_raw_intrinsics() -> Vec<u8> {70use wasm_encoder::Instruction::*;71use wasm_encoder::*;7273let mut module = Module::new();7475let mut types = TypeSection::new();76types.ty().function([], [ValType::I32]);77types.ty().function([ValType::I32], []);78module.section(&types);7980// Declare the functions, using the type we just added.81let mut funcs = FunctionSection::new();82funcs.function(0);83funcs.function(1);84funcs.function(0);85funcs.function(1);86module.section(&funcs);8788// Declare the globals.89let mut globals = GlobalSection::new();90// internal_state_ptr91globals.global(92GlobalType {93val_type: ValType::I32,94mutable: true,95shared: false,96},97&ConstExpr::i32_const(0),98);99// allocation_state100globals.global(101GlobalType {102val_type: ValType::I32,103mutable: true,104shared: false,105},106&ConstExpr::i32_const(0),107);108module.section(&globals);109110// Here the `code` section is defined. This is tricky because an offset is111// needed within the code section itself for the `reloc.CODE` section112// later. At this time `wasm-encoder` doesn't give enough functionality to113// use the high-level APIs. so everything is done manually here.114//115// First the function bodies are created and then they're appended into a116// code section.117118let mut code = Vec::new();1194u32.encode(&mut code); // number of functions120121let global_get = 0x23;122let global_set = 0x24;123124let encode = |code: &mut _, global, instruction| {125assert!(global < 0x7F);126127let mut body = Vec::new();1280u32.encode(&mut body); // no locals129if instruction == global_set {130LocalGet(0).encode(&mut body);131}132let global_offset = body.len() + 1;133// global.get $global ;; but with maximal encoding of $global134body.extend_from_slice(&[instruction, 0x80u8 + global, 0x80, 0x80, 0x80, 0x00]);135End.encode(&mut body);136body.len().encode(code); // length of the function137let offset = code.len() + global_offset;138code.extend_from_slice(&body); // the function itself139offset140};141142let internal_state_ptr_ref1 = encode(&mut code, 0, global_get); // get_state_ptr143let internal_state_ptr_ref2 = encode(&mut code, 0, global_set); // set_state_ptr144let allocation_state_ref1 = encode(&mut code, 1, global_get); // get_allocation_state145let allocation_state_ref2 = encode(&mut code, 1, global_set); // set_allocation_state146147module.section(&RawSection {148id: SectionId::Code as u8,149data: &code,150});151152// Here the linking section is constructed. There is one symbol for each function and global. The injected153// globals here are referenced in the relocations below.154//155// More information about this format is at156// https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md157{158let mut linking = Vec::new();159linking.push(0x02); // version160161linking.push(0x08); // `WASM_SYMBOL_TABLE`162let mut subsection = Vec::new();1636u32.encode(&mut subsection); // 6 symbols (4 functions + 2 globals)164165subsection.push(0x00); // SYMTAB_FUNCTION1660x00.encode(&mut subsection); // flags1670u32.encode(&mut subsection); // function index168"get_state_ptr".encode(&mut subsection); // symbol name169170subsection.push(0x00); // SYMTAB_FUNCTION1710x00.encode(&mut subsection); // flags1721u32.encode(&mut subsection); // function index173"set_state_ptr".encode(&mut subsection); // symbol name174175subsection.push(0x00); // SYMTAB_FUNCTION1760x00.encode(&mut subsection); // flags1772u32.encode(&mut subsection); // function index178"get_allocation_state".encode(&mut subsection); // symbol name179180subsection.push(0x00); // SYMTAB_FUNCTION1810x00.encode(&mut subsection); // flags1823u32.encode(&mut subsection); // function index183"set_allocation_state".encode(&mut subsection); // symbol name184185subsection.push(0x02); // SYMTAB_GLOBAL1860x02.encode(&mut subsection); // flags (WASM_SYM_BINDING_LOCAL)1870u32.encode(&mut subsection); // global index188"internal_state_ptr".encode(&mut subsection); // symbol name189190subsection.push(0x02); // SYMTAB_GLOBAL1910x00.encode(&mut subsection); // flags1921u32.encode(&mut subsection); // global index193"allocation_state".encode(&mut subsection); // symbol name194195subsection.encode(&mut linking);196module.section(&CustomSection {197name: "linking".into(),198data: linking.into(),199});200}201202// A `reloc.CODE` section is appended here with relocations for the203// `global`-referencing instructions that were added.204{205let mut reloc = Vec::new();2063u32.encode(&mut reloc); // target section (code is the 4th section, 3 when 0-indexed)2074u32.encode(&mut reloc); // 4 relocations208209reloc.push(0x07); // R_WASM_GLOBAL_INDEX_LEB210internal_state_ptr_ref1.encode(&mut reloc); // offset2114u32.encode(&mut reloc); // symbol index212213reloc.push(0x07); // R_WASM_GLOBAL_INDEX_LEB214internal_state_ptr_ref2.encode(&mut reloc); // offset2154u32.encode(&mut reloc); // symbol index216217reloc.push(0x07); // R_WASM_GLOBAL_INDEX_LEB218allocation_state_ref1.encode(&mut reloc); // offset2195u32.encode(&mut reloc); // symbol index220221reloc.push(0x07); // R_WASM_GLOBAL_INDEX_LEB222allocation_state_ref2.encode(&mut reloc); // offset2235u32.encode(&mut reloc); // symbol index224225module.section(&CustomSection {226name: "reloc.CODE".into(),227data: reloc.into(),228});229}230231module.finish()232}233234/// This function produces the output of `llvm-ar crus libfoo.a foo.o` given235/// the object file above as input. The archive is what's eventually fed to236/// LLD.237///238/// Like above this is still tricky, mainly around the production of the symbol239/// table.240fn build_archive(wasm: &[u8]) -> Vec<u8> {241use object::{U32Bytes, bytes_of, endian::BigEndian};242243let mut archive = Vec::new();244archive.extend_from_slice(&object::archive::MAGIC);245246// The symbol table is in the "GNU" format which means it has a structure247// that looks like:248//249// * a big-endian 32-bit integer for the number of symbols250// * N big-endian 32-bit integers for the offset to the object file, within251// the entire archive, for which object has the symbol252// * N nul-delimited strings for each symbol253//254// Here we're building an archive with just a few symbols so it's a bit255// easier. Note though we don't know the offset of our `intrinsics.o` up256// front so it's left as 0 for now and filled in later.257258let syms = [259"get_state_ptr",260"set_state_ptr",261"get_allocation_state",262"set_allocation_state",263"allocation_state",264];265266let mut symbol_table = Vec::new();267symbol_table.extend_from_slice(bytes_of(&U32Bytes::new(BigEndian, syms.len() as u32)));268for _ in syms.iter() {269symbol_table.extend_from_slice(bytes_of(&U32Bytes::new(BigEndian, 0)));270}271for s in syms.iter() {272symbol_table.extend_from_slice(&std::ffi::CString::new(*s).unwrap().into_bytes_with_nul());273}274275archive.extend_from_slice(bytes_of(&object::archive::Header {276name: *b"/ ",277date: *b"0 ",278uid: *b"0 ",279gid: *b"0 ",280mode: *b"0 ",281size: format!("{:<10}", symbol_table.len())282.as_bytes()283.try_into()284.unwrap(),285terminator: object::archive::TERMINATOR,286}));287let symtab_offset = archive.len();288archive.extend_from_slice(&symbol_table);289290// All archive members must start on even offsets291if archive.len() & 1 == 1 {292archive.push(0x00);293}294295// Now that we have the starting offset of the `intrinsics.o` file go back296// and fill in the offset within the symbol table generated earlier.297let member_offset = archive.len();298for (index, _) in syms.iter().enumerate() {299let index = index + 1;300archive[symtab_offset + (index * 4)..][..4].copy_from_slice(bytes_of(&U32Bytes::new(301BigEndian,302member_offset.try_into().unwrap(),303)));304}305306archive.extend_from_slice(object::bytes_of(&object::archive::Header {307name: *b"intrinsics.o ",308date: *b"0 ",309uid: *b"0 ",310gid: *b"0 ",311mode: *b"644 ",312size: format!("{:<10}", wasm.len()).as_bytes().try_into().unwrap(),313terminator: object::archive::TERMINATOR,314}));315archive.extend_from_slice(&wasm);316archive317}318319320