Path: blob/main/crates/fuzzing/src/single_module_fuzzer.rs
1693 views
//! Support for maintaining the usefulness of a corpus over time.1//!2//! Wasmtime's fuzzing strategy in general is to use `wasm-smith` to generate3//! modules which interprets fuzz input from libFuzzer as a sort of "DNA". This4//! works to generate pretty interesting modules but falls down over time5//! because the DNA to generate the same module over time can change. This6//! means that maintaining a corpus for Wasmtime is not the most useful thing7//! in the world unfortunately and any historical discoveries of coverage need8//! to be rediscovered every time the DNA changes.9//!10//! To help with this the module here implements a scheme where Wasmtime's fuzz11//! inputs are highly likely to be of the form:12//!13//! ```text14//! [ ... wasm module ... ][ .. fuzz custom section .. ]15//! ```16//!17//! The `fuzz custom section` here contains the original fuzz input used to18//! generate the `wasm module`, and if the DNA hasn't changed then it should19//! still be possible to do that as well. The benefit of this format, though,20//! is that if the DNA is changed then the interpretation of the `fuzz custom21//! section` will change but the original `wasm module` will not. This enables22//! us to populate the corpus, ideally, with a set of interesting `wasm module`23//! entries.24//!25//! Over time the `fuzz custom section` will "bitrot" and will be no longer able26//! to generate the original `wasm module`. The main consequence of this is that27//! when the original test case is mutated the generated wasm module from the28//! mutation will be nothing alike from the original test case's wasm module.29//! This means libFuzzer will have to rediscover ways to mutate into30//! interesting modules, but we're no worse off than before hopefully.31//! Additionally this more easily opens the door to integrate `wasm-mutate` one32//! day into mutation here as well.33//!34//! Currently this is all supported via two methods:35//!36//! 1. A custom mutator is registered with libfuzzer. This means that all37//! inputs generated by the mutator, so long as they fit, will be the38//! "envelope" format of this module. This means that the corpus will39//! hopefully naturally get populated with wasm files rather than random40//! inputs. Note that this is not guaranteed to succeed since sometimes the41//! buffer to store the fuzz input in the mutator is not big enough to store42//! the final wasm module, in which case a non-enveloped wasm module is43//! stored.44//!45//! 2. If the environment variable `WRITE_FUZZ_INPUT_TO is set then the fuzz46//! input, in its envelope format, will be written to the specified file.47//! This can be useful in case an input is in its binary form or if a48//! preexisting corpus is being rewritten.4950use std::borrow::Cow;5152use arbitrary::{Arbitrary, Result, Unstructured};53use wasm_encoder::Section;5455/// Helper macro for fuzz targets that are single-module fuzzers.56///57/// This combines the features of this module into one macro invocation to58/// generate the fuzz entry point and mutator in tandem.59#[macro_export]60macro_rules! single_module_fuzzer {61($execute:ident $generate:ident) => {62libfuzzer_sys::fuzz_target!(|data: &[u8]| {63$crate::init_fuzzing();64drop($crate::single_module_fuzzer::execute(65data, $execute, $generate,66));67});6869libfuzzer_sys::fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {70$crate::single_module_fuzzer::mutate(71data,72size,73max_size,74$generate,75libfuzzer_sys::fuzzer_mutate,76)77});78};79}8081/// Executes a "single module fuzzer" given the raw `input` from libfuzzer.82///83/// This will use the `input` to generate `T`, some configuration, which is84/// then used by `gen_module` to generate a WebAssembly module. The module is85/// then passed to `run` along with the configuration and remaining data that86/// can be used as fuzz input.87///88/// The main purpose of this function is to handle when `input` is actually a89/// WebAssembly module "envelope". If the `input` is a valid wasm module and90/// ends with a specific trailing custom section then the module generated by91/// `gen_module` is actually discarded. The purpose of this is to handle the92/// case where the input used to generate a module may change over time but93/// we're still interested in the historical coverage of the original wasm94/// module.95pub fn execute<'a, T, U>(96input: &'a [u8],97run: fn(&[u8], KnownValid, T, &mut Unstructured<'a>) -> Result<U>,98gen_module: fn(&mut T, &mut Unstructured<'a>) -> Result<(Vec<u8>, KnownValid)>,99) -> Result<U>100where101T: Arbitrary<'a>,102{103let (fuzz_data, module_in_input) = match extract_fuzz_input(input) {104Ok(input) => {105log::debug!("fuzz input was a valid module with trailing custom section");106(input.fuzz_data, Some(input.module))107}108Err(e) => {109log::debug!("fuzz input not a valid module: {e:?}");110(input, None)111}112};113let mut u = Unstructured::new(fuzz_data);114let mut config = u.arbitrary()?;115let (generated, known_valid) = gen_module(&mut config, &mut u)?;116let module = module_in_input.unwrap_or(&generated);117if let Ok(file) = std::env::var("WRITE_FUZZ_INPUT_TO") {118std::fs::write(file, encode_module(&module, &fuzz_data)).unwrap();119}120let known_valid = if module_in_input.is_some() {121KnownValid::No122} else {123known_valid124};125run(module, known_valid, config, &mut u)126}127128/// Used as part of `execute` above to determine whether a module is known to129/// be valid ahead of time.130#[derive(Debug, PartialEq, Eq, Copy, Clone)]131pub enum KnownValid {132/// This module is known to be valid so it should assert compilation133/// succeeds for example.134Yes,135/// This module is not known to be valid and it may not compile136/// successfully. Note that it's also not known to compile unsuccessfully.137No,138}139140const SECTION_NAME: &str = "wasmtime-fuzz-input";141142/// Implementation of a libfuzzer custom mutator for a single-module-fuzzer.143///144/// This mutator will take the seed specified in `data` and attempt to mutate145/// it with the provided `mutate` function. The `mutate` function may not146/// receive the `data` as-specified, but instead may receive only the seed147/// that was used to generate `data`.148pub fn mutate<T>(149data: &mut [u8],150mut size: usize,151max_size: usize,152gen_module: fn(&mut T, &mut Unstructured<'_>) -> Result<(Vec<u8>, KnownValid)>,153mutate: fn(&mut [u8], usize, usize) -> usize,154) -> usize155where156T: for<'a> Arbitrary<'a>,157{158// If `data` is a valid wasm module with the fuzz seed at the end, then159// discard the wasm module portion and instead shuffle the seed into the160// beginning of the `data` slice. This is the "de-envelope" part of the161// seed management here.162//163// After this the `data` array should contain the raw contents used to164// produce the module and is ripe for mutation/minimization/etc.165if let Ok(input) = extract_fuzz_input(&data[..size]) {166let start = input.fuzz_data.as_ptr() as usize - data.as_ptr() as usize;167size = input.fuzz_data.len();168data.copy_within(start..start + input.fuzz_data.len(), 0);169}170171// Delegate to the provided mutation function for standard mutations to172// apply.173let new_size = mutate(data, size, max_size);174175// Next the goal of this function is to produce a test case which is an176// actual wasm module. To that end this will run module generation over the177// input provided. If this is all successful then the custom section178// representing the seed is appended to the module, making it a sort of179// self-referential module.180//181// After all this it's copied into `data` if the it fits. If the module182// doesn't fit then the seed is left un-perturbed since there's not much183// that we can do about that.184let mut u = Unstructured::new(&data[..new_size]);185match u186.arbitrary()187.and_then(|mut config| gen_module(&mut config, &mut u))188{189Ok((module, _known_valid)) => {190let module = encode_module(&module, &data[..new_size]);191192if module.len() < max_size {193log::debug!(194"successfully generated mutated module with \195appended input section"196);197data[..module.len()].copy_from_slice(&module);198return module.len();199} else {200log::debug!("mutated module doesn't fit in original slice");201}202}203204// If our new seed can't generate a new module then that's something205// for the fuzzer to figure out later when it "officially" executes206// this fuzz input. For the purposes of this function it's not too207// useful to try to put it in an envelope otherwise so ignore it.208Err(e) => {209log::debug!("failed to generate module from mutated seed {e:?}");210}211}212213new_size214}215216fn encode_module(module: &[u8], fuzz_data: &[u8]) -> Vec<u8> {217let mut module = module.to_vec();218wasm_encoder::CustomSection {219name: SECTION_NAME.into(),220data: Cow::Borrowed(&fuzz_data),221}222.append_to(&mut module);223module224}225226struct FuzzInput<'a> {227/// The module extracted from the input, without the fuzz input custom228/// section.229module: &'a [u8],230231/// The contents of the fuzz input custom section.232fuzz_data: &'a [u8],233}234235/// Attempts to extract a fuzz input from the `data` provided.236///237/// This will attempt to read `data` as a WebAssembly binary. If successful238/// and the module ends with a custom section indicating it's a fuzz input239/// then the contents of the custom section are returned along with the240/// contents of the original module.241fn extract_fuzz_input(data: &[u8]) -> anyhow::Result<FuzzInput<'_>> {242use wasmparser::{Parser, Payload};243let mut prev_end = 8;244for section in Parser::new(0).parse_all(data) {245let section = section?;246247// If this is a custom section, the end of the section is the end of248// the entire module, and it's got the expected name, then this section249// is assumed to be the input seed to the fuzzer.250//251// The section's contents are returned through `fuzz_data` and the wasm252// binary format means that we can simply chop off the last custom253// section and still have a valid module.254if let Payload::CustomSection(s) = §ion {255if s.name() == SECTION_NAME && s.range().end == data.len() {256return Ok(FuzzInput {257module: &data[..prev_end],258fuzz_data: s.data(),259});260}261}262263// Record each section's end to record what the end of the module is264// up to this point.265if let Some((_, range)) = section.as_section() {266prev_end = range.end;267}268}269anyhow::bail!("no input found")270}271272#[cfg(test)]273mod tests {274use super::*;275use rand::rngs::SmallRng;276use rand::{RngCore, SeedableRng};277278#[test]279fn changing_configuration_does_not_change_module() {280drop(env_logger::try_init());281282// This test asserts that if the static configuration associated with a283// module changes then the generated module, as sourced from the284// original fuzz input, does not change. That's the whole purpose of285// this module, to enable our fuzz inputs to be in a format that's286// resilient to changes in configuration over time (or at least the287// module part of the input).288//289// This test will execute N=200 iterations where each iteration will290// attempt to, with some fresh random data, generate a module. This291// module is then "mutated" with a noop mutation to effectively292// serialize it into the envelope where the module is preserved. The293// now-mutated input, which should be a wasm module, is then passed294// as the seed to a second execution which has a different static input.295//296// This simulates having a fuzzer one day produce an interesting test297// case through mutation, and then the next day the configuration of298// the fuzzer changes. On both days the module input to the function299// should have been the same.300301let mut rng = SmallRng::seed_from_u64(0);302let max_size = 4096;303let seed_size = 128;304let mut buf = vec![0; max_size];305let mut compares = 0;306for _ in 0..200 {307rng.fill_bytes(&mut buf[..seed_size]);308309let run1 = run_config::<u32>;310let mutate = mutate::<u32>;311let run2 = run_config::<(u32, u32)>;312313if let Ok((module, known_valid)) = execute(&buf[..seed_size], run1, generate) {314assert_eq!(known_valid, KnownValid::Yes);315let new_size = mutate(&mut buf, seed_size, max_size, generate, noop_mutate);316if let Ok((module2, known_valid)) = execute(&buf[..new_size], run2, generate) {317assert_eq!(known_valid, KnownValid::No);318compares += 1;319if module != module2 {320panic!("modules differ");321}322}323}324}325326// At least one iteration should have succeeded in the fuzz generation327// above.328assert!(compares > 0);329330fn run_config<T>(331data: &[u8],332known_valid: KnownValid,333_: T,334_: &mut Unstructured<'_>,335) -> Result<(Vec<u8>, KnownValid)>336where337T: for<'a> Arbitrary<'a>,338{339Ok((data.to_vec(), known_valid))340}341342fn generate<T>(_: &mut T, u: &mut Unstructured<'_>) -> Result<(Vec<u8>, KnownValid)>343where344T: for<'a> Arbitrary<'a>,345{346Ok((347u.arbitrary::<wasm_smith::Module>()?.to_bytes(),348KnownValid::Yes,349))350}351352fn noop_mutate(_buf: &mut [u8], size: usize, _new_size: usize) -> usize {353size354}355}356}357358359