Path: blob/main/fuzz/fuzz_targets/cranelift-fuzzgen.rs
1690 views
#![no_main]12use cranelift_codegen::Context;3use cranelift_codegen::ir::Function;4use cranelift_codegen::ir::Signature;5use cranelift_codegen::ir::UserExternalName;6use cranelift_codegen::ir::UserFuncName;7use cranelift_control::ControlPlane;8use libfuzzer_sys::arbitrary;9use libfuzzer_sys::arbitrary::Arbitrary;10use libfuzzer_sys::arbitrary::Unstructured;11use libfuzzer_sys::fuzz_target;12use std::collections::HashMap;13use std::fmt;14use std::sync::LazyLock;15use std::sync::atomic::AtomicU64;16use std::sync::atomic::Ordering;1718use cranelift_codegen::data_value::DataValue;19use cranelift_codegen::ir::{LibCall, TrapCode};20use cranelift_codegen::isa;21use cranelift_filetests::function_runner::{CompiledTestFile, TestFileCompiler, Trampoline};22use cranelift_fuzzgen::*;23use cranelift_interpreter::environment::FuncIndex;24use cranelift_interpreter::environment::FunctionStore;25use cranelift_interpreter::interpreter::{26Interpreter, InterpreterError, InterpreterState, LibCallValues,27};28use cranelift_interpreter::step::ControlFlow;29use cranelift_interpreter::step::CraneliftTrap;30use cranelift_native::builder_with_options;31use smallvec::smallvec;3233const INTERPRETER_FUEL: u64 = 4096;3435/// Gather statistics about the fuzzer executions36struct Statistics {37/// Inputs that fuzzgen can build a function with38/// This is also how many compiles we executed39pub valid_inputs: AtomicU64,40/// How many times did we generate an invalid format?41pub invalid_inputs: AtomicU64,4243/// Total amount of runs that we tried in the interpreter44/// One fuzzer input can have many runs45pub total_runs: AtomicU64,46/// How many runs were successful?47/// This is also how many runs were run in the backend48pub run_result_success: AtomicU64,49/// How many runs resulted in a timeout?50pub run_result_timeout: AtomicU64,51/// How many runs ended with a trap?52pub run_result_trap: HashMap<CraneliftTrap, AtomicU64>,53}5455impl Statistics {56pub fn print(&self, valid_inputs: u64) {57// We get valid_inputs as a param since we already loaded it previously.58let total_runs = self.total_runs.load(Ordering::SeqCst);59let invalid_inputs = self.invalid_inputs.load(Ordering::SeqCst);60let run_result_success = self.run_result_success.load(Ordering::SeqCst);61let run_result_timeout = self.run_result_timeout.load(Ordering::SeqCst);6263println!("== FuzzGen Statistics ====================");64println!("Valid Inputs: {valid_inputs}");65println!(66"Invalid Inputs: {} ({:.1}% of Total Inputs)",67invalid_inputs,68(invalid_inputs as f64 / (valid_inputs + invalid_inputs) as f64) * 100.069);70println!("Total Runs: {total_runs}");71println!(72"Successful Runs: {} ({:.1}% of Total Runs)",73run_result_success,74(run_result_success as f64 / total_runs as f64) * 100.075);76println!(77"Timed out Runs: {} ({:.1}% of Total Runs)",78run_result_timeout,79(run_result_timeout as f64 / total_runs as f64) * 100.080);81println!("Traps:");82// Load and filter out empty trap codes.83let mut traps = self84.run_result_trap85.iter()86.map(|(trap, count)| (trap, count.load(Ordering::SeqCst)))87.filter(|(_, count)| *count != 0)88.collect::<Vec<_>>();8990// Sort traps by count in a descending order91traps.sort_by_key(|(_, count)| -(*count as i64));9293for (trap, count) in traps.into_iter() {94println!(95"\t{}: {} ({:.1}% of Total Runs)",96trap,97count,98(count as f64 / total_runs as f64) * 100.099);100}101}102}103104impl Default for Statistics {105fn default() -> Self {106// Pre-Register all trap codes since we can't modify this hashmap atomically.107let mut run_result_trap = HashMap::new();108run_result_trap.insert(CraneliftTrap::Debug, AtomicU64::new(0));109run_result_trap.insert(CraneliftTrap::BadSignature, AtomicU64::new(0));110run_result_trap.insert(CraneliftTrap::UnreachableCodeReached, AtomicU64::new(0));111run_result_trap.insert(CraneliftTrap::HeapMisaligned, AtomicU64::new(0));112for trapcode in TrapCode::non_user_traps() {113run_result_trap.insert(CraneliftTrap::User(*trapcode), AtomicU64::new(0));114}115116Self {117valid_inputs: AtomicU64::new(0),118invalid_inputs: AtomicU64::new(0),119total_runs: AtomicU64::new(0),120run_result_success: AtomicU64::new(0),121run_result_timeout: AtomicU64::new(0),122run_result_trap,123}124}125}126127#[derive(Debug)]128enum RunResult {129Success(Vec<DataValue>),130Trap(CraneliftTrap),131Timeout,132Error(Box<dyn std::error::Error>),133}134135impl PartialEq for RunResult {136fn eq(&self, other: &Self) -> bool {137match (self, other) {138(RunResult::Success(l), RunResult::Success(r)) => {139l.len() == r.len() && l.iter().zip(r).all(|(l, r)| l.bitwise_eq(r))140}141(RunResult::Trap(l), RunResult::Trap(r)) => l == r,142(RunResult::Timeout, RunResult::Timeout) => true,143(RunResult::Error(_), RunResult::Error(_)) => unimplemented!(),144_ => false,145}146}147}148149pub struct TestCase {150/// TargetIsa to use when compiling this test case151pub isa: isa::OwnedTargetIsa,152/// Functions under test153/// By convention the first function is the main function.154pub functions: Vec<Function>,155/// Control planes for function compilation.156/// There should be an equal amount as functions to compile.157pub ctrl_planes: Vec<ControlPlane>,158/// Generate multiple test inputs for each test case.159/// This allows us to get more coverage per compilation, which may be somewhat expensive.160pub inputs: Vec<TestCaseInput>,161/// Should this `TestCase` be tested after optimizations.162pub compare_against_host: bool,163}164165impl fmt::Debug for TestCase {166fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {167if !self.compare_against_host {168writeln!(f, ";; Testing against optimized version")?;169}170PrintableTestCase::run(&self.isa, &self.functions, &self.inputs).fmt(f)171}172}173174impl<'a> Arbitrary<'a> for TestCase {175fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {176let _ = env_logger::try_init();177Self::generate(u).map_err(|_| {178STATISTICS.invalid_inputs.fetch_add(1, Ordering::SeqCst);179arbitrary::Error::IncorrectFormat180})181}182}183184impl TestCase {185pub fn generate(u: &mut Unstructured) -> anyhow::Result<Self> {186let mut generator = FuzzGen::new(u);187188let compare_against_host = generator.u.arbitrary()?;189190// TestCase is meant to be consumed by a runner, so we make the assumption here that we're191// generating a TargetIsa for the host.192let mut builder =193builder_with_options(true).expect("Unable to build a TargetIsa for the current host");194let flags = generator.generate_flags(builder.triple().architecture)?;195generator.set_isa_flags(&mut builder, IsaFlagGen::Host)?;196let isa = builder.finish(flags)?;197198// When generating functions, we allow each function to call any function that has199// already been generated. This guarantees that we never have loops in the call graph.200// We generate these backwards, and then reverse them so that the main function is at201// the start.202let func_count = generator203.u204.int_in_range(generator.config.testcase_funcs.clone())?;205let mut functions: Vec<Function> = Vec::with_capacity(func_count);206let mut ctrl_planes: Vec<ControlPlane> = Vec::with_capacity(func_count);207for i in (0..func_count).rev() {208// Function name must be in a different namespace than TESTFILE_NAMESPACE (0)209let fname = UserFuncName::user(1, i as u32);210211let usercalls: Vec<(UserExternalName, Signature)> = functions212.iter()213.map(|f| {214(215f.name.get_user().unwrap().clone(),216f.stencil.signature.clone(),217)218})219.collect();220221let func = generator.generate_func(222fname,223isa.clone(),224usercalls,225ALLOWED_LIBCALLS.to_vec(),226)?;227functions.push(func);228229ctrl_planes.push(ControlPlane::arbitrary(generator.u)?);230}231// Now reverse the functions so that the main function is at the start.232functions.reverse();233234let main = &functions[0];235let inputs = generator.generate_test_inputs(&main.signature)?;236237Ok(TestCase {238isa,239functions,240ctrl_planes,241inputs,242compare_against_host,243})244}245246fn to_optimized(&self) -> Self {247let mut ctrl_planes = self.ctrl_planes.clone();248let optimized_functions: Vec<Function> = self249.functions250.iter()251.zip(ctrl_planes.iter_mut())252.map(|(func, ctrl_plane)| {253let mut ctx = Context::for_function(func.clone());254ctx.optimize(self.isa.as_ref(), ctrl_plane).unwrap();255ctx.func256})257.collect();258259TestCase {260isa: self.isa.clone(),261functions: optimized_functions,262ctrl_planes,263inputs: self.inputs.clone(),264compare_against_host: false,265}266}267268/// Returns the main function of this test case.269pub fn main(&self) -> &Function {270&self.functions[0]271}272}273274fn run_in_interpreter(interpreter: &mut Interpreter, args: &[DataValue]) -> RunResult {275// The entrypoint function is always 0276let index = FuncIndex::from_u32(0);277let res = interpreter.call_by_index(index, args);278279match res {280Ok(ControlFlow::Return(results)) => RunResult::Success(results.to_vec()),281Ok(ControlFlow::Trap(trap)) => RunResult::Trap(trap),282Ok(cf) => RunResult::Error(format!("Unrecognized exit ControlFlow: {cf:?}").into()),283Err(InterpreterError::FuelExhausted) => RunResult::Timeout,284Err(e) => RunResult::Error(e.into()),285}286}287288fn run_in_host(289compiled: &CompiledTestFile,290trampoline: &Trampoline,291args: &[DataValue],292) -> RunResult {293let res = trampoline.call(compiled, args);294RunResult::Success(res)295}296297/// These libcalls need a interpreter implementation in `build_interpreter`298const ALLOWED_LIBCALLS: &'static [LibCall] = &[299LibCall::CeilF32,300LibCall::CeilF64,301LibCall::FloorF32,302LibCall::FloorF64,303LibCall::TruncF32,304LibCall::TruncF64,305];306307fn build_interpreter(testcase: &TestCase) -> Interpreter<'_> {308let mut env = FunctionStore::default();309for func in testcase.functions.iter() {310env.add(func.name.to_string(), &func);311}312313let state = InterpreterState::default()314.with_function_store(env)315.with_libcall_handler(|libcall: LibCall, args: LibCallValues| {316use LibCall::*;317Ok(smallvec![match (libcall, &args[..]) {318(CeilF32, [DataValue::F32(a)]) => DataValue::F32(a.ceil()),319(CeilF64, [DataValue::F64(a)]) => DataValue::F64(a.ceil()),320(FloorF32, [DataValue::F32(a)]) => DataValue::F32(a.floor()),321(FloorF64, [DataValue::F64(a)]) => DataValue::F64(a.floor()),322(TruncF32, [DataValue::F32(a)]) => DataValue::F32(a.trunc()),323(TruncF64, [DataValue::F64(a)]) => DataValue::F64(a.trunc()),324_ => unreachable!(),325}])326});327328let interpreter = Interpreter::new(state).with_fuel(Some(INTERPRETER_FUEL));329interpreter330}331332static STATISTICS: LazyLock<Statistics> = LazyLock::new(Statistics::default);333334fn run_test_inputs(testcase: &TestCase, run: impl Fn(&[DataValue]) -> RunResult) {335for args in &testcase.inputs {336STATISTICS.total_runs.fetch_add(1, Ordering::SeqCst);337338// We rebuild the interpreter every run so that we don't accidentally carry over any state339// between runs, such as fuel remaining.340let mut interpreter = build_interpreter(&testcase);341let int_res = run_in_interpreter(&mut interpreter, args);342match int_res {343RunResult::Success(_) => {344STATISTICS.run_result_success.fetch_add(1, Ordering::SeqCst);345}346RunResult::Trap(trap) => {347STATISTICS.run_result_trap[&trap].fetch_add(1, Ordering::SeqCst);348// If this input traps, skip it and continue trying other inputs349// for this function. We've already compiled it anyway.350//351// We could catch traps in the host run and compare them to the352// interpreter traps, but since we already test trap cases with353// wasm tests and wasm-level fuzzing, the amount of effort does354// not justify implementing it again here.355continue;356}357RunResult::Timeout => {358// We probably generated an infinite loop, we should drop this entire input.359// We could `continue` like we do on traps, but timeouts are *really* expensive.360STATISTICS.run_result_timeout.fetch_add(1, Ordering::SeqCst);361return;362}363RunResult::Error(e) => panic!("interpreter failed: {e:?}"),364}365366let res = run(args);367368// This situation can happen when we are comparing the interpreter against the interpreter, and369// one of the optimization passes has increased the number of instructions in the function.370// This can cause the interpreter to run out of fuel in the second run, but not the first.371// We should ignore these cases.372// Running in the host should never return a timeout, so that should be ok.373if res == RunResult::Timeout {374return;375}376377assert_eq!(int_res, res);378}379}380381fuzz_target!(|testcase: TestCase| {382let mut testcase = testcase;383let fuel: u8 = std::env::args()384.find_map(|arg| arg.strip_prefix("--fuel=").map(|s| s.to_owned()))385.map(|fuel| fuel.parse().expect("fuel should be a valid integer"))386.unwrap_or_default();387for i in 0..testcase.ctrl_planes.len() {388testcase.ctrl_planes[i].set_fuel(fuel)389}390let testcase = testcase;391392// This is the default, but we should ensure that it wasn't accidentally turned off anywhere.393assert!(testcase.isa.flags().enable_verifier());394395// Periodically print statistics396let valid_inputs = STATISTICS.valid_inputs.fetch_add(1, Ordering::SeqCst);397if valid_inputs != 0 && valid_inputs % 10000 == 0 {398STATISTICS.print(valid_inputs);399}400401if !testcase.compare_against_host {402let opt_testcase = testcase.to_optimized();403404run_test_inputs(&testcase, |args| {405// We rebuild the interpreter every run so that we don't accidentally carry over any state406// between runs, such as fuel remaining.407let mut interpreter = build_interpreter(&opt_testcase);408409run_in_interpreter(&mut interpreter, args)410});411} else {412let mut compiler = TestFileCompiler::new(testcase.isa.clone());413compiler414.add_functions(&testcase.functions[..], testcase.ctrl_planes.clone())415.unwrap();416let compiled = compiler.compile().unwrap();417let trampoline = compiled.get_trampoline(testcase.main()).unwrap();418419run_test_inputs(&testcase, |args| run_in_host(&compiled, &trampoline, args));420}421});422423424