CoCalc -- cranelift-fuzzgen.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/fuzz/fuzz_targets/cranelift-fuzzgen.rs
¹⁶⁹⁰ views
1
#![no_main]
2

3
use cranelift_codegen::Context;
4
use cranelift_codegen::ir::Function;
5
use cranelift_codegen::ir::Signature;
6
use cranelift_codegen::ir::UserExternalName;
7
use cranelift_codegen::ir::UserFuncName;
8
use cranelift_control::ControlPlane;
9
use libfuzzer_sys::arbitrary;
10
use libfuzzer_sys::arbitrary::Arbitrary;
11
use libfuzzer_sys::arbitrary::Unstructured;
12
use libfuzzer_sys::fuzz_target;
13
use std::collections::HashMap;
14
use std::fmt;
15
use std::sync::LazyLock;
16
use std::sync::atomic::AtomicU64;
17
use std::sync::atomic::Ordering;
18

19
use cranelift_codegen::data_value::DataValue;
20
use cranelift_codegen::ir::{LibCall, TrapCode};
21
use cranelift_codegen::isa;
22
use cranelift_filetests::function_runner::{CompiledTestFile, TestFileCompiler, Trampoline};
23
use cranelift_fuzzgen::*;
24
use cranelift_interpreter::environment::FuncIndex;
25
use cranelift_interpreter::environment::FunctionStore;
26
use cranelift_interpreter::interpreter::{
27
    Interpreter, InterpreterError, InterpreterState, LibCallValues,
28
};
29
use cranelift_interpreter::step::ControlFlow;
30
use cranelift_interpreter::step::CraneliftTrap;
31
use cranelift_native::builder_with_options;
32
use smallvec::smallvec;
33

34
const INTERPRETER_FUEL: u64 = 4096;
35

36
/// Gather statistics about the fuzzer executions
37
struct Statistics {
38
    /// Inputs that fuzzgen can build a function with
39
    /// This is also how many compiles we executed
40
    pub valid_inputs: AtomicU64,
41
    /// How many times did we generate an invalid format?
42
    pub invalid_inputs: AtomicU64,
43

44
    /// Total amount of runs that we tried in the interpreter
45
    /// One fuzzer input can have many runs
46
    pub total_runs: AtomicU64,
47
    /// How many runs were successful?
48
    /// This is also how many runs were run in the backend
49
    pub run_result_success: AtomicU64,
50
    /// How many runs resulted in a timeout?
51
    pub run_result_timeout: AtomicU64,
52
    /// How many runs ended with a trap?
53
    pub run_result_trap: HashMap<CraneliftTrap, AtomicU64>,
54
}
55

56
impl Statistics {
57
    pub fn print(&self, valid_inputs: u64) {
58
        // We get valid_inputs as a param since we already loaded it previously.
59
        let total_runs = self.total_runs.load(Ordering::SeqCst);
60
        let invalid_inputs = self.invalid_inputs.load(Ordering::SeqCst);
61
        let run_result_success = self.run_result_success.load(Ordering::SeqCst);
62
        let run_result_timeout = self.run_result_timeout.load(Ordering::SeqCst);
63

64
        println!("== FuzzGen Statistics  ====================");
65
        println!("Valid Inputs: {valid_inputs}");
66
        println!(
67
            "Invalid Inputs: {} ({:.1}% of Total Inputs)",
68
            invalid_inputs,
69
            (invalid_inputs as f64 / (valid_inputs + invalid_inputs) as f64) * 100.0
70
        );
71
        println!("Total Runs: {total_runs}");
72
        println!(
73
            "Successful Runs: {} ({:.1}% of Total Runs)",
74
            run_result_success,
75
            (run_result_success as f64 / total_runs as f64) * 100.0
76
        );
77
        println!(
78
            "Timed out Runs: {} ({:.1}% of Total Runs)",
79
            run_result_timeout,
80
            (run_result_timeout as f64 / total_runs as f64) * 100.0
81
        );
82
        println!("Traps:");
83
        // Load and filter out empty trap codes.
84
        let mut traps = self
85
            .run_result_trap
86
            .iter()
87
            .map(|(trap, count)| (trap, count.load(Ordering::SeqCst)))
88
            .filter(|(_, count)| *count != 0)
89
            .collect::<Vec<_>>();
90

91
        // Sort traps by count in a descending order
92
        traps.sort_by_key(|(_, count)| -(*count as i64));
93

94
        for (trap, count) in traps.into_iter() {
95
            println!(
96
                "\t{}: {} ({:.1}% of Total Runs)",
97
                trap,
98
                count,
99
                (count as f64 / total_runs as f64) * 100.0
100
            );
101
        }
102
    }
103
}
104

105
impl Default for Statistics {
106
    fn default() -> Self {
107
        // Pre-Register all trap codes since we can't modify this hashmap atomically.
108
        let mut run_result_trap = HashMap::new();
109
        run_result_trap.insert(CraneliftTrap::Debug, AtomicU64::new(0));
110
        run_result_trap.insert(CraneliftTrap::BadSignature, AtomicU64::new(0));
111
        run_result_trap.insert(CraneliftTrap::UnreachableCodeReached, AtomicU64::new(0));
112
        run_result_trap.insert(CraneliftTrap::HeapMisaligned, AtomicU64::new(0));
113
        for trapcode in TrapCode::non_user_traps() {
114
            run_result_trap.insert(CraneliftTrap::User(*trapcode), AtomicU64::new(0));
115
        }
116

117
        Self {
118
            valid_inputs: AtomicU64::new(0),
119
            invalid_inputs: AtomicU64::new(0),
120
            total_runs: AtomicU64::new(0),
121
            run_result_success: AtomicU64::new(0),
122
            run_result_timeout: AtomicU64::new(0),
123
            run_result_trap,
124
        }
125
    }
126
}
127

128
#[derive(Debug)]
129
enum RunResult {
130
    Success(Vec<DataValue>),
131
    Trap(CraneliftTrap),
132
    Timeout,
133
    Error(Box<dyn std::error::Error>),
134
}
135

136
impl PartialEq for RunResult {
137
    fn eq(&self, other: &Self) -> bool {
138
        match (self, other) {
139
            (RunResult::Success(l), RunResult::Success(r)) => {
140
                l.len() == r.len() && l.iter().zip(r).all(|(l, r)| l.bitwise_eq(r))
141
            }
142
            (RunResult::Trap(l), RunResult::Trap(r)) => l == r,
143
            (RunResult::Timeout, RunResult::Timeout) => true,
144
            (RunResult::Error(_), RunResult::Error(_)) => unimplemented!(),
145
            _ => false,
146
        }
147
    }
148
}
149

150
pub struct TestCase {
151
    /// TargetIsa to use when compiling this test case
152
    pub isa: isa::OwnedTargetIsa,
153
    /// Functions under test
154
    /// By convention the first function is the main function.
155
    pub functions: Vec<Function>,
156
    /// Control planes for function compilation.
157
    /// There should be an equal amount as functions to compile.
158
    pub ctrl_planes: Vec<ControlPlane>,
159
    /// Generate multiple test inputs for each test case.
160
    /// This allows us to get more coverage per compilation, which may be somewhat expensive.
161
    pub inputs: Vec<TestCaseInput>,
162
    /// Should this `TestCase` be tested after optimizations.
163
    pub compare_against_host: bool,
164
}
165

166
impl fmt::Debug for TestCase {
167
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
168
        if !self.compare_against_host {
169
            writeln!(f, ";; Testing against optimized version")?;
170
        }
171
        PrintableTestCase::run(&self.isa, &self.functions, &self.inputs).fmt(f)
172
    }
173
}
174

175
impl<'a> Arbitrary<'a> for TestCase {
176
    fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
177
        let _ = env_logger::try_init();
178
        Self::generate(u).map_err(|_| {
179
            STATISTICS.invalid_inputs.fetch_add(1, Ordering::SeqCst);
180
            arbitrary::Error::IncorrectFormat
181
        })
182
    }
183
}
184

185
impl TestCase {
186
    pub fn generate(u: &mut Unstructured) -> anyhow::Result<Self> {
187
        let mut generator = FuzzGen::new(u);
188

189
        let compare_against_host = generator.u.arbitrary()?;
190

191
        // TestCase is meant to be consumed by a runner, so we make the assumption here that we're
192
        // generating a TargetIsa for the host.
193
        let mut builder =
194
            builder_with_options(true).expect("Unable to build a TargetIsa for the current host");
195
        let flags = generator.generate_flags(builder.triple().architecture)?;
196
        generator.set_isa_flags(&mut builder, IsaFlagGen::Host)?;
197
        let isa = builder.finish(flags)?;
198

199
        // When generating functions, we allow each function to call any function that has
200
        // already been generated. This guarantees that we never have loops in the call graph.
201
        // We generate these backwards, and then reverse them so that the main function is at
202
        // the start.
203
        let func_count = generator
204
            .u
205
            .int_in_range(generator.config.testcase_funcs.clone())?;
206
        let mut functions: Vec<Function> = Vec::with_capacity(func_count);
207
        let mut ctrl_planes: Vec<ControlPlane> = Vec::with_capacity(func_count);
208
        for i in (0..func_count).rev() {
209
            // Function name must be in a different namespace than TESTFILE_NAMESPACE (0)
210
            let fname = UserFuncName::user(1, i as u32);
211

212
            let usercalls: Vec<(UserExternalName, Signature)> = functions
213
                .iter()
214
                .map(|f| {
215
                    (
216
                        f.name.get_user().unwrap().clone(),
217
                        f.stencil.signature.clone(),
218
                    )
219
                })
220
                .collect();
221

222
            let func = generator.generate_func(
223
                fname,
224
                isa.clone(),
225
                usercalls,
226
                ALLOWED_LIBCALLS.to_vec(),
227
            )?;
228
            functions.push(func);
229

230
            ctrl_planes.push(ControlPlane::arbitrary(generator.u)?);
231
        }
232
        // Now reverse the functions so that the main function is at the start.
233
        functions.reverse();
234

235
        let main = &functions[0];
236
        let inputs = generator.generate_test_inputs(&main.signature)?;
237

238
        Ok(TestCase {
239
            isa,
240
            functions,
241
            ctrl_planes,
242
            inputs,
243
            compare_against_host,
244
        })
245
    }
246

247
    fn to_optimized(&self) -> Self {
248
        let mut ctrl_planes = self.ctrl_planes.clone();
249
        let optimized_functions: Vec<Function> = self
250
            .functions
251
            .iter()
252
            .zip(ctrl_planes.iter_mut())
253
            .map(|(func, ctrl_plane)| {
254
                let mut ctx = Context::for_function(func.clone());
255
                ctx.optimize(self.isa.as_ref(), ctrl_plane).unwrap();
256
                ctx.func
257
            })
258
            .collect();
259

260
        TestCase {
261
            isa: self.isa.clone(),
262
            functions: optimized_functions,
263
            ctrl_planes,
264
            inputs: self.inputs.clone(),
265
            compare_against_host: false,
266
        }
267
    }
268

269
    /// Returns the main function of this test case.
270
    pub fn main(&self) -> &Function {
271
        &self.functions[0]
272
    }
273
}
274

275
fn run_in_interpreter(interpreter: &mut Interpreter, args: &[DataValue]) -> RunResult {
276
    // The entrypoint function is always 0
277
    let index = FuncIndex::from_u32(0);
278
    let res = interpreter.call_by_index(index, args);
279

280
    match res {
281
        Ok(ControlFlow::Return(results)) => RunResult::Success(results.to_vec()),
282
        Ok(ControlFlow::Trap(trap)) => RunResult::Trap(trap),
283
        Ok(cf) => RunResult::Error(format!("Unrecognized exit ControlFlow: {cf:?}").into()),
284
        Err(InterpreterError::FuelExhausted) => RunResult::Timeout,
285
        Err(e) => RunResult::Error(e.into()),
286
    }
287
}
288

289
fn run_in_host(
290
    compiled: &CompiledTestFile,
291
    trampoline: &Trampoline,
292
    args: &[DataValue],
293
) -> RunResult {
294
    let res = trampoline.call(compiled, args);
295
    RunResult::Success(res)
296
}
297

298
/// These libcalls need a interpreter implementation in `build_interpreter`
299
const ALLOWED_LIBCALLS: &'static [LibCall] = &[
300
    LibCall::CeilF32,
301
    LibCall::CeilF64,
302
    LibCall::FloorF32,
303
    LibCall::FloorF64,
304
    LibCall::TruncF32,
305
    LibCall::TruncF64,
306
];
307

308
fn build_interpreter(testcase: &TestCase) -> Interpreter<'_> {
309
    let mut env = FunctionStore::default();
310
    for func in testcase.functions.iter() {
311
        env.add(func.name.to_string(), &func);
312
    }
313

314
    let state = InterpreterState::default()
315
        .with_function_store(env)
316
        .with_libcall_handler(|libcall: LibCall, args: LibCallValues| {
317
            use LibCall::*;
318
            Ok(smallvec![match (libcall, &args[..]) {
319
                (CeilF32, [DataValue::F32(a)]) => DataValue::F32(a.ceil()),
320
                (CeilF64, [DataValue::F64(a)]) => DataValue::F64(a.ceil()),
321
                (FloorF32, [DataValue::F32(a)]) => DataValue::F32(a.floor()),
322
                (FloorF64, [DataValue::F64(a)]) => DataValue::F64(a.floor()),
323
                (TruncF32, [DataValue::F32(a)]) => DataValue::F32(a.trunc()),
324
                (TruncF64, [DataValue::F64(a)]) => DataValue::F64(a.trunc()),
325
                _ => unreachable!(),
326
            }])
327
        });
328

329
    let interpreter = Interpreter::new(state).with_fuel(Some(INTERPRETER_FUEL));
330
    interpreter
331
}
332

333
static STATISTICS: LazyLock<Statistics> = LazyLock::new(Statistics::default);
334

335
fn run_test_inputs(testcase: &TestCase, run: impl Fn(&[DataValue]) -> RunResult) {
336
    for args in &testcase.inputs {
337
        STATISTICS.total_runs.fetch_add(1, Ordering::SeqCst);
338

339
        // We rebuild the interpreter every run so that we don't accidentally carry over any state
340
        // between runs, such as fuel remaining.
341
        let mut interpreter = build_interpreter(&testcase);
342
        let int_res = run_in_interpreter(&mut interpreter, args);
343
        match int_res {
344
            RunResult::Success(_) => {
345
                STATISTICS.run_result_success.fetch_add(1, Ordering::SeqCst);
346
            }
347
            RunResult::Trap(trap) => {
348
                STATISTICS.run_result_trap[&trap].fetch_add(1, Ordering::SeqCst);
349
                // If this input traps, skip it and continue trying other inputs
350
                // for this function. We've already compiled it anyway.
351
                //
352
                // We could catch traps in the host run and compare them to the
353
                // interpreter traps, but since we already test trap cases with
354
                // wasm tests and wasm-level fuzzing, the amount of effort does
355
                // not justify implementing it again here.
356
                continue;
357
            }
358
            RunResult::Timeout => {
359
                // We probably generated an infinite loop, we should drop this entire input.
360
                // We could `continue` like we do on traps, but timeouts are *really* expensive.
361
                STATISTICS.run_result_timeout.fetch_add(1, Ordering::SeqCst);
362
                return;
363
            }
364
            RunResult::Error(e) => panic!("interpreter failed: {e:?}"),
365
        }
366

367
        let res = run(args);
368

369
        // This situation can happen when we are comparing the interpreter against the interpreter, and
370
        // one of the optimization passes has increased the number of instructions in the function.
371
        // This can cause the interpreter to run out of fuel in the second run, but not the first.
372
        // We should ignore these cases.
373
        // Running in the host should never return a timeout, so that should be ok.
374
        if res == RunResult::Timeout {
375
            return;
376
        }
377

378
        assert_eq!(int_res, res);
379
    }
380
}
381

382
fuzz_target!(|testcase: TestCase| {
383
    let mut testcase = testcase;
384
    let fuel: u8 = std::env::args()
385
        .find_map(|arg| arg.strip_prefix("--fuel=").map(|s| s.to_owned()))
386
        .map(|fuel| fuel.parse().expect("fuel should be a valid integer"))
387
        .unwrap_or_default();
388
    for i in 0..testcase.ctrl_planes.len() {
389
        testcase.ctrl_planes[i].set_fuel(fuel)
390
    }
391
    let testcase = testcase;
392

393
    // This is the default, but we should ensure that it wasn't accidentally turned off anywhere.
394
    assert!(testcase.isa.flags().enable_verifier());
395

396
    // Periodically print statistics
397
    let valid_inputs = STATISTICS.valid_inputs.fetch_add(1, Ordering::SeqCst);
398
    if valid_inputs != 0 && valid_inputs % 10000 == 0 {
399
        STATISTICS.print(valid_inputs);
400
    }
401

402
    if !testcase.compare_against_host {
403
        let opt_testcase = testcase.to_optimized();
404

405
        run_test_inputs(&testcase, |args| {
406
            // We rebuild the interpreter every run so that we don't accidentally carry over any state
407
            // between runs, such as fuel remaining.
408
            let mut interpreter = build_interpreter(&opt_testcase);
409

410
            run_in_interpreter(&mut interpreter, args)
411
        });
412
    } else {
413
        let mut compiler = TestFileCompiler::new(testcase.isa.clone());
414
        compiler
415
            .add_functions(&testcase.functions[..], testcase.ctrl_planes.clone())
416
            .unwrap();
417
        let compiled = compiler.compile().unwrap();
418
        let trampoline = compiled.get_trampoline(testcase.main()).unwrap();
419

420
        run_test_inputs(&testcase, |args| run_in_host(&compiled, &trampoline, args));
421
    }
422
});
423

424
Product

Resources

Company