Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/assembler-x64/src/fuzz.rs
3070 views
1
//! A fuzz testing oracle for roundtrip assembly-disassembly.
2
//!
3
//! This contains manual implementations of the `Arbitrary` trait for types
4
//! throughout this crate to avoid depending on the `arbitrary` crate
5
//! unconditionally (use the `fuzz` feature instead).
6
7
use std::string::{String, ToString};
8
use std::vec::Vec;
9
use std::{format, println};
10
11
use crate::{
12
AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, CodeSink, DeferredTarget, Fixed, Gpr, Inst,
13
KnownOffset, NonRspGpr, Registers, TrapCode, Xmm,
14
};
15
use arbitrary::{Arbitrary, Result, Unstructured};
16
use capstone::{Capstone, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, arch::x86};
17
18
/// Take a random assembly instruction and check its encoding and
19
/// pretty-printing against a known-good disassembler.
20
///
21
/// # Panics
22
///
23
/// This function panics to express failure as expected by the `arbitrary`
24
/// fuzzer infrastructure. It may fail during assembly, disassembly, or when
25
/// comparing the disassembled strings.
26
pub fn roundtrip(inst: &Inst<FuzzRegs>) {
27
// Check that we can actually assemble this instruction.
28
let assembled = assemble(inst);
29
let expected = disassemble(&assembled, inst);
30
31
// Check that our pretty-printed output matches the known-good output. Trim
32
// off the instruction offset first.
33
let expected = expected.split_once(' ').unwrap().1;
34
let actual = inst.to_string();
35
if expected != actual && expected.trim() != fix_up(&actual) {
36
println!("> {inst}");
37
println!(" debug: {inst:x?}");
38
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
39
println!(" expected (capstone): {expected}");
40
println!(" actual (to_string): {actual}");
41
assert_eq!(expected, &actual);
42
}
43
}
44
45
/// Use this assembler to emit machine code into a byte buffer.
46
///
47
/// This will skip any traps or label registrations, but this is fine for the
48
/// single-instruction disassembly we're doing here.
49
fn assemble(inst: &Inst<FuzzRegs>) -> Vec<u8> {
50
let mut sink = TestCodeSink::default();
51
inst.encode(&mut sink);
52
sink.patch_labels_as_if_they_referred_to_end();
53
sink.buf
54
}
55
56
#[derive(Default)]
57
struct TestCodeSink {
58
buf: Vec<u8>,
59
offsets_using_label: Vec<usize>,
60
}
61
62
impl TestCodeSink {
63
/// References to labels, e.g. RIP-relative addressing, is stored with an
64
/// adjustment that takes into account the distance from the relative offset
65
/// to the end of the instruction, where the offset is relative to. That
66
/// means that to indeed make the offset relative to the end of the
67
/// instruction, which is what we pretend all labels are bound to, it's
68
/// required that this adjustment is taken into account.
69
///
70
/// This function will iterate over all labels bound to this code sink and
71
/// pretend the label is found at the end of the `buf`. That means that the
72
/// distance from the label to the end of `buf` minus 4, which is the width
73
/// of the offset, is added to what's already present in the encoding buffer.
74
///
75
/// This is effectively undoing the `bytes_at_end` adjustment that's part of
76
/// `Amode::RipRelative` addressing.
77
fn patch_labels_as_if_they_referred_to_end(&mut self) {
78
let len = i32::try_from(self.buf.len()).unwrap();
79
for offset in self.offsets_using_label.iter() {
80
let range = self.buf[*offset..].first_chunk_mut::<4>().unwrap();
81
let offset = i32::try_from(*offset).unwrap() + 4;
82
let rel_distance = len - offset;
83
*range = (i32::from_le_bytes(*range) + rel_distance).to_le_bytes();
84
}
85
}
86
}
87
88
impl CodeSink for TestCodeSink {
89
fn put1(&mut self, v: u8) {
90
self.buf.extend_from_slice(&[v]);
91
}
92
93
fn put2(&mut self, v: u16) {
94
self.buf.extend_from_slice(&v.to_le_bytes());
95
}
96
97
fn put4(&mut self, v: u32) {
98
self.buf.extend_from_slice(&v.to_le_bytes());
99
}
100
101
fn put8(&mut self, v: u64) {
102
self.buf.extend_from_slice(&v.to_le_bytes());
103
}
104
105
fn add_trap(&mut self, _: TrapCode) {}
106
107
fn use_target(&mut self, _: DeferredTarget) {
108
let offset = self.buf.len();
109
self.offsets_using_label.push(offset);
110
}
111
112
fn known_offset(&self, target: KnownOffset) -> i32 {
113
panic!("unsupported known target {target:?}")
114
}
115
}
116
117
/// Building a new `Capstone` each time is suboptimal (TODO).
118
fn disassemble(assembled: &[u8], original: &Inst<FuzzRegs>) -> String {
119
let cs = Capstone::new()
120
.x86()
121
.mode(x86::ArchMode::Mode64)
122
.syntax(x86::ArchSyntax::Att)
123
.detail(true)
124
.build()
125
.expect("failed to create Capstone object");
126
let insts = cs
127
.disasm_all(assembled, 0x0)
128
.expect("failed to disassemble");
129
130
if insts.len() != 1 {
131
println!("> {original}");
132
println!(" debug: {original:x?}");
133
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
134
assert_eq!(insts.len(), 1, "not a single instruction");
135
}
136
137
let inst = insts.first().expect("at least one instruction");
138
if assembled.len() != inst.len() {
139
println!("> {original}");
140
println!(" debug: {original:x?}");
141
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
142
println!(
143
" capstone-assembled: {}",
144
pretty_print_hexadecimal(inst.bytes())
145
);
146
assert_eq!(assembled.len(), inst.len(), "extra bytes not disassembled");
147
}
148
149
inst.to_string()
150
}
151
152
fn pretty_print_hexadecimal(hex: &[u8]) -> String {
153
use core::fmt::Write;
154
let mut s = String::with_capacity(hex.len() * 2);
155
for b in hex {
156
write!(&mut s, "{b:02X}").unwrap();
157
}
158
s
159
}
160
161
/// See `replace_signed_immediates`.
162
macro_rules! hex_print_signed_imm {
163
($hex:expr, $from:ty => $to:ty) => {{
164
let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
165
let mut simm = String::new();
166
if imm < 0 {
167
simm.push_str("-");
168
}
169
let abs = match imm.checked_abs() {
170
Some(i) => i,
171
None => <$to>::MIN,
172
};
173
if imm > -10 && imm < 10 {
174
simm.push_str(&format!("{:x}", abs));
175
} else {
176
simm.push_str(&format!("0x{:x}", abs));
177
}
178
simm
179
}};
180
}
181
182
/// Replace signed immediates in the disassembly with their unsigned hexadecimal
183
/// equivalent. This is only necessary to match `capstone`'s complex
184
/// pretty-printing rules; e.g. `capstone` will:
185
/// - omit the `0x` prefix when printing `0x0` as `0`.
186
/// - omit the `0x` prefix when print small values (less than 10)
187
/// - print negative values as `-0x...` (signed hex) instead of `0xff...`
188
/// (normal hex)
189
/// - print `mov` immediates as base-10 instead of base-16 (?!).
190
fn replace_signed_immediates(dis: &str) -> alloc::borrow::Cow<'_, str> {
191
match dis.find('$') {
192
None => dis.into(),
193
Some(idx) => {
194
let (prefix, rest) = dis.split_at(idx + 1); // Skip the '$'.
195
let (_, rest) = chomp("-", rest); // Skip the '-' if it's there.
196
let (_, rest) = chomp("0x", rest); // Skip the '0x' if it's there.
197
let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
198
let (hex, rest) = rest.split_at(n); // Split at next non-hex character.
199
let simm = if dis.starts_with("mov") {
200
u64::from_str_radix(hex, 16).unwrap().to_string()
201
} else {
202
match hex.len() {
203
1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
204
4 => hex_print_signed_imm!(hex, u16 => i16),
205
8 => hex_print_signed_imm!(hex, u32 => i32),
206
16 => hex_print_signed_imm!(hex, u64 => i64),
207
_ => panic!("unexpected length for hex: {hex}"),
208
}
209
};
210
format!("{prefix}{simm}{rest}").into()
211
}
212
}
213
}
214
215
// See `replace_signed_immediates`.
216
fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
217
if s.starts_with(pat) {
218
s.split_at(pat.len())
219
} else {
220
("", s)
221
}
222
}
223
224
#[test]
225
fn replace() {
226
assert_eq!(
227
replace_signed_immediates("andl $0xffffff9a, %r11d"),
228
"andl $-0x66, %r11d"
229
);
230
assert_eq!(
231
replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
232
"xorq $-0x44, 0x7f139ecc(%r9)"
233
);
234
assert_eq!(
235
replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
236
"subl $0x3ca77a19, -0x1a030f40(%r14)"
237
);
238
assert_eq!(
239
replace_signed_immediates("movq $0xffffffff864ae103, %rsi"),
240
"movq $18446744071667638531, %rsi"
241
);
242
}
243
244
/// Remove everything after the first semicolon in the disassembly and trim any
245
/// trailing spaces. This is necessary to remove the implicit operands we end up
246
/// printing for Cranelift's sake.
247
fn remove_after_semicolon(dis: &str) -> &str {
248
match dis.find(';') {
249
None => dis,
250
Some(idx) => {
251
let (prefix, _) = dis.split_at(idx);
252
prefix.trim()
253
}
254
}
255
}
256
257
#[test]
258
fn remove_after_parenthesis_test() {
259
assert_eq!(
260
remove_after_semicolon("imulb 0x7658eddd(%rcx) ;; implicit: %ax"),
261
"imulb 0x7658eddd(%rcx)"
262
);
263
}
264
265
/// Run some post-processing on the disassembly to make it match Capstone.
266
fn fix_up(dis: &str) -> alloc::borrow::Cow<'_, str> {
267
let dis = remove_after_semicolon(dis);
268
replace_signed_immediates(&dis)
269
}
270
271
/// Fuzz-specific registers.
272
///
273
/// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`].
274
#[derive(Clone, Arbitrary, Debug)]
275
pub struct FuzzRegs;
276
277
impl Registers for FuzzRegs {
278
type ReadGpr = FuzzReg;
279
type ReadWriteGpr = FuzzReg;
280
type WriteGpr = FuzzReg;
281
type ReadXmm = FuzzReg;
282
type ReadWriteXmm = FuzzReg;
283
type WriteXmm = FuzzReg;
284
}
285
286
/// A simple `u8` register type for fuzzing only.
287
#[derive(Clone, Copy, Debug, PartialEq)]
288
pub struct FuzzReg(u8);
289
290
impl<'a> Arbitrary<'a> for FuzzReg {
291
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
292
Ok(Self(u.int_in_range(0..=15)?))
293
}
294
}
295
296
impl AsReg for FuzzReg {
297
fn new(enc: u8) -> Self {
298
Self(enc)
299
}
300
fn enc(&self) -> u8 {
301
self.0
302
}
303
}
304
305
impl Arbitrary<'_> for AmodeOffset {
306
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
307
// Custom implementation to try to generate some "interesting" offsets.
308
// For example choose either an arbitrary 8-bit or 32-bit number as the
309
// base, and then optionally shift that number to the left to create
310
// multiples of constants. This can help stress some of the more
311
// interesting encodings in EVEX instructions for example.
312
let base = if u.arbitrary()? {
313
i32::from(u.arbitrary::<i8>()?)
314
} else {
315
u.arbitrary::<i32>()?
316
};
317
Ok(match u.int_in_range(0..=5)? {
318
0 => AmodeOffset::ZERO,
319
n => AmodeOffset::new(base << (n - 1)),
320
})
321
}
322
}
323
324
impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
325
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
326
// For now, we don't generate offsets (TODO).
327
Ok(Self {
328
simm32: AmodeOffset::arbitrary(u)?,
329
offset: None,
330
})
331
}
332
}
333
334
impl<R: AsReg, const E: u8> Arbitrary<'_> for Fixed<R, E> {
335
fn arbitrary(_: &mut Unstructured<'_>) -> Result<Self> {
336
Ok(Self::new(E))
337
}
338
}
339
340
impl<R: AsReg> Arbitrary<'_> for NonRspGpr<R> {
341
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
342
use crate::gpr::enc::*;
343
let gpr = u.choose(&[
344
RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
345
])?;
346
Ok(Self::new(R::new(*gpr)))
347
}
348
}
349
impl<'a, R: AsReg> Arbitrary<'a> for Gpr<R> {
350
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
351
Ok(Self(R::new(u.int_in_range(0..=15)?)))
352
}
353
}
354
impl<'a, R: AsReg> Arbitrary<'a> for Xmm<R> {
355
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
356
Ok(Self(R::new(u.int_in_range(0..=15)?)))
357
}
358
}
359
360
/// Helper trait that's used to be the same as `Registers` except with an extra
361
/// `for<'a> Arbitrary<'a>` bound on all of the associated types.
362
pub trait RegistersArbitrary:
363
Registers<
364
ReadGpr: for<'a> Arbitrary<'a>,
365
ReadWriteGpr: for<'a> Arbitrary<'a>,
366
WriteGpr: for<'a> Arbitrary<'a>,
367
ReadXmm: for<'a> Arbitrary<'a>,
368
ReadWriteXmm: for<'a> Arbitrary<'a>,
369
WriteXmm: for<'a> Arbitrary<'a>,
370
>
371
{
372
}
373
374
impl<R> RegistersArbitrary for R
375
where
376
R: Registers,
377
R::ReadGpr: for<'a> Arbitrary<'a>,
378
R::ReadWriteGpr: for<'a> Arbitrary<'a>,
379
R::WriteGpr: for<'a> Arbitrary<'a>,
380
R::ReadXmm: for<'a> Arbitrary<'a>,
381
R::ReadWriteXmm: for<'a> Arbitrary<'a>,
382
R::WriteXmm: for<'a> Arbitrary<'a>,
383
{
384
}
385
386
#[cfg(test)]
387
mod test {
388
use super::*;
389
use arbtest::arbtest;
390
use std::sync::atomic::{AtomicUsize, Ordering};
391
392
#[test]
393
fn smoke() {
394
let count = AtomicUsize::new(0);
395
arbtest(|u| {
396
let inst: Inst<FuzzRegs> = u.arbitrary()?;
397
roundtrip(&inst);
398
println!("#{}: {inst}", count.fetch_add(1, Ordering::SeqCst));
399
Ok(())
400
})
401
.budget_ms(1_000);
402
403
// This will run the `roundtrip` fuzzer for one second. To repeatably
404
// test a single input, append `.seed(0x<failing seed>)`.
405
}
406
407
#[test]
408
fn callq() {
409
for i in -500..500 {
410
println!("immediate: {i}");
411
let inst = crate::inst::callq_d::new(i);
412
roundtrip(&inst.into());
413
}
414
}
415
}
416
417