Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/machinst/blockorder.rs
1693 views
1
//! Computation of basic block order in emitted code.
2
//!
3
//! This module handles the translation from CLIF BBs to VCode BBs.
4
//!
5
//! The basic idea is that we compute a sequence of "lowered blocks" that
6
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
7
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
8
//! moves for phi-nodes on every block-to-block transfer; these blocks always
9
//! conceptually exist, but may be merged with an "original" CLIF block (and
10
//! hence not actually exist; this is equivalent to inserting the blocks only on
11
//! critical edges).
12
//!
13
//! In other words, starting from a CFG like this (where each "CLIF block" and
14
//! "(edge N->M)" is a separate basic block):
15
//!
16
//! ```plain
17
//!
18
//! CLIF block 0
19
//! / \
20
//! (edge 0->1) (edge 0->2)
21
//! | |
22
//! CLIF block 1 CLIF block 2
23
//! \ /
24
//! (edge 1->3) (edge 2->3)
25
//! \ /
26
//! CLIF block 3
27
//! ```
28
//!
29
//! We can produce a CFG of lowered blocks like so:
30
//!
31
//! ```plain
32
//! +--------------+
33
//! | CLIF block 0 |
34
//! +--------------+
35
//! / \
36
//! +--------------+ +--------------+
37
//! | (edge 0->1) | | (edge 0->2) |
38
//! | CLIF block 1 | | CLIF block 2 |
39
//! | (edge 1->3) | | (edge 2->3) |
40
//! +--------------+ +--------------+
41
//! \ /
42
//! \ /
43
//! +------------+
44
//! |CLIF block 3|
45
//! +------------+
46
//! ```
47
//!
48
//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
49
//!
50
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
51
//! (never actually materialized, just defined by a "successors" function), and
52
//! compute the reverse postorder.
53
//!
54
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
55
//! example, consider any information about whether edge blocks will actually
56
//! have content, because this computation happens as part of lowering *before*
57
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
58
//! particular edge. But it works relatively well and is conceptually simple.
59
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
60
//! branch editing that in practice elides empty blocks and simplifies some of
61
//! the other redundancies that this scheme produces.
62
63
use crate::dominator_tree::DominatorTree;
64
use crate::entity::SecondaryMap;
65
use crate::inst_predicates::visit_block_succs;
66
use crate::ir::{Block, Function, Inst, Opcode};
67
use crate::{machinst::*, trace};
68
use rustc_hash::{FxHashMap, FxHashSet};
69
70
/// Mapping from CLIF BBs to VCode BBs.
71
#[derive(Debug)]
72
pub struct BlockLoweringOrder {
73
/// Lowered blocks, in BlockIndex order. Each block is some combination of
74
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
75
/// see [LoweredBlock] for details.
76
lowered_order: Vec<LoweredBlock>,
77
/// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
78
lowered_succ_indices: Vec<BlockIndex>,
79
/// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
80
/// block. Indexed by lowering-order index (`BlockIndex`).
81
lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,
82
/// BlockIndex for each original Block.
83
blockindex_by_block: SecondaryMap<Block, BlockIndex>,
84
/// Cold blocks. These blocks are not reordered in the
85
/// `lowered_order` above; the lowered order must respect RPO
86
/// (uses after defs) in order for lowering to be
87
/// correct. Instead, this set is used to provide `is_cold()`,
88
/// which is used by VCode emission to sink the blocks at the last
89
/// moment (when we actually emit bytes into the MachBuffer).
90
cold_blocks: FxHashSet<BlockIndex>,
91
/// Lowered blocks that are indirect branch targets.
92
indirect_branch_targets: FxHashSet<BlockIndex>,
93
}
94
95
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
96
pub enum LoweredBlock {
97
/// Block in original CLIF.
98
Orig {
99
/// Original CLIF block.
100
block: Block,
101
},
102
103
/// Critical edge between two CLIF blocks.
104
CriticalEdge {
105
/// The predecessor block.
106
pred: Block,
107
108
/// The successor block.
109
succ: Block,
110
111
/// The index of this branch in the successor edges from `pred`, following the same
112
/// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
113
/// multiple edges between the same CLIF blocks.
114
succ_idx: u32,
115
},
116
}
117
118
impl LoweredBlock {
119
/// Unwrap an `Orig` block.
120
pub fn orig_block(&self) -> Option<Block> {
121
match self {
122
&LoweredBlock::Orig { block } => Some(block),
123
&LoweredBlock::CriticalEdge { .. } => None,
124
}
125
}
126
127
/// The associated in-edge predecessor, if this is a critical edge.
128
#[cfg(test)]
129
pub fn in_edge(&self) -> Option<Block> {
130
match self {
131
&LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
132
&LoweredBlock::Orig { .. } => None,
133
}
134
}
135
136
/// The associated out-edge successor, if this is a critical edge.
137
pub fn out_edge(&self) -> Option<Block> {
138
match self {
139
&LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
140
&LoweredBlock::Orig { .. } => None,
141
}
142
}
143
}
144
145
impl BlockLoweringOrder {
146
/// Compute and return a lowered block order for `f`.
147
pub fn new(
148
f: &Function,
149
domtree: &DominatorTree,
150
ctrl_plane: &mut ControlPlane,
151
) -> BlockLoweringOrder {
152
trace!("BlockLoweringOrder: function body {:?}", f);
153
154
// Step 1: compute the in-edge and out-edge count of every block.
155
let mut block_in_count = SecondaryMap::with_default(0);
156
let mut block_out_count = SecondaryMap::with_default(0);
157
158
// Block successors are stored as `LoweredBlocks` to simplify the construction of
159
// `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
160
// updated to be `CriticalEdge` when those cases are identified in step 2 below.
161
let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
162
let mut block_succ_range = SecondaryMap::with_default(0..0);
163
164
let mut indirect_branch_target_clif_blocks = FxHashSet::default();
165
166
for block in f.layout.blocks() {
167
let start = block_succs.len();
168
visit_block_succs(f, block, |_, succ, from_table| {
169
block_out_count[block] += 1;
170
block_in_count[succ] += 1;
171
block_succs.push(LoweredBlock::Orig { block: succ });
172
173
if from_table {
174
indirect_branch_target_clif_blocks.insert(succ);
175
}
176
});
177
178
// Ensure that blocks terminated by br_table instructions
179
// with an empty jump table are still treated like
180
// conditional blocks from the point of view of critical
181
// edge splitting. Also do the same for TryCall and
182
// TryCallIndirect: we cannot have edge moves before the
183
// branch, even if they have empty handler tables and thus
184
// would otherwise have only one successor.
185
if let Some(inst) = f.layout.last_inst(block) {
186
match f.dfg.insts[inst].opcode() {
187
Opcode::BrTable | Opcode::TryCall | Opcode::TryCallIndirect => {
188
block_out_count[block] = block_out_count[block].max(2);
189
}
190
_ => {}
191
}
192
}
193
194
let end = block_succs.len();
195
block_succ_range[block] = start..end;
196
}
197
198
// Step 2: walk the postorder from the domtree in reverse to produce our desired node
199
// lowering order, identifying critical edges to split along the way.
200
201
let mut lowered_order = Vec::new();
202
let mut blockindex_by_block = SecondaryMap::with_default(BlockIndex::invalid());
203
for &block in domtree.cfg_rpo() {
204
let idx = BlockIndex::new(lowered_order.len());
205
lowered_order.push(LoweredBlock::Orig { block });
206
blockindex_by_block[block] = idx;
207
208
if block_out_count[block] > 1 {
209
let range = block_succ_range[block].clone();
210
211
// If chaos-mode is enabled in the control plane, iterate over
212
// the successors in an arbitrary order, which should have no
213
// impact on correctness. The order of the blocks is generally
214
// relevant: Uses must be seen before defs for dead-code
215
// elimination.
216
let succs = ctrl_plane.shuffled(block_succs[range].iter_mut().enumerate());
217
218
for (succ_ix, lb) in succs {
219
let succ = lb.orig_block().unwrap();
220
if block_in_count[succ] > 1 {
221
// Mutate the successor to be a critical edge, as `block` has multiple
222
// edges leaving it, and `succ` has multiple edges entering it.
223
*lb = LoweredBlock::CriticalEdge {
224
pred: block,
225
succ,
226
succ_idx: succ_ix as u32,
227
};
228
lowered_order.push(*lb);
229
}
230
}
231
}
232
}
233
234
let lb_to_bindex = FxHashMap::from_iter(
235
lowered_order
236
.iter()
237
.enumerate()
238
.map(|(i, &lb)| (lb, BlockIndex::new(i))),
239
);
240
241
// Step 3: build the successor tables given the lowering order. We can't perform this step
242
// during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
243
// first.
244
let mut lowered_succ_indices = Vec::new();
245
let mut cold_blocks = FxHashSet::default();
246
let mut indirect_branch_targets = FxHashSet::default();
247
let lowered_succ_ranges =
248
Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
249
let bindex = BlockIndex::new(ix);
250
let start = lowered_succ_indices.len();
251
let opt_inst = match lb {
252
// Block successors are pulled directly over, as they'll have been mutated when
253
// determining the block order already.
254
&LoweredBlock::Orig { block } => {
255
let range = block_succ_range[block].clone();
256
lowered_succ_indices
257
.extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
258
259
if f.layout.is_cold(block) {
260
cold_blocks.insert(bindex);
261
}
262
263
if indirect_branch_target_clif_blocks.contains(&block) {
264
indirect_branch_targets.insert(bindex);
265
}
266
267
let last = f.layout.last_inst(block).unwrap();
268
let opcode = f.dfg.insts[last].opcode();
269
270
assert!(opcode.is_terminator());
271
272
opcode.is_branch().then_some(last)
273
}
274
275
// Critical edges won't have successor information in block_succ_range, but
276
// they only have a single known successor to record anyway.
277
&LoweredBlock::CriticalEdge { succ, .. } => {
278
let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
279
lowered_succ_indices.push(succ_index);
280
281
// Edges inherit indirect branch and cold block metadata from their
282
// successor.
283
284
if f.layout.is_cold(succ) {
285
cold_blocks.insert(bindex);
286
}
287
288
if indirect_branch_target_clif_blocks.contains(&succ) {
289
indirect_branch_targets.insert(bindex);
290
}
291
292
None
293
}
294
};
295
let end = lowered_succ_indices.len();
296
(opt_inst, start..end)
297
}));
298
299
let result = BlockLoweringOrder {
300
lowered_order,
301
lowered_succ_indices,
302
lowered_succ_ranges,
303
blockindex_by_block,
304
cold_blocks,
305
indirect_branch_targets,
306
};
307
308
trace!("BlockLoweringOrder: {:#?}", result);
309
result
310
}
311
312
/// Get the lowered order of blocks.
313
pub fn lowered_order(&self) -> &[LoweredBlock] {
314
&self.lowered_order[..]
315
}
316
317
/// Get the BlockIndex, if any, for a given Block.
318
///
319
/// The result will be `None` if the given Block is unreachable
320
/// (and thus does not appear in the lowered order).
321
pub fn lowered_index_for_block(&self, block: Block) -> Option<BlockIndex> {
322
let idx = self.blockindex_by_block[block];
323
if idx.is_valid() { Some(idx) } else { None }
324
}
325
326
/// Get the successor indices for a lowered block.
327
pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
328
let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
329
(*opt_inst, &self.lowered_succ_indices[range.clone()])
330
}
331
332
/// Determine whether the given lowered-block index is cold.
333
pub fn is_cold(&self, block: BlockIndex) -> bool {
334
self.cold_blocks.contains(&block)
335
}
336
337
/// Determine whether the given lowered block index is an indirect branch
338
/// target.
339
pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
340
self.indirect_branch_targets.contains(&block)
341
}
342
}
343
344
#[cfg(test)]
345
mod test {
346
use super::*;
347
use crate::cursor::{Cursor, FuncCursor};
348
use crate::flowgraph::ControlFlowGraph;
349
use crate::ir::UserFuncName;
350
use crate::ir::types::*;
351
use crate::ir::{AbiParam, InstBuilder, Signature};
352
use crate::isa::CallConv;
353
354
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
355
assert!(n_blocks > 0);
356
357
let name = UserFuncName::testcase("test0");
358
let mut sig = Signature::new(CallConv::SystemV);
359
sig.params.push(AbiParam::new(I32));
360
let mut func = Function::with_name_signature(name, sig);
361
let blocks = (0..n_blocks)
362
.map(|i| {
363
let bb = func.dfg.make_block();
364
assert!(bb.as_u32() == i as u32);
365
bb
366
})
367
.collect::<Vec<_>>();
368
369
let arg0 = func.dfg.append_block_param(blocks[0], I32);
370
371
let mut pos = FuncCursor::new(&mut func);
372
373
let mut edge = 0;
374
for i in 0..n_blocks {
375
pos.insert_block(blocks[i]);
376
let mut succs = vec![];
377
while edge < edges.len() && edges[edge].0 == i {
378
succs.push(edges[edge].1);
379
edge += 1;
380
}
381
if succs.len() == 0 {
382
pos.ins().return_(&[arg0]);
383
} else if succs.len() == 1 {
384
pos.ins().jump(blocks[succs[0]], &[]);
385
} else if succs.len() == 2 {
386
pos.ins()
387
.brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);
388
} else {
389
panic!("Too many successors");
390
}
391
}
392
393
let mut cfg = ControlFlowGraph::new();
394
cfg.compute(&func);
395
let dom_tree = DominatorTree::with_function(&func, &cfg);
396
397
BlockLoweringOrder::new(&func, &dom_tree, &mut Default::default())
398
}
399
400
#[test]
401
fn test_blockorder_diamond() {
402
let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
403
404
// This test case doesn't need to introduce any critical edges, as all regalloc allocations
405
// can sit on either the entry or exit of blocks 1 and 2.
406
assert_eq!(order.lowered_order.len(), 4);
407
}
408
409
#[test]
410
fn test_blockorder_critedge() {
411
// 0
412
// / \
413
// 1 2
414
// / \ \
415
// 3 4 |
416
// |\ _|____|
417
// | \/ |
418
// | /\ |
419
// 5 6
420
//
421
// (3 -> 5, and 3 -> 6 are critical edges and must be split)
422
//
423
let order = build_test_func(
424
7,
425
&[
426
(0, 1),
427
(0, 2),
428
(1, 3),
429
(1, 4),
430
(2, 5),
431
(3, 5),
432
(3, 6),
433
(4, 6),
434
],
435
);
436
437
assert_eq!(order.lowered_order.len(), 9);
438
println!("ordered = {:?}", order.lowered_order);
439
440
// block 0
441
assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
442
assert!(order.lowered_order[0].in_edge().is_none());
443
assert!(order.lowered_order[0].out_edge().is_none());
444
445
// block 2
446
assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
447
assert!(order.lowered_order[1].in_edge().is_none());
448
assert!(order.lowered_order[1].out_edge().is_none());
449
450
// block 1
451
assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
452
assert!(order.lowered_order[2].in_edge().is_none());
453
assert!(order.lowered_order[2].out_edge().is_none());
454
455
// block 4
456
assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
457
assert!(order.lowered_order[3].in_edge().is_none());
458
assert!(order.lowered_order[3].out_edge().is_none());
459
460
// block 3
461
assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
462
assert!(order.lowered_order[4].in_edge().is_none());
463
assert!(order.lowered_order[4].out_edge().is_none());
464
465
// critical edge 3 -> 5
466
assert!(order.lowered_order[5].orig_block().is_none());
467
assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
468
assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
469
470
// critical edge 3 -> 6
471
assert!(order.lowered_order[6].orig_block().is_none());
472
assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
473
assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
474
475
// block 6
476
assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
477
assert!(order.lowered_order[7].in_edge().is_none());
478
assert!(order.lowered_order[7].out_edge().is_none());
479
480
// block 5
481
assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
482
assert!(order.lowered_order[8].in_edge().is_none());
483
assert!(order.lowered_order[8].out_edge().is_none());
484
}
485
}
486
487