Path: blob/main/cranelift/codegen/src/machinst/blockorder.rs
1693 views
//! Computation of basic block order in emitted code.1//!2//! This module handles the translation from CLIF BBs to VCode BBs.3//!4//! The basic idea is that we compute a sequence of "lowered blocks" that5//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit6//! block on *every* edge). Conceptually, the lowering pipeline wants to insert7//! moves for phi-nodes on every block-to-block transfer; these blocks always8//! conceptually exist, but may be merged with an "original" CLIF block (and9//! hence not actually exist; this is equivalent to inserting the blocks only on10//! critical edges).11//!12//! In other words, starting from a CFG like this (where each "CLIF block" and13//! "(edge N->M)" is a separate basic block):14//!15//! ```plain16//!17//! CLIF block 018//! / \19//! (edge 0->1) (edge 0->2)20//! | |21//! CLIF block 1 CLIF block 222//! \ /23//! (edge 1->3) (edge 2->3)24//! \ /25//! CLIF block 326//! ```27//!28//! We can produce a CFG of lowered blocks like so:29//!30//! ```plain31//! +--------------+32//! | CLIF block 0 |33//! +--------------+34//! / \35//! +--------------+ +--------------+36//! | (edge 0->1) | | (edge 0->2) |37//! | CLIF block 1 | | CLIF block 2 |38//! | (edge 1->3) | | (edge 2->3) |39//! +--------------+ +--------------+40//! \ /41//! \ /42//! +------------+43//! |CLIF block 3|44//! +------------+45//! ```46//!47//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.48//!49//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph50//! (never actually materialized, just defined by a "successors" function), and51//! compute the reverse postorder.52//!53//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for54//! example, consider any information about whether edge blocks will actually55//! have content, because this computation happens as part of lowering *before*56//! regalloc, and regalloc may or may not insert moves/spills/reloads on any57//! particular edge. But it works relatively well and is conceptually simple.58//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like59//! branch editing that in practice elides empty blocks and simplifies some of60//! the other redundancies that this scheme produces.6162use crate::dominator_tree::DominatorTree;63use crate::entity::SecondaryMap;64use crate::inst_predicates::visit_block_succs;65use crate::ir::{Block, Function, Inst, Opcode};66use crate::{machinst::*, trace};67use rustc_hash::{FxHashMap, FxHashSet};6869/// Mapping from CLIF BBs to VCode BBs.70#[derive(Debug)]71pub struct BlockLoweringOrder {72/// Lowered blocks, in BlockIndex order. Each block is some combination of73/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;74/// see [LoweredBlock] for details.75lowered_order: Vec<LoweredBlock>,76/// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.77lowered_succ_indices: Vec<BlockIndex>,78/// Ranges in `lowered_succ_indices` giving the successor lists for each lowered79/// block. Indexed by lowering-order index (`BlockIndex`).80lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,81/// BlockIndex for each original Block.82blockindex_by_block: SecondaryMap<Block, BlockIndex>,83/// Cold blocks. These blocks are not reordered in the84/// `lowered_order` above; the lowered order must respect RPO85/// (uses after defs) in order for lowering to be86/// correct. Instead, this set is used to provide `is_cold()`,87/// which is used by VCode emission to sink the blocks at the last88/// moment (when we actually emit bytes into the MachBuffer).89cold_blocks: FxHashSet<BlockIndex>,90/// Lowered blocks that are indirect branch targets.91indirect_branch_targets: FxHashSet<BlockIndex>,92}9394#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]95pub enum LoweredBlock {96/// Block in original CLIF.97Orig {98/// Original CLIF block.99block: Block,100},101102/// Critical edge between two CLIF blocks.103CriticalEdge {104/// The predecessor block.105pred: Block,106107/// The successor block.108succ: Block,109110/// The index of this branch in the successor edges from `pred`, following the same111/// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish112/// multiple edges between the same CLIF blocks.113succ_idx: u32,114},115}116117impl LoweredBlock {118/// Unwrap an `Orig` block.119pub fn orig_block(&self) -> Option<Block> {120match self {121&LoweredBlock::Orig { block } => Some(block),122&LoweredBlock::CriticalEdge { .. } => None,123}124}125126/// The associated in-edge predecessor, if this is a critical edge.127#[cfg(test)]128pub fn in_edge(&self) -> Option<Block> {129match self {130&LoweredBlock::CriticalEdge { pred, .. } => Some(pred),131&LoweredBlock::Orig { .. } => None,132}133}134135/// The associated out-edge successor, if this is a critical edge.136pub fn out_edge(&self) -> Option<Block> {137match self {138&LoweredBlock::CriticalEdge { succ, .. } => Some(succ),139&LoweredBlock::Orig { .. } => None,140}141}142}143144impl BlockLoweringOrder {145/// Compute and return a lowered block order for `f`.146pub fn new(147f: &Function,148domtree: &DominatorTree,149ctrl_plane: &mut ControlPlane,150) -> BlockLoweringOrder {151trace!("BlockLoweringOrder: function body {:?}", f);152153// Step 1: compute the in-edge and out-edge count of every block.154let mut block_in_count = SecondaryMap::with_default(0);155let mut block_out_count = SecondaryMap::with_default(0);156157// Block successors are stored as `LoweredBlocks` to simplify the construction of158// `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are159// updated to be `CriticalEdge` when those cases are identified in step 2 below.160let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();161let mut block_succ_range = SecondaryMap::with_default(0..0);162163let mut indirect_branch_target_clif_blocks = FxHashSet::default();164165for block in f.layout.blocks() {166let start = block_succs.len();167visit_block_succs(f, block, |_, succ, from_table| {168block_out_count[block] += 1;169block_in_count[succ] += 1;170block_succs.push(LoweredBlock::Orig { block: succ });171172if from_table {173indirect_branch_target_clif_blocks.insert(succ);174}175});176177// Ensure that blocks terminated by br_table instructions178// with an empty jump table are still treated like179// conditional blocks from the point of view of critical180// edge splitting. Also do the same for TryCall and181// TryCallIndirect: we cannot have edge moves before the182// branch, even if they have empty handler tables and thus183// would otherwise have only one successor.184if let Some(inst) = f.layout.last_inst(block) {185match f.dfg.insts[inst].opcode() {186Opcode::BrTable | Opcode::TryCall | Opcode::TryCallIndirect => {187block_out_count[block] = block_out_count[block].max(2);188}189_ => {}190}191}192193let end = block_succs.len();194block_succ_range[block] = start..end;195}196197// Step 2: walk the postorder from the domtree in reverse to produce our desired node198// lowering order, identifying critical edges to split along the way.199200let mut lowered_order = Vec::new();201let mut blockindex_by_block = SecondaryMap::with_default(BlockIndex::invalid());202for &block in domtree.cfg_rpo() {203let idx = BlockIndex::new(lowered_order.len());204lowered_order.push(LoweredBlock::Orig { block });205blockindex_by_block[block] = idx;206207if block_out_count[block] > 1 {208let range = block_succ_range[block].clone();209210// If chaos-mode is enabled in the control plane, iterate over211// the successors in an arbitrary order, which should have no212// impact on correctness. The order of the blocks is generally213// relevant: Uses must be seen before defs for dead-code214// elimination.215let succs = ctrl_plane.shuffled(block_succs[range].iter_mut().enumerate());216217for (succ_ix, lb) in succs {218let succ = lb.orig_block().unwrap();219if block_in_count[succ] > 1 {220// Mutate the successor to be a critical edge, as `block` has multiple221// edges leaving it, and `succ` has multiple edges entering it.222*lb = LoweredBlock::CriticalEdge {223pred: block,224succ,225succ_idx: succ_ix as u32,226};227lowered_order.push(*lb);228}229}230}231}232233let lb_to_bindex = FxHashMap::from_iter(234lowered_order235.iter()236.enumerate()237.map(|(i, &lb)| (lb, BlockIndex::new(i))),238);239240// Step 3: build the successor tables given the lowering order. We can't perform this step241// during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated242// first.243let mut lowered_succ_indices = Vec::new();244let mut cold_blocks = FxHashSet::default();245let mut indirect_branch_targets = FxHashSet::default();246let lowered_succ_ranges =247Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {248let bindex = BlockIndex::new(ix);249let start = lowered_succ_indices.len();250let opt_inst = match lb {251// Block successors are pulled directly over, as they'll have been mutated when252// determining the block order already.253&LoweredBlock::Orig { block } => {254let range = block_succ_range[block].clone();255lowered_succ_indices256.extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));257258if f.layout.is_cold(block) {259cold_blocks.insert(bindex);260}261262if indirect_branch_target_clif_blocks.contains(&block) {263indirect_branch_targets.insert(bindex);264}265266let last = f.layout.last_inst(block).unwrap();267let opcode = f.dfg.insts[last].opcode();268269assert!(opcode.is_terminator());270271opcode.is_branch().then_some(last)272}273274// Critical edges won't have successor information in block_succ_range, but275// they only have a single known successor to record anyway.276&LoweredBlock::CriticalEdge { succ, .. } => {277let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];278lowered_succ_indices.push(succ_index);279280// Edges inherit indirect branch and cold block metadata from their281// successor.282283if f.layout.is_cold(succ) {284cold_blocks.insert(bindex);285}286287if indirect_branch_target_clif_blocks.contains(&succ) {288indirect_branch_targets.insert(bindex);289}290291None292}293};294let end = lowered_succ_indices.len();295(opt_inst, start..end)296}));297298let result = BlockLoweringOrder {299lowered_order,300lowered_succ_indices,301lowered_succ_ranges,302blockindex_by_block,303cold_blocks,304indirect_branch_targets,305};306307trace!("BlockLoweringOrder: {:#?}", result);308result309}310311/// Get the lowered order of blocks.312pub fn lowered_order(&self) -> &[LoweredBlock] {313&self.lowered_order[..]314}315316/// Get the BlockIndex, if any, for a given Block.317///318/// The result will be `None` if the given Block is unreachable319/// (and thus does not appear in the lowered order).320pub fn lowered_index_for_block(&self, block: Block) -> Option<BlockIndex> {321let idx = self.blockindex_by_block[block];322if idx.is_valid() { Some(idx) } else { None }323}324325/// Get the successor indices for a lowered block.326pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {327let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];328(*opt_inst, &self.lowered_succ_indices[range.clone()])329}330331/// Determine whether the given lowered-block index is cold.332pub fn is_cold(&self, block: BlockIndex) -> bool {333self.cold_blocks.contains(&block)334}335336/// Determine whether the given lowered block index is an indirect branch337/// target.338pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {339self.indirect_branch_targets.contains(&block)340}341}342343#[cfg(test)]344mod test {345use super::*;346use crate::cursor::{Cursor, FuncCursor};347use crate::flowgraph::ControlFlowGraph;348use crate::ir::UserFuncName;349use crate::ir::types::*;350use crate::ir::{AbiParam, InstBuilder, Signature};351use crate::isa::CallConv;352353fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {354assert!(n_blocks > 0);355356let name = UserFuncName::testcase("test0");357let mut sig = Signature::new(CallConv::SystemV);358sig.params.push(AbiParam::new(I32));359let mut func = Function::with_name_signature(name, sig);360let blocks = (0..n_blocks)361.map(|i| {362let bb = func.dfg.make_block();363assert!(bb.as_u32() == i as u32);364bb365})366.collect::<Vec<_>>();367368let arg0 = func.dfg.append_block_param(blocks[0], I32);369370let mut pos = FuncCursor::new(&mut func);371372let mut edge = 0;373for i in 0..n_blocks {374pos.insert_block(blocks[i]);375let mut succs = vec![];376while edge < edges.len() && edges[edge].0 == i {377succs.push(edges[edge].1);378edge += 1;379}380if succs.len() == 0 {381pos.ins().return_(&[arg0]);382} else if succs.len() == 1 {383pos.ins().jump(blocks[succs[0]], &[]);384} else if succs.len() == 2 {385pos.ins()386.brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);387} else {388panic!("Too many successors");389}390}391392let mut cfg = ControlFlowGraph::new();393cfg.compute(&func);394let dom_tree = DominatorTree::with_function(&func, &cfg);395396BlockLoweringOrder::new(&func, &dom_tree, &mut Default::default())397}398399#[test]400fn test_blockorder_diamond() {401let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);402403// This test case doesn't need to introduce any critical edges, as all regalloc allocations404// can sit on either the entry or exit of blocks 1 and 2.405assert_eq!(order.lowered_order.len(), 4);406}407408#[test]409fn test_blockorder_critedge() {410// 0411// / \412// 1 2413// / \ \414// 3 4 |415// |\ _|____|416// | \/ |417// | /\ |418// 5 6419//420// (3 -> 5, and 3 -> 6 are critical edges and must be split)421//422let order = build_test_func(4237,424&[425(0, 1),426(0, 2),427(1, 3),428(1, 4),429(2, 5),430(3, 5),431(3, 6),432(4, 6),433],434);435436assert_eq!(order.lowered_order.len(), 9);437println!("ordered = {:?}", order.lowered_order);438439// block 0440assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);441assert!(order.lowered_order[0].in_edge().is_none());442assert!(order.lowered_order[0].out_edge().is_none());443444// block 2445assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);446assert!(order.lowered_order[1].in_edge().is_none());447assert!(order.lowered_order[1].out_edge().is_none());448449// block 1450assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);451assert!(order.lowered_order[2].in_edge().is_none());452assert!(order.lowered_order[2].out_edge().is_none());453454// block 4455assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);456assert!(order.lowered_order[3].in_edge().is_none());457assert!(order.lowered_order[3].out_edge().is_none());458459// block 3460assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);461assert!(order.lowered_order[4].in_edge().is_none());462assert!(order.lowered_order[4].out_edge().is_none());463464// critical edge 3 -> 5465assert!(order.lowered_order[5].orig_block().is_none());466assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);467assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);468469// critical edge 3 -> 6470assert!(order.lowered_order[6].orig_block().is_none());471assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);472assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);473474// block 6475assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);476assert!(order.lowered_order[7].in_edge().is_none());477assert!(order.lowered_order[7].out_edge().is_none());478479// block 5480assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);481assert!(order.lowered_order[8].in_edge().is_none());482assert!(order.lowered_order[8].out_edge().is_none());483}484}485486487