Path: blob/main/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
35266 views
//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file implements a pass that removes irreducible control flow.10/// Irreducible control flow means multiple-entry loops, which this pass11/// transforms to have a single entry.12///13/// Note that LLVM has a generic pass that lowers irreducible control flow, but14/// it linearizes control flow, turning diamonds into two triangles, which is15/// both unnecessary and undesirable for WebAssembly.16///17/// The big picture: We recursively process each "region", defined as a group18/// of blocks with a single entry and no branches back to that entry. A region19/// may be the entire function body, or the inner part of a loop, i.e., the20/// loop's body without branches back to the loop entry. In each region we fix21/// up multi-entry loops by adding a new block that can dispatch to each of the22/// loop entries, based on the value of a label "helper" variable, and we23/// replace direct branches to the entries with assignments to the label24/// variable and a branch to the dispatch block. Then the dispatch block is the25/// single entry in the loop containing the previous multiple entries. After26/// ensuring all the loops in a region are reducible, we recurse into them. The27/// total time complexity of this pass is:28///29/// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +30/// NumLoops * NumLoops)31///32/// This pass is similar to what the Relooper [1] does. Both identify looping33/// code that requires multiple entries, and resolve it in a similar way (in34/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note35/// also that like the Relooper, we implement a "minimal" intervention: we only36/// use the "label" helper for the blocks we absolutely must and no others. We37/// also prioritize code size and do not duplicate code in order to resolve38/// irreducibility. The graph algorithms for finding loops and entries and so39/// forth are also similar to the Relooper. The main differences between this40/// pass and the Relooper are:41///42/// * We just care about irreducibility, so we just look at loops.43/// * The Relooper emits structured control flow (with ifs etc.), while we44/// emit a CFG.45///46/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In47/// Proceedings of the ACM international conference companion on Object oriented48/// programming systems languages and applications companion (SPLASH '11). ACM,49/// New York, NY, USA, 301-312. DOI=10.1145/2048147.204822450/// http://doi.acm.org/10.1145/2048147.204822451///52//===----------------------------------------------------------------------===//5354#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"55#include "WebAssembly.h"56#include "WebAssemblySubtarget.h"57#include "llvm/CodeGen/MachineFunctionPass.h"58#include "llvm/CodeGen/MachineInstrBuilder.h"59#include "llvm/Support/Debug.h"60using namespace llvm;6162#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"6364namespace {6566using BlockVector = SmallVector<MachineBasicBlock *, 4>;67using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;6869static BlockVector getSortedEntries(const BlockSet &Entries) {70BlockVector SortedEntries(Entries.begin(), Entries.end());71llvm::sort(SortedEntries,72[](const MachineBasicBlock *A, const MachineBasicBlock *B) {73auto ANum = A->getNumber();74auto BNum = B->getNumber();75return ANum < BNum;76});77return SortedEntries;78}7980// Calculates reachability in a region. Ignores branches to blocks outside of81// the region, and ignores branches to the region entry (for the case where82// the region is the inner part of a loop).83class ReachabilityGraph {84public:85ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)86: Entry(Entry), Blocks(Blocks) {87#ifndef NDEBUG88// The region must have a single entry.89for (auto *MBB : Blocks) {90if (MBB != Entry) {91for (auto *Pred : MBB->predecessors()) {92assert(inRegion(Pred));93}94}95}96#endif97calculate();98}99100bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {101assert(inRegion(From) && inRegion(To));102auto I = Reachable.find(From);103if (I == Reachable.end())104return false;105return I->second.count(To);106}107108// "Loopers" are blocks that are in a loop. We detect these by finding blocks109// that can reach themselves.110const BlockSet &getLoopers() const { return Loopers; }111112// Get all blocks that are loop entries.113const BlockSet &getLoopEntries() const { return LoopEntries; }114115// Get all blocks that enter a particular loop from outside.116const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {117assert(inRegion(LoopEntry));118auto I = LoopEnterers.find(LoopEntry);119assert(I != LoopEnterers.end());120return I->second;121}122123private:124MachineBasicBlock *Entry;125const BlockSet &Blocks;126127BlockSet Loopers, LoopEntries;128DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;129130bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }131132// Maps a block to all the other blocks it can reach.133DenseMap<MachineBasicBlock *, BlockSet> Reachable;134135void calculate() {136// Reachability computation work list. Contains pairs of recent additions137// (A, B) where we just added a link A => B.138using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;139SmallVector<BlockPair, 4> WorkList;140141// Add all relevant direct branches.142for (auto *MBB : Blocks) {143for (auto *Succ : MBB->successors()) {144if (Succ != Entry && inRegion(Succ)) {145Reachable[MBB].insert(Succ);146WorkList.emplace_back(MBB, Succ);147}148}149}150151while (!WorkList.empty()) {152MachineBasicBlock *MBB, *Succ;153std::tie(MBB, Succ) = WorkList.pop_back_val();154assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));155if (MBB != Entry) {156// We recently added MBB => Succ, and that means we may have enabled157// Pred => MBB => Succ.158for (auto *Pred : MBB->predecessors()) {159if (Reachable[Pred].insert(Succ).second) {160WorkList.emplace_back(Pred, Succ);161}162}163}164}165166// Blocks that can return to themselves are in a loop.167for (auto *MBB : Blocks) {168if (canReach(MBB, MBB)) {169Loopers.insert(MBB);170}171}172assert(!Loopers.count(Entry));173174// Find the loop entries - loopers reachable from blocks not in that loop -175// and those outside blocks that reach them, the "loop enterers".176for (auto *Looper : Loopers) {177for (auto *Pred : Looper->predecessors()) {178// Pred can reach Looper. If Looper can reach Pred, it is in the loop;179// otherwise, it is a block that enters into the loop.180if (!canReach(Looper, Pred)) {181LoopEntries.insert(Looper);182LoopEnterers[Looper].insert(Pred);183}184}185}186}187};188189// Finds the blocks in a single-entry loop, given the loop entry and the190// list of blocks that enter the loop.191class LoopBlocks {192public:193LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)194: Entry(Entry), Enterers(Enterers) {195calculate();196}197198BlockSet &getBlocks() { return Blocks; }199200private:201MachineBasicBlock *Entry;202const BlockSet &Enterers;203204BlockSet Blocks;205206void calculate() {207// Going backwards from the loop entry, if we ignore the blocks entering208// from outside, we will traverse all the blocks in the loop.209BlockVector WorkList;210BlockSet AddedToWorkList;211Blocks.insert(Entry);212for (auto *Pred : Entry->predecessors()) {213if (!Enterers.count(Pred)) {214WorkList.push_back(Pred);215AddedToWorkList.insert(Pred);216}217}218219while (!WorkList.empty()) {220auto *MBB = WorkList.pop_back_val();221assert(!Enterers.count(MBB));222if (Blocks.insert(MBB).second) {223for (auto *Pred : MBB->predecessors()) {224if (AddedToWorkList.insert(Pred).second)225WorkList.push_back(Pred);226}227}228}229}230};231232class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {233StringRef getPassName() const override {234return "WebAssembly Fix Irreducible Control Flow";235}236237bool runOnMachineFunction(MachineFunction &MF) override;238239bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,240MachineFunction &MF);241242void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,243MachineFunction &MF, const ReachabilityGraph &Graph);244245public:246static char ID; // Pass identification, replacement for typeid247WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}248};249250bool WebAssemblyFixIrreducibleControlFlow::processRegion(251MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {252bool Changed = false;253// Remove irreducibility before processing child loops, which may take254// multiple iterations.255while (true) {256ReachabilityGraph Graph(Entry, Blocks);257258bool FoundIrreducibility = false;259260for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {261// Find mutual entries - all entries which can reach this one, and262// are reached by it (that always includes LoopEntry itself). All mutual263// entries must be in the same loop, so if we have more than one, then we264// have irreducible control flow.265//266// (Note that we need to sort the entries here, as otherwise the order can267// matter: being mutual is a symmetric relationship, and each set of268// mutuals will be handled properly no matter which we see first. However,269// there can be multiple disjoint sets of mutuals, and which we process270// first changes the output.)271//272// Note that irreducibility may involve inner loops, e.g. imagine A273// starts one loop, and it has B inside it which starts an inner loop.274// If we add a branch from all the way on the outside to B, then in a275// sense B is no longer an "inner" loop, semantically speaking. We will276// fix that irreducibility by adding a block that dispatches to either277// either A or B, so B will no longer be an inner loop in our output.278// (A fancier approach might try to keep it as such.)279//280// Note that we still need to recurse into inner loops later, to handle281// the case where the irreducibility is entirely nested - we would not282// be able to identify that at this point, since the enclosing loop is283// a group of blocks all of whom can reach each other. (We'll see the284// irreducibility after removing branches to the top of that enclosing285// loop.)286BlockSet MutualLoopEntries;287MutualLoopEntries.insert(LoopEntry);288for (auto *OtherLoopEntry : Graph.getLoopEntries()) {289if (OtherLoopEntry != LoopEntry &&290Graph.canReach(LoopEntry, OtherLoopEntry) &&291Graph.canReach(OtherLoopEntry, LoopEntry)) {292MutualLoopEntries.insert(OtherLoopEntry);293}294}295296if (MutualLoopEntries.size() > 1) {297makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);298FoundIrreducibility = true;299Changed = true;300break;301}302}303// Only go on to actually process the inner loops when we are done304// removing irreducible control flow and changing the graph. Modifying305// the graph as we go is possible, and that might let us avoid looking at306// the already-fixed loops again if we are careful, but all that is307// complex and bug-prone. Since irreducible loops are rare, just starting308// another iteration is best.309if (FoundIrreducibility) {310continue;311}312313for (auto *LoopEntry : Graph.getLoopEntries()) {314LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));315// Each of these calls to processRegion may change the graph, but are316// guaranteed not to interfere with each other. The only changes we make317// to the graph are to add blocks on the way to a loop entry. As the318// loops are disjoint, that means we may only alter branches that exit319// another loop, which are ignored when recursing into that other loop320// anyhow.321if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {322Changed = true;323}324}325326return Changed;327}328}329330// Given a set of entries to a single loop, create a single entry for that331// loop by creating a dispatch block for them, routing control flow using332// a helper variable. Also updates Blocks with any new blocks created, so333// that we properly track all the blocks in the region. But this does not update334// ReachabilityGraph; this will be updated in the caller of this function as335// needed.336void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(337BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,338const ReachabilityGraph &Graph) {339assert(Entries.size() >= 2);340341// Sort the entries to ensure a deterministic build.342BlockVector SortedEntries = getSortedEntries(Entries);343344#ifndef NDEBUG345for (auto *Block : SortedEntries)346assert(Block->getNumber() != -1);347if (SortedEntries.size() > 1) {348for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;349++I) {350auto ANum = (*I)->getNumber();351auto BNum = (*(std::next(I)))->getNumber();352assert(ANum != BNum);353}354}355#endif356357// Create a dispatch block which will contain a jump table to the entries.358MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();359MF.insert(MF.end(), Dispatch);360Blocks.insert(Dispatch);361362// Add the jump table.363const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();364MachineInstrBuilder MIB =365BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));366367// Add the register which will be used to tell the jump table which block to368// jump to.369MachineRegisterInfo &MRI = MF.getRegInfo();370Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);371MIB.addReg(Reg);372373// Compute the indices in the superheader, one for each bad block, and374// add them as successors.375DenseMap<MachineBasicBlock *, unsigned> Indices;376for (auto *Entry : SortedEntries) {377auto Pair = Indices.insert(std::make_pair(Entry, 0));378assert(Pair.second);379380unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;381Pair.first->second = Index;382383MIB.addMBB(Entry);384Dispatch->addSuccessor(Entry);385}386387// Rewrite the problematic successors for every block that wants to reach388// the bad blocks. For simplicity, we just introduce a new block for every389// edge we need to rewrite. (Fancier things are possible.)390391BlockVector AllPreds;392for (auto *Entry : SortedEntries) {393for (auto *Pred : Entry->predecessors()) {394if (Pred != Dispatch) {395AllPreds.push_back(Pred);396}397}398}399400// This set stores predecessors within this loop.401DenseSet<MachineBasicBlock *> InLoop;402for (auto *Pred : AllPreds) {403for (auto *Entry : Pred->successors()) {404if (!Entries.count(Entry))405continue;406if (Graph.canReach(Entry, Pred)) {407InLoop.insert(Pred);408break;409}410}411}412413// Record if each entry has a layout predecessor. This map stores414// <<loop entry, Predecessor is within the loop?>, layout predecessor>415DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>416EntryToLayoutPred;417for (auto *Pred : AllPreds) {418bool PredInLoop = InLoop.count(Pred);419for (auto *Entry : Pred->successors())420if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))421EntryToLayoutPred[{Entry, PredInLoop}] = Pred;422}423424// We need to create at most two routing blocks per entry: one for425// predecessors outside the loop and one for predecessors inside the loop.426// This map stores427// <<loop entry, Predecessor is within the loop?>, routing block>428DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>429Map;430for (auto *Pred : AllPreds) {431bool PredInLoop = InLoop.count(Pred);432for (auto *Entry : Pred->successors()) {433if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))434continue;435// If there exists a layout predecessor of this entry and this predecessor436// is not that, we rather create a routing block after that layout437// predecessor to save a branch.438if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))439if (OtherPred != Pred)440continue;441442// This is a successor we need to rewrite.443MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();444MF.insert(Pred->isLayoutSuccessor(Entry)445? MachineFunction::iterator(Entry)446: MF.end(),447Routing);448Blocks.insert(Routing);449450// Set the jump table's register of the index of the block we wish to451// jump to, and jump to the jump table.452BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)453.addImm(Indices[Entry]);454BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);455Routing->addSuccessor(Dispatch);456Map[{Entry, PredInLoop}] = Routing;457}458}459460for (auto *Pred : AllPreds) {461bool PredInLoop = InLoop.count(Pred);462// Remap the terminator operands and the successor list.463for (MachineInstr &Term : Pred->terminators())464for (auto &Op : Term.explicit_uses())465if (Op.isMBB() && Indices.count(Op.getMBB()))466Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);467468for (auto *Succ : Pred->successors()) {469if (!Entries.count(Succ))470continue;471auto *Routing = Map[{Succ, PredInLoop}];472Pred->replaceSuccessor(Succ, Routing);473}474}475476// Create a fake default label, because br_table requires one.477MIB.addMBB(MIB.getInstr()478->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)479.getMBB());480}481482} // end anonymous namespace483484char WebAssemblyFixIrreducibleControlFlow::ID = 0;485INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,486"Removes irreducible control flow", false, false)487488FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {489return new WebAssemblyFixIrreducibleControlFlow();490}491492// Test whether the given register has an ARGUMENT def.493static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {494for (const auto &Def : MRI.def_instructions(Reg))495if (WebAssembly::isArgument(Def.getOpcode()))496return true;497return false;498}499500// Add a register definition with IMPLICIT_DEFs for every register to cover for501// register uses that don't have defs in every possible path.502// TODO: This is fairly heavy-handed; find a better approach.503static void addImplicitDefs(MachineFunction &MF) {504const MachineRegisterInfo &MRI = MF.getRegInfo();505const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();506MachineBasicBlock &Entry = *MF.begin();507for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {508Register Reg = Register::index2VirtReg(I);509510// Skip unused registers.511if (MRI.use_nodbg_empty(Reg))512continue;513514// Skip registers that have an ARGUMENT definition.515if (hasArgumentDef(Reg, MRI))516continue;517518BuildMI(Entry, Entry.begin(), DebugLoc(),519TII.get(WebAssembly::IMPLICIT_DEF), Reg);520}521522// Move ARGUMENT_* instructions to the top of the entry block, so that their523// liveness reflects the fact that these really are live-in values.524for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {525if (WebAssembly::isArgument(MI.getOpcode())) {526MI.removeFromParent();527Entry.insert(Entry.begin(), &MI);528}529}530}531532bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(533MachineFunction &MF) {534LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"535"********** Function: "536<< MF.getName() << '\n');537538// Start the recursive process on the entire function body.539BlockSet AllBlocks;540for (auto &MBB : MF) {541AllBlocks.insert(&MBB);542}543544if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {545// We rewrote part of the function; recompute relevant things.546MF.RenumberBlocks();547// Now we've inserted dispatch blocks, some register uses can have incoming548// paths without a def. For example, before this pass register %a was549// defined in BB1 and used in BB2, and there was only one path from BB1 and550// BB2. But if this pass inserts a dispatch block having multiple551// predecessors between the two BBs, now there are paths to BB2 without552// visiting BB1, and %a's use in BB2 is not dominated by its def. Adding553// IMPLICIT_DEFs to all regs is one simple way to fix it.554addImplicitDefs(MF);555return true;556}557558return false;559}560561562