CoCalc -- WebAssemblyFixIrreducibleControlFlow.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
³⁵²⁶⁶ views
1
//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements a pass that removes irreducible control flow.
11
/// Irreducible control flow means multiple-entry loops, which this pass
12
/// transforms to have a single entry.
13
///
14
/// Note that LLVM has a generic pass that lowers irreducible control flow, but
15
/// it linearizes control flow, turning diamonds into two triangles, which is
16
/// both unnecessary and undesirable for WebAssembly.
17
///
18
/// The big picture: We recursively process each "region", defined as a group
19
/// of blocks with a single entry and no branches back to that entry. A region
20
/// may be the entire function body, or the inner part of a loop, i.e., the
21
/// loop's body without branches back to the loop entry. In each region we fix
22
/// up multi-entry loops by adding a new block that can dispatch to each of the
23
/// loop entries, based on the value of a label "helper" variable, and we
24
/// replace direct branches to the entries with assignments to the label
25
/// variable and a branch to the dispatch block. Then the dispatch block is the
26
/// single entry in the loop containing the previous multiple entries. After
27
/// ensuring all the loops in a region are reducible, we recurse into them. The
28
/// total time complexity of this pass is:
29
///
30
///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31
///     NumLoops * NumLoops)
32
///
33
/// This pass is similar to what the Relooper [1] does. Both identify looping
34
/// code that requires multiple entries, and resolve it in a similar way (in
35
/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36
/// also that like the Relooper, we implement a "minimal" intervention: we only
37
/// use the "label" helper for the blocks we absolutely must and no others. We
38
/// also prioritize code size and do not duplicate code in order to resolve
39
/// irreducibility. The graph algorithms for finding loops and entries and so
40
/// forth are also similar to the Relooper. The main differences between this
41
/// pass and the Relooper are:
42
///
43
///  * We just care about irreducibility, so we just look at loops.
44
///  * The Relooper emits structured control flow (with ifs etc.), while we
45
///    emit a CFG.
46
///
47
/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48
/// Proceedings of the ACM international conference companion on Object oriented
49
/// programming systems languages and applications companion (SPLASH '11). ACM,
50
/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51
/// http://doi.acm.org/10.1145/2048147.2048224
52
///
53
//===----------------------------------------------------------------------===//
54

55
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56
#include "WebAssembly.h"
57
#include "WebAssemblySubtarget.h"
58
#include "llvm/CodeGen/MachineFunctionPass.h"
59
#include "llvm/CodeGen/MachineInstrBuilder.h"
60
#include "llvm/Support/Debug.h"
61
using namespace llvm;
62

63
#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64

65
namespace {
66

67
using BlockVector = SmallVector<MachineBasicBlock *, 4>;
68
using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
69

70
static BlockVector getSortedEntries(const BlockSet &Entries) {
71
  BlockVector SortedEntries(Entries.begin(), Entries.end());
72
  llvm::sort(SortedEntries,
73
             [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
74
               auto ANum = A->getNumber();
75
               auto BNum = B->getNumber();
76
               return ANum < BNum;
77
             });
78
  return SortedEntries;
79
}
80

81
// Calculates reachability in a region. Ignores branches to blocks outside of
82
// the region, and ignores branches to the region entry (for the case where
83
// the region is the inner part of a loop).
84
class ReachabilityGraph {
85
public:
86
  ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
87
      : Entry(Entry), Blocks(Blocks) {
88
#ifndef NDEBUG
89
    // The region must have a single entry.
90
    for (auto *MBB : Blocks) {
91
      if (MBB != Entry) {
92
        for (auto *Pred : MBB->predecessors()) {
93
          assert(inRegion(Pred));
94
        }
95
      }
96
    }
97
#endif
98
    calculate();
99
  }
100

101
  bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
102
    assert(inRegion(From) && inRegion(To));
103
    auto I = Reachable.find(From);
104
    if (I == Reachable.end())
105
      return false;
106
    return I->second.count(To);
107
  }
108

109
  // "Loopers" are blocks that are in a loop. We detect these by finding blocks
110
  // that can reach themselves.
111
  const BlockSet &getLoopers() const { return Loopers; }
112

113
  // Get all blocks that are loop entries.
114
  const BlockSet &getLoopEntries() const { return LoopEntries; }
115

116
  // Get all blocks that enter a particular loop from outside.
117
  const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
118
    assert(inRegion(LoopEntry));
119
    auto I = LoopEnterers.find(LoopEntry);
120
    assert(I != LoopEnterers.end());
121
    return I->second;
122
  }
123

124
private:
125
  MachineBasicBlock *Entry;
126
  const BlockSet &Blocks;
127

128
  BlockSet Loopers, LoopEntries;
129
  DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
130

131
  bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
132

133
  // Maps a block to all the other blocks it can reach.
134
  DenseMap<MachineBasicBlock *, BlockSet> Reachable;
135

136
  void calculate() {
137
    // Reachability computation work list. Contains pairs of recent additions
138
    // (A, B) where we just added a link A => B.
139
    using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
140
    SmallVector<BlockPair, 4> WorkList;
141

142
    // Add all relevant direct branches.
143
    for (auto *MBB : Blocks) {
144
      for (auto *Succ : MBB->successors()) {
145
        if (Succ != Entry && inRegion(Succ)) {
146
          Reachable[MBB].insert(Succ);
147
          WorkList.emplace_back(MBB, Succ);
148
        }
149
      }
150
    }
151

152
    while (!WorkList.empty()) {
153
      MachineBasicBlock *MBB, *Succ;
154
      std::tie(MBB, Succ) = WorkList.pop_back_val();
155
      assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
156
      if (MBB != Entry) {
157
        // We recently added MBB => Succ, and that means we may have enabled
158
        // Pred => MBB => Succ.
159
        for (auto *Pred : MBB->predecessors()) {
160
          if (Reachable[Pred].insert(Succ).second) {
161
            WorkList.emplace_back(Pred, Succ);
162
          }
163
        }
164
      }
165
    }
166

167
    // Blocks that can return to themselves are in a loop.
168
    for (auto *MBB : Blocks) {
169
      if (canReach(MBB, MBB)) {
170
        Loopers.insert(MBB);
171
      }
172
    }
173
    assert(!Loopers.count(Entry));
174

175
    // Find the loop entries - loopers reachable from blocks not in that loop -
176
    // and those outside blocks that reach them, the "loop enterers".
177
    for (auto *Looper : Loopers) {
178
      for (auto *Pred : Looper->predecessors()) {
179
        // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180
        // otherwise, it is a block that enters into the loop.
181
        if (!canReach(Looper, Pred)) {
182
          LoopEntries.insert(Looper);
183
          LoopEnterers[Looper].insert(Pred);
184
        }
185
      }
186
    }
187
  }
188
};
189

190
// Finds the blocks in a single-entry loop, given the loop entry and the
191
// list of blocks that enter the loop.
192
class LoopBlocks {
193
public:
194
  LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
195
      : Entry(Entry), Enterers(Enterers) {
196
    calculate();
197
  }
198

199
  BlockSet &getBlocks() { return Blocks; }
200

201
private:
202
  MachineBasicBlock *Entry;
203
  const BlockSet &Enterers;
204

205
  BlockSet Blocks;
206

207
  void calculate() {
208
    // Going backwards from the loop entry, if we ignore the blocks entering
209
    // from outside, we will traverse all the blocks in the loop.
210
    BlockVector WorkList;
211
    BlockSet AddedToWorkList;
212
    Blocks.insert(Entry);
213
    for (auto *Pred : Entry->predecessors()) {
214
      if (!Enterers.count(Pred)) {
215
        WorkList.push_back(Pred);
216
        AddedToWorkList.insert(Pred);
217
      }
218
    }
219

220
    while (!WorkList.empty()) {
221
      auto *MBB = WorkList.pop_back_val();
222
      assert(!Enterers.count(MBB));
223
      if (Blocks.insert(MBB).second) {
224
        for (auto *Pred : MBB->predecessors()) {
225
          if (AddedToWorkList.insert(Pred).second)
226
            WorkList.push_back(Pred);
227
        }
228
      }
229
    }
230
  }
231
};
232

233
class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
234
  StringRef getPassName() const override {
235
    return "WebAssembly Fix Irreducible Control Flow";
236
  }
237

238
  bool runOnMachineFunction(MachineFunction &MF) override;
239

240
  bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
241
                     MachineFunction &MF);
242

243
  void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
244
                           MachineFunction &MF, const ReachabilityGraph &Graph);
245

246
public:
247
  static char ID; // Pass identification, replacement for typeid
248
  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
249
};
250

251
bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252
    MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
253
  bool Changed = false;
254
  // Remove irreducibility before processing child loops, which may take
255
  // multiple iterations.
256
  while (true) {
257
    ReachabilityGraph Graph(Entry, Blocks);
258

259
    bool FoundIrreducibility = false;
260

261
    for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
262
      // Find mutual entries - all entries which can reach this one, and
263
      // are reached by it (that always includes LoopEntry itself). All mutual
264
      // entries must be in the same loop, so if we have more than one, then we
265
      // have irreducible control flow.
266
      //
267
      // (Note that we need to sort the entries here, as otherwise the order can
268
      // matter: being mutual is a symmetric relationship, and each set of
269
      // mutuals will be handled properly no matter which we see first. However,
270
      // there can be multiple disjoint sets of mutuals, and which we process
271
      // first changes the output.)
272
      //
273
      // Note that irreducibility may involve inner loops, e.g. imagine A
274
      // starts one loop, and it has B inside it which starts an inner loop.
275
      // If we add a branch from all the way on the outside to B, then in a
276
      // sense B is no longer an "inner" loop, semantically speaking. We will
277
      // fix that irreducibility by adding a block that dispatches to either
278
      // either A or B, so B will no longer be an inner loop in our output.
279
      // (A fancier approach might try to keep it as such.)
280
      //
281
      // Note that we still need to recurse into inner loops later, to handle
282
      // the case where the irreducibility is entirely nested - we would not
283
      // be able to identify that at this point, since the enclosing loop is
284
      // a group of blocks all of whom can reach each other. (We'll see the
285
      // irreducibility after removing branches to the top of that enclosing
286
      // loop.)
287
      BlockSet MutualLoopEntries;
288
      MutualLoopEntries.insert(LoopEntry);
289
      for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
290
        if (OtherLoopEntry != LoopEntry &&
291
            Graph.canReach(LoopEntry, OtherLoopEntry) &&
292
            Graph.canReach(OtherLoopEntry, LoopEntry)) {
293
          MutualLoopEntries.insert(OtherLoopEntry);
294
        }
295
      }
296

297
      if (MutualLoopEntries.size() > 1) {
298
        makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
299
        FoundIrreducibility = true;
300
        Changed = true;
301
        break;
302
      }
303
    }
304
    // Only go on to actually process the inner loops when we are done
305
    // removing irreducible control flow and changing the graph. Modifying
306
    // the graph as we go is possible, and that might let us avoid looking at
307
    // the already-fixed loops again if we are careful, but all that is
308
    // complex and bug-prone. Since irreducible loops are rare, just starting
309
    // another iteration is best.
310
    if (FoundIrreducibility) {
311
      continue;
312
    }
313

314
    for (auto *LoopEntry : Graph.getLoopEntries()) {
315
      LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
316
      // Each of these calls to processRegion may change the graph, but are
317
      // guaranteed not to interfere with each other. The only changes we make
318
      // to the graph are to add blocks on the way to a loop entry. As the
319
      // loops are disjoint, that means we may only alter branches that exit
320
      // another loop, which are ignored when recursing into that other loop
321
      // anyhow.
322
      if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
323
        Changed = true;
324
      }
325
    }
326

327
    return Changed;
328
  }
329
}
330

331
// Given a set of entries to a single loop, create a single entry for that
332
// loop by creating a dispatch block for them, routing control flow using
333
// a helper variable. Also updates Blocks with any new blocks created, so
334
// that we properly track all the blocks in the region. But this does not update
335
// ReachabilityGraph; this will be updated in the caller of this function as
336
// needed.
337
void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338
    BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
339
    const ReachabilityGraph &Graph) {
340
  assert(Entries.size() >= 2);
341

342
  // Sort the entries to ensure a deterministic build.
343
  BlockVector SortedEntries = getSortedEntries(Entries);
344

345
#ifndef NDEBUG
346
  for (auto *Block : SortedEntries)
347
    assert(Block->getNumber() != -1);
348
  if (SortedEntries.size() > 1) {
349
    for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
350
         ++I) {
351
      auto ANum = (*I)->getNumber();
352
      auto BNum = (*(std::next(I)))->getNumber();
353
      assert(ANum != BNum);
354
    }
355
  }
356
#endif
357

358
  // Create a dispatch block which will contain a jump table to the entries.
359
  MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
360
  MF.insert(MF.end(), Dispatch);
361
  Blocks.insert(Dispatch);
362

363
  // Add the jump table.
364
  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
365
  MachineInstrBuilder MIB =
366
      BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
367

368
  // Add the register which will be used to tell the jump table which block to
369
  // jump to.
370
  MachineRegisterInfo &MRI = MF.getRegInfo();
371
  Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
372
  MIB.addReg(Reg);
373

374
  // Compute the indices in the superheader, one for each bad block, and
375
  // add them as successors.
376
  DenseMap<MachineBasicBlock *, unsigned> Indices;
377
  for (auto *Entry : SortedEntries) {
378
    auto Pair = Indices.insert(std::make_pair(Entry, 0));
379
    assert(Pair.second);
380

381
    unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
382
    Pair.first->second = Index;
383

384
    MIB.addMBB(Entry);
385
    Dispatch->addSuccessor(Entry);
386
  }
387

388
  // Rewrite the problematic successors for every block that wants to reach
389
  // the bad blocks. For simplicity, we just introduce a new block for every
390
  // edge we need to rewrite. (Fancier things are possible.)
391

392
  BlockVector AllPreds;
393
  for (auto *Entry : SortedEntries) {
394
    for (auto *Pred : Entry->predecessors()) {
395
      if (Pred != Dispatch) {
396
        AllPreds.push_back(Pred);
397
      }
398
    }
399
  }
400

401
  // This set stores predecessors within this loop.
402
  DenseSet<MachineBasicBlock *> InLoop;
403
  for (auto *Pred : AllPreds) {
404
    for (auto *Entry : Pred->successors()) {
405
      if (!Entries.count(Entry))
406
        continue;
407
      if (Graph.canReach(Entry, Pred)) {
408
        InLoop.insert(Pred);
409
        break;
410
      }
411
    }
412
  }
413

414
  // Record if each entry has a layout predecessor. This map stores
415
  // <<loop entry, Predecessor is within the loop?>, layout predecessor>
416
  DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
417
      EntryToLayoutPred;
418
  for (auto *Pred : AllPreds) {
419
    bool PredInLoop = InLoop.count(Pred);
420
    for (auto *Entry : Pred->successors())
421
      if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
422
        EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
423
  }
424

425
  // We need to create at most two routing blocks per entry: one for
426
  // predecessors outside the loop and one for predecessors inside the loop.
427
  // This map stores
428
  // <<loop entry, Predecessor is within the loop?>, routing block>
429
  DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
430
      Map;
431
  for (auto *Pred : AllPreds) {
432
    bool PredInLoop = InLoop.count(Pred);
433
    for (auto *Entry : Pred->successors()) {
434
      if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
435
        continue;
436
      // If there exists a layout predecessor of this entry and this predecessor
437
      // is not that, we rather create a routing block after that layout
438
      // predecessor to save a branch.
439
      if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
440
        if (OtherPred != Pred)
441
          continue;
442

443
      // This is a successor we need to rewrite.
444
      MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
445
      MF.insert(Pred->isLayoutSuccessor(Entry)
446
                    ? MachineFunction::iterator(Entry)
447
                    : MF.end(),
448
                Routing);
449
      Blocks.insert(Routing);
450

451
      // Set the jump table's register of the index of the block we wish to
452
      // jump to, and jump to the jump table.
453
      BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
454
          .addImm(Indices[Entry]);
455
      BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
456
      Routing->addSuccessor(Dispatch);
457
      Map[{Entry, PredInLoop}] = Routing;
458
    }
459
  }
460

461
  for (auto *Pred : AllPreds) {
462
    bool PredInLoop = InLoop.count(Pred);
463
    // Remap the terminator operands and the successor list.
464
    for (MachineInstr &Term : Pred->terminators())
465
      for (auto &Op : Term.explicit_uses())
466
        if (Op.isMBB() && Indices.count(Op.getMBB()))
467
          Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
468

469
    for (auto *Succ : Pred->successors()) {
470
      if (!Entries.count(Succ))
471
        continue;
472
      auto *Routing = Map[{Succ, PredInLoop}];
473
      Pred->replaceSuccessor(Succ, Routing);
474
    }
475
  }
476

477
  // Create a fake default label, because br_table requires one.
478
  MIB.addMBB(MIB.getInstr()
479
                 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
480
                 .getMBB());
481
}
482

483
} // end anonymous namespace
484

485
char WebAssemblyFixIrreducibleControlFlow::ID = 0;
486
INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
487
                "Removes irreducible control flow", false, false)
488

489
FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
490
  return new WebAssemblyFixIrreducibleControlFlow();
491
}
492

493
// Test whether the given register has an ARGUMENT def.
494
static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
495
  for (const auto &Def : MRI.def_instructions(Reg))
496
    if (WebAssembly::isArgument(Def.getOpcode()))
497
      return true;
498
  return false;
499
}
500

501
// Add a register definition with IMPLICIT_DEFs for every register to cover for
502
// register uses that don't have defs in every possible path.
503
// TODO: This is fairly heavy-handed; find a better approach.
504
static void addImplicitDefs(MachineFunction &MF) {
505
  const MachineRegisterInfo &MRI = MF.getRegInfo();
506
  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
507
  MachineBasicBlock &Entry = *MF.begin();
508
  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
509
    Register Reg = Register::index2VirtReg(I);
510

511
    // Skip unused registers.
512
    if (MRI.use_nodbg_empty(Reg))
513
      continue;
514

515
    // Skip registers that have an ARGUMENT definition.
516
    if (hasArgumentDef(Reg, MRI))
517
      continue;
518

519
    BuildMI(Entry, Entry.begin(), DebugLoc(),
520
            TII.get(WebAssembly::IMPLICIT_DEF), Reg);
521
  }
522

523
  // Move ARGUMENT_* instructions to the top of the entry block, so that their
524
  // liveness reflects the fact that these really are live-in values.
525
  for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
526
    if (WebAssembly::isArgument(MI.getOpcode())) {
527
      MI.removeFromParent();
528
      Entry.insert(Entry.begin(), &MI);
529
    }
530
  }
531
}
532

533
bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534
    MachineFunction &MF) {
535
  LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
536
                       "********** Function: "
537
                    << MF.getName() << '\n');
538

539
  // Start the recursive process on the entire function body.
540
  BlockSet AllBlocks;
541
  for (auto &MBB : MF) {
542
    AllBlocks.insert(&MBB);
543
  }
544

545
  if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
546
    // We rewrote part of the function; recompute relevant things.
547
    MF.RenumberBlocks();
548
    // Now we've inserted dispatch blocks, some register uses can have incoming
549
    // paths without a def. For example, before this pass register %a was
550
    // defined in BB1 and used in BB2, and there was only one path from BB1 and
551
    // BB2. But if this pass inserts a dispatch block having multiple
552
    // predecessors between the two BBs, now there are paths to BB2 without
553
    // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554
    // IMPLICIT_DEFs to all regs is one simple way to fix it.
555
    addImplicitDefs(MF);
556
    return true;
557
  }
558

559
  return false;
560
}
561

562
Product

Resources

Company