CoCalc -- X86FlagsCopyLowering.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
³⁵²⁶⁹ views
1
//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
///
10
/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11
/// flag bits.
12
///
13
/// We have to do this by carefully analyzing and rewriting the usage of the
14
/// copied EFLAGS register because there is no general way to rematerialize the
15
/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16
/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17
/// and other non-status flags being overwritten. Using sequences involving
18
/// SAHF don't work on all x86 processors and are often quite slow compared to
19
/// directly testing a single status preserved in its own GPR.
20
///
21
//===----------------------------------------------------------------------===//
22

23
#include "X86.h"
24
#include "X86InstrBuilder.h"
25
#include "X86InstrInfo.h"
26
#include "X86Subtarget.h"
27
#include "llvm/ADT/DepthFirstIterator.h"
28
#include "llvm/ADT/PostOrderIterator.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/ScopeExit.h"
31
#include "llvm/ADT/SmallPtrSet.h"
32
#include "llvm/ADT/SmallVector.h"
33
#include "llvm/ADT/Statistic.h"
34
#include "llvm/CodeGen/MachineBasicBlock.h"
35
#include "llvm/CodeGen/MachineConstantPool.h"
36
#include "llvm/CodeGen/MachineDominators.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineFunctionPass.h"
39
#include "llvm/CodeGen/MachineInstr.h"
40
#include "llvm/CodeGen/MachineInstrBuilder.h"
41
#include "llvm/CodeGen/MachineModuleInfo.h"
42
#include "llvm/CodeGen/MachineOperand.h"
43
#include "llvm/CodeGen/MachineRegisterInfo.h"
44
#include "llvm/CodeGen/MachineSSAUpdater.h"
45
#include "llvm/CodeGen/TargetInstrInfo.h"
46
#include "llvm/CodeGen/TargetRegisterInfo.h"
47
#include "llvm/CodeGen/TargetSchedule.h"
48
#include "llvm/CodeGen/TargetSubtargetInfo.h"
49
#include "llvm/IR/DebugLoc.h"
50
#include "llvm/MC/MCSchedule.h"
51
#include "llvm/Pass.h"
52
#include "llvm/Support/CommandLine.h"
53
#include "llvm/Support/Debug.h"
54
#include "llvm/Support/raw_ostream.h"
55
#include <algorithm>
56
#include <cassert>
57
#include <iterator>
58
#include <utility>
59

60
using namespace llvm;
61

62
#define PASS_KEY "x86-flags-copy-lowering"
63
#define DEBUG_TYPE PASS_KEY
64

65
STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
66
STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
67
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
68
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
69
STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
70

71
namespace {
72

73
// Convenient array type for storing registers associated with each condition.
74
using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
75

76
class X86FlagsCopyLoweringPass : public MachineFunctionPass {
77
public:
78
  X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {}
79

80
  StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
81
  bool runOnMachineFunction(MachineFunction &MF) override;
82
  void getAnalysisUsage(AnalysisUsage &AU) const override;
83

84
  /// Pass identification, replacement for typeid.
85
  static char ID;
86

87
private:
88
  MachineRegisterInfo *MRI = nullptr;
89
  const X86Subtarget *Subtarget = nullptr;
90
  const X86InstrInfo *TII = nullptr;
91
  const TargetRegisterInfo *TRI = nullptr;
92
  const TargetRegisterClass *PromoteRC = nullptr;
93
  MachineDominatorTree *MDT = nullptr;
94

95
  CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
96
                                  MachineBasicBlock::iterator CopyDefI);
97

98
  Register promoteCondToReg(MachineBasicBlock &MBB,
99
                            MachineBasicBlock::iterator TestPos,
100
                            const DebugLoc &TestLoc, X86::CondCode Cond);
101
  std::pair<unsigned, bool> getCondOrInverseInReg(
102
      MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
103
      const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
104
  void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
105
                  const DebugLoc &Loc, unsigned Reg);
106

107
  void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
108
                    const DebugLoc &Loc, MachineInstr &MI,
109
                    CondRegArray &CondRegs);
110
  void rewriteArithmetic(MachineBasicBlock &MBB,
111
                         MachineBasicBlock::iterator Pos, const DebugLoc &Loc,
112
                         MachineInstr &MI, CondRegArray &CondRegs);
113
  void rewriteMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
114
                 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs);
115
};
116

117
} // end anonymous namespace
118

119
INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
120
                      "X86 EFLAGS copy lowering", false, false)
121
INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
122
                    "X86 EFLAGS copy lowering", false, false)
123

124
FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
125
  return new X86FlagsCopyLoweringPass();
126
}
127

128
char X86FlagsCopyLoweringPass::ID = 0;
129

130
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
131
  AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>();
132
  MachineFunctionPass::getAnalysisUsage(AU);
133
}
134

135
static bool isArithmeticOp(unsigned Opc) {
136
  return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) ||
137
         X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r);
138
}
139

140
static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
141
                                     MachineInstr &SplitI,
142
                                     const X86InstrInfo &TII) {
143
  MachineFunction &MF = *MBB.getParent();
144

145
  assert(SplitI.getParent() == &MBB &&
146
         "Split instruction must be in the split block!");
147
  assert(SplitI.isBranch() &&
148
         "Only designed to split a tail of branch instructions!");
149
  assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
150
         "Must split on an actual jCC instruction!");
151

152
  // Dig out the previous instruction to the split point.
153
  MachineInstr &PrevI = *std::prev(SplitI.getIterator());
154
  assert(PrevI.isBranch() && "Must split after a branch!");
155
  assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
156
         "Must split after an actual jCC instruction!");
157
  assert(!std::prev(PrevI.getIterator())->isTerminator() &&
158
         "Must only have this one terminator prior to the split!");
159

160
  // Grab the one successor edge that will stay in `MBB`.
161
  MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
162

163
  // Analyze the original block to see if we are actually splitting an edge
164
  // into two edges. This can happen when we have multiple conditional jumps to
165
  // the same successor.
166
  bool IsEdgeSplit =
167
      std::any_of(SplitI.getIterator(), MBB.instr_end(),
168
                  [&](MachineInstr &MI) {
169
                    assert(MI.isTerminator() &&
170
                           "Should only have spliced terminators!");
171
                    return llvm::any_of(
172
                        MI.operands(), [&](MachineOperand &MOp) {
173
                          return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
174
                        });
175
                  }) ||
176
      MBB.getFallThrough() == &UnsplitSucc;
177

178
  MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
179

180
  // Insert the new block immediately after the current one. Any existing
181
  // fallthrough will be sunk into this new block anyways.
182
  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
183

184
  // Splice the tail of instructions into the new block.
185
  NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
186

187
  // Copy the necessary succesors (and their probability info) into the new
188
  // block.
189
  for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
190
    if (IsEdgeSplit || *SI != &UnsplitSucc)
191
      NewMBB.copySuccessor(&MBB, SI);
192
  // Normalize the probabilities if we didn't end up splitting the edge.
193
  if (!IsEdgeSplit)
194
    NewMBB.normalizeSuccProbs();
195

196
  // Now replace all of the moved successors in the original block with the new
197
  // block. This will merge their probabilities.
198
  for (MachineBasicBlock *Succ : NewMBB.successors())
199
    if (Succ != &UnsplitSucc)
200
      MBB.replaceSuccessor(Succ, &NewMBB);
201

202
  // We should always end up replacing at least one successor.
203
  assert(MBB.isSuccessor(&NewMBB) &&
204
         "Failed to make the new block a successor!");
205

206
  // Now update all the PHIs.
207
  for (MachineBasicBlock *Succ : NewMBB.successors()) {
208
    for (MachineInstr &MI : *Succ) {
209
      if (!MI.isPHI())
210
        break;
211

212
      for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
213
           OpIdx += 2) {
214
        MachineOperand &OpV = MI.getOperand(OpIdx);
215
        MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
216
        assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
217
        if (OpMBB.getMBB() != &MBB)
218
          continue;
219

220
        // Replace the operand for unsplit successors
221
        if (!IsEdgeSplit || Succ != &UnsplitSucc) {
222
          OpMBB.setMBB(&NewMBB);
223

224
          // We have to continue scanning as there may be multiple entries in
225
          // the PHI.
226
          continue;
227
        }
228

229
        // When we have split the edge append a new successor.
230
        MI.addOperand(MF, OpV);
231
        MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
232
        break;
233
      }
234
    }
235
  }
236

237
  return NewMBB;
238
}
239

240
enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber };
241

242
static EFLAGSClobber getClobberType(const MachineInstr &MI) {
243
  const MachineOperand *FlagDef =
244
      MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
245
  if (!FlagDef)
246
    return NoClobber;
247
  if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode()))
248
    return EvitableClobber;
249

250
  return InevitableClobber;
251
}
252

253
bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
254
  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
255
                    << " **********\n");
256

257
  Subtarget = &MF.getSubtarget<X86Subtarget>();
258
  MRI = &MF.getRegInfo();
259
  TII = Subtarget->getInstrInfo();
260
  TRI = Subtarget->getRegisterInfo();
261
  PromoteRC = &X86::GR8RegClass;
262

263
  if (MF.empty())
264
    // Nothing to do for a degenerate empty function...
265
    return false;
266

267
  if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) {
268
        return MI.getOpcode() == TargetOpcode::COPY;
269
      }))
270
    return false;
271

272
  // We change the code, so we don't preserve the dominator tree anyway. If we
273
  // got a valid MDT from the pass manager, use that, otherwise construct one
274
  // now. This is an optimization that avoids unnecessary MDT construction for
275
  // functions that have no flag copies.
276

277
  auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
278
  std::unique_ptr<MachineDominatorTree> OwnedMDT;
279
  if (MDTWrapper) {
280
    MDT = &MDTWrapper->getDomTree();
281
  } else {
282
    OwnedMDT = std::make_unique<MachineDominatorTree>();
283
    OwnedMDT->getBase().recalculate(MF);
284
    MDT = OwnedMDT.get();
285
  }
286

287
  // Collect the copies in RPO so that when there are chains where a copy is in
288
  // turn copied again we visit the first one first. This ensures we can find
289
  // viable locations for testing the original EFLAGS that dominate all the
290
  // uses across complex CFGs.
291
  SmallSetVector<MachineInstr *, 4> Copies;
292
  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
293
  for (MachineBasicBlock *MBB : RPOT)
294
    for (MachineInstr &MI : *MBB)
295
      if (MI.getOpcode() == TargetOpcode::COPY &&
296
          MI.getOperand(0).getReg() == X86::EFLAGS)
297
        Copies.insert(&MI);
298

299
  // Try to elminate the copys by transform the instructions between copy and
300
  // copydef to the NF (no flags update) variants, e.g.
301
  //
302
  // %1:gr64 = COPY $eflags
303
  // OP1 implicit-def dead $eflags
304
  // $eflags = COPY %1
305
  // OP2 cc, implicit $eflags
306
  //
307
  // ->
308
  //
309
  // OP1_NF
310
  // OP2 implicit $eflags
311
  if (Subtarget->hasNF()) {
312
    SmallSetVector<MachineInstr *, 4> RemovedCopies;
313
    // CopyIIt may be invalidated by removing copies.
314
    auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
315
    while (CopyIIt != CopyIEnd) {
316
      auto NCopyIIt = std::next(CopyIIt);
317
      SmallSetVector<MachineInstr *, 4> EvitableClobbers;
318
      MachineInstr *CopyI = *CopyIIt;
319
      MachineOperand &VOp = CopyI->getOperand(1);
320
      MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg());
321
      MachineBasicBlock *CopyIMBB = CopyI->getParent();
322
      MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
323
      // Walk all basic blocks reachable in depth-first iteration on the inverse
324
      // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
325
      // may be executed between the execution of CopyDefIMBB and CopyIMBB. On
326
      // all execution paths, instructions from CopyDefI to CopyI (exclusive)
327
      // has to be NF-convertible if it clobbers flags.
328
      for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
329
           ++BI) {
330
        MachineBasicBlock *MBB = *BI;
331
        for (auto I = (MBB != CopyDefIMBB)
332
                          ? MBB->begin()
333
                          : std::next(MachineBasicBlock::iterator(CopyDefI)),
334
                  E = (MBB != CopyIMBB) ? MBB->end()
335
                                        : MachineBasicBlock::iterator(CopyI);
336
             I != E; ++I) {
337
          MachineInstr &MI = *I;
338
          EFLAGSClobber ClobberType = getClobberType(MI);
339
          if (ClobberType == NoClobber)
340
            continue;
341

342
          if (ClobberType == InevitableClobber)
343
            goto ProcessNextCopyI;
344

345
          assert(ClobberType == EvitableClobber && "unexpected workflow");
346
          EvitableClobbers.insert(&MI);
347
        }
348
      }
349
      // Covert evitable clobbers into NF variants and remove the copyies.
350
      RemovedCopies.insert(CopyI);
351
      CopyI->eraseFromParent();
352
      if (MRI->use_nodbg_empty(CopyDefI->getOperand(0).getReg())) {
353
        RemovedCopies.insert(CopyDefI);
354
        CopyDefI->eraseFromParent();
355
      }
356
      ++NumCopiesEliminated;
357
      for (auto *Clobber : EvitableClobbers) {
358
        unsigned NewOpc = X86::getNFVariant(Clobber->getOpcode());
359
        assert(NewOpc && "evitable clobber must have a NF variant");
360
        Clobber->setDesc(TII->get(NewOpc));
361
        Clobber->removeOperand(
362
            Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)
363
                ->getOperandNo());
364
        ++NumNFsConvertedTo;
365
      }
366
      // Update liveins for basic blocks in the path
367
      for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
368
           ++BI)
369
        if (*BI != CopyDefIMBB)
370
          BI->addLiveIn(X86::EFLAGS);
371
    ProcessNextCopyI:
372
      CopyIIt = NCopyIIt;
373
    }
374
    Copies.set_subtract(RemovedCopies);
375
  }
376

377
  // For the rest of copies that cannot be eliminated by NF transform, we use
378
  // setcc to preserve the flags in GPR32 before OP1, and recheck its value
379
  // before using the flags, e.g.
380
  //
381
  // %1:gr64 = COPY $eflags
382
  // OP1 implicit-def dead $eflags
383
  // $eflags = COPY %1
384
  // OP2 cc, implicit $eflags
385
  //
386
  // ->
387
  //
388
  // %1:gr8 = SETCCr cc, implicit $eflags
389
  // OP1 implicit-def dead $eflags
390
  // TEST8rr %1, %1, implicit-def $eflags
391
  // OP2 ne, implicit $eflags
392
  for (MachineInstr *CopyI : Copies) {
393
    MachineBasicBlock &MBB = *CopyI->getParent();
394

395
    MachineOperand &VOp = CopyI->getOperand(1);
396
    assert(VOp.isReg() &&
397
           "The input to the copy for EFLAGS should always be a register!");
398
    MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
399
    if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
400
      // FIXME: The big likely candidate here are PHI nodes. We could in theory
401
      // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
402
      // enough that it is probably better to change every other part of LLVM
403
      // to avoid creating them. The issue is that once we have PHIs we won't
404
      // know which original EFLAGS value we need to capture with our setCCs
405
      // below. The end result will be computing a complete set of setCCs that
406
      // we *might* want, computing them in every place where we copy *out* of
407
      // EFLAGS and then doing SSA formation on all of them to insert necessary
408
      // PHI nodes and consume those here. Then hoping that somehow we DCE the
409
      // unnecessary ones. This DCE seems very unlikely to be successful and so
410
      // we will almost certainly end up with a glut of dead setCC
411
      // instructions. Until we have a motivating test case and fail to avoid
412
      // it by changing other parts of LLVM's lowering, we refuse to handle
413
      // this complex case here.
414
      LLVM_DEBUG(
415
          dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
416
          CopyDefI.dump());
417
      report_fatal_error(
418
          "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
419
    }
420

421
    auto Cleanup = make_scope_exit([&] {
422
      // All uses of the EFLAGS copy are now rewritten, kill the copy into
423
      // eflags and if dead the copy from.
424
      CopyI->eraseFromParent();
425
      if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
426
        CopyDefI.eraseFromParent();
427
      ++NumCopiesEliminated;
428
    });
429

430
    MachineOperand &DOp = CopyI->getOperand(0);
431
    assert(DOp.isDef() && "Expected register def!");
432
    assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
433
    if (DOp.isDead())
434
      continue;
435

436
    MachineBasicBlock *TestMBB = CopyDefI.getParent();
437
    auto TestPos = CopyDefI.getIterator();
438
    DebugLoc TestLoc = CopyDefI.getDebugLoc();
439

440
    LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
441

442
    // Walk up across live-in EFLAGS to find where they were actually def'ed.
443
    //
444
    // This copy's def may just be part of a region of blocks covered by
445
    // a single def of EFLAGS and we want to find the top of that region where
446
    // possible.
447
    //
448
    // This is essentially a search for a *candidate* reaching definition
449
    // location. We don't need to ever find the actual reaching definition here,
450
    // but we want to walk up the dominator tree to find the highest point which
451
    // would be viable for such a definition.
452
    auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
453
                                MachineBasicBlock::iterator End) {
454
      // Scan backwards as we expect these to be relatively short and often find
455
      // a clobber near the end.
456
      return llvm::any_of(
457
          llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
458
            // Flag any instruction (other than the copy we are
459
            // currently rewriting) that defs EFLAGS.
460
            return &MI != CopyI &&
461
                   MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
462
          });
463
    };
464
    auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
465
                                    MachineBasicBlock *EndMBB) {
466
      assert(MDT->dominates(BeginMBB, EndMBB) &&
467
             "Only support paths down the dominator tree!");
468
      SmallPtrSet<MachineBasicBlock *, 4> Visited;
469
      SmallVector<MachineBasicBlock *, 4> Worklist;
470
      // We terminate at the beginning. No need to scan it.
471
      Visited.insert(BeginMBB);
472
      Worklist.push_back(EndMBB);
473
      do {
474
        auto *MBB = Worklist.pop_back_val();
475
        for (auto *PredMBB : MBB->predecessors()) {
476
          if (!Visited.insert(PredMBB).second)
477
            continue;
478
          if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
479
            return true;
480
          // Enqueue this block to walk its predecessors.
481
          Worklist.push_back(PredMBB);
482
        }
483
      } while (!Worklist.empty());
484
      // No clobber found along a path from the begin to end.
485
      return false;
486
    };
487
    while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
488
           !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
489
      // Find the nearest common dominator of the predecessors, as
490
      // that will be the best candidate to hoist into.
491
      MachineBasicBlock *HoistMBB =
492
          std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
493
                          *TestMBB->pred_begin(),
494
                          [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
495
                            return MDT->findNearestCommonDominator(LHS, RHS);
496
                          });
497

498
      // Now we need to scan all predecessors that may be reached along paths to
499
      // the hoist block. A clobber anywhere in any of these blocks the hoist.
500
      // Note that this even handles loops because we require *no* clobbers.
501
      if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
502
        break;
503

504
      // We also need the terminators to not sneakily clobber flags.
505
      if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
506
                           HoistMBB->instr_end()))
507
        break;
508

509
      // We found a viable location, hoist our test position to it.
510
      TestMBB = HoistMBB;
511
      TestPos = TestMBB->getFirstTerminator()->getIterator();
512
      // Clear the debug location as it would just be confusing after hoisting.
513
      TestLoc = DebugLoc();
514
    }
515
    LLVM_DEBUG({
516
      auto DefIt = llvm::find_if(
517
          llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
518
          [&](MachineInstr &MI) {
519
            return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
520
          });
521
      if (DefIt.base() != TestMBB->instr_begin()) {
522
        dbgs() << "  Using EFLAGS defined by: ";
523
        DefIt->dump();
524
      } else {
525
        dbgs() << "  Using live-in flags for BB:\n";
526
        TestMBB->dump();
527
      }
528
    });
529

530
    // While rewriting uses, we buffer jumps and rewrite them in a second pass
531
    // because doing so will perturb the CFG that we are walking to find the
532
    // uses in the first place.
533
    SmallVector<MachineInstr *, 4> JmpIs;
534

535
    // Gather the condition flags that have already been preserved in
536
    // registers. We do this from scratch each time as we expect there to be
537
    // very few of them and we expect to not revisit the same copy definition
538
    // many times. If either of those change sufficiently we could build a map
539
    // of these up front instead.
540
    CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
541

542
    // Collect the basic blocks we need to scan. Typically this will just be
543
    // a single basic block but we may have to scan multiple blocks if the
544
    // EFLAGS copy lives into successors.
545
    SmallVector<MachineBasicBlock *, 2> Blocks;
546
    SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
547
    Blocks.push_back(&MBB);
548

549
    do {
550
      MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
551

552
      // Track when if/when we find a kill of the flags in this block.
553
      bool FlagsKilled = false;
554

555
      // In most cases, we walk from the beginning to the end of the block. But
556
      // when the block is the same block as the copy is from, we will visit it
557
      // twice. The first time we start from the copy and go to the end. The
558
      // second time we start from the beginning and go to the copy. This lets
559
      // us handle copies inside of cycles.
560
      // FIXME: This loop is *super* confusing. This is at least in part
561
      // a symptom of all of this routine needing to be refactored into
562
      // documentable components. Once done, there may be a better way to write
563
      // this loop.
564
      for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
565
                          ? std::next(CopyI->getIterator())
566
                          : UseMBB.instr_begin(),
567
                MIE = UseMBB.instr_end();
568
           MII != MIE;) {
569
        MachineInstr &MI = *MII++;
570
        // If we are in the original copy block and encounter either the copy
571
        // def or the copy itself, break so that we don't re-process any part of
572
        // the block or process the instructions in the range that was copied
573
        // over.
574
        if (&MI == CopyI || &MI == &CopyDefI) {
575
          assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
576
                 "Should only encounter these on the second pass over the "
577
                 "original block.");
578
          break;
579
        }
580

581
        MachineOperand *FlagUse =
582
            MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr);
583
        FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI);
584

585
        if (!FlagUse && FlagsKilled)
586
          break;
587
        else if (!FlagUse)
588
          continue;
589

590
        LLVM_DEBUG(dbgs() << "  Rewriting use: "; MI.dump());
591

592
        // Check the kill flag before we rewrite as that may change it.
593
        if (FlagUse->isKill())
594
          FlagsKilled = true;
595

596
        // Once we encounter a branch, the rest of the instructions must also be
597
        // branches. We can't rewrite in place here, so we handle them below.
598
        //
599
        // Note that we don't have to handle tail calls here, even conditional
600
        // tail calls, as those are not introduced into the X86 MI until post-RA
601
        // branch folding or black placement. As a consequence, we get to deal
602
        // with the simpler formulation of conditional branches followed by tail
603
        // calls.
604
        if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
605
          auto JmpIt = MI.getIterator();
606
          do {
607
            JmpIs.push_back(&*JmpIt);
608
            ++JmpIt;
609
          } while (JmpIt != UseMBB.instr_end() &&
610
                   X86::getCondFromBranch(*JmpIt) != X86::COND_INVALID);
611
          break;
612
        }
613

614
        // Otherwise we can just rewrite in-place.
615
        unsigned Opc = MI.getOpcode();
616
        if (Opc == TargetOpcode::COPY) {
617
          // Just replace this copy with the original copy def.
618
          MRI->replaceRegWith(MI.getOperand(0).getReg(),
619
                              CopyDefI.getOperand(0).getReg());
620
          MI.eraseFromParent();
621
        } else if (X86::isSETCC(Opc)) {
622
          rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs);
623
        } else if (isArithmeticOp(Opc)) {
624
          rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs);
625
        } else {
626
          rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs);
627
        }
628

629
        // If this was the last use of the flags, we're done.
630
        if (FlagsKilled)
631
          break;
632
      }
633

634
      // If the flags were killed, we're done with this block.
635
      if (FlagsKilled)
636
        continue;
637

638
      // Otherwise we need to scan successors for ones where the flags live-in
639
      // and queue those up for processing.
640
      for (MachineBasicBlock *SuccMBB : UseMBB.successors())
641
        if (SuccMBB->isLiveIn(X86::EFLAGS) &&
642
            VisitedBlocks.insert(SuccMBB).second) {
643
          // We currently don't do any PHI insertion and so we require that the
644
          // test basic block dominates all of the use basic blocks. Further, we
645
          // can't have a cycle from the test block back to itself as that would
646
          // create a cycle requiring a PHI to break it.
647
          //
648
          // We could in theory do PHI insertion here if it becomes useful by
649
          // just taking undef values in along every edge that we don't trace
650
          // this EFLAGS copy along. This isn't as bad as fully general PHI
651
          // insertion, but still seems like a great deal of complexity.
652
          //
653
          // Because it is theoretically possible that some earlier MI pass or
654
          // other lowering transformation could induce this to happen, we do
655
          // a hard check even in non-debug builds here.
656
          if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
657
            LLVM_DEBUG({
658
              dbgs()
659
                  << "ERROR: Encountered use that is not dominated by our test "
660
                     "basic block! Rewriting this would require inserting PHI "
661
                     "nodes to track the flag state across the CFG.\n\nTest "
662
                     "block:\n";
663
              TestMBB->dump();
664
              dbgs() << "Use block:\n";
665
              SuccMBB->dump();
666
            });
667
            report_fatal_error(
668
                "Cannot lower EFLAGS copy when original copy def "
669
                "does not dominate all uses.");
670
          }
671

672
          Blocks.push_back(SuccMBB);
673

674
          // After this, EFLAGS will be recreated before each use.
675
          SuccMBB->removeLiveIn(X86::EFLAGS);
676
        }
677
    } while (!Blocks.empty());
678

679
    // Now rewrite the jumps that use the flags. These we handle specially
680
    // because if there are multiple jumps in a single basic block we'll have
681
    // to do surgery on the CFG.
682
    MachineBasicBlock *LastJmpMBB = nullptr;
683
    for (MachineInstr *JmpI : JmpIs) {
684
      // Past the first jump within a basic block we need to split the blocks
685
      // apart.
686
      if (JmpI->getParent() == LastJmpMBB)
687
        splitBlock(*JmpI->getParent(), *JmpI, *TII);
688
      else
689
        LastJmpMBB = JmpI->getParent();
690

691
      rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
692
    }
693

694
    // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
695
    // the copy's def operand is itself a kill.
696
  }
697

698
#ifndef NDEBUG
699
  for (MachineBasicBlock &MBB : MF)
700
    for (MachineInstr &MI : MBB)
701
      if (MI.getOpcode() == TargetOpcode::COPY &&
702
          (MI.getOperand(0).getReg() == X86::EFLAGS ||
703
           MI.getOperand(1).getReg() == X86::EFLAGS)) {
704
        LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
705
                   MI.dump());
706
        llvm_unreachable("Unlowered EFLAGS copy!");
707
      }
708
#endif
709

710
  return true;
711
}
712

713
/// Collect any conditions that have already been set in registers so that we
714
/// can re-use them rather than adding duplicates.
715
CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
716
    MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
717
  CondRegArray CondRegs = {};
718

719
  // Scan backwards across the range of instructions with live EFLAGS.
720
  for (MachineInstr &MI :
721
       llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
722
    X86::CondCode Cond = X86::getCondFromSETCC(MI);
723
    if (Cond != X86::COND_INVALID && !MI.mayStore() &&
724
        MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
725
      assert(MI.getOperand(0).isDef() &&
726
             "A non-storing SETcc should always define a register!");
727
      CondRegs[Cond] = MI.getOperand(0).getReg();
728
    }
729

730
    // Stop scanning when we see the first definition of the EFLAGS as prior to
731
    // this we would potentially capture the wrong flag state.
732
    if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr))
733
      break;
734
  }
735
  return CondRegs;
736
}
737

738
Register X86FlagsCopyLoweringPass::promoteCondToReg(
739
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
740
    const DebugLoc &TestLoc, X86::CondCode Cond) {
741
  Register Reg = MRI->createVirtualRegister(PromoteRC);
742
  auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
743
                  .addImm(Cond);
744
  (void)SetI;
745
  LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
746
  ++NumSetCCsInserted;
747
  return Reg;
748
}
749

750
std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
751
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
752
    const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
753
  unsigned &CondReg = CondRegs[Cond];
754
  unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
755
  if (!CondReg && !InvCondReg)
756
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
757

758
  if (CondReg)
759
    return {CondReg, false};
760
  else
761
    return {InvCondReg, true};
762
}
763

764
void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
765
                                          MachineBasicBlock::iterator Pos,
766
                                          const DebugLoc &Loc, unsigned Reg) {
767
  auto TestI =
768
      BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
769
  (void)TestI;
770
  LLVM_DEBUG(dbgs() << "    test cond: "; TestI->dump());
771
  ++NumTestsInserted;
772
}
773

774
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
775
                                            MachineBasicBlock::iterator Pos,
776
                                            const DebugLoc &Loc,
777
                                            MachineInstr &MI,
778
                                            CondRegArray &CondRegs) {
779
  X86::CondCode Cond = X86::getCondFromSETCC(MI);
780
  // Note that we can't usefully rewrite this to the inverse without complex
781
  // analysis of the users of the setCC. Largely we rely on duplicates which
782
  // could have been avoided already being avoided here.
783
  unsigned &CondReg = CondRegs[Cond];
784
  if (!CondReg)
785
    CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
786

787
  // Rewriting a register def is trivial: we just replace the register and
788
  // remove the setcc.
789
  if (!MI.mayStore()) {
790
    assert(MI.getOperand(0).isReg() &&
791
           "Cannot have a non-register defined operand to SETcc!");
792
    Register OldReg = MI.getOperand(0).getReg();
793
    // Drop Kill flags on the old register before replacing. CondReg may have
794
    // a longer live range.
795
    MRI->clearKillFlags(OldReg);
796
    MRI->replaceRegWith(OldReg, CondReg);
797
    MI.eraseFromParent();
798
    return;
799
  }
800

801
  // Otherwise, we need to emit a store.
802
  auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
803
                     TII->get(X86::MOV8mr));
804
  // Copy the address operands.
805
  for (int i = 0; i < X86::AddrNumOperands; ++i)
806
    MIB.add(MI.getOperand(i));
807

808
  MIB.addReg(CondReg);
809
  MIB.setMemRefs(MI.memoperands());
810
  MI.eraseFromParent();
811
}
812

813
void X86FlagsCopyLoweringPass::rewriteArithmetic(
814
    MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
815
    const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) {
816
  // Arithmetic is either reading CF or OF.
817
  X86::CondCode Cond = X86::COND_B; // CF == 1
818
  // The addend to use to reset CF or OF when added to the flag value.
819
  // Set up an addend that when one is added will need a carry due to not
820
  // having a higher bit available.
821
  int Addend = 255;
822

823
  // Now get a register that contains the value of the flag input to the
824
  // arithmetic. We require exactly this flag to simplify the arithmetic
825
  // required to materialize it back into the flag.
826
  unsigned &CondReg = CondRegs[Cond];
827
  if (!CondReg)
828
    CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
829

830
  // Insert an instruction that will set the flag back to the desired value.
831
  Register TmpReg = MRI->createVirtualRegister(PromoteRC);
832
  auto AddI =
833
      BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
834
              TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri))
835
          .addDef(TmpReg, RegState::Dead)
836
          .addReg(CondReg)
837
          .addImm(Addend);
838
  (void)AddI;
839
  LLVM_DEBUG(dbgs() << "    add cond: "; AddI->dump());
840
  ++NumAddsInserted;
841
  MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
842
}
843

844
static X86::CondCode getImplicitCondFromMI(unsigned Opc) {
845
#define FROM_TO(A, B)                                                          \
846
  case X86::CMOV##A##_Fp32:                                                    \
847
  case X86::CMOV##A##_Fp64:                                                    \
848
  case X86::CMOV##A##_Fp80:                                                    \
849
    return X86::COND_##B;
850

851
  switch (Opc) {
852
  default:
853
    return X86::COND_INVALID;
854
    FROM_TO(B, B)
855
    FROM_TO(E, E)
856
    FROM_TO(P, P)
857
    FROM_TO(BE, BE)
858
    FROM_TO(NB, AE)
859
    FROM_TO(NE, NE)
860
    FROM_TO(NP, NP)
861
    FROM_TO(NBE, A)
862
  }
863
#undef FROM_TO
864
}
865

866
static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) {
867
  assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC");
868
#define CASE(A)                                                                \
869
  case X86::CMOVB_##A:                                                         \
870
  case X86::CMOVE_##A:                                                         \
871
  case X86::CMOVP_##A:                                                         \
872
  case X86::CMOVBE_##A:                                                        \
873
  case X86::CMOVNB_##A:                                                        \
874
  case X86::CMOVNE_##A:                                                        \
875
  case X86::CMOVNP_##A:                                                        \
876
  case X86::CMOVNBE_##A:                                                       \
877
    return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A;
878
  switch (Opc) {
879
  default:
880
    llvm_unreachable("Unexpected opcode");
881
    CASE(Fp32)
882
    CASE(Fp64)
883
    CASE(Fp80)
884
  }
885
#undef CASE
886
}
887

888
void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB,
889
                                         MachineBasicBlock::iterator Pos,
890
                                         const DebugLoc &Loc, MachineInstr &MI,
891
                                         CondRegArray &CondRegs) {
892
  // First get the register containing this specific condition.
893
  bool IsImplicitCC = false;
894
  X86::CondCode CC = X86::getCondFromMI(MI);
895
  if (CC == X86::COND_INVALID) {
896
    CC = getImplicitCondFromMI(MI.getOpcode());
897
    IsImplicitCC = true;
898
  }
899
  assert(CC != X86::COND_INVALID && "Unknown EFLAG user!");
900
  unsigned CondReg;
901
  bool Inverted;
902
  std::tie(CondReg, Inverted) =
903
      getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs);
904

905
  // Insert a direct test of the saved register.
906
  insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg);
907

908
  // Rewrite the instruction to use the !ZF flag from the test, and then kill
909
  // its use of the flags afterward.
910
  X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE;
911
  if (IsImplicitCC)
912
    MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC)));
913
  else
914
    MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC);
915

916
  MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
917
  LLVM_DEBUG(dbgs() << "    fixed instruction: "; MI.dump());
918
}
919

920
Product

Resources

Company