Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
35269 views
1
//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
///
10
/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11
/// flag bits.
12
///
13
/// We have to do this by carefully analyzing and rewriting the usage of the
14
/// copied EFLAGS register because there is no general way to rematerialize the
15
/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16
/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17
/// and other non-status flags being overwritten. Using sequences involving
18
/// SAHF don't work on all x86 processors and are often quite slow compared to
19
/// directly testing a single status preserved in its own GPR.
20
///
21
//===----------------------------------------------------------------------===//
22
23
#include "X86.h"
24
#include "X86InstrBuilder.h"
25
#include "X86InstrInfo.h"
26
#include "X86Subtarget.h"
27
#include "llvm/ADT/DepthFirstIterator.h"
28
#include "llvm/ADT/PostOrderIterator.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/ScopeExit.h"
31
#include "llvm/ADT/SmallPtrSet.h"
32
#include "llvm/ADT/SmallVector.h"
33
#include "llvm/ADT/Statistic.h"
34
#include "llvm/CodeGen/MachineBasicBlock.h"
35
#include "llvm/CodeGen/MachineConstantPool.h"
36
#include "llvm/CodeGen/MachineDominators.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineFunctionPass.h"
39
#include "llvm/CodeGen/MachineInstr.h"
40
#include "llvm/CodeGen/MachineInstrBuilder.h"
41
#include "llvm/CodeGen/MachineModuleInfo.h"
42
#include "llvm/CodeGen/MachineOperand.h"
43
#include "llvm/CodeGen/MachineRegisterInfo.h"
44
#include "llvm/CodeGen/MachineSSAUpdater.h"
45
#include "llvm/CodeGen/TargetInstrInfo.h"
46
#include "llvm/CodeGen/TargetRegisterInfo.h"
47
#include "llvm/CodeGen/TargetSchedule.h"
48
#include "llvm/CodeGen/TargetSubtargetInfo.h"
49
#include "llvm/IR/DebugLoc.h"
50
#include "llvm/MC/MCSchedule.h"
51
#include "llvm/Pass.h"
52
#include "llvm/Support/CommandLine.h"
53
#include "llvm/Support/Debug.h"
54
#include "llvm/Support/raw_ostream.h"
55
#include <algorithm>
56
#include <cassert>
57
#include <iterator>
58
#include <utility>
59
60
using namespace llvm;
61
62
#define PASS_KEY "x86-flags-copy-lowering"
63
#define DEBUG_TYPE PASS_KEY
64
65
STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
66
STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
67
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
68
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
69
STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
70
71
namespace {
72
73
// Convenient array type for storing registers associated with each condition.
74
using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
75
76
class X86FlagsCopyLoweringPass : public MachineFunctionPass {
77
public:
78
X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {}
79
80
StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
81
bool runOnMachineFunction(MachineFunction &MF) override;
82
void getAnalysisUsage(AnalysisUsage &AU) const override;
83
84
/// Pass identification, replacement for typeid.
85
static char ID;
86
87
private:
88
MachineRegisterInfo *MRI = nullptr;
89
const X86Subtarget *Subtarget = nullptr;
90
const X86InstrInfo *TII = nullptr;
91
const TargetRegisterInfo *TRI = nullptr;
92
const TargetRegisterClass *PromoteRC = nullptr;
93
MachineDominatorTree *MDT = nullptr;
94
95
CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
96
MachineBasicBlock::iterator CopyDefI);
97
98
Register promoteCondToReg(MachineBasicBlock &MBB,
99
MachineBasicBlock::iterator TestPos,
100
const DebugLoc &TestLoc, X86::CondCode Cond);
101
std::pair<unsigned, bool> getCondOrInverseInReg(
102
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
103
const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
104
void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
105
const DebugLoc &Loc, unsigned Reg);
106
107
void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
108
const DebugLoc &Loc, MachineInstr &MI,
109
CondRegArray &CondRegs);
110
void rewriteArithmetic(MachineBasicBlock &MBB,
111
MachineBasicBlock::iterator Pos, const DebugLoc &Loc,
112
MachineInstr &MI, CondRegArray &CondRegs);
113
void rewriteMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
114
const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs);
115
};
116
117
} // end anonymous namespace
118
119
INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
120
"X86 EFLAGS copy lowering", false, false)
121
INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
122
"X86 EFLAGS copy lowering", false, false)
123
124
FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
125
return new X86FlagsCopyLoweringPass();
126
}
127
128
char X86FlagsCopyLoweringPass::ID = 0;
129
130
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
131
AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>();
132
MachineFunctionPass::getAnalysisUsage(AU);
133
}
134
135
static bool isArithmeticOp(unsigned Opc) {
136
return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) ||
137
X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r);
138
}
139
140
static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
141
MachineInstr &SplitI,
142
const X86InstrInfo &TII) {
143
MachineFunction &MF = *MBB.getParent();
144
145
assert(SplitI.getParent() == &MBB &&
146
"Split instruction must be in the split block!");
147
assert(SplitI.isBranch() &&
148
"Only designed to split a tail of branch instructions!");
149
assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
150
"Must split on an actual jCC instruction!");
151
152
// Dig out the previous instruction to the split point.
153
MachineInstr &PrevI = *std::prev(SplitI.getIterator());
154
assert(PrevI.isBranch() && "Must split after a branch!");
155
assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
156
"Must split after an actual jCC instruction!");
157
assert(!std::prev(PrevI.getIterator())->isTerminator() &&
158
"Must only have this one terminator prior to the split!");
159
160
// Grab the one successor edge that will stay in `MBB`.
161
MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
162
163
// Analyze the original block to see if we are actually splitting an edge
164
// into two edges. This can happen when we have multiple conditional jumps to
165
// the same successor.
166
bool IsEdgeSplit =
167
std::any_of(SplitI.getIterator(), MBB.instr_end(),
168
[&](MachineInstr &MI) {
169
assert(MI.isTerminator() &&
170
"Should only have spliced terminators!");
171
return llvm::any_of(
172
MI.operands(), [&](MachineOperand &MOp) {
173
return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
174
});
175
}) ||
176
MBB.getFallThrough() == &UnsplitSucc;
177
178
MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
179
180
// Insert the new block immediately after the current one. Any existing
181
// fallthrough will be sunk into this new block anyways.
182
MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
183
184
// Splice the tail of instructions into the new block.
185
NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
186
187
// Copy the necessary succesors (and their probability info) into the new
188
// block.
189
for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
190
if (IsEdgeSplit || *SI != &UnsplitSucc)
191
NewMBB.copySuccessor(&MBB, SI);
192
// Normalize the probabilities if we didn't end up splitting the edge.
193
if (!IsEdgeSplit)
194
NewMBB.normalizeSuccProbs();
195
196
// Now replace all of the moved successors in the original block with the new
197
// block. This will merge their probabilities.
198
for (MachineBasicBlock *Succ : NewMBB.successors())
199
if (Succ != &UnsplitSucc)
200
MBB.replaceSuccessor(Succ, &NewMBB);
201
202
// We should always end up replacing at least one successor.
203
assert(MBB.isSuccessor(&NewMBB) &&
204
"Failed to make the new block a successor!");
205
206
// Now update all the PHIs.
207
for (MachineBasicBlock *Succ : NewMBB.successors()) {
208
for (MachineInstr &MI : *Succ) {
209
if (!MI.isPHI())
210
break;
211
212
for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
213
OpIdx += 2) {
214
MachineOperand &OpV = MI.getOperand(OpIdx);
215
MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
216
assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
217
if (OpMBB.getMBB() != &MBB)
218
continue;
219
220
// Replace the operand for unsplit successors
221
if (!IsEdgeSplit || Succ != &UnsplitSucc) {
222
OpMBB.setMBB(&NewMBB);
223
224
// We have to continue scanning as there may be multiple entries in
225
// the PHI.
226
continue;
227
}
228
229
// When we have split the edge append a new successor.
230
MI.addOperand(MF, OpV);
231
MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
232
break;
233
}
234
}
235
}
236
237
return NewMBB;
238
}
239
240
enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber };
241
242
static EFLAGSClobber getClobberType(const MachineInstr &MI) {
243
const MachineOperand *FlagDef =
244
MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
245
if (!FlagDef)
246
return NoClobber;
247
if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode()))
248
return EvitableClobber;
249
250
return InevitableClobber;
251
}
252
253
bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
254
LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
255
<< " **********\n");
256
257
Subtarget = &MF.getSubtarget<X86Subtarget>();
258
MRI = &MF.getRegInfo();
259
TII = Subtarget->getInstrInfo();
260
TRI = Subtarget->getRegisterInfo();
261
PromoteRC = &X86::GR8RegClass;
262
263
if (MF.empty())
264
// Nothing to do for a degenerate empty function...
265
return false;
266
267
if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) {
268
return MI.getOpcode() == TargetOpcode::COPY;
269
}))
270
return false;
271
272
// We change the code, so we don't preserve the dominator tree anyway. If we
273
// got a valid MDT from the pass manager, use that, otherwise construct one
274
// now. This is an optimization that avoids unnecessary MDT construction for
275
// functions that have no flag copies.
276
277
auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
278
std::unique_ptr<MachineDominatorTree> OwnedMDT;
279
if (MDTWrapper) {
280
MDT = &MDTWrapper->getDomTree();
281
} else {
282
OwnedMDT = std::make_unique<MachineDominatorTree>();
283
OwnedMDT->getBase().recalculate(MF);
284
MDT = OwnedMDT.get();
285
}
286
287
// Collect the copies in RPO so that when there are chains where a copy is in
288
// turn copied again we visit the first one first. This ensures we can find
289
// viable locations for testing the original EFLAGS that dominate all the
290
// uses across complex CFGs.
291
SmallSetVector<MachineInstr *, 4> Copies;
292
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
293
for (MachineBasicBlock *MBB : RPOT)
294
for (MachineInstr &MI : *MBB)
295
if (MI.getOpcode() == TargetOpcode::COPY &&
296
MI.getOperand(0).getReg() == X86::EFLAGS)
297
Copies.insert(&MI);
298
299
// Try to elminate the copys by transform the instructions between copy and
300
// copydef to the NF (no flags update) variants, e.g.
301
//
302
// %1:gr64 = COPY $eflags
303
// OP1 implicit-def dead $eflags
304
// $eflags = COPY %1
305
// OP2 cc, implicit $eflags
306
//
307
// ->
308
//
309
// OP1_NF
310
// OP2 implicit $eflags
311
if (Subtarget->hasNF()) {
312
SmallSetVector<MachineInstr *, 4> RemovedCopies;
313
// CopyIIt may be invalidated by removing copies.
314
auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
315
while (CopyIIt != CopyIEnd) {
316
auto NCopyIIt = std::next(CopyIIt);
317
SmallSetVector<MachineInstr *, 4> EvitableClobbers;
318
MachineInstr *CopyI = *CopyIIt;
319
MachineOperand &VOp = CopyI->getOperand(1);
320
MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg());
321
MachineBasicBlock *CopyIMBB = CopyI->getParent();
322
MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
323
// Walk all basic blocks reachable in depth-first iteration on the inverse
324
// CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
325
// may be executed between the execution of CopyDefIMBB and CopyIMBB. On
326
// all execution paths, instructions from CopyDefI to CopyI (exclusive)
327
// has to be NF-convertible if it clobbers flags.
328
for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
329
++BI) {
330
MachineBasicBlock *MBB = *BI;
331
for (auto I = (MBB != CopyDefIMBB)
332
? MBB->begin()
333
: std::next(MachineBasicBlock::iterator(CopyDefI)),
334
E = (MBB != CopyIMBB) ? MBB->end()
335
: MachineBasicBlock::iterator(CopyI);
336
I != E; ++I) {
337
MachineInstr &MI = *I;
338
EFLAGSClobber ClobberType = getClobberType(MI);
339
if (ClobberType == NoClobber)
340
continue;
341
342
if (ClobberType == InevitableClobber)
343
goto ProcessNextCopyI;
344
345
assert(ClobberType == EvitableClobber && "unexpected workflow");
346
EvitableClobbers.insert(&MI);
347
}
348
}
349
// Covert evitable clobbers into NF variants and remove the copyies.
350
RemovedCopies.insert(CopyI);
351
CopyI->eraseFromParent();
352
if (MRI->use_nodbg_empty(CopyDefI->getOperand(0).getReg())) {
353
RemovedCopies.insert(CopyDefI);
354
CopyDefI->eraseFromParent();
355
}
356
++NumCopiesEliminated;
357
for (auto *Clobber : EvitableClobbers) {
358
unsigned NewOpc = X86::getNFVariant(Clobber->getOpcode());
359
assert(NewOpc && "evitable clobber must have a NF variant");
360
Clobber->setDesc(TII->get(NewOpc));
361
Clobber->removeOperand(
362
Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)
363
->getOperandNo());
364
++NumNFsConvertedTo;
365
}
366
// Update liveins for basic blocks in the path
367
for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
368
++BI)
369
if (*BI != CopyDefIMBB)
370
BI->addLiveIn(X86::EFLAGS);
371
ProcessNextCopyI:
372
CopyIIt = NCopyIIt;
373
}
374
Copies.set_subtract(RemovedCopies);
375
}
376
377
// For the rest of copies that cannot be eliminated by NF transform, we use
378
// setcc to preserve the flags in GPR32 before OP1, and recheck its value
379
// before using the flags, e.g.
380
//
381
// %1:gr64 = COPY $eflags
382
// OP1 implicit-def dead $eflags
383
// $eflags = COPY %1
384
// OP2 cc, implicit $eflags
385
//
386
// ->
387
//
388
// %1:gr8 = SETCCr cc, implicit $eflags
389
// OP1 implicit-def dead $eflags
390
// TEST8rr %1, %1, implicit-def $eflags
391
// OP2 ne, implicit $eflags
392
for (MachineInstr *CopyI : Copies) {
393
MachineBasicBlock &MBB = *CopyI->getParent();
394
395
MachineOperand &VOp = CopyI->getOperand(1);
396
assert(VOp.isReg() &&
397
"The input to the copy for EFLAGS should always be a register!");
398
MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
399
if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
400
// FIXME: The big likely candidate here are PHI nodes. We could in theory
401
// handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
402
// enough that it is probably better to change every other part of LLVM
403
// to avoid creating them. The issue is that once we have PHIs we won't
404
// know which original EFLAGS value we need to capture with our setCCs
405
// below. The end result will be computing a complete set of setCCs that
406
// we *might* want, computing them in every place where we copy *out* of
407
// EFLAGS and then doing SSA formation on all of them to insert necessary
408
// PHI nodes and consume those here. Then hoping that somehow we DCE the
409
// unnecessary ones. This DCE seems very unlikely to be successful and so
410
// we will almost certainly end up with a glut of dead setCC
411
// instructions. Until we have a motivating test case and fail to avoid
412
// it by changing other parts of LLVM's lowering, we refuse to handle
413
// this complex case here.
414
LLVM_DEBUG(
415
dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
416
CopyDefI.dump());
417
report_fatal_error(
418
"Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
419
}
420
421
auto Cleanup = make_scope_exit([&] {
422
// All uses of the EFLAGS copy are now rewritten, kill the copy into
423
// eflags and if dead the copy from.
424
CopyI->eraseFromParent();
425
if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
426
CopyDefI.eraseFromParent();
427
++NumCopiesEliminated;
428
});
429
430
MachineOperand &DOp = CopyI->getOperand(0);
431
assert(DOp.isDef() && "Expected register def!");
432
assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
433
if (DOp.isDead())
434
continue;
435
436
MachineBasicBlock *TestMBB = CopyDefI.getParent();
437
auto TestPos = CopyDefI.getIterator();
438
DebugLoc TestLoc = CopyDefI.getDebugLoc();
439
440
LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
441
442
// Walk up across live-in EFLAGS to find where they were actually def'ed.
443
//
444
// This copy's def may just be part of a region of blocks covered by
445
// a single def of EFLAGS and we want to find the top of that region where
446
// possible.
447
//
448
// This is essentially a search for a *candidate* reaching definition
449
// location. We don't need to ever find the actual reaching definition here,
450
// but we want to walk up the dominator tree to find the highest point which
451
// would be viable for such a definition.
452
auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
453
MachineBasicBlock::iterator End) {
454
// Scan backwards as we expect these to be relatively short and often find
455
// a clobber near the end.
456
return llvm::any_of(
457
llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
458
// Flag any instruction (other than the copy we are
459
// currently rewriting) that defs EFLAGS.
460
return &MI != CopyI &&
461
MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
462
});
463
};
464
auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
465
MachineBasicBlock *EndMBB) {
466
assert(MDT->dominates(BeginMBB, EndMBB) &&
467
"Only support paths down the dominator tree!");
468
SmallPtrSet<MachineBasicBlock *, 4> Visited;
469
SmallVector<MachineBasicBlock *, 4> Worklist;
470
// We terminate at the beginning. No need to scan it.
471
Visited.insert(BeginMBB);
472
Worklist.push_back(EndMBB);
473
do {
474
auto *MBB = Worklist.pop_back_val();
475
for (auto *PredMBB : MBB->predecessors()) {
476
if (!Visited.insert(PredMBB).second)
477
continue;
478
if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
479
return true;
480
// Enqueue this block to walk its predecessors.
481
Worklist.push_back(PredMBB);
482
}
483
} while (!Worklist.empty());
484
// No clobber found along a path from the begin to end.
485
return false;
486
};
487
while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
488
!HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
489
// Find the nearest common dominator of the predecessors, as
490
// that will be the best candidate to hoist into.
491
MachineBasicBlock *HoistMBB =
492
std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
493
*TestMBB->pred_begin(),
494
[&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
495
return MDT->findNearestCommonDominator(LHS, RHS);
496
});
497
498
// Now we need to scan all predecessors that may be reached along paths to
499
// the hoist block. A clobber anywhere in any of these blocks the hoist.
500
// Note that this even handles loops because we require *no* clobbers.
501
if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
502
break;
503
504
// We also need the terminators to not sneakily clobber flags.
505
if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
506
HoistMBB->instr_end()))
507
break;
508
509
// We found a viable location, hoist our test position to it.
510
TestMBB = HoistMBB;
511
TestPos = TestMBB->getFirstTerminator()->getIterator();
512
// Clear the debug location as it would just be confusing after hoisting.
513
TestLoc = DebugLoc();
514
}
515
LLVM_DEBUG({
516
auto DefIt = llvm::find_if(
517
llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
518
[&](MachineInstr &MI) {
519
return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
520
});
521
if (DefIt.base() != TestMBB->instr_begin()) {
522
dbgs() << " Using EFLAGS defined by: ";
523
DefIt->dump();
524
} else {
525
dbgs() << " Using live-in flags for BB:\n";
526
TestMBB->dump();
527
}
528
});
529
530
// While rewriting uses, we buffer jumps and rewrite them in a second pass
531
// because doing so will perturb the CFG that we are walking to find the
532
// uses in the first place.
533
SmallVector<MachineInstr *, 4> JmpIs;
534
535
// Gather the condition flags that have already been preserved in
536
// registers. We do this from scratch each time as we expect there to be
537
// very few of them and we expect to not revisit the same copy definition
538
// many times. If either of those change sufficiently we could build a map
539
// of these up front instead.
540
CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
541
542
// Collect the basic blocks we need to scan. Typically this will just be
543
// a single basic block but we may have to scan multiple blocks if the
544
// EFLAGS copy lives into successors.
545
SmallVector<MachineBasicBlock *, 2> Blocks;
546
SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
547
Blocks.push_back(&MBB);
548
549
do {
550
MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
551
552
// Track when if/when we find a kill of the flags in this block.
553
bool FlagsKilled = false;
554
555
// In most cases, we walk from the beginning to the end of the block. But
556
// when the block is the same block as the copy is from, we will visit it
557
// twice. The first time we start from the copy and go to the end. The
558
// second time we start from the beginning and go to the copy. This lets
559
// us handle copies inside of cycles.
560
// FIXME: This loop is *super* confusing. This is at least in part
561
// a symptom of all of this routine needing to be refactored into
562
// documentable components. Once done, there may be a better way to write
563
// this loop.
564
for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
565
? std::next(CopyI->getIterator())
566
: UseMBB.instr_begin(),
567
MIE = UseMBB.instr_end();
568
MII != MIE;) {
569
MachineInstr &MI = *MII++;
570
// If we are in the original copy block and encounter either the copy
571
// def or the copy itself, break so that we don't re-process any part of
572
// the block or process the instructions in the range that was copied
573
// over.
574
if (&MI == CopyI || &MI == &CopyDefI) {
575
assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
576
"Should only encounter these on the second pass over the "
577
"original block.");
578
break;
579
}
580
581
MachineOperand *FlagUse =
582
MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr);
583
FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI);
584
585
if (!FlagUse && FlagsKilled)
586
break;
587
else if (!FlagUse)
588
continue;
589
590
LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump());
591
592
// Check the kill flag before we rewrite as that may change it.
593
if (FlagUse->isKill())
594
FlagsKilled = true;
595
596
// Once we encounter a branch, the rest of the instructions must also be
597
// branches. We can't rewrite in place here, so we handle them below.
598
//
599
// Note that we don't have to handle tail calls here, even conditional
600
// tail calls, as those are not introduced into the X86 MI until post-RA
601
// branch folding or black placement. As a consequence, we get to deal
602
// with the simpler formulation of conditional branches followed by tail
603
// calls.
604
if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
605
auto JmpIt = MI.getIterator();
606
do {
607
JmpIs.push_back(&*JmpIt);
608
++JmpIt;
609
} while (JmpIt != UseMBB.instr_end() &&
610
X86::getCondFromBranch(*JmpIt) != X86::COND_INVALID);
611
break;
612
}
613
614
// Otherwise we can just rewrite in-place.
615
unsigned Opc = MI.getOpcode();
616
if (Opc == TargetOpcode::COPY) {
617
// Just replace this copy with the original copy def.
618
MRI->replaceRegWith(MI.getOperand(0).getReg(),
619
CopyDefI.getOperand(0).getReg());
620
MI.eraseFromParent();
621
} else if (X86::isSETCC(Opc)) {
622
rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs);
623
} else if (isArithmeticOp(Opc)) {
624
rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs);
625
} else {
626
rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs);
627
}
628
629
// If this was the last use of the flags, we're done.
630
if (FlagsKilled)
631
break;
632
}
633
634
// If the flags were killed, we're done with this block.
635
if (FlagsKilled)
636
continue;
637
638
// Otherwise we need to scan successors for ones where the flags live-in
639
// and queue those up for processing.
640
for (MachineBasicBlock *SuccMBB : UseMBB.successors())
641
if (SuccMBB->isLiveIn(X86::EFLAGS) &&
642
VisitedBlocks.insert(SuccMBB).second) {
643
// We currently don't do any PHI insertion and so we require that the
644
// test basic block dominates all of the use basic blocks. Further, we
645
// can't have a cycle from the test block back to itself as that would
646
// create a cycle requiring a PHI to break it.
647
//
648
// We could in theory do PHI insertion here if it becomes useful by
649
// just taking undef values in along every edge that we don't trace
650
// this EFLAGS copy along. This isn't as bad as fully general PHI
651
// insertion, but still seems like a great deal of complexity.
652
//
653
// Because it is theoretically possible that some earlier MI pass or
654
// other lowering transformation could induce this to happen, we do
655
// a hard check even in non-debug builds here.
656
if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
657
LLVM_DEBUG({
658
dbgs()
659
<< "ERROR: Encountered use that is not dominated by our test "
660
"basic block! Rewriting this would require inserting PHI "
661
"nodes to track the flag state across the CFG.\n\nTest "
662
"block:\n";
663
TestMBB->dump();
664
dbgs() << "Use block:\n";
665
SuccMBB->dump();
666
});
667
report_fatal_error(
668
"Cannot lower EFLAGS copy when original copy def "
669
"does not dominate all uses.");
670
}
671
672
Blocks.push_back(SuccMBB);
673
674
// After this, EFLAGS will be recreated before each use.
675
SuccMBB->removeLiveIn(X86::EFLAGS);
676
}
677
} while (!Blocks.empty());
678
679
// Now rewrite the jumps that use the flags. These we handle specially
680
// because if there are multiple jumps in a single basic block we'll have
681
// to do surgery on the CFG.
682
MachineBasicBlock *LastJmpMBB = nullptr;
683
for (MachineInstr *JmpI : JmpIs) {
684
// Past the first jump within a basic block we need to split the blocks
685
// apart.
686
if (JmpI->getParent() == LastJmpMBB)
687
splitBlock(*JmpI->getParent(), *JmpI, *TII);
688
else
689
LastJmpMBB = JmpI->getParent();
690
691
rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
692
}
693
694
// FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
695
// the copy's def operand is itself a kill.
696
}
697
698
#ifndef NDEBUG
699
for (MachineBasicBlock &MBB : MF)
700
for (MachineInstr &MI : MBB)
701
if (MI.getOpcode() == TargetOpcode::COPY &&
702
(MI.getOperand(0).getReg() == X86::EFLAGS ||
703
MI.getOperand(1).getReg() == X86::EFLAGS)) {
704
LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
705
MI.dump());
706
llvm_unreachable("Unlowered EFLAGS copy!");
707
}
708
#endif
709
710
return true;
711
}
712
713
/// Collect any conditions that have already been set in registers so that we
714
/// can re-use them rather than adding duplicates.
715
CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
716
MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
717
CondRegArray CondRegs = {};
718
719
// Scan backwards across the range of instructions with live EFLAGS.
720
for (MachineInstr &MI :
721
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
722
X86::CondCode Cond = X86::getCondFromSETCC(MI);
723
if (Cond != X86::COND_INVALID && !MI.mayStore() &&
724
MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
725
assert(MI.getOperand(0).isDef() &&
726
"A non-storing SETcc should always define a register!");
727
CondRegs[Cond] = MI.getOperand(0).getReg();
728
}
729
730
// Stop scanning when we see the first definition of the EFLAGS as prior to
731
// this we would potentially capture the wrong flag state.
732
if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr))
733
break;
734
}
735
return CondRegs;
736
}
737
738
Register X86FlagsCopyLoweringPass::promoteCondToReg(
739
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
740
const DebugLoc &TestLoc, X86::CondCode Cond) {
741
Register Reg = MRI->createVirtualRegister(PromoteRC);
742
auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
743
.addImm(Cond);
744
(void)SetI;
745
LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
746
++NumSetCCsInserted;
747
return Reg;
748
}
749
750
std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
751
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
752
const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
753
unsigned &CondReg = CondRegs[Cond];
754
unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
755
if (!CondReg && !InvCondReg)
756
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
757
758
if (CondReg)
759
return {CondReg, false};
760
else
761
return {InvCondReg, true};
762
}
763
764
void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
765
MachineBasicBlock::iterator Pos,
766
const DebugLoc &Loc, unsigned Reg) {
767
auto TestI =
768
BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
769
(void)TestI;
770
LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump());
771
++NumTestsInserted;
772
}
773
774
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
775
MachineBasicBlock::iterator Pos,
776
const DebugLoc &Loc,
777
MachineInstr &MI,
778
CondRegArray &CondRegs) {
779
X86::CondCode Cond = X86::getCondFromSETCC(MI);
780
// Note that we can't usefully rewrite this to the inverse without complex
781
// analysis of the users of the setCC. Largely we rely on duplicates which
782
// could have been avoided already being avoided here.
783
unsigned &CondReg = CondRegs[Cond];
784
if (!CondReg)
785
CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
786
787
// Rewriting a register def is trivial: we just replace the register and
788
// remove the setcc.
789
if (!MI.mayStore()) {
790
assert(MI.getOperand(0).isReg() &&
791
"Cannot have a non-register defined operand to SETcc!");
792
Register OldReg = MI.getOperand(0).getReg();
793
// Drop Kill flags on the old register before replacing. CondReg may have
794
// a longer live range.
795
MRI->clearKillFlags(OldReg);
796
MRI->replaceRegWith(OldReg, CondReg);
797
MI.eraseFromParent();
798
return;
799
}
800
801
// Otherwise, we need to emit a store.
802
auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
803
TII->get(X86::MOV8mr));
804
// Copy the address operands.
805
for (int i = 0; i < X86::AddrNumOperands; ++i)
806
MIB.add(MI.getOperand(i));
807
808
MIB.addReg(CondReg);
809
MIB.setMemRefs(MI.memoperands());
810
MI.eraseFromParent();
811
}
812
813
void X86FlagsCopyLoweringPass::rewriteArithmetic(
814
MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
815
const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) {
816
// Arithmetic is either reading CF or OF.
817
X86::CondCode Cond = X86::COND_B; // CF == 1
818
// The addend to use to reset CF or OF when added to the flag value.
819
// Set up an addend that when one is added will need a carry due to not
820
// having a higher bit available.
821
int Addend = 255;
822
823
// Now get a register that contains the value of the flag input to the
824
// arithmetic. We require exactly this flag to simplify the arithmetic
825
// required to materialize it back into the flag.
826
unsigned &CondReg = CondRegs[Cond];
827
if (!CondReg)
828
CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
829
830
// Insert an instruction that will set the flag back to the desired value.
831
Register TmpReg = MRI->createVirtualRegister(PromoteRC);
832
auto AddI =
833
BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
834
TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri))
835
.addDef(TmpReg, RegState::Dead)
836
.addReg(CondReg)
837
.addImm(Addend);
838
(void)AddI;
839
LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump());
840
++NumAddsInserted;
841
MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
842
}
843
844
static X86::CondCode getImplicitCondFromMI(unsigned Opc) {
845
#define FROM_TO(A, B) \
846
case X86::CMOV##A##_Fp32: \
847
case X86::CMOV##A##_Fp64: \
848
case X86::CMOV##A##_Fp80: \
849
return X86::COND_##B;
850
851
switch (Opc) {
852
default:
853
return X86::COND_INVALID;
854
FROM_TO(B, B)
855
FROM_TO(E, E)
856
FROM_TO(P, P)
857
FROM_TO(BE, BE)
858
FROM_TO(NB, AE)
859
FROM_TO(NE, NE)
860
FROM_TO(NP, NP)
861
FROM_TO(NBE, A)
862
}
863
#undef FROM_TO
864
}
865
866
static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) {
867
assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC");
868
#define CASE(A) \
869
case X86::CMOVB_##A: \
870
case X86::CMOVE_##A: \
871
case X86::CMOVP_##A: \
872
case X86::CMOVBE_##A: \
873
case X86::CMOVNB_##A: \
874
case X86::CMOVNE_##A: \
875
case X86::CMOVNP_##A: \
876
case X86::CMOVNBE_##A: \
877
return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A;
878
switch (Opc) {
879
default:
880
llvm_unreachable("Unexpected opcode");
881
CASE(Fp32)
882
CASE(Fp64)
883
CASE(Fp80)
884
}
885
#undef CASE
886
}
887
888
void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB,
889
MachineBasicBlock::iterator Pos,
890
const DebugLoc &Loc, MachineInstr &MI,
891
CondRegArray &CondRegs) {
892
// First get the register containing this specific condition.
893
bool IsImplicitCC = false;
894
X86::CondCode CC = X86::getCondFromMI(MI);
895
if (CC == X86::COND_INVALID) {
896
CC = getImplicitCondFromMI(MI.getOpcode());
897
IsImplicitCC = true;
898
}
899
assert(CC != X86::COND_INVALID && "Unknown EFLAG user!");
900
unsigned CondReg;
901
bool Inverted;
902
std::tie(CondReg, Inverted) =
903
getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs);
904
905
// Insert a direct test of the saved register.
906
insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg);
907
908
// Rewrite the instruction to use the !ZF flag from the test, and then kill
909
// its use of the flags afterward.
910
X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE;
911
if (IsImplicitCC)
912
MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC)));
913
else
914
MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC);
915
916
MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
917
LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump());
918
}
919
920