Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
35294 views
1
//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass identifies loops where we can generate the Hexagon hardware
10
// loop instruction. The hardware loop can perform loop branches with a
11
// zero-cycle overhead.
12
//
13
// The pattern that defines the induction variable can changed depending on
14
// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15
// normalizes induction variables, and the Loop Strength Reduction pass
16
// run by 'llc' may also make changes to the induction variable.
17
// The pattern detected by this phase is due to running Strength Reduction.
18
//
19
// Criteria for hardware loops:
20
// - Countable loops (w/ ind. var for a trip count)
21
// - Assumes loops are normalized by IndVarSimplify
22
// - Try inner-most loops first
23
// - No function calls in loops.
24
//
25
//===----------------------------------------------------------------------===//
26
27
#include "HexagonInstrInfo.h"
28
#include "HexagonSubtarget.h"
29
#include "llvm/ADT/ArrayRef.h"
30
#include "llvm/ADT/STLExtras.h"
31
#include "llvm/ADT/SmallSet.h"
32
#include "llvm/ADT/SmallVector.h"
33
#include "llvm/ADT/Statistic.h"
34
#include "llvm/ADT/StringRef.h"
35
#include "llvm/CodeGen/MachineBasicBlock.h"
36
#include "llvm/CodeGen/MachineDominators.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineFunctionPass.h"
39
#include "llvm/CodeGen/MachineInstr.h"
40
#include "llvm/CodeGen/MachineInstrBuilder.h"
41
#include "llvm/CodeGen/MachineLoopInfo.h"
42
#include "llvm/CodeGen/MachineOperand.h"
43
#include "llvm/CodeGen/MachineRegisterInfo.h"
44
#include "llvm/CodeGen/TargetRegisterInfo.h"
45
#include "llvm/IR/Constants.h"
46
#include "llvm/IR/DebugLoc.h"
47
#include "llvm/InitializePasses.h"
48
#include "llvm/Pass.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/Debug.h"
51
#include "llvm/Support/ErrorHandling.h"
52
#include "llvm/Support/MathExtras.h"
53
#include "llvm/Support/raw_ostream.h"
54
#include <cassert>
55
#include <cstdint>
56
#include <cstdlib>
57
#include <iterator>
58
#include <map>
59
#include <set>
60
#include <string>
61
#include <utility>
62
#include <vector>
63
64
using namespace llvm;
65
66
#define DEBUG_TYPE "hwloops"
67
68
#ifndef NDEBUG
69
static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70
71
// Option to create preheader only for a specific function.
72
static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73
cl::init(""));
74
#endif
75
76
// Option to create a preheader if one doesn't exist.
77
static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78
cl::Hidden, cl::init(true),
79
cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80
81
// Turn it off by default. If a preheader block is not created here, the
82
// software pipeliner may be unable to find a block suitable to serve as
83
// a preheader. In that case SWP will not run.
84
static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
85
cl::desc("Allow speculation of preheader "
86
"instructions"));
87
88
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89
90
namespace llvm {
91
92
FunctionPass *createHexagonHardwareLoops();
93
void initializeHexagonHardwareLoopsPass(PassRegistry&);
94
95
} // end namespace llvm
96
97
namespace {
98
99
class CountValue;
100
101
struct HexagonHardwareLoops : public MachineFunctionPass {
102
MachineLoopInfo *MLI;
103
MachineRegisterInfo *MRI;
104
MachineDominatorTree *MDT;
105
const HexagonInstrInfo *TII;
106
const HexagonRegisterInfo *TRI;
107
#ifndef NDEBUG
108
static int Counter;
109
#endif
110
111
public:
112
static char ID;
113
114
HexagonHardwareLoops() : MachineFunctionPass(ID) {}
115
116
bool runOnMachineFunction(MachineFunction &MF) override;
117
118
StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
119
120
void getAnalysisUsage(AnalysisUsage &AU) const override {
121
AU.addRequired<MachineDominatorTreeWrapperPass>();
122
AU.addRequired<MachineLoopInfoWrapperPass>();
123
MachineFunctionPass::getAnalysisUsage(AU);
124
}
125
126
private:
127
using LoopFeederMap = std::map<Register, MachineInstr *>;
128
129
/// Kinds of comparisons in the compare instructions.
130
struct Comparison {
131
enum Kind {
132
EQ = 0x01,
133
NE = 0x02,
134
L = 0x04,
135
G = 0x08,
136
U = 0x40,
137
LTs = L,
138
LEs = L | EQ,
139
GTs = G,
140
GEs = G | EQ,
141
LTu = L | U,
142
LEu = L | EQ | U,
143
GTu = G | U,
144
GEu = G | EQ | U
145
};
146
147
static Kind getSwappedComparison(Kind Cmp) {
148
assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
149
if ((Cmp & L) || (Cmp & G))
150
return (Kind)(Cmp ^ (L|G));
151
return Cmp;
152
}
153
154
static Kind getNegatedComparison(Kind Cmp) {
155
if ((Cmp & L) || (Cmp & G))
156
return (Kind)((Cmp ^ (L | G)) ^ EQ);
157
if ((Cmp & NE) || (Cmp & EQ))
158
return (Kind)(Cmp ^ (EQ | NE));
159
return (Kind)0;
160
}
161
162
static bool isSigned(Kind Cmp) {
163
return (Cmp & (L | G) && !(Cmp & U));
164
}
165
166
static bool isUnsigned(Kind Cmp) {
167
return (Cmp & U);
168
}
169
};
170
171
/// Find the register that contains the loop controlling
172
/// induction variable.
173
/// If successful, it will return true and set the \p Reg, \p IVBump
174
/// and \p IVOp arguments. Otherwise it will return false.
175
/// The returned induction register is the register R that follows the
176
/// following induction pattern:
177
/// loop:
178
/// R = phi ..., [ R.next, LatchBlock ]
179
/// R.next = R + #bump
180
/// if (R.next < #N) goto loop
181
/// IVBump is the immediate value added to R, and IVOp is the instruction
182
/// "R.next = R + #bump".
183
bool findInductionRegister(MachineLoop *L, Register &Reg,
184
int64_t &IVBump, MachineInstr *&IVOp) const;
185
186
/// Return the comparison kind for the specified opcode.
187
Comparison::Kind getComparisonKind(unsigned CondOpc,
188
MachineOperand *InitialValue,
189
const MachineOperand *Endvalue,
190
int64_t IVBump) const;
191
192
/// Analyze the statements in a loop to determine if the loop
193
/// has a computable trip count and, if so, return a value that represents
194
/// the trip count expression.
195
CountValue *getLoopTripCount(MachineLoop *L,
196
SmallVectorImpl<MachineInstr *> &OldInsts);
197
198
/// Return the expression that represents the number of times
199
/// a loop iterates. The function takes the operands that represent the
200
/// loop start value, loop end value, and induction value. Based upon
201
/// these operands, the function attempts to compute the trip count.
202
/// If the trip count is not directly available (as an immediate value,
203
/// or a register), the function will attempt to insert computation of it
204
/// to the loop's preheader.
205
CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
206
const MachineOperand *End, Register IVReg,
207
int64_t IVBump, Comparison::Kind Cmp) const;
208
209
/// Return true if the instruction is not valid within a hardware
210
/// loop.
211
bool isInvalidLoopOperation(const MachineInstr *MI,
212
bool IsInnerHWLoop) const;
213
214
/// Return true if the loop contains an instruction that inhibits
215
/// using the hardware loop.
216
bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
217
218
/// Given a loop, check if we can convert it to a hardware loop.
219
/// If so, then perform the conversion and return true.
220
bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
221
222
/// Return true if the instruction is now dead.
223
bool isDead(const MachineInstr *MI,
224
SmallVectorImpl<MachineInstr *> &DeadPhis) const;
225
226
/// Remove the instruction if it is now dead.
227
void removeIfDead(MachineInstr *MI);
228
229
/// Make sure that the "bump" instruction executes before the
230
/// compare. We need that for the IV fixup, so that the compare
231
/// instruction would not use a bumped value that has not yet been
232
/// defined. If the instructions are out of order, try to reorder them.
233
bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
234
235
/// Return true if MO and MI pair is visited only once. If visited
236
/// more than once, this indicates there is recursion. In such a case,
237
/// return false.
238
bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
239
const MachineOperand *MO,
240
LoopFeederMap &LoopFeederPhi) const;
241
242
/// Return true if the Phi may generate a value that may underflow,
243
/// or may wrap.
244
bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
245
MachineBasicBlock *MBB, MachineLoop *L,
246
LoopFeederMap &LoopFeederPhi) const;
247
248
/// Return true if the induction variable may underflow an unsigned
249
/// value in the first iteration.
250
bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
251
const MachineOperand *EndVal,
252
MachineBasicBlock *MBB, MachineLoop *L,
253
LoopFeederMap &LoopFeederPhi) const;
254
255
/// Check if the given operand has a compile-time known constant
256
/// value. Return true if yes, and false otherwise. When returning true, set
257
/// Val to the corresponding constant value.
258
bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
259
260
/// Check if the operand has a compile-time known constant value.
261
bool isImmediate(const MachineOperand &MO) const {
262
int64_t V;
263
return checkForImmediate(MO, V);
264
}
265
266
/// Return the immediate for the specified operand.
267
int64_t getImmediate(const MachineOperand &MO) const {
268
int64_t V;
269
if (!checkForImmediate(MO, V))
270
llvm_unreachable("Invalid operand");
271
return V;
272
}
273
274
/// Reset the given machine operand to now refer to a new immediate
275
/// value. Assumes that the operand was already referencing an immediate
276
/// value, either directly, or via a register.
277
void setImmediate(MachineOperand &MO, int64_t Val);
278
279
/// Fix the data flow of the induction variable.
280
/// The desired flow is: phi ---> bump -+-> comparison-in-latch.
281
/// |
282
/// +-> back to phi
283
/// where "bump" is the increment of the induction variable:
284
/// iv = iv + #const.
285
/// Due to some prior code transformations, the actual flow may look
286
/// like this:
287
/// phi -+-> bump ---> back to phi
288
/// |
289
/// +-> comparison-in-latch (against upper_bound-bump),
290
/// i.e. the comparison that controls the loop execution may be using
291
/// the value of the induction variable from before the increment.
292
///
293
/// Return true if the loop's flow is the desired one (i.e. it's
294
/// either been fixed, or no fixing was necessary).
295
/// Otherwise, return false. This can happen if the induction variable
296
/// couldn't be identified, or if the value in the latch's comparison
297
/// cannot be adjusted to reflect the post-bump value.
298
bool fixupInductionVariable(MachineLoop *L);
299
300
/// Given a loop, if it does not have a preheader, create one.
301
/// Return the block that is the preheader.
302
MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
303
};
304
305
char HexagonHardwareLoops::ID = 0;
306
#ifndef NDEBUG
307
int HexagonHardwareLoops::Counter = 0;
308
#endif
309
310
/// Abstraction for a trip count of a loop. A smaller version
311
/// of the MachineOperand class without the concerns of changing the
312
/// operand representation.
313
class CountValue {
314
public:
315
enum CountValueType {
316
CV_Register,
317
CV_Immediate
318
};
319
320
private:
321
CountValueType Kind;
322
union Values {
323
Values() : R{Register(), 0} {}
324
Values(const Values&) = default;
325
struct {
326
Register Reg;
327
unsigned Sub;
328
} R;
329
unsigned ImmVal;
330
} Contents;
331
332
public:
333
explicit CountValue(CountValueType t, Register v, unsigned u = 0) {
334
Kind = t;
335
if (Kind == CV_Register) {
336
Contents.R.Reg = v;
337
Contents.R.Sub = u;
338
} else {
339
Contents.ImmVal = v;
340
}
341
}
342
343
bool isReg() const { return Kind == CV_Register; }
344
bool isImm() const { return Kind == CV_Immediate; }
345
346
Register getReg() const {
347
assert(isReg() && "Wrong CountValue accessor");
348
return Contents.R.Reg;
349
}
350
351
unsigned getSubReg() const {
352
assert(isReg() && "Wrong CountValue accessor");
353
return Contents.R.Sub;
354
}
355
356
unsigned getImm() const {
357
assert(isImm() && "Wrong CountValue accessor");
358
return Contents.ImmVal;
359
}
360
361
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
362
if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
363
if (isImm()) { OS << Contents.ImmVal; }
364
}
365
};
366
367
} // end anonymous namespace
368
369
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
370
"Hexagon Hardware Loops", false, false)
371
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
372
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
373
INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
374
"Hexagon Hardware Loops", false, false)
375
376
FunctionPass *llvm::createHexagonHardwareLoops() {
377
return new HexagonHardwareLoops();
378
}
379
380
bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
381
LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
382
if (skipFunction(MF.getFunction()))
383
return false;
384
385
bool Changed = false;
386
387
MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
388
MRI = &MF.getRegInfo();
389
MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
390
const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
391
TII = HST.getInstrInfo();
392
TRI = HST.getRegisterInfo();
393
394
for (auto &L : *MLI)
395
if (L->isOutermost()) {
396
bool L0Used = false;
397
bool L1Used = false;
398
Changed |= convertToHardwareLoop(L, L0Used, L1Used);
399
}
400
401
return Changed;
402
}
403
404
bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
405
Register &Reg,
406
int64_t &IVBump,
407
MachineInstr *&IVOp
408
) const {
409
MachineBasicBlock *Header = L->getHeader();
410
MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
411
MachineBasicBlock *Latch = L->getLoopLatch();
412
MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
413
if (!Header || !Preheader || !Latch || !ExitingBlock)
414
return false;
415
416
// This pair represents an induction register together with an immediate
417
// value that will be added to it in each loop iteration.
418
using RegisterBump = std::pair<Register, int64_t>;
419
420
// Mapping: R.next -> (R, bump), where R, R.next and bump are derived
421
// from an induction operation
422
// R.next = R + bump
423
// where bump is an immediate value.
424
using InductionMap = std::map<Register, RegisterBump>;
425
426
InductionMap IndMap;
427
428
using instr_iterator = MachineBasicBlock::instr_iterator;
429
430
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
431
I != E && I->isPHI(); ++I) {
432
MachineInstr *Phi = &*I;
433
434
// Have a PHI instruction. Get the operand that corresponds to the
435
// latch block, and see if is a result of an addition of form "reg+imm",
436
// where the "reg" is defined by the PHI node we are looking at.
437
for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
438
if (Phi->getOperand(i+1).getMBB() != Latch)
439
continue;
440
441
Register PhiOpReg = Phi->getOperand(i).getReg();
442
MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
443
444
if (DI->getDesc().isAdd()) {
445
// If the register operand to the add is the PHI we're looking at, this
446
// meets the induction pattern.
447
Register IndReg = DI->getOperand(1).getReg();
448
MachineOperand &Opnd2 = DI->getOperand(2);
449
int64_t V;
450
if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
451
Register UpdReg = DI->getOperand(0).getReg();
452
IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
453
}
454
}
455
} // for (i)
456
} // for (instr)
457
458
SmallVector<MachineOperand,2> Cond;
459
MachineBasicBlock *TB = nullptr, *FB = nullptr;
460
bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
461
if (NotAnalyzed)
462
return false;
463
464
Register PredR;
465
unsigned PredPos, PredRegFlags;
466
if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
467
return false;
468
469
MachineInstr *PredI = MRI->getVRegDef(PredR);
470
if (!PredI->isCompare())
471
return false;
472
473
Register CmpReg1, CmpReg2;
474
int64_t CmpImm = 0, CmpMask = 0;
475
bool CmpAnalyzed =
476
TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
477
// Fail if the compare was not analyzed, or it's not comparing a register
478
// with an immediate value. Not checking the mask here, since we handle
479
// the individual compare opcodes (including A4_cmpb*) later on.
480
if (!CmpAnalyzed)
481
return false;
482
483
// Exactly one of the input registers to the comparison should be among
484
// the induction registers.
485
InductionMap::iterator IndMapEnd = IndMap.end();
486
InductionMap::iterator F = IndMapEnd;
487
if (CmpReg1 != 0) {
488
InductionMap::iterator F1 = IndMap.find(CmpReg1);
489
if (F1 != IndMapEnd)
490
F = F1;
491
}
492
if (CmpReg2 != 0) {
493
InductionMap::iterator F2 = IndMap.find(CmpReg2);
494
if (F2 != IndMapEnd) {
495
if (F != IndMapEnd)
496
return false;
497
F = F2;
498
}
499
}
500
if (F == IndMapEnd)
501
return false;
502
503
Reg = F->second.first;
504
IVBump = F->second.second;
505
IVOp = MRI->getVRegDef(F->first);
506
return true;
507
}
508
509
// Return the comparison kind for the specified opcode.
510
HexagonHardwareLoops::Comparison::Kind
511
HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
512
MachineOperand *InitialValue,
513
const MachineOperand *EndValue,
514
int64_t IVBump) const {
515
Comparison::Kind Cmp = (Comparison::Kind)0;
516
switch (CondOpc) {
517
case Hexagon::C2_cmpeq:
518
case Hexagon::C2_cmpeqi:
519
case Hexagon::C2_cmpeqp:
520
Cmp = Comparison::EQ;
521
break;
522
case Hexagon::C4_cmpneq:
523
case Hexagon::C4_cmpneqi:
524
Cmp = Comparison::NE;
525
break;
526
case Hexagon::C2_cmplt:
527
Cmp = Comparison::LTs;
528
break;
529
case Hexagon::C2_cmpltu:
530
Cmp = Comparison::LTu;
531
break;
532
case Hexagon::C4_cmplte:
533
case Hexagon::C4_cmpltei:
534
Cmp = Comparison::LEs;
535
break;
536
case Hexagon::C4_cmplteu:
537
case Hexagon::C4_cmplteui:
538
Cmp = Comparison::LEu;
539
break;
540
case Hexagon::C2_cmpgt:
541
case Hexagon::C2_cmpgti:
542
case Hexagon::C2_cmpgtp:
543
Cmp = Comparison::GTs;
544
break;
545
case Hexagon::C2_cmpgtu:
546
case Hexagon::C2_cmpgtui:
547
case Hexagon::C2_cmpgtup:
548
Cmp = Comparison::GTu;
549
break;
550
case Hexagon::C2_cmpgei:
551
Cmp = Comparison::GEs;
552
break;
553
case Hexagon::C2_cmpgeui:
554
Cmp = Comparison::GEs;
555
break;
556
default:
557
return (Comparison::Kind)0;
558
}
559
return Cmp;
560
}
561
562
/// Analyze the statements in a loop to determine if the loop has
563
/// a computable trip count and, if so, return a value that represents
564
/// the trip count expression.
565
///
566
/// This function iterates over the phi nodes in the loop to check for
567
/// induction variable patterns that are used in the calculation for
568
/// the number of time the loop is executed.
569
CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
570
SmallVectorImpl<MachineInstr *> &OldInsts) {
571
MachineBasicBlock *TopMBB = L->getTopBlock();
572
MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
573
assert(PI != TopMBB->pred_end() &&
574
"Loop must have more than one incoming edge!");
575
MachineBasicBlock *Backedge = *PI++;
576
if (PI == TopMBB->pred_end()) // dead loop?
577
return nullptr;
578
MachineBasicBlock *Incoming = *PI++;
579
if (PI != TopMBB->pred_end()) // multiple backedges?
580
return nullptr;
581
582
// Make sure there is one incoming and one backedge and determine which
583
// is which.
584
if (L->contains(Incoming)) {
585
if (L->contains(Backedge))
586
return nullptr;
587
std::swap(Incoming, Backedge);
588
} else if (!L->contains(Backedge))
589
return nullptr;
590
591
// Look for the cmp instruction to determine if we can get a useful trip
592
// count. The trip count can be either a register or an immediate. The
593
// location of the value depends upon the type (reg or imm).
594
MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
595
if (!ExitingBlock)
596
return nullptr;
597
598
Register IVReg = 0;
599
int64_t IVBump = 0;
600
MachineInstr *IVOp;
601
bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
602
if (!FoundIV)
603
return nullptr;
604
605
MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
606
607
MachineOperand *InitialValue = nullptr;
608
MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
609
MachineBasicBlock *Latch = L->getLoopLatch();
610
for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
611
MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
612
if (MBB == Preheader)
613
InitialValue = &IV_Phi->getOperand(i);
614
else if (MBB == Latch)
615
IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
616
}
617
if (!InitialValue)
618
return nullptr;
619
620
SmallVector<MachineOperand,2> Cond;
621
MachineBasicBlock *TB = nullptr, *FB = nullptr;
622
bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
623
if (NotAnalyzed)
624
return nullptr;
625
626
MachineBasicBlock *Header = L->getHeader();
627
// TB must be non-null. If FB is also non-null, one of them must be
628
// the header. Otherwise, branch to TB could be exiting the loop, and
629
// the fall through can go to the header.
630
assert (TB && "Exit block without a branch?");
631
if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
632
MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
633
SmallVector<MachineOperand,2> LCond;
634
bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
635
if (NotAnalyzed)
636
return nullptr;
637
if (TB == Latch)
638
TB = (LTB == Header) ? LTB : LFB;
639
else
640
FB = (LTB == Header) ? LTB: LFB;
641
}
642
assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
643
if (!TB || (FB && TB != Header && FB != Header))
644
return nullptr;
645
646
// Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
647
// to put imm(0), followed by P in the vector Cond.
648
// If TB is not the header, it means that the "not-taken" path must lead
649
// to the header.
650
bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
651
Register PredReg;
652
unsigned PredPos, PredRegFlags;
653
if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
654
return nullptr;
655
MachineInstr *CondI = MRI->getVRegDef(PredReg);
656
unsigned CondOpc = CondI->getOpcode();
657
658
Register CmpReg1, CmpReg2;
659
int64_t Mask = 0, ImmValue = 0;
660
bool AnalyzedCmp =
661
TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
662
if (!AnalyzedCmp)
663
return nullptr;
664
665
// The comparison operator type determines how we compute the loop
666
// trip count.
667
OldInsts.push_back(CondI);
668
OldInsts.push_back(IVOp);
669
670
// Sadly, the following code gets information based on the position
671
// of the operands in the compare instruction. This has to be done
672
// this way, because the comparisons check for a specific relationship
673
// between the operands (e.g. is-less-than), rather than to find out
674
// what relationship the operands are in (as on PPC).
675
Comparison::Kind Cmp;
676
bool isSwapped = false;
677
const MachineOperand &Op1 = CondI->getOperand(1);
678
const MachineOperand &Op2 = CondI->getOperand(2);
679
const MachineOperand *EndValue = nullptr;
680
681
if (Op1.isReg()) {
682
if (Op2.isImm() || Op1.getReg() == IVReg)
683
EndValue = &Op2;
684
else {
685
EndValue = &Op1;
686
isSwapped = true;
687
}
688
}
689
690
if (!EndValue)
691
return nullptr;
692
693
Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
694
if (!Cmp)
695
return nullptr;
696
if (Negated)
697
Cmp = Comparison::getNegatedComparison(Cmp);
698
if (isSwapped)
699
Cmp = Comparison::getSwappedComparison(Cmp);
700
701
if (InitialValue->isReg()) {
702
Register R = InitialValue->getReg();
703
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
704
if (!MDT->properlyDominates(DefBB, Header)) {
705
int64_t V;
706
if (!checkForImmediate(*InitialValue, V))
707
return nullptr;
708
}
709
OldInsts.push_back(MRI->getVRegDef(R));
710
}
711
if (EndValue->isReg()) {
712
Register R = EndValue->getReg();
713
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
714
if (!MDT->properlyDominates(DefBB, Header)) {
715
int64_t V;
716
if (!checkForImmediate(*EndValue, V))
717
return nullptr;
718
}
719
OldInsts.push_back(MRI->getVRegDef(R));
720
}
721
722
return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
723
}
724
725
/// Helper function that returns the expression that represents the
726
/// number of times a loop iterates. The function takes the operands that
727
/// represent the loop start value, loop end value, and induction value.
728
/// Based upon these operands, the function attempts to compute the trip count.
729
CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
730
const MachineOperand *Start,
731
const MachineOperand *End,
732
Register IVReg,
733
int64_t IVBump,
734
Comparison::Kind Cmp) const {
735
// Cannot handle comparison EQ, i.e. while (A == B).
736
if (Cmp == Comparison::EQ)
737
return nullptr;
738
739
// Check if either the start or end values are an assignment of an immediate.
740
// If so, use the immediate value rather than the register.
741
if (Start->isReg()) {
742
const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
743
if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
744
StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
745
Start = &StartValInstr->getOperand(1);
746
}
747
if (End->isReg()) {
748
const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
749
if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
750
EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
751
End = &EndValInstr->getOperand(1);
752
}
753
754
if (!Start->isReg() && !Start->isImm())
755
return nullptr;
756
if (!End->isReg() && !End->isImm())
757
return nullptr;
758
759
bool CmpLess = Cmp & Comparison::L;
760
bool CmpGreater = Cmp & Comparison::G;
761
bool CmpHasEqual = Cmp & Comparison::EQ;
762
763
// Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
764
if (CmpLess && IVBump < 0)
765
// Loop going while iv is "less" with the iv value going down. Must wrap.
766
return nullptr;
767
768
if (CmpGreater && IVBump > 0)
769
// Loop going while iv is "greater" with the iv value going up. Must wrap.
770
return nullptr;
771
772
// Phis that may feed into the loop.
773
LoopFeederMap LoopFeederPhi;
774
775
// Check if the initial value may be zero and can be decremented in the first
776
// iteration. If the value is zero, the endloop instruction will not decrement
777
// the loop counter, so we shouldn't generate a hardware loop in this case.
778
if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
779
LoopFeederPhi))
780
return nullptr;
781
782
if (Start->isImm() && End->isImm()) {
783
// Both, start and end are immediates.
784
int64_t StartV = Start->getImm();
785
int64_t EndV = End->getImm();
786
int64_t Dist = EndV - StartV;
787
if (Dist == 0)
788
return nullptr;
789
790
bool Exact = (Dist % IVBump) == 0;
791
792
if (Cmp == Comparison::NE) {
793
if (!Exact)
794
return nullptr;
795
if ((Dist < 0) ^ (IVBump < 0))
796
return nullptr;
797
}
798
799
// For comparisons that include the final value (i.e. include equality
800
// with the final value), we need to increase the distance by 1.
801
if (CmpHasEqual)
802
Dist = Dist > 0 ? Dist+1 : Dist-1;
803
804
// For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
805
// CmpGreater should imply Dist < 0. These conditions could actually
806
// fail, for example, in unreachable code (which may still appear to be
807
// reachable in the CFG).
808
if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
809
return nullptr;
810
811
// "Normalized" distance, i.e. with the bump set to +-1.
812
int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
813
: (-Dist + (-IVBump - 1)) / (-IVBump);
814
assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
815
816
uint64_t Count = Dist1;
817
818
if (Count > 0xFFFFFFFFULL)
819
return nullptr;
820
821
return new CountValue(CountValue::CV_Immediate, Count);
822
}
823
824
// A general case: Start and End are some values, but the actual
825
// iteration count may not be available. If it is not, insert
826
// a computation of it into the preheader.
827
828
// If the induction variable bump is not a power of 2, quit.
829
// Othwerise we'd need a general integer division.
830
if (!isPowerOf2_64(std::abs(IVBump)))
831
return nullptr;
832
833
MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
834
assert (PH && "Should have a preheader by now");
835
MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
836
DebugLoc DL;
837
if (InsertPos != PH->end())
838
DL = InsertPos->getDebugLoc();
839
840
// If Start is an immediate and End is a register, the trip count
841
// will be "reg - imm". Hexagon's "subtract immediate" instruction
842
// is actually "reg + -imm".
843
844
// If the loop IV is going downwards, i.e. if the bump is negative,
845
// then the iteration count (computed as End-Start) will need to be
846
// negated. To avoid the negation, just swap Start and End.
847
if (IVBump < 0) {
848
std::swap(Start, End);
849
IVBump = -IVBump;
850
}
851
// Cmp may now have a wrong direction, e.g. LEs may now be GEs.
852
// Signedness, and "including equality" are preserved.
853
854
bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
855
bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
856
857
int64_t StartV = 0, EndV = 0;
858
if (Start->isImm())
859
StartV = Start->getImm();
860
if (End->isImm())
861
EndV = End->getImm();
862
863
int64_t AdjV = 0;
864
// To compute the iteration count, we would need this computation:
865
// Count = (End - Start + (IVBump-1)) / IVBump
866
// or, when CmpHasEqual:
867
// Count = (End - Start + (IVBump-1)+1) / IVBump
868
// The "IVBump-1" part is the adjustment (AdjV). We can avoid
869
// generating an instruction specifically to add it if we can adjust
870
// the immediate values for Start or End.
871
872
if (CmpHasEqual) {
873
// Need to add 1 to the total iteration count.
874
if (Start->isImm())
875
StartV--;
876
else if (End->isImm())
877
EndV++;
878
else
879
AdjV += 1;
880
}
881
882
if (Cmp != Comparison::NE) {
883
if (Start->isImm())
884
StartV -= (IVBump-1);
885
else if (End->isImm())
886
EndV += (IVBump-1);
887
else
888
AdjV += (IVBump-1);
889
}
890
891
Register R = 0;
892
unsigned SR = 0;
893
if (Start->isReg()) {
894
R = Start->getReg();
895
SR = Start->getSubReg();
896
} else {
897
R = End->getReg();
898
SR = End->getSubReg();
899
}
900
const TargetRegisterClass *RC = MRI->getRegClass(R);
901
// Hardware loops cannot handle 64-bit registers. If it's a double
902
// register, it has to have a subregister.
903
if (!SR && RC == &Hexagon::DoubleRegsRegClass)
904
return nullptr;
905
const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
906
907
// Compute DistR (register with the distance between Start and End).
908
Register DistR;
909
unsigned DistSR;
910
911
// Avoid special case, where the start value is an imm(0).
912
if (Start->isImm() && StartV == 0) {
913
DistR = End->getReg();
914
DistSR = End->getSubReg();
915
} else {
916
const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
917
(RegToImm ? TII->get(Hexagon::A2_subri) :
918
TII->get(Hexagon::A2_addi));
919
if (RegToReg || RegToImm) {
920
Register SubR = MRI->createVirtualRegister(IntRC);
921
MachineInstrBuilder SubIB =
922
BuildMI(*PH, InsertPos, DL, SubD, SubR);
923
924
if (RegToReg)
925
SubIB.addReg(End->getReg(), 0, End->getSubReg())
926
.addReg(Start->getReg(), 0, Start->getSubReg());
927
else
928
SubIB.addImm(EndV)
929
.addReg(Start->getReg(), 0, Start->getSubReg());
930
DistR = SubR;
931
} else {
932
// If the loop has been unrolled, we should use the original loop count
933
// instead of recalculating the value. This will avoid additional
934
// 'Add' instruction.
935
const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
936
if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
937
EndValInstr->getOperand(1).getSubReg() == 0 &&
938
EndValInstr->getOperand(2).getImm() == StartV) {
939
DistR = EndValInstr->getOperand(1).getReg();
940
} else {
941
Register SubR = MRI->createVirtualRegister(IntRC);
942
MachineInstrBuilder SubIB =
943
BuildMI(*PH, InsertPos, DL, SubD, SubR);
944
SubIB.addReg(End->getReg(), 0, End->getSubReg())
945
.addImm(-StartV);
946
DistR = SubR;
947
}
948
}
949
DistSR = 0;
950
}
951
952
// From DistR, compute AdjR (register with the adjusted distance).
953
Register AdjR;
954
unsigned AdjSR;
955
956
if (AdjV == 0) {
957
AdjR = DistR;
958
AdjSR = DistSR;
959
} else {
960
// Generate CountR = ADD DistR, AdjVal
961
Register AddR = MRI->createVirtualRegister(IntRC);
962
MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
963
BuildMI(*PH, InsertPos, DL, AddD, AddR)
964
.addReg(DistR, 0, DistSR)
965
.addImm(AdjV);
966
967
AdjR = AddR;
968
AdjSR = 0;
969
}
970
971
// From AdjR, compute CountR (register with the final count).
972
Register CountR;
973
unsigned CountSR;
974
975
if (IVBump == 1) {
976
CountR = AdjR;
977
CountSR = AdjSR;
978
} else {
979
// The IV bump is a power of two. Log_2(IV bump) is the shift amount.
980
unsigned Shift = Log2_32(IVBump);
981
982
// Generate NormR = LSR DistR, Shift.
983
Register LsrR = MRI->createVirtualRegister(IntRC);
984
const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
985
BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
986
.addReg(AdjR, 0, AdjSR)
987
.addImm(Shift);
988
989
CountR = LsrR;
990
CountSR = 0;
991
}
992
993
return new CountValue(CountValue::CV_Register, CountR, CountSR);
994
}
995
996
/// Return true if the operation is invalid within hardware loop.
997
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
998
bool IsInnerHWLoop) const {
999
// Call is not allowed because the callee may use a hardware loop except for
1000
// the case when the call never returns.
1001
if (MI->getDesc().isCall())
1002
return !TII->doesNotReturn(*MI);
1003
1004
// Check if the instruction defines a hardware loop register.
1005
using namespace Hexagon;
1006
1007
static const Register Regs01[] = { LC0, SA0, LC1, SA1 };
1008
static const Register Regs1[] = { LC1, SA1 };
1009
auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01) : ArrayRef(Regs1);
1010
for (Register R : CheckRegs)
1011
if (MI->modifiesRegister(R, TRI))
1012
return true;
1013
1014
return false;
1015
}
1016
1017
/// Return true if the loop contains an instruction that inhibits
1018
/// the use of the hardware loop instruction.
1019
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1020
bool IsInnerHWLoop) const {
1021
LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1022
<< printMBBReference(**L->block_begin()));
1023
for (MachineBasicBlock *MBB : L->getBlocks()) {
1024
for (const MachineInstr &MI : *MBB) {
1025
if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
1026
LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1027
MI.dump(););
1028
return true;
1029
}
1030
}
1031
}
1032
return false;
1033
}
1034
1035
/// Returns true if the instruction is dead. This was essentially
1036
/// copied from DeadMachineInstructionElim::isDead, but with special cases
1037
/// for inline asm, physical registers and instructions with side effects
1038
/// removed.
1039
bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1040
SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1041
// Examine each operand.
1042
for (const MachineOperand &MO : MI->operands()) {
1043
if (!MO.isReg() || !MO.isDef())
1044
continue;
1045
1046
Register Reg = MO.getReg();
1047
if (MRI->use_nodbg_empty(Reg))
1048
continue;
1049
1050
using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1051
1052
// This instruction has users, but if the only user is the phi node for the
1053
// parent block, and the only use of that phi node is this instruction, then
1054
// this instruction is dead: both it (and the phi node) can be removed.
1055
use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1056
use_nodbg_iterator End = MRI->use_nodbg_end();
1057
if (std::next(I) != End || !I->getParent()->isPHI())
1058
return false;
1059
1060
MachineInstr *OnePhi = I->getParent();
1061
for (const MachineOperand &OPO : OnePhi->operands()) {
1062
if (!OPO.isReg() || !OPO.isDef())
1063
continue;
1064
1065
Register OPReg = OPO.getReg();
1066
use_nodbg_iterator nextJ;
1067
for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1068
J != End; J = nextJ) {
1069
nextJ = std::next(J);
1070
MachineOperand &Use = *J;
1071
MachineInstr *UseMI = Use.getParent();
1072
1073
// If the phi node has a user that is not MI, bail.
1074
if (MI != UseMI)
1075
return false;
1076
}
1077
}
1078
DeadPhis.push_back(OnePhi);
1079
}
1080
1081
// If there are no defs with uses, the instruction is dead.
1082
return true;
1083
}
1084
1085
void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1086
// This procedure was essentially copied from DeadMachineInstructionElim.
1087
1088
SmallVector<MachineInstr*, 1> DeadPhis;
1089
if (isDead(MI, DeadPhis)) {
1090
LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1091
1092
// It is possible that some DBG_VALUE instructions refer to this
1093
// instruction. Examine each def operand for such references;
1094
// if found, mark the DBG_VALUE as undef (but don't delete it).
1095
for (const MachineOperand &MO : MI->operands()) {
1096
if (!MO.isReg() || !MO.isDef())
1097
continue;
1098
Register Reg = MO.getReg();
1099
// We use make_early_inc_range here because setReg below invalidates the
1100
// iterator.
1101
for (MachineOperand &MO :
1102
llvm::make_early_inc_range(MRI->use_operands(Reg))) {
1103
MachineInstr *UseMI = MO.getParent();
1104
if (UseMI == MI)
1105
continue;
1106
if (MO.isDebug())
1107
MO.setReg(0U);
1108
}
1109
}
1110
1111
MI->eraseFromParent();
1112
for (unsigned i = 0; i < DeadPhis.size(); ++i)
1113
DeadPhis[i]->eraseFromParent();
1114
}
1115
}
1116
1117
/// Check if the loop is a candidate for converting to a hardware
1118
/// loop. If so, then perform the transformation.
1119
///
1120
/// This function works on innermost loops first. A loop can be converted
1121
/// if it is a counting loop; either a register value or an immediate.
1122
///
1123
/// The code makes several assumptions about the representation of the loop
1124
/// in llvm.
1125
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1126
bool &RecL0used,
1127
bool &RecL1used) {
1128
// This is just to confirm basic correctness.
1129
assert(L->getHeader() && "Loop without a header?");
1130
1131
bool Changed = false;
1132
bool L0Used = false;
1133
bool L1Used = false;
1134
1135
// Process nested loops first.
1136
for (MachineLoop *I : *L) {
1137
Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);
1138
L0Used |= RecL0used;
1139
L1Used |= RecL1used;
1140
}
1141
1142
// If a nested loop has been converted, then we can't convert this loop.
1143
if (Changed && L0Used && L1Used)
1144
return Changed;
1145
1146
unsigned LOOP_i;
1147
unsigned LOOP_r;
1148
unsigned ENDLOOP;
1149
1150
// Flag used to track loopN instruction:
1151
// 1 - Hardware loop is being generated for the inner most loop.
1152
// 0 - Hardware loop is being generated for the outer loop.
1153
unsigned IsInnerHWLoop = 1;
1154
1155
if (L0Used) {
1156
LOOP_i = Hexagon::J2_loop1i;
1157
LOOP_r = Hexagon::J2_loop1r;
1158
ENDLOOP = Hexagon::ENDLOOP1;
1159
IsInnerHWLoop = 0;
1160
} else {
1161
LOOP_i = Hexagon::J2_loop0i;
1162
LOOP_r = Hexagon::J2_loop0r;
1163
ENDLOOP = Hexagon::ENDLOOP0;
1164
}
1165
1166
#ifndef NDEBUG
1167
// Stop trying after reaching the limit (if any).
1168
int Limit = HWLoopLimit;
1169
if (Limit >= 0) {
1170
if (Counter >= HWLoopLimit)
1171
return false;
1172
Counter++;
1173
}
1174
#endif
1175
1176
// Does the loop contain any invalid instructions?
1177
if (containsInvalidInstruction(L, IsInnerHWLoop))
1178
return false;
1179
1180
MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1181
// Don't generate hw loop if the loop has more than one exit.
1182
if (!LastMBB)
1183
return false;
1184
1185
MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1186
if (LastI == LastMBB->end())
1187
return false;
1188
1189
// Is the induction variable bump feeding the latch condition?
1190
if (!fixupInductionVariable(L))
1191
return false;
1192
1193
// Ensure the loop has a preheader: the loop instruction will be
1194
// placed there.
1195
MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1196
if (!Preheader) {
1197
Preheader = createPreheaderForLoop(L);
1198
if (!Preheader)
1199
return false;
1200
}
1201
1202
MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1203
1204
SmallVector<MachineInstr*, 2> OldInsts;
1205
// Are we able to determine the trip count for the loop?
1206
CountValue *TripCount = getLoopTripCount(L, OldInsts);
1207
if (!TripCount)
1208
return false;
1209
1210
// Is the trip count available in the preheader?
1211
if (TripCount->isReg()) {
1212
// There will be a use of the register inserted into the preheader,
1213
// so make sure that the register is actually defined at that point.
1214
MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1215
MachineBasicBlock *BBDef = TCDef->getParent();
1216
if (!MDT->dominates(BBDef, Preheader))
1217
return false;
1218
}
1219
1220
// Determine the loop start.
1221
MachineBasicBlock *TopBlock = L->getTopBlock();
1222
MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1223
MachineBasicBlock *LoopStart = nullptr;
1224
if (ExitingBlock != L->getLoopLatch()) {
1225
MachineBasicBlock *TB = nullptr, *FB = nullptr;
1226
SmallVector<MachineOperand, 2> Cond;
1227
1228
if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1229
return false;
1230
1231
if (L->contains(TB))
1232
LoopStart = TB;
1233
else if (L->contains(FB))
1234
LoopStart = FB;
1235
else
1236
return false;
1237
}
1238
else
1239
LoopStart = TopBlock;
1240
1241
// Convert the loop to a hardware loop.
1242
LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1243
DebugLoc DL;
1244
if (InsertPos != Preheader->end())
1245
DL = InsertPos->getDebugLoc();
1246
1247
if (TripCount->isReg()) {
1248
// Create a copy of the loop count register.
1249
Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1250
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1251
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1252
// Add the Loop instruction to the beginning of the loop.
1253
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1254
.addReg(CountReg);
1255
} else {
1256
assert(TripCount->isImm() && "Expecting immediate value for trip count");
1257
// Add the Loop immediate instruction to the beginning of the loop,
1258
// if the immediate fits in the instructions. Otherwise, we need to
1259
// create a new virtual register.
1260
int64_t CountImm = TripCount->getImm();
1261
if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1262
Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1263
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1264
.addImm(CountImm);
1265
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1266
.addMBB(LoopStart).addReg(CountReg);
1267
} else
1268
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1269
.addMBB(LoopStart).addImm(CountImm);
1270
}
1271
1272
// Make sure the loop start always has a reference in the CFG.
1273
LoopStart->setMachineBlockAddressTaken();
1274
1275
// Replace the loop branch with an endloop instruction.
1276
DebugLoc LastIDL = LastI->getDebugLoc();
1277
BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1278
1279
// The loop ends with either:
1280
// - a conditional branch followed by an unconditional branch, or
1281
// - a conditional branch to the loop start.
1282
if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1283
LastI->getOpcode() == Hexagon::J2_jumpf) {
1284
// Delete one and change/add an uncond. branch to out of the loop.
1285
MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1286
LastI = LastMBB->erase(LastI);
1287
if (!L->contains(BranchTarget)) {
1288
if (LastI != LastMBB->end())
1289
LastI = LastMBB->erase(LastI);
1290
SmallVector<MachineOperand, 0> Cond;
1291
TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1292
}
1293
} else {
1294
// Conditional branch to loop start; just delete it.
1295
LastMBB->erase(LastI);
1296
}
1297
delete TripCount;
1298
1299
// The induction operation and the comparison may now be
1300
// unneeded. If these are unneeded, then remove them.
1301
for (unsigned i = 0; i < OldInsts.size(); ++i)
1302
removeIfDead(OldInsts[i]);
1303
1304
++NumHWLoops;
1305
1306
// Set RecL1used and RecL0used only after hardware loop has been
1307
// successfully generated. Doing it earlier can cause wrong loop instruction
1308
// to be used.
1309
if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1310
RecL1used = true;
1311
else
1312
RecL0used = true;
1313
1314
return true;
1315
}
1316
1317
bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1318
MachineInstr *CmpI) {
1319
assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1320
1321
MachineBasicBlock *BB = BumpI->getParent();
1322
if (CmpI->getParent() != BB)
1323
return false;
1324
1325
using instr_iterator = MachineBasicBlock::instr_iterator;
1326
1327
// Check if things are in order to begin with.
1328
for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1329
if (&*I == CmpI)
1330
return true;
1331
1332
// Out of order.
1333
Register PredR = CmpI->getOperand(0).getReg();
1334
bool FoundBump = false;
1335
instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1336
for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1337
MachineInstr *In = &*I;
1338
for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1339
MachineOperand &MO = In->getOperand(i);
1340
if (MO.isReg() && MO.isUse()) {
1341
if (MO.getReg() == PredR) // Found an intervening use of PredR.
1342
return false;
1343
}
1344
}
1345
1346
if (In == BumpI) {
1347
BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1348
FoundBump = true;
1349
break;
1350
}
1351
}
1352
assert (FoundBump && "Cannot determine instruction order");
1353
return FoundBump;
1354
}
1355
1356
/// This function is required to break recursion. Visiting phis in a loop may
1357
/// result in recursion during compilation. We break the recursion by making
1358
/// sure that we visit a MachineOperand and its definition in a
1359
/// MachineInstruction only once. If we attempt to visit more than once, then
1360
/// there is recursion, and will return false.
1361
bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1362
MachineInstr *MI,
1363
const MachineOperand *MO,
1364
LoopFeederMap &LoopFeederPhi) const {
1365
if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1366
LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1367
<< printMBBReference(**L->block_begin()));
1368
// Ignore all BBs that form Loop.
1369
if (llvm::is_contained(L->getBlocks(), A))
1370
return false;
1371
MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1372
LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1373
return true;
1374
} else
1375
// Already visited node.
1376
return false;
1377
}
1378
1379
/// Return true if a Phi may generate a value that can underflow.
1380
/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1381
bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1382
MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1383
MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1384
assert(Phi->isPHI() && "Expecting a Phi.");
1385
// Walk through each Phi, and its used operands. Make sure that
1386
// if there is recursion in Phi, we won't generate hardware loops.
1387
for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1388
if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1389
if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1390
Phi->getParent(), L, LoopFeederPhi))
1391
return true;
1392
return false;
1393
}
1394
1395
/// Return true if the induction variable can underflow in the first iteration.
1396
/// An example, is an initial unsigned value that is 0 and is decrement in the
1397
/// first itertion of a do-while loop. In this case, we cannot generate a
1398
/// hardware loop because the endloop instruction does not decrement the loop
1399
/// counter if it is <= 1. We only need to perform this analysis if the
1400
/// initial value is a register.
1401
///
1402
/// This function assumes the initial value may underfow unless proven
1403
/// otherwise. If the type is signed, then we don't care because signed
1404
/// underflow is undefined. We attempt to prove the initial value is not
1405
/// zero by perfoming a crude analysis of the loop counter. This function
1406
/// checks if the initial value is used in any comparison prior to the loop
1407
/// and, if so, assumes the comparison is a range check. This is inexact,
1408
/// but will catch the simple cases.
1409
bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1410
const MachineOperand *InitVal, const MachineOperand *EndVal,
1411
MachineBasicBlock *MBB, MachineLoop *L,
1412
LoopFeederMap &LoopFeederPhi) const {
1413
// Only check register values since they are unknown.
1414
if (!InitVal->isReg())
1415
return false;
1416
1417
if (!EndVal->isImm())
1418
return false;
1419
1420
// A register value that is assigned an immediate is a known value, and it
1421
// won't underflow in the first iteration.
1422
int64_t Imm;
1423
if (checkForImmediate(*InitVal, Imm))
1424
return (EndVal->getImm() == Imm);
1425
1426
Register Reg = InitVal->getReg();
1427
1428
// We don't know the value of a physical register.
1429
if (!Reg.isVirtual())
1430
return true;
1431
1432
MachineInstr *Def = MRI->getVRegDef(Reg);
1433
if (!Def)
1434
return true;
1435
1436
// If the initial value is a Phi or copy and the operands may not underflow,
1437
// then the definition cannot be underflow either.
1438
if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1439
L, LoopFeederPhi))
1440
return false;
1441
if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1442
EndVal, Def->getParent(),
1443
L, LoopFeederPhi))
1444
return false;
1445
1446
// Iterate over the uses of the initial value. If the initial value is used
1447
// in a compare, then we assume this is a range check that ensures the loop
1448
// doesn't underflow. This is not an exact test and should be improved.
1449
for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1450
E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1451
MachineInstr *MI = &*I;
1452
Register CmpReg1, CmpReg2;
1453
int64_t CmpMask = 0, CmpValue = 0;
1454
1455
if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1456
continue;
1457
1458
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1459
SmallVector<MachineOperand, 2> Cond;
1460
if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1461
continue;
1462
1463
Comparison::Kind Cmp =
1464
getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1465
if (Cmp == 0)
1466
continue;
1467
if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1468
Cmp = Comparison::getNegatedComparison(Cmp);
1469
if (CmpReg2 != 0 && CmpReg2 == Reg)
1470
Cmp = Comparison::getSwappedComparison(Cmp);
1471
1472
// Signed underflow is undefined.
1473
if (Comparison::isSigned(Cmp))
1474
return false;
1475
1476
// Check if there is a comparison of the initial value. If the initial value
1477
// is greater than or not equal to another value, then assume this is a
1478
// range check.
1479
if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1480
return false;
1481
}
1482
1483
// OK - this is a hack that needs to be improved. We really need to analyze
1484
// the instructions performed on the initial value. This works on the simplest
1485
// cases only.
1486
if (!Def->isCopy() && !Def->isPHI())
1487
return false;
1488
1489
return true;
1490
}
1491
1492
bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1493
int64_t &Val) const {
1494
if (MO.isImm()) {
1495
Val = MO.getImm();
1496
return true;
1497
}
1498
if (!MO.isReg())
1499
return false;
1500
1501
// MO is a register. Check whether it is defined as an immediate value,
1502
// and if so, get the value of it in TV. That value will then need to be
1503
// processed to handle potential subregisters in MO.
1504
int64_t TV;
1505
1506
Register R = MO.getReg();
1507
if (!R.isVirtual())
1508
return false;
1509
MachineInstr *DI = MRI->getVRegDef(R);
1510
unsigned DOpc = DI->getOpcode();
1511
switch (DOpc) {
1512
case TargetOpcode::COPY:
1513
case Hexagon::A2_tfrsi:
1514
case Hexagon::A2_tfrpi:
1515
case Hexagon::CONST32:
1516
case Hexagon::CONST64:
1517
// Call recursively to avoid an extra check whether operand(1) is
1518
// indeed an immediate (it could be a global address, for example),
1519
// plus we can handle COPY at the same time.
1520
if (!checkForImmediate(DI->getOperand(1), TV))
1521
return false;
1522
break;
1523
case Hexagon::A2_combineii:
1524
case Hexagon::A4_combineir:
1525
case Hexagon::A4_combineii:
1526
case Hexagon::A4_combineri:
1527
case Hexagon::A2_combinew: {
1528
const MachineOperand &S1 = DI->getOperand(1);
1529
const MachineOperand &S2 = DI->getOperand(2);
1530
int64_t V1, V2;
1531
if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1532
return false;
1533
TV = V2 | (static_cast<uint64_t>(V1) << 32);
1534
break;
1535
}
1536
case TargetOpcode::REG_SEQUENCE: {
1537
const MachineOperand &S1 = DI->getOperand(1);
1538
const MachineOperand &S3 = DI->getOperand(3);
1539
int64_t V1, V3;
1540
if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1541
return false;
1542
unsigned Sub2 = DI->getOperand(2).getImm();
1543
unsigned Sub4 = DI->getOperand(4).getImm();
1544
if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1545
TV = V1 | (V3 << 32);
1546
else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1547
TV = V3 | (V1 << 32);
1548
else
1549
llvm_unreachable("Unexpected form of REG_SEQUENCE");
1550
break;
1551
}
1552
1553
default:
1554
return false;
1555
}
1556
1557
// By now, we should have successfully obtained the immediate value defining
1558
// the register referenced in MO. Handle a potential use of a subregister.
1559
switch (MO.getSubReg()) {
1560
case Hexagon::isub_lo:
1561
Val = TV & 0xFFFFFFFFULL;
1562
break;
1563
case Hexagon::isub_hi:
1564
Val = (TV >> 32) & 0xFFFFFFFFULL;
1565
break;
1566
default:
1567
Val = TV;
1568
break;
1569
}
1570
return true;
1571
}
1572
1573
void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1574
if (MO.isImm()) {
1575
MO.setImm(Val);
1576
return;
1577
}
1578
1579
assert(MO.isReg());
1580
Register R = MO.getReg();
1581
MachineInstr *DI = MRI->getVRegDef(R);
1582
1583
const TargetRegisterClass *RC = MRI->getRegClass(R);
1584
Register NewR = MRI->createVirtualRegister(RC);
1585
MachineBasicBlock &B = *DI->getParent();
1586
DebugLoc DL = DI->getDebugLoc();
1587
BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1588
MO.setReg(NewR);
1589
}
1590
1591
bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1592
MachineBasicBlock *Header = L->getHeader();
1593
MachineBasicBlock *Latch = L->getLoopLatch();
1594
MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1595
1596
if (!(Header && Latch && ExitingBlock))
1597
return false;
1598
1599
// These data structures follow the same concept as the corresponding
1600
// ones in findInductionRegister (where some comments are).
1601
using RegisterBump = std::pair<Register, int64_t>;
1602
using RegisterInduction = std::pair<Register, RegisterBump>;
1603
using RegisterInductionSet = std::set<RegisterInduction>;
1604
1605
// Register candidates for induction variables, with their associated bumps.
1606
RegisterInductionSet IndRegs;
1607
1608
// Look for induction patterns:
1609
// %1 = PHI ..., [ latch, %2 ]
1610
// %2 = ADD %1, imm
1611
using instr_iterator = MachineBasicBlock::instr_iterator;
1612
1613
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1614
I != E && I->isPHI(); ++I) {
1615
MachineInstr *Phi = &*I;
1616
1617
// Have a PHI instruction.
1618
for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1619
if (Phi->getOperand(i+1).getMBB() != Latch)
1620
continue;
1621
1622
Register PhiReg = Phi->getOperand(i).getReg();
1623
MachineInstr *DI = MRI->getVRegDef(PhiReg);
1624
1625
if (DI->getDesc().isAdd()) {
1626
// If the register operand to the add/sub is the PHI we are looking
1627
// at, this meets the induction pattern.
1628
Register IndReg = DI->getOperand(1).getReg();
1629
MachineOperand &Opnd2 = DI->getOperand(2);
1630
int64_t V;
1631
if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1632
Register UpdReg = DI->getOperand(0).getReg();
1633
IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1634
}
1635
}
1636
} // for (i)
1637
} // for (instr)
1638
1639
if (IndRegs.empty())
1640
return false;
1641
1642
MachineBasicBlock *TB = nullptr, *FB = nullptr;
1643
SmallVector<MachineOperand,2> Cond;
1644
// analyzeBranch returns true if it fails to analyze branch.
1645
bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1646
if (NotAnalyzed || Cond.empty())
1647
return false;
1648
1649
if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1650
MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1651
SmallVector<MachineOperand,2> LCond;
1652
bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1653
if (NotAnalyzed)
1654
return false;
1655
1656
// Since latch is not the exiting block, the latch branch should be an
1657
// unconditional branch to the loop header.
1658
if (TB == Latch)
1659
TB = (LTB == Header) ? LTB : LFB;
1660
else
1661
FB = (LTB == Header) ? LTB : LFB;
1662
}
1663
if (TB != Header) {
1664
if (FB != Header) {
1665
// The latch/exit block does not go back to the header.
1666
return false;
1667
}
1668
// FB is the header (i.e., uncond. jump to branch header)
1669
// In this case, the LoopBody -> TB should not be a back edge otherwise
1670
// it could result in an infinite loop after conversion to hw_loop.
1671
// This case can happen when the Latch has two jumps like this:
1672
// Jmp_c OuterLoopHeader <-- TB
1673
// Jmp InnerLoopHeader <-- FB
1674
if (MDT->dominates(TB, FB))
1675
return false;
1676
}
1677
1678
// Expecting a predicate register as a condition. It won't be a hardware
1679
// predicate register at this point yet, just a vreg.
1680
// HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
1681
// into Cond, followed by the predicate register. For non-negated branches
1682
// it's just the register.
1683
unsigned CSz = Cond.size();
1684
if (CSz != 1 && CSz != 2)
1685
return false;
1686
1687
if (!Cond[CSz-1].isReg())
1688
return false;
1689
1690
Register P = Cond[CSz - 1].getReg();
1691
MachineInstr *PredDef = MRI->getVRegDef(P);
1692
1693
if (!PredDef->isCompare())
1694
return false;
1695
1696
SmallSet<Register,2> CmpRegs;
1697
MachineOperand *CmpImmOp = nullptr;
1698
1699
// Go over all operands to the compare and look for immediate and register
1700
// operands. Assume that if the compare has a single register use and a
1701
// single immediate operand, then the register is being compared with the
1702
// immediate value.
1703
for (MachineOperand &MO : PredDef->operands()) {
1704
if (MO.isReg()) {
1705
// Skip all implicit references. In one case there was:
1706
// %140 = FCMPUGT32_rr %138, %139, implicit %usr
1707
if (MO.isImplicit())
1708
continue;
1709
if (MO.isUse()) {
1710
if (!isImmediate(MO)) {
1711
CmpRegs.insert(MO.getReg());
1712
continue;
1713
}
1714
// Consider the register to be the "immediate" operand.
1715
if (CmpImmOp)
1716
return false;
1717
CmpImmOp = &MO;
1718
}
1719
} else if (MO.isImm()) {
1720
if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1721
return false;
1722
CmpImmOp = &MO;
1723
}
1724
}
1725
1726
if (CmpRegs.empty())
1727
return false;
1728
1729
// Check if the compared register follows the order we want. Fix if needed.
1730
for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1731
I != E; ++I) {
1732
// This is a success. If the register used in the comparison is one that
1733
// we have identified as a bumped (updated) induction register, there is
1734
// nothing to do.
1735
if (CmpRegs.count(I->first))
1736
return true;
1737
1738
// Otherwise, if the register being compared comes out of a PHI node,
1739
// and has been recognized as following the induction pattern, and is
1740
// compared against an immediate, we can fix it.
1741
const RegisterBump &RB = I->second;
1742
if (CmpRegs.count(RB.first)) {
1743
if (!CmpImmOp) {
1744
// If both operands to the compare instruction are registers, see if
1745
// it can be changed to use induction register as one of the operands.
1746
MachineInstr *IndI = nullptr;
1747
MachineInstr *nonIndI = nullptr;
1748
MachineOperand *IndMO = nullptr;
1749
MachineOperand *nonIndMO = nullptr;
1750
1751
for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1752
MachineOperand &MO = PredDef->getOperand(i);
1753
if (MO.isReg() && MO.getReg() == RB.first) {
1754
LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1755
<< ") = " << *(MRI->getVRegDef(I->first)));
1756
if (IndI)
1757
return false;
1758
1759
IndI = MRI->getVRegDef(I->first);
1760
IndMO = &MO;
1761
} else if (MO.isReg()) {
1762
LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1763
<< ") = " << *(MRI->getVRegDef(MO.getReg())));
1764
if (nonIndI)
1765
return false;
1766
1767
nonIndI = MRI->getVRegDef(MO.getReg());
1768
nonIndMO = &MO;
1769
}
1770
}
1771
if (IndI && nonIndI &&
1772
nonIndI->getOpcode() == Hexagon::A2_addi &&
1773
nonIndI->getOperand(2).isImm() &&
1774
nonIndI->getOperand(2).getImm() == - RB.second) {
1775
bool Order = orderBumpCompare(IndI, PredDef);
1776
if (Order) {
1777
IndMO->setReg(I->first);
1778
nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1779
return true;
1780
}
1781
}
1782
return false;
1783
}
1784
1785
// It is not valid to do this transformation on an unsigned comparison
1786
// because it may underflow.
1787
Comparison::Kind Cmp =
1788
getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1789
if (!Cmp || Comparison::isUnsigned(Cmp))
1790
return false;
1791
1792
// If the register is being compared against an immediate, try changing
1793
// the compare instruction to use induction register and adjust the
1794
// immediate operand.
1795
int64_t CmpImm = getImmediate(*CmpImmOp);
1796
int64_t V = RB.second;
1797
// Handle Overflow (64-bit).
1798
if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1799
((V < 0) && (CmpImm < INT64_MIN - V)))
1800
return false;
1801
CmpImm += V;
1802
// Most comparisons of register against an immediate value allow
1803
// the immediate to be constant-extended. There are some exceptions
1804
// though. Make sure the new combination will work.
1805
if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&
1806
!TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))
1807
return false;
1808
1809
// Make sure that the compare happens after the bump. Otherwise,
1810
// after the fixup, the compare would use a yet-undefined register.
1811
MachineInstr *BumpI = MRI->getVRegDef(I->first);
1812
bool Order = orderBumpCompare(BumpI, PredDef);
1813
if (!Order)
1814
return false;
1815
1816
// Finally, fix the compare instruction.
1817
setImmediate(*CmpImmOp, CmpImm);
1818
for (MachineOperand &MO : PredDef->operands()) {
1819
if (MO.isReg() && MO.getReg() == RB.first) {
1820
MO.setReg(I->first);
1821
return true;
1822
}
1823
}
1824
}
1825
}
1826
1827
return false;
1828
}
1829
1830
/// createPreheaderForLoop - Create a preheader for a given loop.
1831
MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1832
MachineLoop *L) {
1833
if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1834
return TmpPH;
1835
if (!HWCreatePreheader)
1836
return nullptr;
1837
1838
MachineBasicBlock *Header = L->getHeader();
1839
MachineBasicBlock *Latch = L->getLoopLatch();
1840
MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1841
MachineFunction *MF = Header->getParent();
1842
DebugLoc DL;
1843
1844
#ifndef NDEBUG
1845
if ((!PHFn.empty()) && (PHFn != MF->getName()))
1846
return nullptr;
1847
#endif
1848
1849
if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1850
return nullptr;
1851
1852
using instr_iterator = MachineBasicBlock::instr_iterator;
1853
1854
// Verify that all existing predecessors have analyzable branches
1855
// (or no branches at all).
1856
using MBBVector = std::vector<MachineBasicBlock *>;
1857
1858
MBBVector Preds(Header->pred_begin(), Header->pred_end());
1859
SmallVector<MachineOperand,2> Tmp1;
1860
MachineBasicBlock *TB = nullptr, *FB = nullptr;
1861
1862
if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1863
return nullptr;
1864
1865
for (MachineBasicBlock *PB : Preds) {
1866
bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1867
if (NotAnalyzed)
1868
return nullptr;
1869
}
1870
1871
MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1872
MF->insert(Header->getIterator(), NewPH);
1873
1874
if (Header->pred_size() > 2) {
1875
// Ensure that the header has only two predecessors: the preheader and
1876
// the loop latch. Any additional predecessors of the header should
1877
// join at the newly created preheader. Inspect all PHI nodes from the
1878
// header and create appropriate corresponding PHI nodes in the preheader.
1879
1880
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1881
I != E && I->isPHI(); ++I) {
1882
MachineInstr *PN = &*I;
1883
1884
const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1885
MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1886
NewPH->insert(NewPH->end(), NewPN);
1887
1888
Register PR = PN->getOperand(0).getReg();
1889
const TargetRegisterClass *RC = MRI->getRegClass(PR);
1890
Register NewPR = MRI->createVirtualRegister(RC);
1891
NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1892
1893
// Copy all non-latch operands of a header's PHI node to the newly
1894
// created PHI node in the preheader.
1895
for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1896
Register PredR = PN->getOperand(i).getReg();
1897
unsigned PredRSub = PN->getOperand(i).getSubReg();
1898
MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1899
if (PredB == Latch)
1900
continue;
1901
1902
MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1903
MO.setSubReg(PredRSub);
1904
NewPN->addOperand(MO);
1905
NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1906
}
1907
1908
// Remove copied operands from the old PHI node and add the value
1909
// coming from the preheader's PHI.
1910
for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1911
MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1912
if (PredB != Latch) {
1913
PN->removeOperand(i+1);
1914
PN->removeOperand(i);
1915
}
1916
}
1917
PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1918
PN->addOperand(MachineOperand::CreateMBB(NewPH));
1919
}
1920
} else {
1921
assert(Header->pred_size() == 2);
1922
1923
// The header has only two predecessors, but the non-latch predecessor
1924
// is not a preheader (e.g. it has other successors, etc.)
1925
// In such a case we don't need any extra PHI nodes in the new preheader,
1926
// all we need is to adjust existing PHIs in the header to now refer to
1927
// the new preheader.
1928
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1929
I != E && I->isPHI(); ++I) {
1930
MachineInstr *PN = &*I;
1931
for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1932
MachineOperand &MO = PN->getOperand(i+1);
1933
if (MO.getMBB() != Latch)
1934
MO.setMBB(NewPH);
1935
}
1936
}
1937
}
1938
1939
// "Reroute" the CFG edges to link in the new preheader.
1940
// If any of the predecessors falls through to the header, insert a branch
1941
// to the new preheader in that place.
1942
SmallVector<MachineOperand,1> Tmp2;
1943
SmallVector<MachineOperand,1> EmptyCond;
1944
1945
TB = FB = nullptr;
1946
1947
for (MachineBasicBlock *PB : Preds) {
1948
if (PB != Latch) {
1949
Tmp2.clear();
1950
bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1951
(void)NotAnalyzed; // suppress compiler warning
1952
assert (!NotAnalyzed && "Should be analyzable!");
1953
if (TB != Header && (Tmp2.empty() || FB != Header))
1954
TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1955
PB->ReplaceUsesOfBlockWith(Header, NewPH);
1956
}
1957
}
1958
1959
// It can happen that the latch block will fall through into the header.
1960
// Insert an unconditional branch to the header.
1961
TB = FB = nullptr;
1962
bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1963
(void)LatchNotAnalyzed; // suppress compiler warning
1964
assert (!LatchNotAnalyzed && "Should be analyzable!");
1965
if (!TB && !FB)
1966
TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1967
1968
// Finally, the branch from the preheader to the header.
1969
TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1970
NewPH->addSuccessor(Header);
1971
1972
MachineLoop *ParentLoop = L->getParentLoop();
1973
if (ParentLoop)
1974
ParentLoop->addBasicBlockToLoop(NewPH, *MLI);
1975
1976
// Update the dominator information with the new preheader.
1977
if (MDT) {
1978
if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1979
if (MachineDomTreeNode *DHN = HN->getIDom()) {
1980
MDT->addNewBlock(NewPH, DHN->getBlock());
1981
MDT->changeImmediateDominator(Header, NewPH);
1982
}
1983
}
1984
}
1985
1986
return NewPH;
1987
}
1988
1989