Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
35294 views
1
//===- HexagonBitSimplify.cpp ---------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "BitTracker.h"
10
#include "HexagonBitTracker.h"
11
#include "HexagonInstrInfo.h"
12
#include "HexagonRegisterInfo.h"
13
#include "HexagonSubtarget.h"
14
#include "llvm/ADT/BitVector.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/GraphTraits.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/CodeGen/MachineBasicBlock.h"
21
#include "llvm/CodeGen/MachineDominators.h"
22
#include "llvm/CodeGen/MachineFunction.h"
23
#include "llvm/CodeGen/MachineFunctionPass.h"
24
#include "llvm/CodeGen/MachineInstr.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineOperand.h"
27
#include "llvm/CodeGen/MachineRegisterInfo.h"
28
#include "llvm/CodeGen/TargetRegisterInfo.h"
29
#include "llvm/IR/DebugLoc.h"
30
#include "llvm/InitializePasses.h"
31
#include "llvm/MC/MCInstrDesc.h"
32
#include "llvm/Pass.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Compiler.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/ErrorHandling.h"
37
#include "llvm/Support/MathExtras.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include <algorithm>
40
#include <cassert>
41
#include <cstdint>
42
#include <deque>
43
#include <iterator>
44
#include <limits>
45
#include <utility>
46
#include <vector>
47
48
#define DEBUG_TYPE "hexbit"
49
50
using namespace llvm;
51
52
static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
53
cl::init(true), cl::desc("Preserve subregisters in tied operands"));
54
static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
55
cl::init(true), cl::desc("Generate extract instructions"));
56
static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
57
cl::init(true), cl::desc("Generate bitsplit instructions"));
58
59
static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
60
cl::init(std::numeric_limits<unsigned>::max()));
61
static unsigned CountExtract = 0;
62
static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
63
cl::init(std::numeric_limits<unsigned>::max()));
64
static unsigned CountBitSplit = 0;
65
66
static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit",
67
cl::Hidden, cl::init(1000));
68
69
namespace llvm {
70
71
void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
72
FunctionPass *createHexagonBitSimplify();
73
74
} // end namespace llvm
75
76
namespace {
77
78
// Set of virtual registers, based on BitVector.
79
struct RegisterSet {
80
RegisterSet() = default;
81
explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {}
82
RegisterSet(const RegisterSet &RS) = default;
83
84
void clear() {
85
Bits.clear();
86
LRU.clear();
87
}
88
89
unsigned count() const {
90
return Bits.count();
91
}
92
93
unsigned find_first() const {
94
int First = Bits.find_first();
95
if (First < 0)
96
return 0;
97
return x2v(First);
98
}
99
100
unsigned find_next(unsigned Prev) const {
101
int Next = Bits.find_next(v2x(Prev));
102
if (Next < 0)
103
return 0;
104
return x2v(Next);
105
}
106
107
RegisterSet &insert(unsigned R) {
108
unsigned Idx = v2x(R);
109
ensure(Idx);
110
bool Exists = Bits.test(Idx);
111
Bits.set(Idx);
112
if (!Exists) {
113
LRU.push_back(Idx);
114
if (LRU.size() > RegisterSetLimit) {
115
unsigned T = LRU.front();
116
Bits.reset(T);
117
LRU.pop_front();
118
}
119
}
120
return *this;
121
}
122
RegisterSet &remove(unsigned R) {
123
unsigned Idx = v2x(R);
124
if (Idx < Bits.size()) {
125
bool Exists = Bits.test(Idx);
126
Bits.reset(Idx);
127
if (Exists) {
128
auto F = llvm::find(LRU, Idx);
129
assert(F != LRU.end());
130
LRU.erase(F);
131
}
132
}
133
return *this;
134
}
135
136
RegisterSet &insert(const RegisterSet &Rs) {
137
for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
138
insert(R);
139
return *this;
140
}
141
RegisterSet &remove(const RegisterSet &Rs) {
142
for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
143
remove(R);
144
return *this;
145
}
146
147
bool operator[](unsigned R) const {
148
unsigned Idx = v2x(R);
149
return Idx < Bits.size() ? Bits[Idx] : false;
150
}
151
bool has(unsigned R) const {
152
unsigned Idx = v2x(R);
153
if (Idx >= Bits.size())
154
return false;
155
return Bits.test(Idx);
156
}
157
158
bool empty() const {
159
return !Bits.any();
160
}
161
bool includes(const RegisterSet &Rs) const {
162
// A.test(B) <=> A-B != {}
163
return !Rs.Bits.test(Bits);
164
}
165
bool intersects(const RegisterSet &Rs) const {
166
return Bits.anyCommon(Rs.Bits);
167
}
168
169
private:
170
BitVector Bits;
171
std::deque<unsigned> LRU;
172
173
void ensure(unsigned Idx) {
174
if (Bits.size() <= Idx)
175
Bits.resize(std::max(Idx+1, 32U));
176
}
177
178
static inline unsigned v2x(unsigned v) {
179
return Register::virtReg2Index(v);
180
}
181
182
static inline unsigned x2v(unsigned x) {
183
return Register::index2VirtReg(x);
184
}
185
};
186
187
struct PrintRegSet {
188
PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
189
: RS(S), TRI(RI) {}
190
191
friend raw_ostream &operator<< (raw_ostream &OS,
192
const PrintRegSet &P);
193
194
private:
195
const RegisterSet &RS;
196
const TargetRegisterInfo *TRI;
197
};
198
199
raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
200
LLVM_ATTRIBUTE_UNUSED;
201
raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
202
OS << '{';
203
for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
204
OS << ' ' << printReg(R, P.TRI);
205
OS << " }";
206
return OS;
207
}
208
209
class Transformation;
210
211
class HexagonBitSimplify : public MachineFunctionPass {
212
public:
213
static char ID;
214
215
HexagonBitSimplify() : MachineFunctionPass(ID) {}
216
217
StringRef getPassName() const override {
218
return "Hexagon bit simplification";
219
}
220
221
void getAnalysisUsage(AnalysisUsage &AU) const override {
222
AU.addRequired<MachineDominatorTreeWrapperPass>();
223
AU.addPreserved<MachineDominatorTreeWrapperPass>();
224
MachineFunctionPass::getAnalysisUsage(AU);
225
}
226
227
bool runOnMachineFunction(MachineFunction &MF) override;
228
229
static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
230
static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
231
static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
232
const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
233
static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
234
uint16_t W);
235
static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
236
uint16_t W, uint64_t &U);
237
static bool replaceReg(Register OldR, Register NewR,
238
MachineRegisterInfo &MRI);
239
static bool getSubregMask(const BitTracker::RegisterRef &RR,
240
unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
241
static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR,
242
MachineRegisterInfo &MRI);
243
static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR,
244
unsigned NewSR, MachineRegisterInfo &MRI);
245
static bool parseRegSequence(const MachineInstr &I,
246
BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
247
const MachineRegisterInfo &MRI);
248
249
static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
250
uint16_t Begin);
251
static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
252
uint16_t Begin, const HexagonInstrInfo &HII);
253
254
static const TargetRegisterClass *getFinalVRegClass(
255
const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
256
static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
257
const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
258
259
private:
260
MachineDominatorTree *MDT = nullptr;
261
262
bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
263
static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
264
unsigned NewSub = Hexagon::NoSubRegister);
265
};
266
267
using HBS = HexagonBitSimplify;
268
269
// The purpose of this class is to provide a common facility to traverse
270
// the function top-down or bottom-up via the dominator tree, and keep
271
// track of the available registers.
272
class Transformation {
273
public:
274
bool TopDown;
275
276
Transformation(bool TD) : TopDown(TD) {}
277
virtual ~Transformation() = default;
278
279
virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
280
};
281
282
} // end anonymous namespace
283
284
char HexagonBitSimplify::ID = 0;
285
286
INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify",
287
"Hexagon bit simplification", false, false)
288
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
289
INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify",
290
"Hexagon bit simplification", false, false)
291
292
bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
293
RegisterSet &AVs) {
294
bool Changed = false;
295
296
if (T.TopDown)
297
Changed = T.processBlock(B, AVs);
298
299
RegisterSet Defs;
300
for (auto &I : B)
301
getInstrDefs(I, Defs);
302
RegisterSet NewAVs = AVs;
303
NewAVs.insert(Defs);
304
305
for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
306
Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
307
308
if (!T.TopDown)
309
Changed |= T.processBlock(B, AVs);
310
311
return Changed;
312
}
313
314
//
315
// Utility functions:
316
//
317
void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
318
RegisterSet &Defs) {
319
for (auto &Op : MI.operands()) {
320
if (!Op.isReg() || !Op.isDef())
321
continue;
322
Register R = Op.getReg();
323
if (!R.isVirtual())
324
continue;
325
Defs.insert(R);
326
}
327
}
328
329
void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
330
RegisterSet &Uses) {
331
for (auto &Op : MI.operands()) {
332
if (!Op.isReg() || !Op.isUse())
333
continue;
334
Register R = Op.getReg();
335
if (!R.isVirtual())
336
continue;
337
Uses.insert(R);
338
}
339
}
340
341
// Check if all the bits in range [B, E) in both cells are equal.
342
bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
343
uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
344
uint16_t W) {
345
for (uint16_t i = 0; i < W; ++i) {
346
// If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
347
if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
348
return false;
349
// Same for RC2[i].
350
if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
351
return false;
352
if (RC1[B1+i] != RC2[B2+i])
353
return false;
354
}
355
return true;
356
}
357
358
bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
359
uint16_t B, uint16_t W) {
360
assert(B < RC.width() && B+W <= RC.width());
361
for (uint16_t i = B; i < B+W; ++i)
362
if (!RC[i].is(0))
363
return false;
364
return true;
365
}
366
367
bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
368
uint16_t B, uint16_t W, uint64_t &U) {
369
assert(B < RC.width() && B+W <= RC.width());
370
int64_t T = 0;
371
for (uint16_t i = B+W; i > B; --i) {
372
const BitTracker::BitValue &BV = RC[i-1];
373
T <<= 1;
374
if (BV.is(1))
375
T |= 1;
376
else if (!BV.is(0))
377
return false;
378
}
379
U = T;
380
return true;
381
}
382
383
bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR,
384
MachineRegisterInfo &MRI) {
385
if (!OldR.isVirtual() || !NewR.isVirtual())
386
return false;
387
auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
388
decltype(End) NextI;
389
for (auto I = Begin; I != End; I = NextI) {
390
NextI = std::next(I);
391
I->setReg(NewR);
392
}
393
return Begin != End;
394
}
395
396
bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR,
397
unsigned NewSR,
398
MachineRegisterInfo &MRI) {
399
if (!OldR.isVirtual() || !NewR.isVirtual())
400
return false;
401
if (hasTiedUse(OldR, MRI, NewSR))
402
return false;
403
auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
404
decltype(End) NextI;
405
for (auto I = Begin; I != End; I = NextI) {
406
NextI = std::next(I);
407
I->setReg(NewR);
408
I->setSubReg(NewSR);
409
}
410
return Begin != End;
411
}
412
413
bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR,
414
Register NewR, unsigned NewSR,
415
MachineRegisterInfo &MRI) {
416
if (!OldR.isVirtual() || !NewR.isVirtual())
417
return false;
418
if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
419
return false;
420
auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
421
decltype(End) NextI;
422
for (auto I = Begin; I != End; I = NextI) {
423
NextI = std::next(I);
424
if (I->getSubReg() != OldSR)
425
continue;
426
I->setReg(NewR);
427
I->setSubReg(NewSR);
428
}
429
return Begin != End;
430
}
431
432
// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
433
// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
434
// if this succeeded, otherwise return false.
435
bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
436
unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
437
const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
438
if (RR.Sub == 0) {
439
Begin = 0;
440
Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC);
441
return true;
442
}
443
444
Begin = 0;
445
446
switch (RC->getID()) {
447
case Hexagon::DoubleRegsRegClassID:
448
case Hexagon::HvxWRRegClassID:
449
Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2;
450
if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)
451
Begin = Width;
452
break;
453
default:
454
return false;
455
}
456
return true;
457
}
458
459
460
// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
461
// subregister.
462
bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
463
BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
464
const MachineRegisterInfo &MRI) {
465
assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
466
unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
467
auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg());
468
auto &HRI = static_cast<const HexagonRegisterInfo&>(
469
*MRI.getTargetRegisterInfo());
470
unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo);
471
unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi);
472
assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
473
if (Sub1 == SubLo && Sub2 == SubHi) {
474
SL = I.getOperand(1);
475
SH = I.getOperand(3);
476
return true;
477
}
478
if (Sub1 == SubHi && Sub2 == SubLo) {
479
SH = I.getOperand(1);
480
SL = I.getOperand(3);
481
return true;
482
}
483
return false;
484
}
485
486
// All stores (except 64-bit stores) take a 32-bit register as the source
487
// of the value to be stored. If the instruction stores into a location
488
// that is shorter than 32 bits, some bits of the source register are not
489
// used. For each store instruction, calculate the set of used bits in
490
// the source register, and set appropriate bits in Bits. Return true if
491
// the bits are calculated, false otherwise.
492
bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
493
uint16_t Begin) {
494
using namespace Hexagon;
495
496
switch (Opc) {
497
// Store byte
498
case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
499
case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
500
case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
501
case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
502
case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
503
case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
504
case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
505
case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
506
case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
507
case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
508
case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
509
case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
510
case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
511
case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
512
case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
513
case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
514
case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
515
case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
516
case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
517
case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
518
case S4_storerb_ap: // memb(Re32=#U6)=Rt32
519
case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
520
case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
521
case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
522
case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
523
case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
524
case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
525
case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
526
case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
527
case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
528
case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
529
case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
530
case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
531
case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
532
case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
533
case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
534
case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
535
case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
536
case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
537
case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
538
case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
539
case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
540
case S2_storerbgp: // memb(gp+#u16:0)=Rt32
541
case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
542
case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
543
case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
544
case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
545
case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
546
case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
547
case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
548
case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
549
case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
550
Bits.set(Begin, Begin+8);
551
return true;
552
553
// Store low half
554
case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
555
case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
556
case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
557
case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
558
case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
559
case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
560
case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
561
case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
562
case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
563
case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
564
case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
565
case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
566
case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
567
case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
568
case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
569
case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
570
case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
571
case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
572
case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
573
case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
574
case S4_storerh_ap: // memh(Re32=#U6)=Rt32
575
case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
576
case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
577
case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
578
case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
579
case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
580
case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
581
case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
582
case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
583
case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
584
case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
585
case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
586
case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
587
case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
588
case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
589
case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
590
case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
591
case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
592
case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
593
case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
594
case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
595
case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
596
case S2_storerhgp: // memh(gp+#u16:1)=Rt32
597
case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
598
case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
599
case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
600
case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
601
case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
602
case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
603
case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
604
case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
605
case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
606
Bits.set(Begin, Begin+16);
607
return true;
608
609
// Store high half
610
case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
611
case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
612
case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
613
case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
614
case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
615
case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
616
case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
617
case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
618
case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
619
case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
620
case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
621
case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
622
case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
623
case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
624
case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
625
case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
626
case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
627
case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
628
case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
629
case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
630
case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
631
case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
632
case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
633
case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
634
case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
635
case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
636
Bits.set(Begin+16, Begin+32);
637
return true;
638
}
639
640
return false;
641
}
642
643
// For an instruction with opcode Opc, calculate the set of bits that it
644
// uses in a register in operand OpN. This only calculates the set of used
645
// bits for cases where it does not depend on any operands (as is the case
646
// in shifts, for example). For concrete instructions from a program, the
647
// operand may be a subregister of a larger register, while Bits would
648
// correspond to the larger register in its entirety. Because of that,
649
// the parameter Begin can be used to indicate which bit of Bits should be
650
// considered the LSB of the operand.
651
bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
652
BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
653
using namespace Hexagon;
654
655
const MCInstrDesc &D = HII.get(Opc);
656
if (D.mayStore()) {
657
if (OpN == D.getNumOperands()-1)
658
return getUsedBitsInStore(Opc, Bits, Begin);
659
return false;
660
}
661
662
switch (Opc) {
663
// One register source. Used bits: R1[0-7].
664
case A2_sxtb:
665
case A2_zxtb:
666
case A4_cmpbeqi:
667
case A4_cmpbgti:
668
case A4_cmpbgtui:
669
if (OpN == 1) {
670
Bits.set(Begin, Begin+8);
671
return true;
672
}
673
break;
674
675
// One register source. Used bits: R1[0-15].
676
case A2_aslh:
677
case A2_sxth:
678
case A2_zxth:
679
case A4_cmpheqi:
680
case A4_cmphgti:
681
case A4_cmphgtui:
682
if (OpN == 1) {
683
Bits.set(Begin, Begin+16);
684
return true;
685
}
686
break;
687
688
// One register source. Used bits: R1[16-31].
689
case A2_asrh:
690
if (OpN == 1) {
691
Bits.set(Begin+16, Begin+32);
692
return true;
693
}
694
break;
695
696
// Two register sources. Used bits: R1[0-7], R2[0-7].
697
case A4_cmpbeq:
698
case A4_cmpbgt:
699
case A4_cmpbgtu:
700
if (OpN == 1) {
701
Bits.set(Begin, Begin+8);
702
return true;
703
}
704
break;
705
706
// Two register sources. Used bits: R1[0-15], R2[0-15].
707
case A4_cmpheq:
708
case A4_cmphgt:
709
case A4_cmphgtu:
710
case A2_addh_h16_ll:
711
case A2_addh_h16_sat_ll:
712
case A2_addh_l16_ll:
713
case A2_addh_l16_sat_ll:
714
case A2_combine_ll:
715
case A2_subh_h16_ll:
716
case A2_subh_h16_sat_ll:
717
case A2_subh_l16_ll:
718
case A2_subh_l16_sat_ll:
719
case M2_mpy_acc_ll_s0:
720
case M2_mpy_acc_ll_s1:
721
case M2_mpy_acc_sat_ll_s0:
722
case M2_mpy_acc_sat_ll_s1:
723
case M2_mpy_ll_s0:
724
case M2_mpy_ll_s1:
725
case M2_mpy_nac_ll_s0:
726
case M2_mpy_nac_ll_s1:
727
case M2_mpy_nac_sat_ll_s0:
728
case M2_mpy_nac_sat_ll_s1:
729
case M2_mpy_rnd_ll_s0:
730
case M2_mpy_rnd_ll_s1:
731
case M2_mpy_sat_ll_s0:
732
case M2_mpy_sat_ll_s1:
733
case M2_mpy_sat_rnd_ll_s0:
734
case M2_mpy_sat_rnd_ll_s1:
735
case M2_mpyd_acc_ll_s0:
736
case M2_mpyd_acc_ll_s1:
737
case M2_mpyd_ll_s0:
738
case M2_mpyd_ll_s1:
739
case M2_mpyd_nac_ll_s0:
740
case M2_mpyd_nac_ll_s1:
741
case M2_mpyd_rnd_ll_s0:
742
case M2_mpyd_rnd_ll_s1:
743
case M2_mpyu_acc_ll_s0:
744
case M2_mpyu_acc_ll_s1:
745
case M2_mpyu_ll_s0:
746
case M2_mpyu_ll_s1:
747
case M2_mpyu_nac_ll_s0:
748
case M2_mpyu_nac_ll_s1:
749
case M2_mpyud_acc_ll_s0:
750
case M2_mpyud_acc_ll_s1:
751
case M2_mpyud_ll_s0:
752
case M2_mpyud_ll_s1:
753
case M2_mpyud_nac_ll_s0:
754
case M2_mpyud_nac_ll_s1:
755
if (OpN == 1 || OpN == 2) {
756
Bits.set(Begin, Begin+16);
757
return true;
758
}
759
break;
760
761
// Two register sources. Used bits: R1[0-15], R2[16-31].
762
case A2_addh_h16_lh:
763
case A2_addh_h16_sat_lh:
764
case A2_combine_lh:
765
case A2_subh_h16_lh:
766
case A2_subh_h16_sat_lh:
767
case M2_mpy_acc_lh_s0:
768
case M2_mpy_acc_lh_s1:
769
case M2_mpy_acc_sat_lh_s0:
770
case M2_mpy_acc_sat_lh_s1:
771
case M2_mpy_lh_s0:
772
case M2_mpy_lh_s1:
773
case M2_mpy_nac_lh_s0:
774
case M2_mpy_nac_lh_s1:
775
case M2_mpy_nac_sat_lh_s0:
776
case M2_mpy_nac_sat_lh_s1:
777
case M2_mpy_rnd_lh_s0:
778
case M2_mpy_rnd_lh_s1:
779
case M2_mpy_sat_lh_s0:
780
case M2_mpy_sat_lh_s1:
781
case M2_mpy_sat_rnd_lh_s0:
782
case M2_mpy_sat_rnd_lh_s1:
783
case M2_mpyd_acc_lh_s0:
784
case M2_mpyd_acc_lh_s1:
785
case M2_mpyd_lh_s0:
786
case M2_mpyd_lh_s1:
787
case M2_mpyd_nac_lh_s0:
788
case M2_mpyd_nac_lh_s1:
789
case M2_mpyd_rnd_lh_s0:
790
case M2_mpyd_rnd_lh_s1:
791
case M2_mpyu_acc_lh_s0:
792
case M2_mpyu_acc_lh_s1:
793
case M2_mpyu_lh_s0:
794
case M2_mpyu_lh_s1:
795
case M2_mpyu_nac_lh_s0:
796
case M2_mpyu_nac_lh_s1:
797
case M2_mpyud_acc_lh_s0:
798
case M2_mpyud_acc_lh_s1:
799
case M2_mpyud_lh_s0:
800
case M2_mpyud_lh_s1:
801
case M2_mpyud_nac_lh_s0:
802
case M2_mpyud_nac_lh_s1:
803
// These four are actually LH.
804
case A2_addh_l16_hl:
805
case A2_addh_l16_sat_hl:
806
case A2_subh_l16_hl:
807
case A2_subh_l16_sat_hl:
808
if (OpN == 1) {
809
Bits.set(Begin, Begin+16);
810
return true;
811
}
812
if (OpN == 2) {
813
Bits.set(Begin+16, Begin+32);
814
return true;
815
}
816
break;
817
818
// Two register sources, used bits: R1[16-31], R2[0-15].
819
case A2_addh_h16_hl:
820
case A2_addh_h16_sat_hl:
821
case A2_combine_hl:
822
case A2_subh_h16_hl:
823
case A2_subh_h16_sat_hl:
824
case M2_mpy_acc_hl_s0:
825
case M2_mpy_acc_hl_s1:
826
case M2_mpy_acc_sat_hl_s0:
827
case M2_mpy_acc_sat_hl_s1:
828
case M2_mpy_hl_s0:
829
case M2_mpy_hl_s1:
830
case M2_mpy_nac_hl_s0:
831
case M2_mpy_nac_hl_s1:
832
case M2_mpy_nac_sat_hl_s0:
833
case M2_mpy_nac_sat_hl_s1:
834
case M2_mpy_rnd_hl_s0:
835
case M2_mpy_rnd_hl_s1:
836
case M2_mpy_sat_hl_s0:
837
case M2_mpy_sat_hl_s1:
838
case M2_mpy_sat_rnd_hl_s0:
839
case M2_mpy_sat_rnd_hl_s1:
840
case M2_mpyd_acc_hl_s0:
841
case M2_mpyd_acc_hl_s1:
842
case M2_mpyd_hl_s0:
843
case M2_mpyd_hl_s1:
844
case M2_mpyd_nac_hl_s0:
845
case M2_mpyd_nac_hl_s1:
846
case M2_mpyd_rnd_hl_s0:
847
case M2_mpyd_rnd_hl_s1:
848
case M2_mpyu_acc_hl_s0:
849
case M2_mpyu_acc_hl_s1:
850
case M2_mpyu_hl_s0:
851
case M2_mpyu_hl_s1:
852
case M2_mpyu_nac_hl_s0:
853
case M2_mpyu_nac_hl_s1:
854
case M2_mpyud_acc_hl_s0:
855
case M2_mpyud_acc_hl_s1:
856
case M2_mpyud_hl_s0:
857
case M2_mpyud_hl_s1:
858
case M2_mpyud_nac_hl_s0:
859
case M2_mpyud_nac_hl_s1:
860
if (OpN == 1) {
861
Bits.set(Begin+16, Begin+32);
862
return true;
863
}
864
if (OpN == 2) {
865
Bits.set(Begin, Begin+16);
866
return true;
867
}
868
break;
869
870
// Two register sources, used bits: R1[16-31], R2[16-31].
871
case A2_addh_h16_hh:
872
case A2_addh_h16_sat_hh:
873
case A2_combine_hh:
874
case A2_subh_h16_hh:
875
case A2_subh_h16_sat_hh:
876
case M2_mpy_acc_hh_s0:
877
case M2_mpy_acc_hh_s1:
878
case M2_mpy_acc_sat_hh_s0:
879
case M2_mpy_acc_sat_hh_s1:
880
case M2_mpy_hh_s0:
881
case M2_mpy_hh_s1:
882
case M2_mpy_nac_hh_s0:
883
case M2_mpy_nac_hh_s1:
884
case M2_mpy_nac_sat_hh_s0:
885
case M2_mpy_nac_sat_hh_s1:
886
case M2_mpy_rnd_hh_s0:
887
case M2_mpy_rnd_hh_s1:
888
case M2_mpy_sat_hh_s0:
889
case M2_mpy_sat_hh_s1:
890
case M2_mpy_sat_rnd_hh_s0:
891
case M2_mpy_sat_rnd_hh_s1:
892
case M2_mpyd_acc_hh_s0:
893
case M2_mpyd_acc_hh_s1:
894
case M2_mpyd_hh_s0:
895
case M2_mpyd_hh_s1:
896
case M2_mpyd_nac_hh_s0:
897
case M2_mpyd_nac_hh_s1:
898
case M2_mpyd_rnd_hh_s0:
899
case M2_mpyd_rnd_hh_s1:
900
case M2_mpyu_acc_hh_s0:
901
case M2_mpyu_acc_hh_s1:
902
case M2_mpyu_hh_s0:
903
case M2_mpyu_hh_s1:
904
case M2_mpyu_nac_hh_s0:
905
case M2_mpyu_nac_hh_s1:
906
case M2_mpyud_acc_hh_s0:
907
case M2_mpyud_acc_hh_s1:
908
case M2_mpyud_hh_s0:
909
case M2_mpyud_hh_s1:
910
case M2_mpyud_nac_hh_s0:
911
case M2_mpyud_nac_hh_s1:
912
if (OpN == 1 || OpN == 2) {
913
Bits.set(Begin+16, Begin+32);
914
return true;
915
}
916
break;
917
}
918
919
return false;
920
}
921
922
// Calculate the register class that matches Reg:Sub. For example, if
923
// %1 is a double register, then %1:isub_hi would match the "int"
924
// register class.
925
const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
926
const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
927
if (!RR.Reg.isVirtual())
928
return nullptr;
929
auto *RC = MRI.getRegClass(RR.Reg);
930
if (RR.Sub == 0)
931
return RC;
932
auto &HRI = static_cast<const HexagonRegisterInfo&>(
933
*MRI.getTargetRegisterInfo());
934
935
auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
936
(void)HRI;
937
assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) ||
938
Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi));
939
};
940
941
switch (RC->getID()) {
942
case Hexagon::DoubleRegsRegClassID:
943
VerifySR(RC, RR.Sub);
944
return &Hexagon::IntRegsRegClass;
945
case Hexagon::HvxWRRegClassID:
946
VerifySR(RC, RR.Sub);
947
return &Hexagon::HvxVRRegClass;
948
}
949
return nullptr;
950
}
951
952
// Check if RD could be replaced with RS at any possible use of RD.
953
// For example a predicate register cannot be replaced with a integer
954
// register, but a 64-bit register with a subregister can be replaced
955
// with a 32-bit register.
956
bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
957
const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
958
if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual())
959
return false;
960
// Return false if one (or both) classes are nullptr.
961
auto *DRC = getFinalVRegClass(RD, MRI);
962
if (!DRC)
963
return false;
964
965
return DRC == getFinalVRegClass(RS, MRI);
966
}
967
968
bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
969
unsigned NewSub) {
970
if (!PreserveTiedOps)
971
return false;
972
return llvm::any_of(MRI.use_operands(Reg),
973
[NewSub] (const MachineOperand &Op) -> bool {
974
return Op.getSubReg() != NewSub && Op.isTied();
975
});
976
}
977
978
namespace {
979
980
class DeadCodeElimination {
981
public:
982
DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
983
: MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
984
MDT(mdt), MRI(mf.getRegInfo()) {}
985
986
bool run() {
987
return runOnNode(MDT.getRootNode());
988
}
989
990
private:
991
bool isDead(unsigned R) const;
992
bool runOnNode(MachineDomTreeNode *N);
993
994
MachineFunction &MF;
995
const HexagonInstrInfo &HII;
996
MachineDominatorTree &MDT;
997
MachineRegisterInfo &MRI;
998
};
999
1000
} // end anonymous namespace
1001
1002
bool DeadCodeElimination::isDead(unsigned R) const {
1003
for (const MachineOperand &MO : MRI.use_operands(R)) {
1004
const MachineInstr *UseI = MO.getParent();
1005
if (UseI->isDebugInstr())
1006
continue;
1007
if (UseI->isPHI()) {
1008
assert(!UseI->getOperand(0).getSubReg());
1009
Register DR = UseI->getOperand(0).getReg();
1010
if (DR == R)
1011
continue;
1012
}
1013
return false;
1014
}
1015
return true;
1016
}
1017
1018
bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
1019
bool Changed = false;
1020
1021
for (auto *DTN : children<MachineDomTreeNode*>(N))
1022
Changed |= runOnNode(DTN);
1023
1024
MachineBasicBlock *B = N->getBlock();
1025
std::vector<MachineInstr*> Instrs;
1026
for (MachineInstr &MI : llvm::reverse(*B))
1027
Instrs.push_back(&MI);
1028
1029
for (auto *MI : Instrs) {
1030
unsigned Opc = MI->getOpcode();
1031
// Do not touch lifetime markers. This is why the target-independent DCE
1032
// cannot be used.
1033
if (Opc == TargetOpcode::LIFETIME_START ||
1034
Opc == TargetOpcode::LIFETIME_END)
1035
continue;
1036
bool Store = false;
1037
if (MI->isInlineAsm())
1038
continue;
1039
// Delete PHIs if possible.
1040
if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
1041
continue;
1042
1043
bool AllDead = true;
1044
SmallVector<unsigned,2> Regs;
1045
for (auto &Op : MI->operands()) {
1046
if (!Op.isReg() || !Op.isDef())
1047
continue;
1048
Register R = Op.getReg();
1049
if (!R.isVirtual() || !isDead(R)) {
1050
AllDead = false;
1051
break;
1052
}
1053
Regs.push_back(R);
1054
}
1055
if (!AllDead)
1056
continue;
1057
1058
B->erase(MI);
1059
for (unsigned Reg : Regs)
1060
MRI.markUsesInDebugValueAsUndef(Reg);
1061
Changed = true;
1062
}
1063
1064
return Changed;
1065
}
1066
1067
namespace {
1068
1069
// Eliminate redundant instructions
1070
//
1071
// This transformation will identify instructions where the output register
1072
// is the same as one of its input registers. This only works on instructions
1073
// that define a single register (unlike post-increment loads, for example).
1074
// The equality check is actually more detailed: the code calculates which
1075
// bits of the output are used, and only compares these bits with the input
1076
// registers.
1077
// If the output matches an input, the instruction is replaced with COPY.
1078
// The copies will be removed by another transformation.
1079
class RedundantInstrElimination : public Transformation {
1080
public:
1081
RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
1082
const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1083
: Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1084
1085
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1086
1087
private:
1088
bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
1089
unsigned &LostB, unsigned &LostE);
1090
bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
1091
unsigned &LostB, unsigned &LostE);
1092
bool computeUsedBits(unsigned Reg, BitVector &Bits);
1093
bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
1094
uint16_t Begin);
1095
bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
1096
1097
const HexagonInstrInfo &HII;
1098
const HexagonRegisterInfo &HRI;
1099
MachineRegisterInfo &MRI;
1100
BitTracker &BT;
1101
};
1102
1103
} // end anonymous namespace
1104
1105
// Check if the instruction is a lossy shift left, where the input being
1106
// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1107
// of bit indices that are lost.
1108
bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
1109
unsigned OpN, unsigned &LostB, unsigned &LostE) {
1110
using namespace Hexagon;
1111
1112
unsigned Opc = MI.getOpcode();
1113
unsigned ImN, RegN, Width;
1114
switch (Opc) {
1115
case S2_asl_i_p:
1116
ImN = 2;
1117
RegN = 1;
1118
Width = 64;
1119
break;
1120
case S2_asl_i_p_acc:
1121
case S2_asl_i_p_and:
1122
case S2_asl_i_p_nac:
1123
case S2_asl_i_p_or:
1124
case S2_asl_i_p_xacc:
1125
ImN = 3;
1126
RegN = 2;
1127
Width = 64;
1128
break;
1129
case S2_asl_i_r:
1130
ImN = 2;
1131
RegN = 1;
1132
Width = 32;
1133
break;
1134
case S2_addasl_rrri:
1135
case S4_andi_asl_ri:
1136
case S4_ori_asl_ri:
1137
case S4_addi_asl_ri:
1138
case S4_subi_asl_ri:
1139
case S2_asl_i_r_acc:
1140
case S2_asl_i_r_and:
1141
case S2_asl_i_r_nac:
1142
case S2_asl_i_r_or:
1143
case S2_asl_i_r_sat:
1144
case S2_asl_i_r_xacc:
1145
ImN = 3;
1146
RegN = 2;
1147
Width = 32;
1148
break;
1149
default:
1150
return false;
1151
}
1152
1153
if (RegN != OpN)
1154
return false;
1155
1156
assert(MI.getOperand(ImN).isImm());
1157
unsigned S = MI.getOperand(ImN).getImm();
1158
if (S == 0)
1159
return false;
1160
LostB = Width-S;
1161
LostE = Width;
1162
return true;
1163
}
1164
1165
// Check if the instruction is a lossy shift right, where the input being
1166
// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1167
// of bit indices that are lost.
1168
bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
1169
unsigned OpN, unsigned &LostB, unsigned &LostE) {
1170
using namespace Hexagon;
1171
1172
unsigned Opc = MI.getOpcode();
1173
unsigned ImN, RegN;
1174
switch (Opc) {
1175
case S2_asr_i_p:
1176
case S2_lsr_i_p:
1177
ImN = 2;
1178
RegN = 1;
1179
break;
1180
case S2_asr_i_p_acc:
1181
case S2_asr_i_p_and:
1182
case S2_asr_i_p_nac:
1183
case S2_asr_i_p_or:
1184
case S2_lsr_i_p_acc:
1185
case S2_lsr_i_p_and:
1186
case S2_lsr_i_p_nac:
1187
case S2_lsr_i_p_or:
1188
case S2_lsr_i_p_xacc:
1189
ImN = 3;
1190
RegN = 2;
1191
break;
1192
case S2_asr_i_r:
1193
case S2_lsr_i_r:
1194
ImN = 2;
1195
RegN = 1;
1196
break;
1197
case S4_andi_lsr_ri:
1198
case S4_ori_lsr_ri:
1199
case S4_addi_lsr_ri:
1200
case S4_subi_lsr_ri:
1201
case S2_asr_i_r_acc:
1202
case S2_asr_i_r_and:
1203
case S2_asr_i_r_nac:
1204
case S2_asr_i_r_or:
1205
case S2_lsr_i_r_acc:
1206
case S2_lsr_i_r_and:
1207
case S2_lsr_i_r_nac:
1208
case S2_lsr_i_r_or:
1209
case S2_lsr_i_r_xacc:
1210
ImN = 3;
1211
RegN = 2;
1212
break;
1213
1214
default:
1215
return false;
1216
}
1217
1218
if (RegN != OpN)
1219
return false;
1220
1221
assert(MI.getOperand(ImN).isImm());
1222
unsigned S = MI.getOperand(ImN).getImm();
1223
LostB = 0;
1224
LostE = S;
1225
return true;
1226
}
1227
1228
// Calculate the bit vector that corresponds to the used bits of register Reg.
1229
// The vector Bits has the same size, as the size of Reg in bits. If the cal-
1230
// culation fails (i.e. the used bits are unknown), it returns false. Other-
1231
// wise, it returns true and sets the corresponding bits in Bits.
1232
bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
1233
BitVector Used(Bits.size());
1234
RegisterSet Visited;
1235
std::vector<unsigned> Pending;
1236
Pending.push_back(Reg);
1237
1238
for (unsigned i = 0; i < Pending.size(); ++i) {
1239
unsigned R = Pending[i];
1240
if (Visited.has(R))
1241
continue;
1242
Visited.insert(R);
1243
for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
1244
BitTracker::RegisterRef UR = *I;
1245
unsigned B, W;
1246
if (!HBS::getSubregMask(UR, B, W, MRI))
1247
return false;
1248
MachineInstr &UseI = *I->getParent();
1249
if (UseI.isPHI() || UseI.isCopy()) {
1250
Register DefR = UseI.getOperand(0).getReg();
1251
if (!DefR.isVirtual())
1252
return false;
1253
Pending.push_back(DefR);
1254
} else {
1255
if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
1256
return false;
1257
}
1258
}
1259
}
1260
Bits |= Used;
1261
return true;
1262
}
1263
1264
// Calculate the bits used by instruction MI in a register in operand OpN.
1265
// Return true/false if the calculation succeeds/fails. If is succeeds, set
1266
// used bits in Bits. This function does not reset any bits in Bits, so
1267
// subsequent calls over different instructions will result in the union
1268
// of the used bits in all these instructions.
1269
// The register in question may be used with a sub-register, whereas Bits
1270
// holds the bits for the entire register. To keep track of that, the
1271
// argument Begin indicates where in Bits is the lowest-significant bit
1272
// of the register used in operand OpN. For example, in instruction:
1273
// %1 = S2_lsr_i_r %2:isub_hi, 10
1274
// the operand 1 is a 32-bit register, which happens to be a subregister
1275
// of the 64-bit register %2, and that subregister starts at position 32.
1276
// In this case Begin=32, since Bits[32] would be the lowest-significant bit
1277
// of %2:isub_hi.
1278
bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
1279
unsigned OpN, BitVector &Bits, uint16_t Begin) {
1280
unsigned Opc = MI.getOpcode();
1281
BitVector T(Bits.size());
1282
bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
1283
// Even if we don't have bits yet, we could still provide some information
1284
// if the instruction is a lossy shift: the lost bits will be marked as
1285
// not used.
1286
unsigned LB, LE;
1287
if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
1288
assert(MI.getOperand(OpN).isReg());
1289
BitTracker::RegisterRef RR = MI.getOperand(OpN);
1290
const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
1291
uint16_t Width = HRI.getRegSizeInBits(*RC);
1292
1293
if (!GotBits)
1294
T.set(Begin, Begin+Width);
1295
assert(LB <= LE && LB < Width && LE <= Width);
1296
T.reset(Begin+LB, Begin+LE);
1297
GotBits = true;
1298
}
1299
if (GotBits)
1300
Bits |= T;
1301
return GotBits;
1302
}
1303
1304
// Calculates the used bits in RD ("defined register"), and checks if these
1305
// bits in RS ("used register") and RD are identical.
1306
bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
1307
BitTracker::RegisterRef RS) {
1308
const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1309
const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1310
1311
unsigned DB, DW;
1312
if (!HBS::getSubregMask(RD, DB, DW, MRI))
1313
return false;
1314
unsigned SB, SW;
1315
if (!HBS::getSubregMask(RS, SB, SW, MRI))
1316
return false;
1317
if (SW != DW)
1318
return false;
1319
1320
BitVector Used(DC.width());
1321
if (!computeUsedBits(RD.Reg, Used))
1322
return false;
1323
1324
for (unsigned i = 0; i != DW; ++i)
1325
if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1326
return false;
1327
return true;
1328
}
1329
1330
bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
1331
const RegisterSet&) {
1332
if (!BT.reached(&B))
1333
return false;
1334
bool Changed = false;
1335
1336
for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1337
MachineInstr *MI = &*I;
1338
1339
if (MI->getOpcode() == TargetOpcode::COPY)
1340
continue;
1341
if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
1342
continue;
1343
unsigned NumD = MI->getDesc().getNumDefs();
1344
if (NumD != 1)
1345
continue;
1346
1347
BitTracker::RegisterRef RD = MI->getOperand(0);
1348
if (!BT.has(RD.Reg))
1349
continue;
1350
const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1351
auto At = MachineBasicBlock::iterator(MI);
1352
1353
// Find a source operand that is equal to the result.
1354
for (auto &Op : MI->uses()) {
1355
if (!Op.isReg())
1356
continue;
1357
BitTracker::RegisterRef RS = Op;
1358
if (!BT.has(RS.Reg))
1359
continue;
1360
if (!HBS::isTransparentCopy(RD, RS, MRI))
1361
continue;
1362
1363
unsigned BN, BW;
1364
if (!HBS::getSubregMask(RS, BN, BW, MRI))
1365
continue;
1366
1367
const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1368
if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
1369
continue;
1370
1371
// If found, replace the instruction with a COPY.
1372
const DebugLoc &DL = MI->getDebugLoc();
1373
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
1374
Register NewR = MRI.createVirtualRegister(FRC);
1375
MachineInstr *CopyI =
1376
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1377
.addReg(RS.Reg, 0, RS.Sub);
1378
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
1379
// This pass can create copies between registers that don't have the
1380
// exact same values. Updating the tracker has to involve updating
1381
// all dependent cells. Example:
1382
// %1 = inst %2 ; %1 != %2, but used bits are equal
1383
//
1384
// %3 = copy %2 ; <- inserted
1385
// ... = %3 ; <- replaced from %2
1386
// Indirectly, we can create a "copy" between %1 and %2 even
1387
// though their exact values do not match.
1388
BT.visit(*CopyI);
1389
Changed = true;
1390
break;
1391
}
1392
}
1393
1394
return Changed;
1395
}
1396
1397
namespace {
1398
1399
// Recognize instructions that produce constant values known at compile-time.
1400
// Replace them with register definitions that load these constants directly.
1401
class ConstGeneration : public Transformation {
1402
public:
1403
ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1404
MachineRegisterInfo &mri)
1405
: Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1406
1407
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1408
static bool isTfrConst(const MachineInstr &MI);
1409
1410
private:
1411
Register genTfrConst(const TargetRegisterClass *RC, int64_t C,
1412
MachineBasicBlock &B, MachineBasicBlock::iterator At,
1413
DebugLoc &DL);
1414
1415
const HexagonInstrInfo &HII;
1416
MachineRegisterInfo &MRI;
1417
BitTracker &BT;
1418
};
1419
1420
} // end anonymous namespace
1421
1422
bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
1423
unsigned Opc = MI.getOpcode();
1424
switch (Opc) {
1425
case Hexagon::A2_combineii:
1426
case Hexagon::A4_combineii:
1427
case Hexagon::A2_tfrsi:
1428
case Hexagon::A2_tfrpi:
1429
case Hexagon::PS_true:
1430
case Hexagon::PS_false:
1431
case Hexagon::CONST32:
1432
case Hexagon::CONST64:
1433
return true;
1434
}
1435
return false;
1436
}
1437
1438
// Generate a transfer-immediate instruction that is appropriate for the
1439
// register class and the actual value being transferred.
1440
Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
1441
MachineBasicBlock &B,
1442
MachineBasicBlock::iterator At,
1443
DebugLoc &DL) {
1444
Register Reg = MRI.createVirtualRegister(RC);
1445
if (RC == &Hexagon::IntRegsRegClass) {
1446
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
1447
.addImm(int32_t(C));
1448
return Reg;
1449
}
1450
1451
if (RC == &Hexagon::DoubleRegsRegClass) {
1452
if (isInt<8>(C)) {
1453
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
1454
.addImm(C);
1455
return Reg;
1456
}
1457
1458
unsigned Lo = Lo_32(C), Hi = Hi_32(C);
1459
if (isInt<8>(Lo) || isInt<8>(Hi)) {
1460
unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
1461
: Hexagon::A4_combineii;
1462
BuildMI(B, At, DL, HII.get(Opc), Reg)
1463
.addImm(int32_t(Hi))
1464
.addImm(int32_t(Lo));
1465
return Reg;
1466
}
1467
MachineFunction *MF = B.getParent();
1468
auto &HST = MF->getSubtarget<HexagonSubtarget>();
1469
1470
// Disable CONST64 for tiny core since it takes a LD resource.
1471
if (!HST.isTinyCore() ||
1472
MF->getFunction().hasOptSize()) {
1473
BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
1474
.addImm(C);
1475
return Reg;
1476
}
1477
}
1478
1479
if (RC == &Hexagon::PredRegsRegClass) {
1480
unsigned Opc;
1481
if (C == 0)
1482
Opc = Hexagon::PS_false;
1483
else if ((C & 0xFF) == 0xFF)
1484
Opc = Hexagon::PS_true;
1485
else
1486
return 0;
1487
BuildMI(B, At, DL, HII.get(Opc), Reg);
1488
return Reg;
1489
}
1490
1491
return 0;
1492
}
1493
1494
bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1495
if (!BT.reached(&B))
1496
return false;
1497
bool Changed = false;
1498
RegisterSet Defs;
1499
1500
for (auto I = B.begin(), E = B.end(); I != E; ++I) {
1501
if (isTfrConst(*I))
1502
continue;
1503
Defs.clear();
1504
HBS::getInstrDefs(*I, Defs);
1505
if (Defs.count() != 1)
1506
continue;
1507
Register DR = Defs.find_first();
1508
if (!DR.isVirtual())
1509
continue;
1510
uint64_t U;
1511
const BitTracker::RegisterCell &DRC = BT.lookup(DR);
1512
if (HBS::getConst(DRC, 0, DRC.width(), U)) {
1513
int64_t C = U;
1514
DebugLoc DL = I->getDebugLoc();
1515
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1516
Register ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
1517
if (ImmReg) {
1518
HBS::replaceReg(DR, ImmReg, MRI);
1519
BT.put(ImmReg, DRC);
1520
Changed = true;
1521
}
1522
}
1523
}
1524
return Changed;
1525
}
1526
1527
namespace {
1528
1529
// Identify pairs of available registers which hold identical values.
1530
// In such cases, only one of them needs to be calculated, the other one
1531
// will be defined as a copy of the first.
1532
class CopyGeneration : public Transformation {
1533
public:
1534
CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1535
const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1536
: Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
1537
1538
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1539
1540
private:
1541
bool findMatch(const BitTracker::RegisterRef &Inp,
1542
BitTracker::RegisterRef &Out, const RegisterSet &AVs);
1543
1544
const HexagonInstrInfo &HII;
1545
const HexagonRegisterInfo &HRI;
1546
MachineRegisterInfo &MRI;
1547
BitTracker &BT;
1548
RegisterSet Forbidden;
1549
};
1550
1551
// Eliminate register copies RD = RS, by replacing the uses of RD with
1552
// with uses of RS.
1553
class CopyPropagation : public Transformation {
1554
public:
1555
CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1556
: Transformation(false), HRI(hri), MRI(mri) {}
1557
1558
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1559
1560
static bool isCopyReg(unsigned Opc, bool NoConv);
1561
1562
private:
1563
bool propagateRegCopy(MachineInstr &MI);
1564
1565
const HexagonRegisterInfo &HRI;
1566
MachineRegisterInfo &MRI;
1567
};
1568
1569
} // end anonymous namespace
1570
1571
/// Check if there is a register in AVs that is identical to Inp. If so,
1572
/// set Out to the found register. The output may be a pair Reg:Sub.
1573
bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
1574
BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
1575
if (!BT.has(Inp.Reg))
1576
return false;
1577
const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
1578
auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
1579
unsigned B, W;
1580
if (!HBS::getSubregMask(Inp, B, W, MRI))
1581
return false;
1582
1583
for (Register R = AVs.find_first(); R; R = AVs.find_next(R)) {
1584
if (!BT.has(R) || Forbidden[R])
1585
continue;
1586
const BitTracker::RegisterCell &RC = BT.lookup(R);
1587
unsigned RW = RC.width();
1588
if (W == RW) {
1589
if (FRC != MRI.getRegClass(R))
1590
continue;
1591
if (!HBS::isTransparentCopy(R, Inp, MRI))
1592
continue;
1593
if (!HBS::isEqual(InpRC, B, RC, 0, W))
1594
continue;
1595
Out.Reg = R;
1596
Out.Sub = 0;
1597
return true;
1598
}
1599
// Check if there is a super-register, whose part (with a subregister)
1600
// is equal to the input.
1601
// Only do double registers for now.
1602
if (W*2 != RW)
1603
continue;
1604
if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1605
continue;
1606
1607
if (HBS::isEqual(InpRC, B, RC, 0, W))
1608
Out.Sub = Hexagon::isub_lo;
1609
else if (HBS::isEqual(InpRC, B, RC, W, W))
1610
Out.Sub = Hexagon::isub_hi;
1611
else
1612
continue;
1613
Out.Reg = R;
1614
if (HBS::isTransparentCopy(Out, Inp, MRI))
1615
return true;
1616
}
1617
return false;
1618
}
1619
1620
bool CopyGeneration::processBlock(MachineBasicBlock &B,
1621
const RegisterSet &AVs) {
1622
if (!BT.reached(&B))
1623
return false;
1624
RegisterSet AVB(AVs);
1625
bool Changed = false;
1626
RegisterSet Defs;
1627
1628
for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
1629
Defs.clear();
1630
HBS::getInstrDefs(*I, Defs);
1631
1632
unsigned Opc = I->getOpcode();
1633
if (CopyPropagation::isCopyReg(Opc, false) ||
1634
ConstGeneration::isTfrConst(*I))
1635
continue;
1636
1637
DebugLoc DL = I->getDebugLoc();
1638
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1639
1640
for (Register R = Defs.find_first(); R; R = Defs.find_next(R)) {
1641
BitTracker::RegisterRef MR;
1642
auto *FRC = HBS::getFinalVRegClass(R, MRI);
1643
1644
if (findMatch(R, MR, AVB)) {
1645
Register NewR = MRI.createVirtualRegister(FRC);
1646
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1647
.addReg(MR.Reg, 0, MR.Sub);
1648
BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
1649
HBS::replaceReg(R, NewR, MRI);
1650
Forbidden.insert(R);
1651
continue;
1652
}
1653
1654
if (FRC == &Hexagon::DoubleRegsRegClass ||
1655
FRC == &Hexagon::HvxWRRegClass) {
1656
// Try to generate REG_SEQUENCE.
1657
unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo);
1658
unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi);
1659
BitTracker::RegisterRef TL = { R, SubLo };
1660
BitTracker::RegisterRef TH = { R, SubHi };
1661
BitTracker::RegisterRef ML, MH;
1662
if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
1663
auto *FRC = HBS::getFinalVRegClass(R, MRI);
1664
Register NewR = MRI.createVirtualRegister(FRC);
1665
BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1666
.addReg(ML.Reg, 0, ML.Sub)
1667
.addImm(SubLo)
1668
.addReg(MH.Reg, 0, MH.Sub)
1669
.addImm(SubHi);
1670
BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
1671
HBS::replaceReg(R, NewR, MRI);
1672
Forbidden.insert(R);
1673
}
1674
}
1675
}
1676
}
1677
1678
return Changed;
1679
}
1680
1681
bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
1682
switch (Opc) {
1683
case TargetOpcode::COPY:
1684
case TargetOpcode::REG_SEQUENCE:
1685
case Hexagon::A4_combineir:
1686
case Hexagon::A4_combineri:
1687
return true;
1688
case Hexagon::A2_tfr:
1689
case Hexagon::A2_tfrp:
1690
case Hexagon::A2_combinew:
1691
case Hexagon::V6_vcombine:
1692
return NoConv;
1693
default:
1694
break;
1695
}
1696
return false;
1697
}
1698
1699
bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
1700
bool Changed = false;
1701
unsigned Opc = MI.getOpcode();
1702
BitTracker::RegisterRef RD = MI.getOperand(0);
1703
assert(MI.getOperand(0).getSubReg() == 0);
1704
1705
switch (Opc) {
1706
case TargetOpcode::COPY:
1707
case Hexagon::A2_tfr:
1708
case Hexagon::A2_tfrp: {
1709
BitTracker::RegisterRef RS = MI.getOperand(1);
1710
if (!HBS::isTransparentCopy(RD, RS, MRI))
1711
break;
1712
if (RS.Sub != 0)
1713
Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
1714
else
1715
Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
1716
break;
1717
}
1718
case TargetOpcode::REG_SEQUENCE: {
1719
BitTracker::RegisterRef SL, SH;
1720
if (HBS::parseRegSequence(MI, SL, SH, MRI)) {
1721
const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1722
unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1723
unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1724
Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI);
1725
Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI);
1726
}
1727
break;
1728
}
1729
case Hexagon::A2_combinew:
1730
case Hexagon::V6_vcombine: {
1731
const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);
1732
unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
1733
unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
1734
BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
1735
Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI);
1736
Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI);
1737
break;
1738
}
1739
case Hexagon::A4_combineir:
1740
case Hexagon::A4_combineri: {
1741
unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1742
unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
1743
: Hexagon::isub_hi;
1744
BitTracker::RegisterRef RS = MI.getOperand(SrcX);
1745
Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
1746
break;
1747
}
1748
}
1749
return Changed;
1750
}
1751
1752
bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1753
std::vector<MachineInstr*> Instrs;
1754
for (MachineInstr &MI : llvm::reverse(B))
1755
Instrs.push_back(&MI);
1756
1757
bool Changed = false;
1758
for (auto *I : Instrs) {
1759
unsigned Opc = I->getOpcode();
1760
if (!CopyPropagation::isCopyReg(Opc, true))
1761
continue;
1762
Changed |= propagateRegCopy(*I);
1763
}
1764
1765
return Changed;
1766
}
1767
1768
namespace {
1769
1770
// Recognize patterns that can be simplified and replace them with the
1771
// simpler forms.
1772
// This is by no means complete
1773
class BitSimplification : public Transformation {
1774
public:
1775
BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
1776
const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
1777
MachineRegisterInfo &mri, MachineFunction &mf)
1778
: Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
1779
MF(mf), BT(bt) {}
1780
1781
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1782
1783
private:
1784
struct RegHalf : public BitTracker::RegisterRef {
1785
bool Low; // Low/High halfword.
1786
};
1787
1788
bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
1789
unsigned B, RegHalf &RH);
1790
bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);
1791
1792
bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
1793
BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
1794
unsigned getCombineOpcode(bool HLow, bool LLow);
1795
1796
bool genStoreUpperHalf(MachineInstr *MI);
1797
bool genStoreImmediate(MachineInstr *MI);
1798
bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
1799
const BitTracker::RegisterCell &RC);
1800
bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1801
const BitTracker::RegisterCell &RC);
1802
bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1803
const BitTracker::RegisterCell &RC);
1804
bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1805
const BitTracker::RegisterCell &RC);
1806
bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
1807
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1808
bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
1809
const BitTracker::RegisterCell &RC);
1810
bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1811
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
1812
bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
1813
1814
// Cache of created instructions to avoid creating duplicates.
1815
// XXX Currently only used by genBitSplit.
1816
std::vector<MachineInstr*> NewMIs;
1817
1818
const MachineDominatorTree &MDT;
1819
const HexagonInstrInfo &HII;
1820
const HexagonRegisterInfo &HRI;
1821
MachineRegisterInfo &MRI;
1822
MachineFunction &MF;
1823
BitTracker &BT;
1824
};
1825
1826
} // end anonymous namespace
1827
1828
// Check if the bits [B..B+16) in register cell RC form a valid halfword,
1829
// i.e. [0..16), [16..32), etc. of some register. If so, return true and
1830
// set the information about the found register in RH.
1831
bool BitSimplification::matchHalf(unsigned SelfR,
1832
const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
1833
// XXX This could be searching in the set of available registers, in case
1834
// the match is not exact.
1835
1836
// Match 16-bit chunks, where the RC[B..B+15] references exactly one
1837
// register and all the bits B..B+15 match between RC and the register.
1838
// This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1839
// and RC = { [0]:0 [1-15]:v1[1-15]... }.
1840
bool Low = false;
1841
unsigned I = B;
1842
while (I < B+16 && RC[I].num())
1843
I++;
1844
if (I == B+16)
1845
return false;
1846
1847
Register Reg = RC[I].RefI.Reg;
1848
unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
1849
if (P < I-B)
1850
return false;
1851
unsigned Pos = P - (I-B);
1852
1853
if (Reg == 0 || Reg == SelfR) // Don't match "self".
1854
return false;
1855
if (!Reg.isVirtual())
1856
return false;
1857
if (!BT.has(Reg))
1858
return false;
1859
1860
const BitTracker::RegisterCell &SC = BT.lookup(Reg);
1861
if (Pos+16 > SC.width())
1862
return false;
1863
1864
for (unsigned i = 0; i < 16; ++i) {
1865
const BitTracker::BitValue &RV = RC[i+B];
1866
if (RV.Type == BitTracker::BitValue::Ref) {
1867
if (RV.RefI.Reg != Reg)
1868
return false;
1869
if (RV.RefI.Pos != i+Pos)
1870
return false;
1871
continue;
1872
}
1873
if (RC[i+B] != SC[i+Pos])
1874
return false;
1875
}
1876
1877
unsigned Sub = 0;
1878
switch (Pos) {
1879
case 0:
1880
Sub = Hexagon::isub_lo;
1881
Low = true;
1882
break;
1883
case 16:
1884
Sub = Hexagon::isub_lo;
1885
Low = false;
1886
break;
1887
case 32:
1888
Sub = Hexagon::isub_hi;
1889
Low = true;
1890
break;
1891
case 48:
1892
Sub = Hexagon::isub_hi;
1893
Low = false;
1894
break;
1895
default:
1896
return false;
1897
}
1898
1899
RH.Reg = Reg;
1900
RH.Sub = Sub;
1901
RH.Low = Low;
1902
// If the subregister is not valid with the register, set it to 0.
1903
if (!HBS::getFinalVRegClass(RH, MRI))
1904
RH.Sub = 0;
1905
1906
return true;
1907
}
1908
1909
bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
1910
unsigned OpNum) {
1911
auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1912
auto *RRC = HBS::getFinalVRegClass(R, MRI);
1913
return OpRC->hasSubClassEq(RRC);
1914
}
1915
1916
// Check if RC matches the pattern of a S2_packhl. If so, return true and
1917
// set the inputs Rs and Rt.
1918
bool BitSimplification::matchPackhl(unsigned SelfR,
1919
const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
1920
BitTracker::RegisterRef &Rt) {
1921
RegHalf L1, H1, L2, H2;
1922
1923
if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
1924
return false;
1925
if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1926
return false;
1927
1928
// Rs = H1.L1, Rt = H2.L2
1929
if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1930
return false;
1931
if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1932
return false;
1933
1934
Rs = H1;
1935
Rt = H2;
1936
return true;
1937
}
1938
1939
unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
1940
return HLow ? LLow ? Hexagon::A2_combine_ll
1941
: Hexagon::A2_combine_lh
1942
: LLow ? Hexagon::A2_combine_hl
1943
: Hexagon::A2_combine_hh;
1944
}
1945
1946
// If MI stores the upper halfword of a register (potentially obtained via
1947
// shifts or extracts), replace it with a storerf instruction. This could
1948
// cause the "extraction" code to become dead.
1949
bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
1950
unsigned Opc = MI->getOpcode();
1951
if (Opc != Hexagon::S2_storerh_io)
1952
return false;
1953
1954
MachineOperand &ValOp = MI->getOperand(2);
1955
BitTracker::RegisterRef RS = ValOp;
1956
if (!BT.has(RS.Reg))
1957
return false;
1958
const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1959
RegHalf H;
1960
unsigned B = (RS.Sub == Hexagon::isub_hi) ? 32 : 0;
1961
if (!matchHalf(0, RC, B, H))
1962
return false;
1963
if (H.Low)
1964
return false;
1965
MI->setDesc(HII.get(Hexagon::S2_storerf_io));
1966
ValOp.setReg(H.Reg);
1967
ValOp.setSubReg(H.Sub);
1968
return true;
1969
}
1970
1971
// If MI stores a value known at compile-time, and the value is within a range
1972
// that avoids using constant-extenders, replace it with a store-immediate.
1973
bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
1974
unsigned Opc = MI->getOpcode();
1975
unsigned Align = 0;
1976
switch (Opc) {
1977
case Hexagon::S2_storeri_io:
1978
Align++;
1979
[[fallthrough]];
1980
case Hexagon::S2_storerh_io:
1981
Align++;
1982
[[fallthrough]];
1983
case Hexagon::S2_storerb_io:
1984
break;
1985
default:
1986
return false;
1987
}
1988
1989
// Avoid stores to frame-indices (due to an unknown offset).
1990
if (!MI->getOperand(0).isReg())
1991
return false;
1992
MachineOperand &OffOp = MI->getOperand(1);
1993
if (!OffOp.isImm())
1994
return false;
1995
1996
int64_t Off = OffOp.getImm();
1997
// Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1998
if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
1999
return false;
2000
// Source register:
2001
BitTracker::RegisterRef RS = MI->getOperand(2);
2002
if (!BT.has(RS.Reg))
2003
return false;
2004
const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
2005
uint64_t U;
2006
if (!HBS::getConst(RC, 0, RC.width(), U))
2007
return false;
2008
2009
// Only consider 8-bit values to avoid constant-extenders.
2010
int V;
2011
switch (Opc) {
2012
case Hexagon::S2_storerb_io:
2013
V = int8_t(U);
2014
break;
2015
case Hexagon::S2_storerh_io:
2016
V = int16_t(U);
2017
break;
2018
case Hexagon::S2_storeri_io:
2019
V = int32_t(U);
2020
break;
2021
default:
2022
// Opc is already checked above to be one of the three store instructions.
2023
// This silences a -Wuninitialized false positive on GCC 5.4.
2024
llvm_unreachable("Unexpected store opcode");
2025
}
2026
if (!isInt<8>(V))
2027
return false;
2028
2029
MI->removeOperand(2);
2030
switch (Opc) {
2031
case Hexagon::S2_storerb_io:
2032
MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
2033
break;
2034
case Hexagon::S2_storerh_io:
2035
MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
2036
break;
2037
case Hexagon::S2_storeri_io:
2038
MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
2039
break;
2040
}
2041
MI->addOperand(MachineOperand::CreateImm(V));
2042
return true;
2043
}
2044
2045
// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2046
// last instruction in a sequence that results in something equivalent to
2047
// the pack-halfwords. The intent is to cause the entire sequence to become
2048
// dead.
2049
bool BitSimplification::genPackhl(MachineInstr *MI,
2050
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2051
unsigned Opc = MI->getOpcode();
2052
if (Opc == Hexagon::S2_packhl)
2053
return false;
2054
BitTracker::RegisterRef Rs, Rt;
2055
if (!matchPackhl(RD.Reg, RC, Rs, Rt))
2056
return false;
2057
if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
2058
!validateReg(Rt, Hexagon::S2_packhl, 2))
2059
return false;
2060
2061
MachineBasicBlock &B = *MI->getParent();
2062
Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2063
DebugLoc DL = MI->getDebugLoc();
2064
auto At = MI->isPHI() ? B.getFirstNonPHI()
2065
: MachineBasicBlock::iterator(MI);
2066
BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
2067
.addReg(Rs.Reg, 0, Rs.Sub)
2068
.addReg(Rt.Reg, 0, Rt.Sub);
2069
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2070
BT.put(BitTracker::RegisterRef(NewR), RC);
2071
return true;
2072
}
2073
2074
// If MI produces halfword of the input in the low half of the output,
2075
// replace it with zero-extend or extractu.
2076
bool BitSimplification::genExtractHalf(MachineInstr *MI,
2077
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2078
RegHalf L;
2079
// Check for halfword in low 16 bits, zeros elsewhere.
2080
if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
2081
return false;
2082
2083
unsigned Opc = MI->getOpcode();
2084
MachineBasicBlock &B = *MI->getParent();
2085
DebugLoc DL = MI->getDebugLoc();
2086
2087
// Prefer zxth, since zxth can go in any slot, while extractu only in
2088
// slots 2 and 3.
2089
unsigned NewR = 0;
2090
auto At = MI->isPHI() ? B.getFirstNonPHI()
2091
: MachineBasicBlock::iterator(MI);
2092
if (L.Low && Opc != Hexagon::A2_zxth) {
2093
if (validateReg(L, Hexagon::A2_zxth, 1)) {
2094
NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2095
BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
2096
.addReg(L.Reg, 0, L.Sub);
2097
}
2098
} else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
2099
if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2100
NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2101
BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
2102
.addReg(L.Reg, 0, L.Sub)
2103
.addImm(16);
2104
}
2105
}
2106
if (NewR == 0)
2107
return false;
2108
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2109
BT.put(BitTracker::RegisterRef(NewR), RC);
2110
return true;
2111
}
2112
2113
// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2114
// combine.
2115
bool BitSimplification::genCombineHalf(MachineInstr *MI,
2116
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2117
RegHalf L, H;
2118
// Check for combine h/l
2119
if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
2120
return false;
2121
// Do nothing if this is just a reg copy.
2122
if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
2123
return false;
2124
2125
unsigned Opc = MI->getOpcode();
2126
unsigned COpc = getCombineOpcode(H.Low, L.Low);
2127
if (COpc == Opc)
2128
return false;
2129
if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
2130
return false;
2131
2132
MachineBasicBlock &B = *MI->getParent();
2133
DebugLoc DL = MI->getDebugLoc();
2134
Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2135
auto At = MI->isPHI() ? B.getFirstNonPHI()
2136
: MachineBasicBlock::iterator(MI);
2137
BuildMI(B, At, DL, HII.get(COpc), NewR)
2138
.addReg(H.Reg, 0, H.Sub)
2139
.addReg(L.Reg, 0, L.Sub);
2140
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2141
BT.put(BitTracker::RegisterRef(NewR), RC);
2142
return true;
2143
}
2144
2145
// If MI resets high bits of a register and keeps the lower ones, replace it
2146
// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2147
bool BitSimplification::genExtractLow(MachineInstr *MI,
2148
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2149
unsigned Opc = MI->getOpcode();
2150
switch (Opc) {
2151
case Hexagon::A2_zxtb:
2152
case Hexagon::A2_zxth:
2153
case Hexagon::S2_extractu:
2154
return false;
2155
}
2156
if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
2157
int32_t Imm = MI->getOperand(2).getImm();
2158
if (isInt<10>(Imm))
2159
return false;
2160
}
2161
2162
if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
2163
return false;
2164
unsigned W = RC.width();
2165
while (W > 0 && RC[W-1].is(0))
2166
W--;
2167
if (W == 0 || W == RC.width())
2168
return false;
2169
unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
2170
: (W == 16) ? Hexagon::A2_zxth
2171
: (W < 10) ? Hexagon::A2_andir
2172
: Hexagon::S2_extractu;
2173
MachineBasicBlock &B = *MI->getParent();
2174
DebugLoc DL = MI->getDebugLoc();
2175
2176
for (auto &Op : MI->uses()) {
2177
if (!Op.isReg())
2178
continue;
2179
BitTracker::RegisterRef RS = Op;
2180
if (!BT.has(RS.Reg))
2181
continue;
2182
const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2183
unsigned BN, BW;
2184
if (!HBS::getSubregMask(RS, BN, BW, MRI))
2185
continue;
2186
if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
2187
continue;
2188
if (!validateReg(RS, NewOpc, 1))
2189
continue;
2190
2191
Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2192
auto At = MI->isPHI() ? B.getFirstNonPHI()
2193
: MachineBasicBlock::iterator(MI);
2194
auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
2195
.addReg(RS.Reg, 0, RS.Sub);
2196
if (NewOpc == Hexagon::A2_andir)
2197
MIB.addImm((1 << W) - 1);
2198
else if (NewOpc == Hexagon::S2_extractu)
2199
MIB.addImm(W).addImm(0);
2200
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2201
BT.put(BitTracker::RegisterRef(NewR), RC);
2202
return true;
2203
}
2204
return false;
2205
}
2206
2207
bool BitSimplification::genBitSplit(MachineInstr *MI,
2208
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
2209
const RegisterSet &AVs) {
2210
if (!GenBitSplit)
2211
return false;
2212
if (MaxBitSplit.getNumOccurrences()) {
2213
if (CountBitSplit >= MaxBitSplit)
2214
return false;
2215
}
2216
2217
unsigned Opc = MI->getOpcode();
2218
switch (Opc) {
2219
case Hexagon::A4_bitsplit:
2220
case Hexagon::A4_bitspliti:
2221
return false;
2222
}
2223
2224
unsigned W = RC.width();
2225
if (W != 32)
2226
return false;
2227
2228
auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
2229
unsigned Z = C.width();
2230
while (Z > 0 && C[Z-1].is(0))
2231
--Z;
2232
return C.width() - Z;
2233
};
2234
2235
// Count the number of leading zeros in the target RC.
2236
unsigned Z = ctlz(RC);
2237
if (Z == 0 || Z == W)
2238
return false;
2239
2240
// A simplistic analysis: assume the source register (the one being split)
2241
// is fully unknown, and that all its bits are self-references.
2242
const BitTracker::BitValue &B0 = RC[0];
2243
if (B0.Type != BitTracker::BitValue::Ref)
2244
return false;
2245
2246
unsigned SrcR = B0.RefI.Reg;
2247
unsigned SrcSR = 0;
2248
unsigned Pos = B0.RefI.Pos;
2249
2250
// All the non-zero bits should be consecutive bits from the same register.
2251
for (unsigned i = 1; i < W-Z; ++i) {
2252
const BitTracker::BitValue &V = RC[i];
2253
if (V.Type != BitTracker::BitValue::Ref)
2254
return false;
2255
if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
2256
return false;
2257
}
2258
2259
// Now, find the other bitfield among AVs.
2260
for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
2261
// The number of leading zeros here should be the number of trailing
2262
// non-zeros in RC.
2263
unsigned SRC = MRI.getRegClass(S)->getID();
2264
if (SRC != Hexagon::IntRegsRegClassID &&
2265
SRC != Hexagon::DoubleRegsRegClassID)
2266
continue;
2267
if (!BT.has(S))
2268
continue;
2269
const BitTracker::RegisterCell &SC = BT.lookup(S);
2270
if (SC.width() != W || ctlz(SC) != W-Z)
2271
continue;
2272
// The Z lower bits should now match SrcR.
2273
const BitTracker::BitValue &S0 = SC[0];
2274
if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
2275
continue;
2276
unsigned P = S0.RefI.Pos;
2277
2278
if (Pos <= P && (Pos + W-Z) != P)
2279
continue;
2280
if (P < Pos && (P + Z) != Pos)
2281
continue;
2282
// The starting bitfield position must be at a subregister boundary.
2283
if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
2284
continue;
2285
2286
unsigned I;
2287
for (I = 1; I < Z; ++I) {
2288
const BitTracker::BitValue &V = SC[I];
2289
if (V.Type != BitTracker::BitValue::Ref)
2290
break;
2291
if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
2292
break;
2293
}
2294
if (I != Z)
2295
continue;
2296
2297
// Generate bitsplit where S is defined.
2298
if (MaxBitSplit.getNumOccurrences())
2299
CountBitSplit++;
2300
MachineInstr *DefS = MRI.getVRegDef(S);
2301
assert(DefS != nullptr);
2302
DebugLoc DL = DefS->getDebugLoc();
2303
MachineBasicBlock &B = *DefS->getParent();
2304
auto At = DefS->isPHI() ? B.getFirstNonPHI()
2305
: MachineBasicBlock::iterator(DefS);
2306
if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
2307
SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
2308
if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
2309
continue;
2310
unsigned ImmOp = Pos <= P ? W-Z : Z;
2311
2312
// Find an existing bitsplit instruction if one already exists.
2313
unsigned NewR = 0;
2314
for (MachineInstr *In : NewMIs) {
2315
if (In->getOpcode() != Hexagon::A4_bitspliti)
2316
continue;
2317
MachineOperand &Op1 = In->getOperand(1);
2318
if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)
2319
continue;
2320
if (In->getOperand(2).getImm() != ImmOp)
2321
continue;
2322
// Check if the target register is available here.
2323
MachineOperand &Op0 = In->getOperand(0);
2324
MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());
2325
assert(DefI != nullptr);
2326
if (!MDT.dominates(DefI, &*At))
2327
continue;
2328
2329
// Found one that can be reused.
2330
assert(Op0.getSubReg() == 0);
2331
NewR = Op0.getReg();
2332
break;
2333
}
2334
if (!NewR) {
2335
NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2336
auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
2337
.addReg(SrcR, 0, SrcSR)
2338
.addImm(ImmOp);
2339
NewMIs.push_back(NewBS);
2340
}
2341
if (Pos <= P) {
2342
HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
2343
HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
2344
} else {
2345
HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
2346
HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
2347
}
2348
return true;
2349
}
2350
2351
return false;
2352
}
2353
2354
// Check for tstbit simplification opportunity, where the bit being checked
2355
// can be tracked back to another register. For example:
2356
// %2 = S2_lsr_i_r %1, 5
2357
// %3 = S2_tstbit_i %2, 0
2358
// =>
2359
// %3 = S2_tstbit_i %1, 5
2360
bool BitSimplification::simplifyTstbit(MachineInstr *MI,
2361
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2362
unsigned Opc = MI->getOpcode();
2363
if (Opc != Hexagon::S2_tstbit_i)
2364
return false;
2365
2366
unsigned BN = MI->getOperand(2).getImm();
2367
BitTracker::RegisterRef RS = MI->getOperand(1);
2368
unsigned F, W;
2369
DebugLoc DL = MI->getDebugLoc();
2370
if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
2371
return false;
2372
MachineBasicBlock &B = *MI->getParent();
2373
auto At = MI->isPHI() ? B.getFirstNonPHI()
2374
: MachineBasicBlock::iterator(MI);
2375
2376
const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2377
const BitTracker::BitValue &V = SC[F+BN];
2378
if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
2379
const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
2380
// Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2381
// a double register, need to use a subregister and adjust bit
2382
// number.
2383
unsigned P = std::numeric_limits<unsigned>::max();
2384
BitTracker::RegisterRef RR(V.RefI.Reg, 0);
2385
if (TC == &Hexagon::DoubleRegsRegClass) {
2386
P = V.RefI.Pos;
2387
RR.Sub = Hexagon::isub_lo;
2388
if (P >= 32) {
2389
P -= 32;
2390
RR.Sub = Hexagon::isub_hi;
2391
}
2392
} else if (TC == &Hexagon::IntRegsRegClass) {
2393
P = V.RefI.Pos;
2394
}
2395
if (P != std::numeric_limits<unsigned>::max()) {
2396
Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2397
BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
2398
.addReg(RR.Reg, 0, RR.Sub)
2399
.addImm(P);
2400
HBS::replaceReg(RD.Reg, NewR, MRI);
2401
BT.put(NewR, RC);
2402
return true;
2403
}
2404
} else if (V.is(0) || V.is(1)) {
2405
Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2406
unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
2407
BuildMI(B, At, DL, HII.get(NewOpc), NewR);
2408
HBS::replaceReg(RD.Reg, NewR, MRI);
2409
return true;
2410
}
2411
2412
return false;
2413
}
2414
2415
// Detect whether RD is a bitfield extract (sign- or zero-extended) of
2416
// some register from the AVs set. Create a new corresponding instruction
2417
// at the location of MI. The intent is to recognize situations where
2418
// a sequence of instructions performs an operation that is equivalent to
2419
// an extract operation, such as a shift left followed by a shift right.
2420
bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
2421
BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
2422
const RegisterSet &AVs) {
2423
if (!GenExtract)
2424
return false;
2425
if (MaxExtract.getNumOccurrences()) {
2426
if (CountExtract >= MaxExtract)
2427
return false;
2428
CountExtract++;
2429
}
2430
2431
unsigned W = RC.width();
2432
unsigned RW = W;
2433
unsigned Len;
2434
bool Signed;
2435
2436
// The code is mostly class-independent, except for the part that generates
2437
// the extract instruction, and establishes the source register (in case it
2438
// needs to use a subregister).
2439
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2440
if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2441
return false;
2442
assert(RD.Sub == 0);
2443
2444
// Observation:
2445
// If the cell has a form of 00..0xx..x with k zeros and n remaining
2446
// bits, this could be an extractu of the n bits, but it could also be
2447
// an extractu of a longer field which happens to have 0s in the top
2448
// bit positions.
2449
// The same logic applies to sign-extended fields.
2450
//
2451
// Do not check for the extended extracts, since it would expand the
2452
// search space quite a bit. The search may be expensive as it is.
2453
2454
const BitTracker::BitValue &TopV = RC[W-1];
2455
2456
// Eliminate candidates that have self-referential bits, since they
2457
// cannot be extracts from other registers. Also, skip registers that
2458
// have compile-time constant values.
2459
bool IsConst = true;
2460
for (unsigned I = 0; I != W; ++I) {
2461
const BitTracker::BitValue &V = RC[I];
2462
if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
2463
return false;
2464
IsConst = IsConst && (V.is(0) || V.is(1));
2465
}
2466
if (IsConst)
2467
return false;
2468
2469
if (TopV.is(0) || TopV.is(1)) {
2470
bool S = TopV.is(1);
2471
for (--W; W > 0 && RC[W-1].is(S); --W)
2472
;
2473
Len = W;
2474
Signed = S;
2475
// The sign bit must be a part of the field being extended.
2476
if (Signed)
2477
++Len;
2478
} else {
2479
// This could still be a sign-extended extract.
2480
assert(TopV.Type == BitTracker::BitValue::Ref);
2481
if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
2482
return false;
2483
for (--W; W > 0 && RC[W-1] == TopV; --W)
2484
;
2485
// The top bits of RC are copies of TopV. One occurrence of TopV will
2486
// be a part of the field.
2487
Len = W + 1;
2488
Signed = true;
2489
}
2490
2491
// This would be just a copy. It should be handled elsewhere.
2492
if (Len == RW)
2493
return false;
2494
2495
LLVM_DEBUG({
2496
dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub)
2497
<< ", MI: " << *MI;
2498
dbgs() << "Cell: " << RC << '\n';
2499
dbgs() << "Expected bitfield size: " << Len << " bits, "
2500
<< (Signed ? "sign" : "zero") << "-extended\n";
2501
});
2502
2503
bool Changed = false;
2504
2505
for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
2506
if (!BT.has(R))
2507
continue;
2508
const BitTracker::RegisterCell &SC = BT.lookup(R);
2509
unsigned SW = SC.width();
2510
2511
// The source can be longer than the destination, as long as its size is
2512
// a multiple of the size of the destination. Also, we would need to be
2513
// able to refer to the subregister in the source that would be of the
2514
// same size as the destination, but only check the sizes here.
2515
if (SW < RW || (SW % RW) != 0)
2516
continue;
2517
2518
// The field can start at any offset in SC as long as it contains Len
2519
// bits and does not cross subregister boundary (if the source register
2520
// is longer than the destination).
2521
unsigned Off = 0;
2522
while (Off <= SW-Len) {
2523
unsigned OE = (Off+Len)/RW;
2524
if (OE != Off/RW) {
2525
// The assumption here is that if the source (R) is longer than the
2526
// destination, then the destination is a sequence of words of
2527
// size RW, and each such word in R can be accessed via a subregister.
2528
//
2529
// If the beginning and the end of the field cross the subregister
2530
// boundary, advance to the next subregister.
2531
Off = OE*RW;
2532
continue;
2533
}
2534
if (HBS::isEqual(RC, 0, SC, Off, Len))
2535
break;
2536
++Off;
2537
}
2538
2539
if (Off > SW-Len)
2540
continue;
2541
2542
// Found match.
2543
unsigned ExtOpc = 0;
2544
if (Off == 0) {
2545
if (Len == 8)
2546
ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
2547
else if (Len == 16)
2548
ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
2549
else if (Len < 10 && !Signed)
2550
ExtOpc = Hexagon::A2_andir;
2551
}
2552
if (ExtOpc == 0) {
2553
ExtOpc =
2554
Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
2555
: (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
2556
}
2557
unsigned SR = 0;
2558
// This only recognizes isub_lo and isub_hi.
2559
if (RW != SW && RW*2 != SW)
2560
continue;
2561
if (RW != SW)
2562
SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
2563
Off = Off % RW;
2564
2565
if (!validateReg({R,SR}, ExtOpc, 1))
2566
continue;
2567
2568
// Don't generate the same instruction as the one being optimized.
2569
if (MI->getOpcode() == ExtOpc) {
2570
// All possible ExtOpc's have the source in operand(1).
2571
const MachineOperand &SrcOp = MI->getOperand(1);
2572
if (SrcOp.getReg() == R)
2573
continue;
2574
}
2575
2576
DebugLoc DL = MI->getDebugLoc();
2577
MachineBasicBlock &B = *MI->getParent();
2578
Register NewR = MRI.createVirtualRegister(FRC);
2579
auto At = MI->isPHI() ? B.getFirstNonPHI()
2580
: MachineBasicBlock::iterator(MI);
2581
auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
2582
.addReg(R, 0, SR);
2583
switch (ExtOpc) {
2584
case Hexagon::A2_sxtb:
2585
case Hexagon::A2_zxtb:
2586
case Hexagon::A2_sxth:
2587
case Hexagon::A2_zxth:
2588
break;
2589
case Hexagon::A2_andir:
2590
MIB.addImm((1u << Len) - 1);
2591
break;
2592
case Hexagon::S4_extract:
2593
case Hexagon::S2_extractu:
2594
case Hexagon::S4_extractp:
2595
case Hexagon::S2_extractup:
2596
MIB.addImm(Len)
2597
.addImm(Off);
2598
break;
2599
default:
2600
llvm_unreachable("Unexpected opcode");
2601
}
2602
2603
HBS::replaceReg(RD.Reg, NewR, MRI);
2604
BT.put(BitTracker::RegisterRef(NewR), RC);
2605
Changed = true;
2606
break;
2607
}
2608
2609
return Changed;
2610
}
2611
2612
bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
2613
BitTracker::RegisterRef RD) {
2614
unsigned Opc = MI->getOpcode();
2615
if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
2616
return false;
2617
MachineOperand &CmpOp = MI->getOperand(2);
2618
if (!CmpOp.isImm() || CmpOp.getImm() != 0)
2619
return false;
2620
2621
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2622
if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2623
return false;
2624
assert(RD.Sub == 0);
2625
2626
MachineBasicBlock &B = *MI->getParent();
2627
const DebugLoc &DL = MI->getDebugLoc();
2628
auto At = MI->isPHI() ? B.getFirstNonPHI()
2629
: MachineBasicBlock::iterator(MI);
2630
bool KnownZ = true;
2631
bool KnownNZ = false;
2632
2633
BitTracker::RegisterRef SR = MI->getOperand(1);
2634
if (!BT.has(SR.Reg))
2635
return false;
2636
const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
2637
unsigned F, W;
2638
if (!HBS::getSubregMask(SR, F, W, MRI))
2639
return false;
2640
2641
for (uint16_t I = F; I != F+W; ++I) {
2642
const BitTracker::BitValue &V = SC[I];
2643
if (!V.is(0))
2644
KnownZ = false;
2645
if (V.is(1))
2646
KnownNZ = true;
2647
}
2648
2649
auto ReplaceWithConst = [&](int C) {
2650
Register NewR = MRI.createVirtualRegister(FRC);
2651
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
2652
.addImm(C);
2653
HBS::replaceReg(RD.Reg, NewR, MRI);
2654
BitTracker::RegisterCell NewRC(W);
2655
for (uint16_t I = 0; I != W; ++I) {
2656
NewRC[I] = BitTracker::BitValue(C & 1);
2657
C = unsigned(C) >> 1;
2658
}
2659
BT.put(BitTracker::RegisterRef(NewR), NewRC);
2660
return true;
2661
};
2662
2663
auto IsNonZero = [] (const MachineOperand &Op) {
2664
if (Op.isGlobal() || Op.isBlockAddress())
2665
return true;
2666
if (Op.isImm())
2667
return Op.getImm() != 0;
2668
if (Op.isCImm())
2669
return !Op.getCImm()->isZero();
2670
if (Op.isFPImm())
2671
return !Op.getFPImm()->isZero();
2672
return false;
2673
};
2674
2675
auto IsZero = [] (const MachineOperand &Op) {
2676
if (Op.isGlobal() || Op.isBlockAddress())
2677
return false;
2678
if (Op.isImm())
2679
return Op.getImm() == 0;
2680
if (Op.isCImm())
2681
return Op.getCImm()->isZero();
2682
if (Op.isFPImm())
2683
return Op.getFPImm()->isZero();
2684
return false;
2685
};
2686
2687
// If the source register is known to be 0 or non-0, the comparison can
2688
// be folded to a load of a constant.
2689
if (KnownZ || KnownNZ) {
2690
assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
2691
return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
2692
}
2693
2694
// Special case: if the compare comes from a C2_muxii, then we know the
2695
// two possible constants that can be the source value.
2696
MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
2697
if (!InpDef)
2698
return false;
2699
if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
2700
MachineOperand &Src1 = InpDef->getOperand(2);
2701
MachineOperand &Src2 = InpDef->getOperand(3);
2702
// Check if both are non-zero.
2703
bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
2704
if (KnownNZ1 && KnownNZ2)
2705
return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
2706
// Check if both are zero.
2707
bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
2708
if (KnownZ1 && KnownZ2)
2709
return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
2710
2711
// If for both operands we know that they are either 0 or non-0,
2712
// replace the comparison with a C2_muxii, using the same predicate
2713
// register, but with operands substituted with 0/1 accordingly.
2714
if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
2715
Register NewR = MRI.createVirtualRegister(FRC);
2716
BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
2717
.addReg(InpDef->getOperand(1).getReg())
2718
.addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
2719
.addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
2720
HBS::replaceReg(RD.Reg, NewR, MRI);
2721
// Create a new cell with only the least significant bit unknown.
2722
BitTracker::RegisterCell NewRC(W);
2723
NewRC[0] = BitTracker::BitValue::self();
2724
NewRC.fill(1, W, BitTracker::BitValue::Zero);
2725
BT.put(BitTracker::RegisterRef(NewR), NewRC);
2726
return true;
2727
}
2728
}
2729
2730
return false;
2731
}
2732
2733
bool BitSimplification::processBlock(MachineBasicBlock &B,
2734
const RegisterSet &AVs) {
2735
if (!BT.reached(&B))
2736
return false;
2737
bool Changed = false;
2738
RegisterSet AVB = AVs;
2739
RegisterSet Defs;
2740
2741
for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
2742
MachineInstr *MI = &*I;
2743
Defs.clear();
2744
HBS::getInstrDefs(*MI, Defs);
2745
2746
unsigned Opc = MI->getOpcode();
2747
if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2748
continue;
2749
2750
if (MI->mayStore()) {
2751
bool T = genStoreUpperHalf(MI);
2752
T = T || genStoreImmediate(MI);
2753
Changed |= T;
2754
continue;
2755
}
2756
2757
if (Defs.count() != 1)
2758
continue;
2759
const MachineOperand &Op0 = MI->getOperand(0);
2760
if (!Op0.isReg() || !Op0.isDef())
2761
continue;
2762
BitTracker::RegisterRef RD = Op0;
2763
if (!BT.has(RD.Reg))
2764
continue;
2765
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2766
const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
2767
2768
if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
2769
bool T = genPackhl(MI, RD, RC);
2770
T = T || simplifyExtractLow(MI, RD, RC, AVB);
2771
Changed |= T;
2772
continue;
2773
}
2774
2775
if (FRC->getID() == Hexagon::IntRegsRegClassID) {
2776
bool T = genBitSplit(MI, RD, RC, AVB);
2777
T = T || simplifyExtractLow(MI, RD, RC, AVB);
2778
T = T || genExtractHalf(MI, RD, RC);
2779
T = T || genCombineHalf(MI, RD, RC);
2780
T = T || genExtractLow(MI, RD, RC);
2781
T = T || simplifyRCmp0(MI, RD);
2782
Changed |= T;
2783
continue;
2784
}
2785
2786
if (FRC->getID() == Hexagon::PredRegsRegClassID) {
2787
bool T = simplifyTstbit(MI, RD, RC);
2788
Changed |= T;
2789
continue;
2790
}
2791
}
2792
return Changed;
2793
}
2794
2795
bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
2796
if (skipFunction(MF.getFunction()))
2797
return false;
2798
2799
auto &HST = MF.getSubtarget<HexagonSubtarget>();
2800
auto &HRI = *HST.getRegisterInfo();
2801
auto &HII = *HST.getInstrInfo();
2802
2803
MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2804
MachineRegisterInfo &MRI = MF.getRegInfo();
2805
bool Changed;
2806
2807
Changed = DeadCodeElimination(MF, *MDT).run();
2808
2809
const HexagonEvaluator HE(HRI, MRI, HII, MF);
2810
BitTracker BT(HE, MF);
2811
LLVM_DEBUG(BT.trace(true));
2812
BT.run();
2813
2814
MachineBasicBlock &Entry = MF.front();
2815
2816
RegisterSet AIG; // Available registers for IG.
2817
ConstGeneration ImmG(BT, HII, MRI);
2818
Changed |= visitBlock(Entry, ImmG, AIG);
2819
2820
RegisterSet ARE; // Available registers for RIE.
2821
RedundantInstrElimination RIE(BT, HII, HRI, MRI);
2822
bool Ried = visitBlock(Entry, RIE, ARE);
2823
if (Ried) {
2824
Changed = true;
2825
BT.run();
2826
}
2827
2828
RegisterSet ACG; // Available registers for CG.
2829
CopyGeneration CopyG(BT, HII, HRI, MRI);
2830
Changed |= visitBlock(Entry, CopyG, ACG);
2831
2832
RegisterSet ACP; // Available registers for CP.
2833
CopyPropagation CopyP(HRI, MRI);
2834
Changed |= visitBlock(Entry, CopyP, ACP);
2835
2836
Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2837
2838
BT.run();
2839
RegisterSet ABS; // Available registers for BS.
2840
BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
2841
Changed |= visitBlock(Entry, BitS, ABS);
2842
2843
Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2844
2845
if (Changed) {
2846
for (auto &B : MF)
2847
for (auto &I : B)
2848
I.clearKillInfo();
2849
DeadCodeElimination(MF, *MDT).run();
2850
}
2851
return Changed;
2852
}
2853
2854
// Recognize loops where the code at the end of the loop matches the code
2855
// before the entry of the loop, and the matching code is such that is can
2856
// be simplified. This pass relies on the bit simplification above and only
2857
// prepares code in a way that can be handled by the bit simplifcation.
2858
//
2859
// This is the motivating testcase (and explanation):
2860
//
2861
// {
2862
// loop0(.LBB0_2, r1) // %for.body.preheader
2863
// r5:4 = memd(r0++#8)
2864
// }
2865
// {
2866
// r3 = lsr(r4, #16)
2867
// r7:6 = combine(r5, r5)
2868
// }
2869
// {
2870
// r3 = insert(r5, #16, #16)
2871
// r7:6 = vlsrw(r7:6, #16)
2872
// }
2873
// .LBB0_2:
2874
// {
2875
// memh(r2+#4) = r5
2876
// memh(r2+#6) = r6 # R6 is really R5.H
2877
// }
2878
// {
2879
// r2 = add(r2, #8)
2880
// memh(r2+#0) = r4
2881
// memh(r2+#2) = r3 # R3 is really R4.H
2882
// }
2883
// {
2884
// r5:4 = memd(r0++#8)
2885
// }
2886
// { # "Shuffling" code that sets up R3 and R6
2887
// r3 = lsr(r4, #16) # so that their halves can be stored in the
2888
// r7:6 = combine(r5, r5) # next iteration. This could be folded into
2889
// } # the stores if the code was at the beginning
2890
// { # of the loop iteration. Since the same code
2891
// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2892
// r7:6 = vlsrw(r7:6, #16) # there.
2893
// }:endloop0
2894
//
2895
//
2896
// The outcome:
2897
//
2898
// {
2899
// loop0(.LBB0_2, r1)
2900
// r5:4 = memd(r0++#8)
2901
// }
2902
// .LBB0_2:
2903
// {
2904
// memh(r2+#4) = r5
2905
// memh(r2+#6) = r5.h
2906
// }
2907
// {
2908
// r2 = add(r2, #8)
2909
// memh(r2+#0) = r4
2910
// memh(r2+#2) = r4.h
2911
// }
2912
// {
2913
// r5:4 = memd(r0++#8)
2914
// }:endloop0
2915
2916
namespace llvm {
2917
2918
FunctionPass *createHexagonLoopRescheduling();
2919
void initializeHexagonLoopReschedulingPass(PassRegistry&);
2920
2921
} // end namespace llvm
2922
2923
namespace {
2924
2925
class HexagonLoopRescheduling : public MachineFunctionPass {
2926
public:
2927
static char ID;
2928
2929
HexagonLoopRescheduling() : MachineFunctionPass(ID) {
2930
initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
2931
}
2932
2933
bool runOnMachineFunction(MachineFunction &MF) override;
2934
2935
private:
2936
const HexagonInstrInfo *HII = nullptr;
2937
const HexagonRegisterInfo *HRI = nullptr;
2938
MachineRegisterInfo *MRI = nullptr;
2939
BitTracker *BTP = nullptr;
2940
2941
struct LoopCand {
2942
LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
2943
MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
2944
2945
MachineBasicBlock *LB, *PB, *EB;
2946
};
2947
using InstrList = std::vector<MachineInstr *>;
2948
struct InstrGroup {
2949
BitTracker::RegisterRef Inp, Out;
2950
InstrList Ins;
2951
};
2952
struct PhiInfo {
2953
PhiInfo(MachineInstr &P, MachineBasicBlock &B);
2954
2955
unsigned DefR;
2956
BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register
2957
MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block
2958
};
2959
2960
static unsigned getDefReg(const MachineInstr *MI);
2961
bool isConst(unsigned Reg) const;
2962
bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
2963
bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
2964
bool isShuffleOf(unsigned OutR, unsigned InpR) const;
2965
bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
2966
unsigned &InpR2) const;
2967
void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
2968
MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
2969
bool processLoop(LoopCand &C);
2970
};
2971
2972
} // end anonymous namespace
2973
2974
char HexagonLoopRescheduling::ID = 0;
2975
2976
INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
2977
"Hexagon Loop Rescheduling", false, false)
2978
2979
HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
2980
MachineBasicBlock &B) {
2981
DefR = HexagonLoopRescheduling::getDefReg(&P);
2982
LB = &B;
2983
PB = nullptr;
2984
for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
2985
const MachineOperand &OpB = P.getOperand(i+1);
2986
if (OpB.getMBB() == &B) {
2987
LR = P.getOperand(i);
2988
continue;
2989
}
2990
PB = OpB.getMBB();
2991
PR = P.getOperand(i);
2992
}
2993
}
2994
2995
unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
2996
RegisterSet Defs;
2997
HBS::getInstrDefs(*MI, Defs);
2998
if (Defs.count() != 1)
2999
return 0;
3000
return Defs.find_first();
3001
}
3002
3003
bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
3004
if (!BTP->has(Reg))
3005
return false;
3006
const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
3007
for (unsigned i = 0, w = RC.width(); i < w; ++i) {
3008
const BitTracker::BitValue &V = RC[i];
3009
if (!V.is(0) && !V.is(1))
3010
return false;
3011
}
3012
return true;
3013
}
3014
3015
bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
3016
unsigned DefR) const {
3017
unsigned Opc = MI->getOpcode();
3018
switch (Opc) {
3019
case TargetOpcode::COPY:
3020
case Hexagon::S2_lsr_i_r:
3021
case Hexagon::S2_asr_i_r:
3022
case Hexagon::S2_asl_i_r:
3023
case Hexagon::S2_lsr_i_p:
3024
case Hexagon::S2_asr_i_p:
3025
case Hexagon::S2_asl_i_p:
3026
case Hexagon::S2_insert:
3027
case Hexagon::A2_or:
3028
case Hexagon::A2_orp:
3029
case Hexagon::A2_and:
3030
case Hexagon::A2_andp:
3031
case Hexagon::A2_combinew:
3032
case Hexagon::A4_combineri:
3033
case Hexagon::A4_combineir:
3034
case Hexagon::A2_combineii:
3035
case Hexagon::A4_combineii:
3036
case Hexagon::A2_combine_ll:
3037
case Hexagon::A2_combine_lh:
3038
case Hexagon::A2_combine_hl:
3039
case Hexagon::A2_combine_hh:
3040
return true;
3041
}
3042
return false;
3043
}
3044
3045
bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
3046
unsigned InpR) const {
3047
for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
3048
const MachineOperand &Op = MI->getOperand(i);
3049
if (!Op.isReg())
3050
continue;
3051
if (Op.getReg() == InpR)
3052
return i == n-1;
3053
}
3054
return false;
3055
}
3056
3057
bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
3058
if (!BTP->has(OutR) || !BTP->has(InpR))
3059
return false;
3060
const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
3061
for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
3062
const BitTracker::BitValue &V = OutC[i];
3063
if (V.Type != BitTracker::BitValue::Ref)
3064
continue;
3065
if (V.RefI.Reg != InpR)
3066
return false;
3067
}
3068
return true;
3069
}
3070
3071
bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
3072
unsigned OutR2, unsigned &InpR2) const {
3073
if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
3074
return false;
3075
const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
3076
const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
3077
unsigned W = OutC1.width();
3078
unsigned MatchR = 0;
3079
if (W != OutC2.width())
3080
return false;
3081
for (unsigned i = 0; i < W; ++i) {
3082
const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
3083
if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
3084
return false;
3085
if (V1.Type != BitTracker::BitValue::Ref)
3086
continue;
3087
if (V1.RefI.Pos != V2.RefI.Pos)
3088
return false;
3089
if (V1.RefI.Reg != InpR1)
3090
return false;
3091
if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
3092
return false;
3093
if (!MatchR)
3094
MatchR = V2.RefI.Reg;
3095
else if (V2.RefI.Reg != MatchR)
3096
return false;
3097
}
3098
InpR2 = MatchR;
3099
return true;
3100
}
3101
3102
void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
3103
MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
3104
unsigned NewPredR) {
3105
DenseMap<unsigned,unsigned> RegMap;
3106
3107
const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
3108
Register PhiR = MRI->createVirtualRegister(PhiRC);
3109
BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
3110
.addReg(NewPredR)
3111
.addMBB(&PB)
3112
.addReg(G.Inp.Reg)
3113
.addMBB(&LB);
3114
RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
3115
3116
for (const MachineInstr *SI : llvm::reverse(G.Ins)) {
3117
unsigned DR = getDefReg(SI);
3118
const TargetRegisterClass *RC = MRI->getRegClass(DR);
3119
Register NewDR = MRI->createVirtualRegister(RC);
3120
DebugLoc DL = SI->getDebugLoc();
3121
3122
auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
3123
for (const MachineOperand &Op : SI->operands()) {
3124
if (!Op.isReg()) {
3125
MIB.add(Op);
3126
continue;
3127
}
3128
if (!Op.isUse())
3129
continue;
3130
unsigned UseR = RegMap[Op.getReg()];
3131
MIB.addReg(UseR, 0, Op.getSubReg());
3132
}
3133
RegMap.insert(std::make_pair(DR, NewDR));
3134
}
3135
3136
HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
3137
}
3138
3139
bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
3140
LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB)
3141
<< "\n");
3142
std::vector<PhiInfo> Phis;
3143
for (auto &I : *C.LB) {
3144
if (!I.isPHI())
3145
break;
3146
unsigned PR = getDefReg(&I);
3147
if (isConst(PR))
3148
continue;
3149
bool BadUse = false, GoodUse = false;
3150
for (const MachineOperand &MO : MRI->use_operands(PR)) {
3151
const MachineInstr *UseI = MO.getParent();
3152
if (UseI->getParent() != C.LB) {
3153
BadUse = true;
3154
break;
3155
}
3156
if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
3157
GoodUse = true;
3158
}
3159
if (BadUse || !GoodUse)
3160
continue;
3161
3162
Phis.push_back(PhiInfo(I, *C.LB));
3163
}
3164
3165
LLVM_DEBUG({
3166
dbgs() << "Phis: {";
3167
for (auto &I : Phis) {
3168
dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi("
3169
<< printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
3170
<< ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
3171
<< I.LB->getNumber() << ')';
3172
}
3173
dbgs() << " }\n";
3174
});
3175
3176
if (Phis.empty())
3177
return false;
3178
3179
bool Changed = false;
3180
InstrList ShufIns;
3181
3182
// Go backwards in the block: for each bit shuffling instruction, check
3183
// if that instruction could potentially be moved to the front of the loop:
3184
// the output of the loop cannot be used in a non-shuffling instruction
3185
// in this loop.
3186
for (MachineInstr &MI : llvm::reverse(*C.LB)) {
3187
if (MI.isTerminator())
3188
continue;
3189
if (MI.isPHI())
3190
break;
3191
3192
RegisterSet Defs;
3193
HBS::getInstrDefs(MI, Defs);
3194
if (Defs.count() != 1)
3195
continue;
3196
Register DefR = Defs.find_first();
3197
if (!DefR.isVirtual())
3198
continue;
3199
if (!isBitShuffle(&MI, DefR))
3200
continue;
3201
3202
bool BadUse = false;
3203
for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
3204
MachineInstr *UseI = UI->getParent();
3205
if (UseI->getParent() == C.LB) {
3206
if (UseI->isPHI()) {
3207
// If the use is in a phi node in this loop, then it should be
3208
// the value corresponding to the back edge.
3209
unsigned Idx = UI.getOperandNo();
3210
if (UseI->getOperand(Idx+1).getMBB() != C.LB)
3211
BadUse = true;
3212
} else {
3213
if (!llvm::is_contained(ShufIns, UseI))
3214
BadUse = true;
3215
}
3216
} else {
3217
// There is a use outside of the loop, but there is no epilog block
3218
// suitable for a copy-out.
3219
if (C.EB == nullptr)
3220
BadUse = true;
3221
}
3222
if (BadUse)
3223
break;
3224
}
3225
3226
if (BadUse)
3227
continue;
3228
ShufIns.push_back(&MI);
3229
}
3230
3231
// Partition the list of shuffling instructions into instruction groups,
3232
// where each group has to be moved as a whole (i.e. a group is a chain of
3233
// dependent instructions). A group produces a single live output register,
3234
// which is meant to be the input of the loop phi node (although this is
3235
// not checked here yet). It also uses a single register as its input,
3236
// which is some value produced in the loop body. After moving the group
3237
// to the beginning of the loop, that input register would need to be
3238
// the loop-carried register (through a phi node) instead of the (currently
3239
// loop-carried) output register.
3240
using InstrGroupList = std::vector<InstrGroup>;
3241
InstrGroupList Groups;
3242
3243
for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
3244
MachineInstr *SI = ShufIns[i];
3245
if (SI == nullptr)
3246
continue;
3247
3248
InstrGroup G;
3249
G.Ins.push_back(SI);
3250
G.Out.Reg = getDefReg(SI);
3251
RegisterSet Inputs;
3252
HBS::getInstrUses(*SI, Inputs);
3253
3254
for (unsigned j = i+1; j < n; ++j) {
3255
MachineInstr *MI = ShufIns[j];
3256
if (MI == nullptr)
3257
continue;
3258
RegisterSet Defs;
3259
HBS::getInstrDefs(*MI, Defs);
3260
// If this instruction does not define any pending inputs, skip it.
3261
if (!Defs.intersects(Inputs))
3262
continue;
3263
// Otherwise, add it to the current group and remove the inputs that
3264
// are defined by MI.
3265
G.Ins.push_back(MI);
3266
Inputs.remove(Defs);
3267
// Then add all registers used by MI.
3268
HBS::getInstrUses(*MI, Inputs);
3269
ShufIns[j] = nullptr;
3270
}
3271
3272
// Only add a group if it requires at most one register.
3273
if (Inputs.count() > 1)
3274
continue;
3275
auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3276
return G.Out.Reg == P.LR.Reg;
3277
};
3278
if (llvm::none_of(Phis, LoopInpEq))
3279
continue;
3280
3281
G.Inp.Reg = Inputs.find_first();
3282
Groups.push_back(G);
3283
}
3284
3285
LLVM_DEBUG({
3286
for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
3287
InstrGroup &G = Groups[i];
3288
dbgs() << "Group[" << i << "] inp: "
3289
<< printReg(G.Inp.Reg, HRI, G.Inp.Sub)
3290
<< " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
3291
for (const MachineInstr *MI : G.Ins)
3292
dbgs() << " " << MI;
3293
}
3294
});
3295
3296
for (InstrGroup &G : Groups) {
3297
if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
3298
continue;
3299
auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
3300
return G.Out.Reg == P.LR.Reg;
3301
};
3302
auto F = llvm::find_if(Phis, LoopInpEq);
3303
if (F == Phis.end())
3304
continue;
3305
unsigned PrehR = 0;
3306
if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {
3307
const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);
3308
unsigned Opc = DefPrehR->getOpcode();
3309
if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
3310
continue;
3311
if (!DefPrehR->getOperand(1).isImm())
3312
continue;
3313
if (DefPrehR->getOperand(1).getImm() != 0)
3314
continue;
3315
const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
3316
if (RC != MRI->getRegClass(F->PR.Reg)) {
3317
PrehR = MRI->createVirtualRegister(RC);
3318
unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
3319
: Hexagon::A2_tfrpi;
3320
auto T = C.PB->getFirstTerminator();
3321
DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
3322
BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)
3323
.addImm(0);
3324
} else {
3325
PrehR = F->PR.Reg;
3326
}
3327
}
3328
// isSameShuffle could match with PrehR being of a wider class than
3329
// G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3330
// it would match for the input being a 32-bit register, and PrehR
3331
// being a 64-bit register (where the low 32 bits match). This could
3332
// be handled, but for now skip these cases.
3333
if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))
3334
continue;
3335
moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);
3336
Changed = true;
3337
}
3338
3339
return Changed;
3340
}
3341
3342
bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
3343
if (skipFunction(MF.getFunction()))
3344
return false;
3345
3346
auto &HST = MF.getSubtarget<HexagonSubtarget>();
3347
HII = HST.getInstrInfo();
3348
HRI = HST.getRegisterInfo();
3349
MRI = &MF.getRegInfo();
3350
const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
3351
BitTracker BT(HE, MF);
3352
LLVM_DEBUG(BT.trace(true));
3353
BT.run();
3354
BTP = &BT;
3355
3356
std::vector<LoopCand> Cand;
3357
3358
for (auto &B : MF) {
3359
if (B.pred_size() != 2 || B.succ_size() != 2)
3360
continue;
3361
MachineBasicBlock *PB = nullptr;
3362
bool IsLoop = false;
3363
for (MachineBasicBlock *Pred : B.predecessors()) {
3364
if (Pred != &B)
3365
PB = Pred;
3366
else
3367
IsLoop = true;
3368
}
3369
if (!IsLoop)
3370
continue;
3371
3372
MachineBasicBlock *EB = nullptr;
3373
for (MachineBasicBlock *Succ : B.successors()) {
3374
if (Succ == &B)
3375
continue;
3376
// Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3377
// edge from B to EP is non-critical.
3378
if (Succ->pred_size() == 1)
3379
EB = Succ;
3380
break;
3381
}
3382
3383
Cand.push_back(LoopCand(&B, PB, EB));
3384
}
3385
3386
bool Changed = false;
3387
for (auto &C : Cand)
3388
Changed |= processLoop(C);
3389
3390
return Changed;
3391
}
3392
3393
//===----------------------------------------------------------------------===//
3394
// Public Constructor Functions
3395
//===----------------------------------------------------------------------===//
3396
3397
FunctionPass *llvm::createHexagonLoopRescheduling() {
3398
return new HexagonLoopRescheduling();
3399
}
3400
3401
FunctionPass *llvm::createHexagonBitSimplify() {
3402
return new HexagonBitSimplify();
3403
}
3404
3405