Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
35294 views
1
//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the pass that finds instructions that can be
10
// re-written as LEA instructions in order to reduce pipeline delays.
11
// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "X86.h"
16
#include "X86InstrInfo.h"
17
#include "X86Subtarget.h"
18
#include "llvm/ADT/Statistic.h"
19
#include "llvm/Analysis/ProfileSummaryInfo.h"
20
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21
#include "llvm/CodeGen/MachineFunctionPass.h"
22
#include "llvm/CodeGen/MachineInstrBuilder.h"
23
#include "llvm/CodeGen/MachineSizeOpts.h"
24
#include "llvm/CodeGen/Passes.h"
25
#include "llvm/CodeGen/TargetSchedule.h"
26
#include "llvm/Support/Debug.h"
27
#include "llvm/Support/raw_ostream.h"
28
using namespace llvm;
29
30
#define FIXUPLEA_DESC "X86 LEA Fixup"
31
#define FIXUPLEA_NAME "x86-fixup-LEAs"
32
33
#define DEBUG_TYPE FIXUPLEA_NAME
34
35
STATISTIC(NumLEAs, "Number of LEA instructions created");
36
37
namespace {
38
class FixupLEAPass : public MachineFunctionPass {
39
enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40
41
/// Given a machine register, look for the instruction
42
/// which writes it in the current basic block. If found,
43
/// try to replace it with an equivalent LEA instruction.
44
/// If replacement succeeds, then also process the newly created
45
/// instruction.
46
void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47
MachineBasicBlock &MBB);
48
49
/// Given a memory access or LEA instruction
50
/// whose address mode uses a base and/or index register, look for
51
/// an opportunity to replace the instruction which sets the base or index
52
/// register with an equivalent LEA instruction.
53
void processInstruction(MachineBasicBlock::iterator &I,
54
MachineBasicBlock &MBB);
55
56
/// Given a LEA instruction which is unprofitable
57
/// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58
void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59
MachineBasicBlock &MBB);
60
61
/// Given a LEA instruction which is unprofitable
62
/// on SNB+ try to replace it with other instructions.
63
/// According to Intel's Optimization Reference Manual:
64
/// " For LEA instructions with three source operands and some specific
65
/// situations, instruction latency has increased to 3 cycles, and must
66
/// dispatch via port 1:
67
/// - LEA that has all three source operands: base, index, and offset
68
/// - LEA that uses base and index registers where the base is EBP, RBP,
69
/// or R13
70
/// - LEA that uses RIP relative addressing mode
71
/// - LEA that uses 16-bit addressing mode "
72
/// This function currently handles the first 2 cases only.
73
void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74
MachineBasicBlock &MBB, bool OptIncDec);
75
76
/// Look for LEAs that are really two address LEAs that we might be able to
77
/// turn into regular ADD instructions.
78
bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79
MachineBasicBlock &MBB, bool OptIncDec,
80
bool UseLEAForSP) const;
81
82
/// Look for and transform the sequence
83
/// lea (reg1, reg2), reg3
84
/// sub reg3, reg4
85
/// to
86
/// sub reg1, reg4
87
/// sub reg2, reg4
88
/// It can also optimize the sequence lea/add similarly.
89
bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90
91
/// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92
/// the dest register of LEA instruction I.
93
MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94
MachineBasicBlock &MBB) const;
95
96
/// Check instructions between LeaI and AluI (exclusively).
97
/// Set BaseIndexDef to true if base or index register from LeaI is defined.
98
/// Set AluDestRef to true if the dest register of AluI is used or defined.
99
/// *KilledBase is set to the killed base register usage.
100
/// *KilledIndex is set to the killed index register usage.
101
void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102
MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103
bool &AluDestRef, MachineOperand **KilledBase,
104
MachineOperand **KilledIndex) const;
105
106
/// Determine if an instruction references a machine register
107
/// and, if so, whether it reads or writes the register.
108
RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109
110
/// Step backwards through a basic block, looking
111
/// for an instruction which writes a register within
112
/// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113
MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114
MachineBasicBlock::iterator &I,
115
MachineBasicBlock &MBB);
116
117
/// if an instruction can be converted to an
118
/// equivalent LEA, insert the new instruction into the basic block
119
/// and return a pointer to it. Otherwise, return zero.
120
MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121
MachineBasicBlock::iterator &MBBI) const;
122
123
public:
124
static char ID;
125
126
StringRef getPassName() const override { return FIXUPLEA_DESC; }
127
128
FixupLEAPass() : MachineFunctionPass(ID) { }
129
130
/// Loop over all of the basic blocks,
131
/// replacing instructions by equivalent LEA instructions
132
/// if needed and when possible.
133
bool runOnMachineFunction(MachineFunction &MF) override;
134
135
// This pass runs after regalloc and doesn't support VReg operands.
136
MachineFunctionProperties getRequiredProperties() const override {
137
return MachineFunctionProperties().set(
138
MachineFunctionProperties::Property::NoVRegs);
139
}
140
141
void getAnalysisUsage(AnalysisUsage &AU) const override {
142
AU.addRequired<ProfileSummaryInfoWrapperPass>();
143
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144
MachineFunctionPass::getAnalysisUsage(AU);
145
}
146
147
private:
148
TargetSchedModel TSM;
149
const X86InstrInfo *TII = nullptr;
150
const X86RegisterInfo *TRI = nullptr;
151
};
152
}
153
154
char FixupLEAPass::ID = 0;
155
156
INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157
158
MachineInstr *
159
FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160
MachineBasicBlock::iterator &MBBI) const {
161
MachineInstr &MI = *MBBI;
162
switch (MI.getOpcode()) {
163
case X86::MOV32rr:
164
case X86::MOV64rr: {
165
const MachineOperand &Src = MI.getOperand(1);
166
const MachineOperand &Dest = MI.getOperand(0);
167
MachineInstr *NewMI =
168
BuildMI(MBB, MBBI, MI.getDebugLoc(),
169
TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170
: X86::LEA64r))
171
.add(Dest)
172
.add(Src)
173
.addImm(1)
174
.addReg(0)
175
.addImm(0)
176
.addReg(0);
177
return NewMI;
178
}
179
}
180
181
if (!MI.isConvertibleTo3Addr())
182
return nullptr;
183
184
switch (MI.getOpcode()) {
185
default:
186
// Only convert instructions that we've verified are safe.
187
return nullptr;
188
case X86::ADD64ri32:
189
case X86::ADD64ri32_DB:
190
case X86::ADD32ri:
191
case X86::ADD32ri_DB:
192
if (!MI.getOperand(2).isImm()) {
193
// convertToThreeAddress will call getImm()
194
// which requires isImm() to be true
195
return nullptr;
196
}
197
break;
198
case X86::SHL64ri:
199
case X86::SHL32ri:
200
case X86::INC64r:
201
case X86::INC32r:
202
case X86::DEC64r:
203
case X86::DEC32r:
204
case X86::ADD64rr:
205
case X86::ADD64rr_DB:
206
case X86::ADD32rr:
207
case X86::ADD32rr_DB:
208
// These instructions are all fine to convert.
209
break;
210
}
211
return TII->convertToThreeAddress(MI, nullptr, nullptr);
212
}
213
214
FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
215
216
static bool isLEA(unsigned Opcode) {
217
return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
218
Opcode == X86::LEA64_32r;
219
}
220
221
bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
222
if (skipFunction(MF.getFunction()))
223
return false;
224
225
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
226
bool IsSlowLEA = ST.slowLEA();
227
bool IsSlow3OpsLEA = ST.slow3OpsLEA();
228
bool LEAUsesAG = ST.leaUsesAG();
229
230
bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
231
bool UseLEAForSP = ST.useLeaForSP();
232
233
TSM.init(&ST);
234
TII = ST.getInstrInfo();
235
TRI = ST.getRegisterInfo();
236
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
237
auto *MBFI = (PSI && PSI->hasProfileSummary())
238
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
239
: nullptr;
240
241
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
242
for (MachineBasicBlock &MBB : MF) {
243
// First pass. Try to remove or optimize existing LEAs.
244
bool OptIncDecPerBB =
245
OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
246
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
247
if (!isLEA(I->getOpcode()))
248
continue;
249
250
if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
251
continue;
252
253
if (IsSlowLEA)
254
processInstructionForSlowLEA(I, MBB);
255
else if (IsSlow3OpsLEA)
256
processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
257
}
258
259
// Second pass for creating LEAs. This may reverse some of the
260
// transformations above.
261
if (LEAUsesAG) {
262
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
263
processInstruction(I, MBB);
264
}
265
}
266
267
LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
268
269
return true;
270
}
271
272
FixupLEAPass::RegUsageState
273
FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
274
RegUsageState RegUsage = RU_NotUsed;
275
MachineInstr &MI = *I;
276
277
for (const MachineOperand &MO : MI.operands()) {
278
if (MO.isReg() && MO.getReg() == p.getReg()) {
279
if (MO.isDef())
280
return RU_Write;
281
RegUsage = RU_Read;
282
}
283
}
284
return RegUsage;
285
}
286
287
/// getPreviousInstr - Given a reference to an instruction in a basic
288
/// block, return a reference to the previous instruction in the block,
289
/// wrapping around to the last instruction of the block if the block
290
/// branches to itself.
291
static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
292
MachineBasicBlock &MBB) {
293
if (I == MBB.begin()) {
294
if (MBB.isPredecessor(&MBB)) {
295
I = --MBB.end();
296
return true;
297
} else
298
return false;
299
}
300
--I;
301
return true;
302
}
303
304
MachineBasicBlock::iterator
305
FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
306
MachineBasicBlock &MBB) {
307
int InstrDistance = 1;
308
MachineBasicBlock::iterator CurInst;
309
static const int INSTR_DISTANCE_THRESHOLD = 5;
310
311
CurInst = I;
312
bool Found;
313
Found = getPreviousInstr(CurInst, MBB);
314
while (Found && I != CurInst) {
315
if (CurInst->isCall() || CurInst->isInlineAsm())
316
break;
317
if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
318
break; // too far back to make a difference
319
if (usesRegister(p, CurInst) == RU_Write) {
320
return CurInst;
321
}
322
InstrDistance += TSM.computeInstrLatency(&*CurInst);
323
Found = getPreviousInstr(CurInst, MBB);
324
}
325
return MachineBasicBlock::iterator();
326
}
327
328
static inline bool isInefficientLEAReg(unsigned Reg) {
329
return Reg == X86::EBP || Reg == X86::RBP ||
330
Reg == X86::R13D || Reg == X86::R13;
331
}
332
333
/// Returns true if this LEA uses base and index registers, and the base
334
/// register is known to be inefficient for the subtarget.
335
// TODO: use a variant scheduling class to model the latency profile
336
// of LEA instructions, and implement this logic as a scheduling predicate.
337
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
338
const MachineOperand &Index) {
339
return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
340
Index.getReg() != X86::NoRegister;
341
}
342
343
static inline bool hasLEAOffset(const MachineOperand &Offset) {
344
return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() ||
345
Offset.isBlockAddress();
346
}
347
348
static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
349
switch (LEAOpcode) {
350
default:
351
llvm_unreachable("Unexpected LEA instruction");
352
case X86::LEA32r:
353
case X86::LEA64_32r:
354
return X86::ADD32rr;
355
case X86::LEA64r:
356
return X86::ADD64rr;
357
}
358
}
359
360
static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
361
switch (LEAOpcode) {
362
default:
363
llvm_unreachable("Unexpected LEA instruction");
364
case X86::LEA32r:
365
case X86::LEA64_32r:
366
return X86::SUB32rr;
367
case X86::LEA64r:
368
return X86::SUB64rr;
369
}
370
}
371
372
static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
373
const MachineOperand &Offset) {
374
switch (LEAOpcode) {
375
default:
376
llvm_unreachable("Unexpected LEA instruction");
377
case X86::LEA32r:
378
case X86::LEA64_32r:
379
return X86::ADD32ri;
380
case X86::LEA64r:
381
return X86::ADD64ri32;
382
}
383
}
384
385
static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
386
switch (LEAOpcode) {
387
default:
388
llvm_unreachable("Unexpected LEA instruction");
389
case X86::LEA32r:
390
case X86::LEA64_32r:
391
return IsINC ? X86::INC32r : X86::DEC32r;
392
case X86::LEA64r:
393
return IsINC ? X86::INC64r : X86::DEC64r;
394
}
395
}
396
397
MachineBasicBlock::iterator
398
FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
399
MachineBasicBlock &MBB) const {
400
const int InstrDistanceThreshold = 5;
401
int InstrDistance = 1;
402
MachineBasicBlock::iterator CurInst = std::next(I);
403
404
unsigned LEAOpcode = I->getOpcode();
405
unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
406
unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
407
Register DestReg = I->getOperand(0).getReg();
408
409
while (CurInst != MBB.end()) {
410
if (CurInst->isCall() || CurInst->isInlineAsm())
411
break;
412
if (InstrDistance > InstrDistanceThreshold)
413
break;
414
415
// Check if the lea dest register is used in an add/sub instruction only.
416
for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
417
MachineOperand &Opnd = CurInst->getOperand(I);
418
if (Opnd.isReg()) {
419
if (Opnd.getReg() == DestReg) {
420
if (Opnd.isDef() || !Opnd.isKill())
421
return MachineBasicBlock::iterator();
422
423
unsigned AluOpcode = CurInst->getOpcode();
424
if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
425
return MachineBasicBlock::iterator();
426
427
MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
428
MachineOperand AluDest = CurInst->getOperand(0);
429
if (Opnd2.getReg() != AluDest.getReg())
430
return MachineBasicBlock::iterator();
431
432
// X - (Y + Z) may generate different flags than (X - Y) - Z when
433
// there is overflow. So we can't change the alu instruction if the
434
// flags register is live.
435
if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
436
return MachineBasicBlock::iterator();
437
438
return CurInst;
439
}
440
if (TRI->regsOverlap(DestReg, Opnd.getReg()))
441
return MachineBasicBlock::iterator();
442
}
443
}
444
445
InstrDistance++;
446
++CurInst;
447
}
448
return MachineBasicBlock::iterator();
449
}
450
451
void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
452
MachineBasicBlock::iterator &AluI,
453
bool &BaseIndexDef, bool &AluDestRef,
454
MachineOperand **KilledBase,
455
MachineOperand **KilledIndex) const {
456
BaseIndexDef = AluDestRef = false;
457
*KilledBase = *KilledIndex = nullptr;
458
Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
459
Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
460
Register AluDestReg = AluI->getOperand(0).getReg();
461
462
for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) {
463
for (MachineOperand &Opnd : CurInst.operands()) {
464
if (!Opnd.isReg())
465
continue;
466
Register Reg = Opnd.getReg();
467
if (TRI->regsOverlap(Reg, AluDestReg))
468
AluDestRef = true;
469
if (TRI->regsOverlap(Reg, BaseReg)) {
470
if (Opnd.isDef())
471
BaseIndexDef = true;
472
else if (Opnd.isKill())
473
*KilledBase = &Opnd;
474
}
475
if (TRI->regsOverlap(Reg, IndexReg)) {
476
if (Opnd.isDef())
477
BaseIndexDef = true;
478
else if (Opnd.isKill())
479
*KilledIndex = &Opnd;
480
}
481
}
482
}
483
}
484
485
bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
486
MachineBasicBlock &MBB) const {
487
// Look for an add/sub instruction which uses the result of lea.
488
MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
489
if (AluI == MachineBasicBlock::iterator())
490
return false;
491
492
// Check if there are any related register usage between lea and alu.
493
bool BaseIndexDef, AluDestRef;
494
MachineOperand *KilledBase, *KilledIndex;
495
checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
496
497
MachineBasicBlock::iterator InsertPos = AluI;
498
if (BaseIndexDef) {
499
if (AluDestRef)
500
return false;
501
InsertPos = I;
502
KilledBase = KilledIndex = nullptr;
503
}
504
505
// Check if there are same registers.
506
Register AluDestReg = AluI->getOperand(0).getReg();
507
Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
508
Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
509
if (I->getOpcode() == X86::LEA64_32r) {
510
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
511
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
512
}
513
if (AluDestReg == IndexReg) {
514
if (BaseReg == IndexReg)
515
return false;
516
std::swap(BaseReg, IndexReg);
517
std::swap(KilledBase, KilledIndex);
518
}
519
if (BaseReg == IndexReg)
520
KilledBase = nullptr;
521
522
// Now it's safe to change instructions.
523
MachineInstr *NewMI1, *NewMI2;
524
unsigned NewOpcode = AluI->getOpcode();
525
NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
526
AluDestReg)
527
.addReg(AluDestReg, RegState::Kill)
528
.addReg(BaseReg, KilledBase ? RegState::Kill : 0);
529
NewMI1->addRegisterDead(X86::EFLAGS, TRI);
530
NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
531
AluDestReg)
532
.addReg(AluDestReg, RegState::Kill)
533
.addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
534
NewMI2->addRegisterDead(X86::EFLAGS, TRI);
535
536
// Clear the old Kill flags.
537
if (KilledBase)
538
KilledBase->setIsKill(false);
539
if (KilledIndex)
540
KilledIndex->setIsKill(false);
541
542
MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
543
MBB.erase(I);
544
MBB.erase(AluI);
545
I = NewMI1;
546
return true;
547
}
548
549
bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
550
MachineBasicBlock &MBB, bool OptIncDec,
551
bool UseLEAForSP) const {
552
MachineInstr &MI = *I;
553
554
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
555
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
556
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
557
const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
558
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
559
560
if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
561
MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
562
MachineBasicBlock::LQR_Dead)
563
return false;
564
565
Register DestReg = MI.getOperand(0).getReg();
566
Register BaseReg = Base.getReg();
567
Register IndexReg = Index.getReg();
568
569
// Don't change stack adjustment LEAs.
570
if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
571
return false;
572
573
// LEA64_32 has 64-bit operands but 32-bit result.
574
if (MI.getOpcode() == X86::LEA64_32r) {
575
if (BaseReg != 0)
576
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
577
if (IndexReg != 0)
578
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
579
}
580
581
MachineInstr *NewMI = nullptr;
582
583
// Case 1.
584
// Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
585
// which can be turned into add %reg2, %reg1
586
if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
587
(DestReg == BaseReg || DestReg == IndexReg)) {
588
unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
589
if (DestReg != BaseReg)
590
std::swap(BaseReg, IndexReg);
591
592
if (MI.getOpcode() == X86::LEA64_32r) {
593
// TODO: Do we need the super register implicit use?
594
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
595
.addReg(BaseReg).addReg(IndexReg)
596
.addReg(Base.getReg(), RegState::Implicit)
597
.addReg(Index.getReg(), RegState::Implicit);
598
} else {
599
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
600
.addReg(BaseReg).addReg(IndexReg);
601
}
602
} else if (DestReg == BaseReg && IndexReg == 0) {
603
// Case 2.
604
// This is an LEA with only a base register and a displacement,
605
// We can use ADDri or INC/DEC.
606
607
// Does this LEA have one these forms:
608
// lea %reg, 1(%reg)
609
// lea %reg, -1(%reg)
610
if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
611
bool IsINC = Disp.getImm() == 1;
612
unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
613
614
if (MI.getOpcode() == X86::LEA64_32r) {
615
// TODO: Do we need the super register implicit use?
616
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
617
.addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
618
} else {
619
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
620
.addReg(BaseReg);
621
}
622
} else {
623
unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
624
if (MI.getOpcode() == X86::LEA64_32r) {
625
// TODO: Do we need the super register implicit use?
626
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
627
.addReg(BaseReg).addImm(Disp.getImm())
628
.addReg(Base.getReg(), RegState::Implicit);
629
} else {
630
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
631
.addReg(BaseReg).addImm(Disp.getImm());
632
}
633
}
634
} else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
635
// Case 3.
636
// Look for and transform the sequence
637
// lea (reg1, reg2), reg3
638
// sub reg3, reg4
639
return optLEAALU(I, MBB);
640
} else
641
return false;
642
643
MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
644
MBB.erase(I);
645
I = NewMI;
646
return true;
647
}
648
649
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
650
MachineBasicBlock &MBB) {
651
// Process a load, store, or LEA instruction.
652
MachineInstr &MI = *I;
653
const MCInstrDesc &Desc = MI.getDesc();
654
int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
655
if (AddrOffset >= 0) {
656
AddrOffset += X86II::getOperandBias(Desc);
657
MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
658
if (p.isReg() && p.getReg() != X86::ESP) {
659
seekLEAFixup(p, I, MBB);
660
}
661
MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
662
if (q.isReg() && q.getReg() != X86::ESP) {
663
seekLEAFixup(q, I, MBB);
664
}
665
}
666
}
667
668
void FixupLEAPass::seekLEAFixup(MachineOperand &p,
669
MachineBasicBlock::iterator &I,
670
MachineBasicBlock &MBB) {
671
MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
672
if (MBI != MachineBasicBlock::iterator()) {
673
MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
674
if (NewMI) {
675
++NumLEAs;
676
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
677
// now to replace with an equivalent LEA...
678
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
679
MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
680
MBB.erase(MBI);
681
MachineBasicBlock::iterator J =
682
static_cast<MachineBasicBlock::iterator>(NewMI);
683
processInstruction(J, MBB);
684
}
685
}
686
}
687
688
void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
689
MachineBasicBlock &MBB) {
690
MachineInstr &MI = *I;
691
const unsigned Opcode = MI.getOpcode();
692
693
const MachineOperand &Dst = MI.getOperand(0);
694
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
695
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
696
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
697
const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
698
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
699
700
if (Segment.getReg() != 0 || !Offset.isImm() ||
701
MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
702
MachineBasicBlock::LQR_Dead)
703
return;
704
const Register DstR = Dst.getReg();
705
const Register SrcR1 = Base.getReg();
706
const Register SrcR2 = Index.getReg();
707
if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
708
return;
709
if (Scale.getImm() > 1)
710
return;
711
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
712
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
713
MachineInstr *NewMI = nullptr;
714
// Make ADD instruction for two registers writing to LEA's destination
715
if (SrcR1 != 0 && SrcR2 != 0) {
716
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
717
const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
718
NewMI =
719
BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
720
LLVM_DEBUG(NewMI->dump(););
721
}
722
// Make ADD instruction for immediate
723
if (Offset.getImm() != 0) {
724
const MCInstrDesc &ADDri =
725
TII->get(getADDriFromLEA(Opcode, Offset));
726
const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
727
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
728
.add(SrcR)
729
.addImm(Offset.getImm());
730
LLVM_DEBUG(NewMI->dump(););
731
}
732
if (NewMI) {
733
MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
734
MBB.erase(I);
735
I = NewMI;
736
}
737
}
738
739
void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
740
MachineBasicBlock &MBB,
741
bool OptIncDec) {
742
MachineInstr &MI = *I;
743
const unsigned LEAOpcode = MI.getOpcode();
744
745
const MachineOperand &Dest = MI.getOperand(0);
746
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
747
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
748
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
749
const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
750
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
751
752
if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
753
MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
754
MachineBasicBlock::LQR_Dead ||
755
Segment.getReg() != X86::NoRegister)
756
return;
757
758
Register DestReg = Dest.getReg();
759
Register BaseReg = Base.getReg();
760
Register IndexReg = Index.getReg();
761
762
if (MI.getOpcode() == X86::LEA64_32r) {
763
if (BaseReg != 0)
764
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
765
if (IndexReg != 0)
766
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
767
}
768
769
bool IsScale1 = Scale.getImm() == 1;
770
bool IsInefficientBase = isInefficientLEAReg(BaseReg);
771
bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
772
773
// Skip these cases since it takes more than 2 instructions
774
// to replace the LEA instruction.
775
if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
776
return;
777
778
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
779
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
780
781
MachineInstr *NewMI = nullptr;
782
bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;
783
// First try and remove the base while sticking with LEA iff base == index and
784
// scale == 1. We can handle:
785
// 1. lea D(%base,%index,1) -> lea D(,%index,2)
786
// 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
787
// Only do this if the LEA would otherwise be split into 2-instruction
788
// (either it has a an Offset or neither base nor index are dst)
789
if (IsScale1 && BaseReg == IndexReg &&
790
(hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {
791
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
792
.add(Dest)
793
.addReg(0)
794
.addImm(2)
795
.add(Index)
796
.add(Offset)
797
.add(Segment);
798
LLVM_DEBUG(NewMI->dump(););
799
800
MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
801
MBB.erase(I);
802
I = NewMI;
803
return;
804
} else if (IsScale1 && BaseOrIndexIsDst) {
805
// Try to replace LEA with one or two (for the 3-op LEA case)
806
// add instructions:
807
// 1.lea (%base,%index,1), %base => add %index,%base
808
// 2.lea (%base,%index,1), %index => add %base,%index
809
810
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
811
if (DestReg != BaseReg)
812
std::swap(BaseReg, IndexReg);
813
814
if (MI.getOpcode() == X86::LEA64_32r) {
815
// TODO: Do we need the super register implicit use?
816
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
817
.addReg(BaseReg)
818
.addReg(IndexReg)
819
.addReg(Base.getReg(), RegState::Implicit)
820
.addReg(Index.getReg(), RegState::Implicit);
821
} else {
822
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
823
.addReg(BaseReg)
824
.addReg(IndexReg);
825
}
826
} else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
827
// If the base is inefficient try switching the index and base operands,
828
// otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
829
// lea offset(%base,%index,scale),%dst =>
830
// lea (%base,%index,scale); add offset,%dst
831
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
832
.add(Dest)
833
.add(IsInefficientBase ? Index : Base)
834
.add(Scale)
835
.add(IsInefficientBase ? Base : Index)
836
.addImm(0)
837
.add(Segment);
838
LLVM_DEBUG(NewMI->dump(););
839
}
840
841
// If either replacement succeeded above, add the offset if needed, then
842
// replace the instruction.
843
if (NewMI) {
844
// Create ADD instruction for the Offset in case of 3-Ops LEA.
845
if (hasLEAOffset(Offset)) {
846
if (OptIncDec && Offset.isImm() &&
847
(Offset.getImm() == 1 || Offset.getImm() == -1)) {
848
unsigned NewOpc =
849
getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
850
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
851
.addReg(DestReg);
852
LLVM_DEBUG(NewMI->dump(););
853
} else {
854
unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
855
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
856
.addReg(DestReg)
857
.add(Offset);
858
LLVM_DEBUG(NewMI->dump(););
859
}
860
}
861
862
MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
863
MBB.erase(I);
864
I = NewMI;
865
return;
866
}
867
868
// Handle the rest of the cases with inefficient base register:
869
assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
870
assert(IsInefficientBase && "efficient base should be handled already!");
871
872
// FIXME: Handle LEA64_32r.
873
if (LEAOpcode == X86::LEA64_32r)
874
return;
875
876
// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
877
if (IsScale1 && !hasLEAOffset(Offset)) {
878
bool BIK = Base.isKill() && BaseReg != IndexReg;
879
TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
880
LLVM_DEBUG(MI.getPrevNode()->dump(););
881
882
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
883
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
884
.addReg(DestReg)
885
.add(Index);
886
LLVM_DEBUG(NewMI->dump(););
887
888
MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
889
MBB.erase(I);
890
I = NewMI;
891
return;
892
}
893
894
// lea offset(%base,%index,scale), %dst =>
895
// lea offset( ,%index,scale), %dst; add %base,%dst
896
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
897
.add(Dest)
898
.addReg(0)
899
.add(Scale)
900
.add(Index)
901
.add(Offset)
902
.add(Segment);
903
LLVM_DEBUG(NewMI->dump(););
904
905
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
906
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
907
.addReg(DestReg)
908
.add(Base);
909
LLVM_DEBUG(NewMI->dump(););
910
911
MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
912
MBB.erase(I);
913
I = NewMI;
914
}
915
916