Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
35266 views
1
//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the PPC implementation of TargetFrameLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "PPCFrameLowering.h"
14
#include "MCTargetDesc/PPCPredicates.h"
15
#include "PPCInstrBuilder.h"
16
#include "PPCInstrInfo.h"
17
#include "PPCMachineFunctionInfo.h"
18
#include "PPCSubtarget.h"
19
#include "PPCTargetMachine.h"
20
#include "llvm/ADT/Statistic.h"
21
#include "llvm/CodeGen/LivePhysRegs.h"
22
#include "llvm/CodeGen/MachineFrameInfo.h"
23
#include "llvm/CodeGen/MachineFunction.h"
24
#include "llvm/CodeGen/MachineInstrBuilder.h"
25
#include "llvm/CodeGen/MachineModuleInfo.h"
26
#include "llvm/CodeGen/MachineRegisterInfo.h"
27
#include "llvm/CodeGen/RegisterScavenging.h"
28
#include "llvm/IR/Function.h"
29
#include "llvm/Target/TargetOptions.h"
30
31
using namespace llvm;
32
33
#define DEBUG_TYPE "framelowering"
34
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36
STATISTIC(NumPrologProbed, "Number of prologues probed");
37
38
static cl::opt<bool>
39
EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40
cl::desc("Enable spills in prologue to vector registers."),
41
cl::init(false), cl::Hidden);
42
43
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44
if (STI.isAIXABI())
45
return STI.isPPC64() ? 16 : 8;
46
// SVR4 ABI:
47
return STI.isPPC64() ? 16 : 4;
48
}
49
50
static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51
if (STI.isAIXABI())
52
return STI.isPPC64() ? 40 : 20;
53
return STI.isELFv2ABI() ? 24 : 40;
54
}
55
56
static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57
// First slot in the general register save area.
58
return STI.isPPC64() ? -8U : -4U;
59
}
60
61
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62
if (STI.isAIXABI() || STI.isPPC64())
63
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64
65
// 32-bit SVR4 ABI:
66
return 8;
67
}
68
69
static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70
// Third slot in the general purpose register save area.
71
if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72
return -12U;
73
74
// Second slot in the general purpose register save area.
75
return STI.isPPC64() ? -16U : -8U;
76
}
77
78
static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79
return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80
}
81
82
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84
STI.getPlatformStackAlignment(), 0),
85
Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86
TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88
LinkageSize(computeLinkageSize(Subtarget)),
89
BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90
CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91
92
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94
unsigned &NumEntries) const {
95
96
// Floating-point register save area offsets.
97
#define CALLEE_SAVED_FPRS \
98
{PPC::F31, -8}, \
99
{PPC::F30, -16}, \
100
{PPC::F29, -24}, \
101
{PPC::F28, -32}, \
102
{PPC::F27, -40}, \
103
{PPC::F26, -48}, \
104
{PPC::F25, -56}, \
105
{PPC::F24, -64}, \
106
{PPC::F23, -72}, \
107
{PPC::F22, -80}, \
108
{PPC::F21, -88}, \
109
{PPC::F20, -96}, \
110
{PPC::F19, -104}, \
111
{PPC::F18, -112}, \
112
{PPC::F17, -120}, \
113
{PPC::F16, -128}, \
114
{PPC::F15, -136}, \
115
{PPC::F14, -144}
116
117
// 32-bit general purpose register save area offsets shared by ELF and
118
// AIX. AIX has an extra CSR with r13.
119
#define CALLEE_SAVED_GPRS32 \
120
{PPC::R31, -4}, \
121
{PPC::R30, -8}, \
122
{PPC::R29, -12}, \
123
{PPC::R28, -16}, \
124
{PPC::R27, -20}, \
125
{PPC::R26, -24}, \
126
{PPC::R25, -28}, \
127
{PPC::R24, -32}, \
128
{PPC::R23, -36}, \
129
{PPC::R22, -40}, \
130
{PPC::R21, -44}, \
131
{PPC::R20, -48}, \
132
{PPC::R19, -52}, \
133
{PPC::R18, -56}, \
134
{PPC::R17, -60}, \
135
{PPC::R16, -64}, \
136
{PPC::R15, -68}, \
137
{PPC::R14, -72}
138
139
// 64-bit general purpose register save area offsets.
140
#define CALLEE_SAVED_GPRS64 \
141
{PPC::X31, -8}, \
142
{PPC::X30, -16}, \
143
{PPC::X29, -24}, \
144
{PPC::X28, -32}, \
145
{PPC::X27, -40}, \
146
{PPC::X26, -48}, \
147
{PPC::X25, -56}, \
148
{PPC::X24, -64}, \
149
{PPC::X23, -72}, \
150
{PPC::X22, -80}, \
151
{PPC::X21, -88}, \
152
{PPC::X20, -96}, \
153
{PPC::X19, -104}, \
154
{PPC::X18, -112}, \
155
{PPC::X17, -120}, \
156
{PPC::X16, -128}, \
157
{PPC::X15, -136}, \
158
{PPC::X14, -144}
159
160
// Vector register save area offsets.
161
#define CALLEE_SAVED_VRS \
162
{PPC::V31, -16}, \
163
{PPC::V30, -32}, \
164
{PPC::V29, -48}, \
165
{PPC::V28, -64}, \
166
{PPC::V27, -80}, \
167
{PPC::V26, -96}, \
168
{PPC::V25, -112}, \
169
{PPC::V24, -128}, \
170
{PPC::V23, -144}, \
171
{PPC::V22, -160}, \
172
{PPC::V21, -176}, \
173
{PPC::V20, -192}
174
175
// Note that the offsets here overlap, but this is fixed up in
176
// processFunctionBeforeFrameFinalized.
177
178
static const SpillSlot ELFOffsets32[] = {
179
CALLEE_SAVED_FPRS,
180
CALLEE_SAVED_GPRS32,
181
182
// CR save area offset. We map each of the nonvolatile CR fields
183
// to the slot for CR2, which is the first of the nonvolatile CR
184
// fields to be assigned, so that we only allocate one save slot.
185
// See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186
{PPC::CR2, -4},
187
188
// VRSAVE save area offset.
189
{PPC::VRSAVE, -4},
190
191
CALLEE_SAVED_VRS,
192
193
// SPE register save area (overlaps Vector save area).
194
{PPC::S31, -8},
195
{PPC::S30, -16},
196
{PPC::S29, -24},
197
{PPC::S28, -32},
198
{PPC::S27, -40},
199
{PPC::S26, -48},
200
{PPC::S25, -56},
201
{PPC::S24, -64},
202
{PPC::S23, -72},
203
{PPC::S22, -80},
204
{PPC::S21, -88},
205
{PPC::S20, -96},
206
{PPC::S19, -104},
207
{PPC::S18, -112},
208
{PPC::S17, -120},
209
{PPC::S16, -128},
210
{PPC::S15, -136},
211
{PPC::S14, -144}};
212
213
static const SpillSlot ELFOffsets64[] = {
214
CALLEE_SAVED_FPRS,
215
CALLEE_SAVED_GPRS64,
216
217
// VRSAVE save area offset.
218
{PPC::VRSAVE, -4},
219
CALLEE_SAVED_VRS
220
};
221
222
static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223
CALLEE_SAVED_GPRS32,
224
// Add AIX's extra CSR.
225
{PPC::R13, -76},
226
CALLEE_SAVED_VRS};
227
228
static const SpillSlot AIXOffsets64[] = {
229
CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230
231
if (Subtarget.is64BitELFABI()) {
232
NumEntries = std::size(ELFOffsets64);
233
return ELFOffsets64;
234
}
235
236
if (Subtarget.is32BitELFABI()) {
237
NumEntries = std::size(ELFOffsets32);
238
return ELFOffsets32;
239
}
240
241
assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242
243
if (Subtarget.isPPC64()) {
244
NumEntries = std::size(AIXOffsets64);
245
return AIXOffsets64;
246
}
247
248
NumEntries = std::size(AIXOffsets32);
249
return AIXOffsets32;
250
}
251
252
static bool spillsCR(const MachineFunction &MF) {
253
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254
return FuncInfo->isCRSpilled();
255
}
256
257
static bool hasSpills(const MachineFunction &MF) {
258
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259
return FuncInfo->hasSpills();
260
}
261
262
static bool hasNonRISpills(const MachineFunction &MF) {
263
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264
return FuncInfo->hasNonRISpills();
265
}
266
267
/// MustSaveLR - Return true if this function requires that we save the LR
268
/// register onto the stack in the prolog and restore it in the epilog of the
269
/// function.
270
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272
273
// We need a save/restore of LR if there is any def of LR (which is
274
// defined by calls, including the PIC setup sequence), or if there is
275
// some use of the LR stack slot (e.g. for builtin_return_address).
276
// (LR comes in 32 and 64 bit versions.)
277
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279
}
280
281
/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282
/// call frame size. Update the MachineFunction object with the stack size.
283
uint64_t
284
PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285
bool UseEstimate) const {
286
unsigned NewMaxCallFrameSize = 0;
287
uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288
&NewMaxCallFrameSize);
289
MF.getFrameInfo().setStackSize(FrameSize);
290
MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291
return FrameSize;
292
}
293
294
/// determineFrameLayout - Determine the size of the frame and maximum call
295
/// frame size.
296
uint64_t
297
PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298
bool UseEstimate,
299
unsigned *NewMaxCallFrameSize) const {
300
const MachineFrameInfo &MFI = MF.getFrameInfo();
301
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302
303
// Get the number of bytes to allocate from the FrameInfo
304
uint64_t FrameSize =
305
UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306
307
// Get stack alignments. The frame must be aligned to the greatest of these:
308
Align TargetAlign = getStackAlign(); // alignment required per the ABI
309
Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
310
Align Alignment = std::max(TargetAlign, MaxAlign);
311
312
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313
314
unsigned LR = RegInfo->getRARegister();
315
bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316
bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317
!MFI.adjustsStack() && // No calls.
318
!MustSaveLR(MF, LR) && // No need to save LR.
319
!FI->mustSaveTOC() && // No need to save TOC.
320
!RegInfo->hasBasePointer(MF) && // No special alignment.
321
!MFI.isFrameAddressTaken();
322
323
// Note: for PPC32 SVR4ABI, we can still generate stackless
324
// code if all local vars are reg-allocated.
325
bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
326
327
// Check whether we can skip adjusting the stack pointer (by using red zone)
328
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
329
// No need for frame
330
return 0;
331
}
332
333
// Get the maximum call frame size of all the calls.
334
unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
335
336
// Maximum call frame needs to be at least big enough for linkage area.
337
unsigned minCallFrameSize = getLinkageSize();
338
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
339
340
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
341
// that allocations will be aligned.
342
if (MFI.hasVarSizedObjects())
343
maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
344
345
// Update the new max call frame size if the caller passes in a valid pointer.
346
if (NewMaxCallFrameSize)
347
*NewMaxCallFrameSize = maxCallFrameSize;
348
349
// Include call frame size in total.
350
FrameSize += maxCallFrameSize;
351
352
// Make sure the frame is aligned.
353
FrameSize = alignTo(FrameSize, Alignment);
354
355
return FrameSize;
356
}
357
358
// hasFP - Return true if the specified function actually has a dedicated frame
359
// pointer register.
360
bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
361
const MachineFrameInfo &MFI = MF.getFrameInfo();
362
// FIXME: This is pretty much broken by design: hasFP() might be called really
363
// early, before the stack layout was calculated and thus hasFP() might return
364
// true or false here depending on the time of call.
365
return (MFI.getStackSize()) && needsFP(MF);
366
}
367
368
// needsFP - Return true if the specified function should have a dedicated frame
369
// pointer register. This is true if the function has variable sized allocas or
370
// if frame pointer elimination is disabled.
371
bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
372
const MachineFrameInfo &MFI = MF.getFrameInfo();
373
374
// Naked functions have no stack frame pushed, so we don't have a frame
375
// pointer.
376
if (MF.getFunction().hasFnAttribute(Attribute::Naked))
377
return false;
378
379
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
380
MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
381
MF.exposesReturnsTwice() ||
382
(MF.getTarget().Options.GuaranteedTailCallOpt &&
383
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
384
}
385
386
void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
387
// When there is dynamic alloca in this function, we can not use the frame
388
// pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1
389
// always points to the backchain.
390
bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects();
391
unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
392
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
393
394
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
395
bool HasBP = RegInfo->hasBasePointer(MF);
396
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
397
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
398
399
for (MachineBasicBlock &MBB : MF)
400
for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
401
--MBBI;
402
for (MachineOperand &MO : MBBI->operands()) {
403
if (!MO.isReg())
404
continue;
405
406
switch (MO.getReg()) {
407
case PPC::FP:
408
MO.setReg(FPReg);
409
break;
410
case PPC::FP8:
411
MO.setReg(FP8Reg);
412
break;
413
case PPC::BP:
414
MO.setReg(BPReg);
415
break;
416
case PPC::BP8:
417
MO.setReg(BP8Reg);
418
break;
419
420
}
421
}
422
}
423
}
424
425
/* This function will do the following:
426
- If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
427
respectively (defaults recommended by the ABI) and return true
428
- If MBB is not an entry block, initialize the register scavenger and look
429
for available registers.
430
- If the defaults (R0/R12) are available, return true
431
- If TwoUniqueRegsRequired is set to true, it looks for two unique
432
registers. Otherwise, look for a single available register.
433
- If the required registers are found, set SR1 and SR2 and return true.
434
- If the required registers are not found, set SR2 or both SR1 and SR2 to
435
PPC::NoRegister and return false.
436
437
Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
438
is not set, this function will attempt to find two different registers, but
439
still return true if only one register is available (and set SR1 == SR2).
440
*/
441
bool
442
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
443
bool UseAtEnd,
444
bool TwoUniqueRegsRequired,
445
Register *SR1,
446
Register *SR2) const {
447
RegScavenger RS;
448
Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
449
Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
450
451
// Set the defaults for the two scratch registers.
452
if (SR1)
453
*SR1 = R0;
454
455
if (SR2) {
456
assert (SR1 && "Asking for the second scratch register but not the first?");
457
*SR2 = R12;
458
}
459
460
// If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
461
if ((UseAtEnd && MBB->isReturnBlock()) ||
462
(!UseAtEnd && (&MBB->getParent()->front() == MBB)))
463
return true;
464
465
if (UseAtEnd) {
466
// The scratch register will be used before the first terminator (or at the
467
// end of the block if there are no terminators).
468
MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469
if (MBBI == MBB->begin()) {
470
RS.enterBasicBlock(*MBB);
471
} else {
472
RS.enterBasicBlockEnd(*MBB);
473
RS.backward(MBBI);
474
}
475
} else {
476
// The scratch register will be used at the start of the block.
477
RS.enterBasicBlock(*MBB);
478
}
479
480
// If the two registers are available, we're all good.
481
// Note that we only return here if both R0 and R12 are available because
482
// although the function may not require two unique registers, it may benefit
483
// from having two so we should try to provide them.
484
if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
485
return true;
486
487
// Get the list of callee-saved registers for the target.
488
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
489
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
490
491
// Get all the available registers in the block.
492
BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
493
&PPC::GPRCRegClass);
494
495
// We shouldn't use callee-saved registers as scratch registers as they may be
496
// available when looking for a candidate block for shrink wrapping but not
497
// available when the actual prologue/epilogue is being emitted because they
498
// were added as live-in to the prologue block by PrologueEpilogueInserter.
499
for (int i = 0; CSRegs[i]; ++i)
500
BV.reset(CSRegs[i]);
501
502
// Set the first scratch register to the first available one.
503
if (SR1) {
504
int FirstScratchReg = BV.find_first();
505
*SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
506
}
507
508
// If there is another one available, set the second scratch register to that.
509
// Otherwise, set it to either PPC::NoRegister if this function requires two
510
// or to whatever SR1 is set to if this function doesn't require two.
511
if (SR2) {
512
int SecondScratchReg = BV.find_next(*SR1);
513
if (SecondScratchReg != -1)
514
*SR2 = SecondScratchReg;
515
else
516
*SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
517
}
518
519
// Now that we've done our best to provide both registers, double check
520
// whether we were unable to provide enough.
521
if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
522
return false;
523
524
return true;
525
}
526
527
// We need a scratch register for spilling LR and for spilling CR. By default,
528
// we use two scratch registers to hide latency. However, if only one scratch
529
// register is available, we can adjust for that by not overlapping the spill
530
// code. However, if we need to realign the stack (i.e. have a base pointer)
531
// and the stack frame is large, we need two scratch registers.
532
// Also, stack probe requires two scratch registers, one for old sp, one for
533
// large frame and large probe size.
534
bool
535
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
536
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
537
MachineFunction &MF = *(MBB->getParent());
538
bool HasBP = RegInfo->hasBasePointer(MF);
539
unsigned FrameSize = determineFrameLayout(MF);
540
int NegFrameSize = -FrameSize;
541
bool IsLargeFrame = !isInt<16>(NegFrameSize);
542
MachineFrameInfo &MFI = MF.getFrameInfo();
543
Align MaxAlign = MFI.getMaxAlign();
544
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
545
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
546
547
return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
548
TLI.hasInlineStackProbe(MF);
549
}
550
551
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
552
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
553
554
return findScratchRegister(TmpMBB, false,
555
twoUniqueScratchRegsRequired(TmpMBB));
556
}
557
558
bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
559
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
560
561
return findScratchRegister(TmpMBB, true);
562
}
563
564
bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
565
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
566
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
567
568
// Abort if there is no register info or function info.
569
if (!RegInfo || !FI)
570
return false;
571
572
// Only move the stack update on ELFv2 ABI and PPC64.
573
if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
574
return false;
575
576
// Check the frame size first and return false if it does not fit the
577
// requirements.
578
// We need a non-zero frame size as well as a frame that will fit in the red
579
// zone. This is because by moving the stack pointer update we are now storing
580
// to the red zone until the stack pointer is updated. If we get an interrupt
581
// inside the prologue but before the stack update we now have a number of
582
// stores to the red zone and those stores must all fit.
583
MachineFrameInfo &MFI = MF.getFrameInfo();
584
unsigned FrameSize = MFI.getStackSize();
585
if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
586
return false;
587
588
// Frame pointers and base pointers complicate matters so don't do anything
589
// if we have them. For example having a frame pointer will sometimes require
590
// a copy of r1 into r31 and that makes keeping track of updates to r1 more
591
// difficult. Similar situation exists with setjmp.
592
if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
593
return false;
594
595
// Calls to fast_cc functions use different rules for passing parameters on
596
// the stack from the ABI and using PIC base in the function imposes
597
// similar restrictions to using the base pointer. It is not generally safe
598
// to move the stack pointer update in these situations.
599
if (FI->hasFastCall() || FI->usesPICBase())
600
return false;
601
602
// Finally we can move the stack update if we do not require register
603
// scavenging. Register scavenging can introduce more spills and so
604
// may make the frame size larger than we have computed.
605
return !RegInfo->requiresFrameIndexScavenging(MF);
606
}
607
608
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
609
MachineBasicBlock &MBB) const {
610
MachineBasicBlock::iterator MBBI = MBB.begin();
611
MachineFrameInfo &MFI = MF.getFrameInfo();
612
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
613
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
614
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
615
616
const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
617
DebugLoc dl;
618
// AIX assembler does not support cfi directives.
619
const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
620
621
const bool HasFastMFLR = Subtarget.hasFastMFLR();
622
623
// Get processor type.
624
bool isPPC64 = Subtarget.isPPC64();
625
// Get the ABI.
626
bool isSVR4ABI = Subtarget.isSVR4ABI();
627
bool isELFv2ABI = Subtarget.isELFv2ABI();
628
assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
629
630
// Work out frame sizes.
631
uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
632
int64_t NegFrameSize = -FrameSize;
633
if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
634
llvm_unreachable("Unhandled stack size!");
635
636
if (MFI.isFrameAddressTaken())
637
replaceFPWithRealFP(MF);
638
639
// Check if the link register (LR) must be saved.
640
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
641
bool MustSaveLR = FI->mustSaveLR();
642
bool MustSaveTOC = FI->mustSaveTOC();
643
const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
644
bool MustSaveCR = !MustSaveCRs.empty();
645
// Do we have a frame pointer and/or base pointer for this function?
646
bool HasFP = hasFP(MF);
647
bool HasBP = RegInfo->hasBasePointer(MF);
648
bool HasRedZone = isPPC64 || !isSVR4ABI;
649
bool HasROPProtect = Subtarget.hasROPProtect();
650
bool HasPrivileged = Subtarget.hasPrivileged();
651
652
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
653
Register BPReg = RegInfo->getBaseRegister(MF);
654
Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
655
Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
656
Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
657
Register ScratchReg;
658
Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
659
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
660
const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
661
: PPC::MFLR );
662
const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
663
: PPC::STW );
664
const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
665
: PPC::STWU );
666
const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
667
: PPC::STWUX);
668
const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
669
: PPC::OR );
670
const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
671
: PPC::SUBFC);
672
const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
673
: PPC::SUBFIC);
674
const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
675
: PPC::MFCR);
676
const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
677
const MCInstrDesc &HashST =
678
TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
679
: (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
680
681
// Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
682
// LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
683
// Red Zone, an asynchronous event (a form of "callee") could claim a frame &
684
// overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
685
assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
686
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
687
688
// Using the same bool variable as below to suppress compiler warnings.
689
bool SingleScratchReg = findScratchRegister(
690
&MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
691
assert(SingleScratchReg &&
692
"Required number of registers not available in this block");
693
694
SingleScratchReg = ScratchReg == TempReg;
695
696
int64_t LROffset = getReturnSaveOffset();
697
698
int64_t FPOffset = 0;
699
if (HasFP) {
700
MachineFrameInfo &MFI = MF.getFrameInfo();
701
int FPIndex = FI->getFramePointerSaveIndex();
702
assert(FPIndex && "No Frame Pointer Save Slot!");
703
FPOffset = MFI.getObjectOffset(FPIndex);
704
}
705
706
int64_t BPOffset = 0;
707
if (HasBP) {
708
MachineFrameInfo &MFI = MF.getFrameInfo();
709
int BPIndex = FI->getBasePointerSaveIndex();
710
assert(BPIndex && "No Base Pointer Save Slot!");
711
BPOffset = MFI.getObjectOffset(BPIndex);
712
}
713
714
int64_t PBPOffset = 0;
715
if (FI->usesPICBase()) {
716
MachineFrameInfo &MFI = MF.getFrameInfo();
717
int PBPIndex = FI->getPICBasePointerSaveIndex();
718
assert(PBPIndex && "No PIC Base Pointer Save Slot!");
719
PBPOffset = MFI.getObjectOffset(PBPIndex);
720
}
721
722
// Get stack alignments.
723
Align MaxAlign = MFI.getMaxAlign();
724
if (HasBP && MaxAlign > 1)
725
assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
726
727
// Frames of 32KB & larger require special handling because they cannot be
728
// indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
729
bool isLargeFrame = !isInt<16>(NegFrameSize);
730
731
// Check if we can move the stack update instruction (stdu) down the prologue
732
// past the callee saves. Hopefully this will avoid the situation where the
733
// saves are waiting for the update on the store with update to complete.
734
MachineBasicBlock::iterator StackUpdateLoc = MBBI;
735
bool MovingStackUpdateDown = false;
736
737
// Check if we can move the stack update.
738
if (stackUpdateCanBeMoved(MF)) {
739
const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
740
for (CalleeSavedInfo CSI : Info) {
741
// If the callee saved register is spilled to a register instead of the
742
// stack then the spill no longer uses the stack pointer.
743
// This can lead to two consequences:
744
// 1) We no longer need to update the stack because the function does not
745
// spill any callee saved registers to stack.
746
// 2) We have a situation where we still have to update the stack pointer
747
// even though some registers are spilled to other registers. In
748
// this case the current code moves the stack update to an incorrect
749
// position.
750
// In either case we should abort moving the stack update operation.
751
if (CSI.isSpilledToReg()) {
752
StackUpdateLoc = MBBI;
753
MovingStackUpdateDown = false;
754
break;
755
}
756
757
int FrIdx = CSI.getFrameIdx();
758
// If the frame index is not negative the callee saved info belongs to a
759
// stack object that is not a fixed stack object. We ignore non-fixed
760
// stack objects because we won't move the stack update pointer past them.
761
if (FrIdx >= 0)
762
continue;
763
764
if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
765
StackUpdateLoc++;
766
MovingStackUpdateDown = true;
767
} else {
768
// We need all of the Frame Indices to meet these conditions.
769
// If they do not, abort the whole operation.
770
StackUpdateLoc = MBBI;
771
MovingStackUpdateDown = false;
772
break;
773
}
774
}
775
776
// If the operation was not aborted then update the object offset.
777
if (MovingStackUpdateDown) {
778
for (CalleeSavedInfo CSI : Info) {
779
int FrIdx = CSI.getFrameIdx();
780
if (FrIdx < 0)
781
MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
782
}
783
}
784
}
785
786
// Where in the prologue we move the CR fields depends on how many scratch
787
// registers we have, and if we need to save the link register or not. This
788
// lambda is to avoid duplicating the logic in 2 places.
789
auto BuildMoveFromCR = [&]() {
790
if (isELFv2ABI && MustSaveCRs.size() == 1) {
791
// In the ELFv2 ABI, we are not required to save all CR fields.
792
// If only one CR field is clobbered, it is more efficient to use
793
// mfocrf to selectively save just that field, because mfocrf has short
794
// latency compares to mfcr.
795
assert(isPPC64 && "V2 ABI is 64-bit only.");
796
MachineInstrBuilder MIB =
797
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
798
MIB.addReg(MustSaveCRs[0], RegState::Kill);
799
} else {
800
MachineInstrBuilder MIB =
801
BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
802
for (unsigned CRfield : MustSaveCRs)
803
MIB.addReg(CRfield, RegState::ImplicitKill);
804
}
805
};
806
807
// If we need to spill the CR and the LR but we don't have two separate
808
// registers available, we must spill them one at a time
809
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
810
BuildMoveFromCR();
811
BuildMI(MBB, MBBI, dl, StoreWordInst)
812
.addReg(TempReg, getKillRegState(true))
813
.addImm(CRSaveOffset)
814
.addReg(SPReg);
815
}
816
817
if (MustSaveLR)
818
BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
819
820
if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
821
BuildMoveFromCR();
822
823
if (HasRedZone) {
824
if (HasFP)
825
BuildMI(MBB, MBBI, dl, StoreInst)
826
.addReg(FPReg)
827
.addImm(FPOffset)
828
.addReg(SPReg);
829
if (FI->usesPICBase())
830
BuildMI(MBB, MBBI, dl, StoreInst)
831
.addReg(PPC::R30)
832
.addImm(PBPOffset)
833
.addReg(SPReg);
834
if (HasBP)
835
BuildMI(MBB, MBBI, dl, StoreInst)
836
.addReg(BPReg)
837
.addImm(BPOffset)
838
.addReg(SPReg);
839
}
840
841
// Generate the instruction to store the LR. In the case where ROP protection
842
// is required the register holding the LR should not be killed as it will be
843
// used by the hash store instruction.
844
auto SaveLR = [&](int64_t Offset) {
845
assert(MustSaveLR && "LR is not required to be saved!");
846
BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
847
.addReg(ScratchReg, getKillRegState(!HasROPProtect))
848
.addImm(Offset)
849
.addReg(SPReg);
850
851
// Add the ROP protection Hash Store instruction.
852
// NOTE: This is technically a violation of the ABI. The hash can be saved
853
// up to 512 bytes into the Protected Zone. This can be outside of the
854
// initial 288 byte volatile program storage region in the Protected Zone.
855
// However, this restriction will be removed in an upcoming revision of the
856
// ABI.
857
if (HasROPProtect) {
858
const int SaveIndex = FI->getROPProtectionHashSaveIndex();
859
const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
860
assert((ImmOffset <= -8 && ImmOffset >= -512) &&
861
"ROP hash save offset out of range.");
862
assert(((ImmOffset & 0x7) == 0) &&
863
"ROP hash save offset must be 8 byte aligned.");
864
BuildMI(MBB, StackUpdateLoc, dl, HashST)
865
.addReg(ScratchReg, getKillRegState(true))
866
.addImm(ImmOffset)
867
.addReg(SPReg);
868
}
869
};
870
871
if (MustSaveLR && HasFastMFLR)
872
SaveLR(LROffset);
873
874
if (MustSaveCR &&
875
!(SingleScratchReg && MustSaveLR)) {
876
assert(HasRedZone && "A red zone is always available on PPC64");
877
BuildMI(MBB, MBBI, dl, StoreWordInst)
878
.addReg(TempReg, getKillRegState(true))
879
.addImm(CRSaveOffset)
880
.addReg(SPReg);
881
}
882
883
// Skip the rest if this is a leaf function & all spills fit in the Red Zone.
884
if (!FrameSize) {
885
if (MustSaveLR && !HasFastMFLR)
886
SaveLR(LROffset);
887
return;
888
}
889
890
// Adjust stack pointer: r1 += NegFrameSize.
891
// If there is a preferred stack alignment, align R1 now
892
893
if (HasBP && HasRedZone) {
894
// Save a copy of r1 as the base pointer.
895
BuildMI(MBB, MBBI, dl, OrInst, BPReg)
896
.addReg(SPReg)
897
.addReg(SPReg);
898
}
899
900
// Have we generated a STUX instruction to claim stack frame? If so,
901
// the negated frame size will be placed in ScratchReg.
902
bool HasSTUX =
903
(TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
904
(HasBP && MaxAlign > 1) || isLargeFrame;
905
906
// If we use STUX to update the stack pointer, we need the two scratch
907
// registers TempReg and ScratchReg, we have to save LR here which is stored
908
// in ScratchReg.
909
// If the offset can not be encoded into the store instruction, we also have
910
// to save LR here.
911
if (MustSaveLR && !HasFastMFLR &&
912
(HasSTUX || !isInt<16>(FrameSize + LROffset)))
913
SaveLR(LROffset);
914
915
// If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
916
// pointer is always stored at SP, we will get a free probe due to an essential
917
// STU(X) instruction.
918
if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
919
// To be consistent with other targets, a pseudo instruction is emitted and
920
// will be later expanded in `inlineStackProbe`.
921
BuildMI(MBB, MBBI, dl,
922
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
923
: PPC::PROBED_STACKALLOC_32))
924
.addDef(TempReg)
925
.addDef(ScratchReg) // ScratchReg stores the old sp.
926
.addImm(NegFrameSize);
927
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
928
// update the ScratchReg to meet the assumption that ScratchReg contains
929
// the NegFrameSize. This solution is rather tricky.
930
if (!HasRedZone) {
931
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
932
.addReg(ScratchReg)
933
.addReg(SPReg);
934
}
935
} else {
936
// This condition must be kept in sync with canUseAsPrologue.
937
if (HasBP && MaxAlign > 1) {
938
if (isPPC64)
939
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
940
.addReg(SPReg)
941
.addImm(0)
942
.addImm(64 - Log2(MaxAlign));
943
else // PPC32...
944
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
945
.addReg(SPReg)
946
.addImm(0)
947
.addImm(32 - Log2(MaxAlign))
948
.addImm(31);
949
if (!isLargeFrame) {
950
BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
951
.addReg(ScratchReg, RegState::Kill)
952
.addImm(NegFrameSize);
953
} else {
954
assert(!SingleScratchReg && "Only a single scratch reg available");
955
TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
956
BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
957
.addReg(ScratchReg, RegState::Kill)
958
.addReg(TempReg, RegState::Kill);
959
}
960
961
BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
962
.addReg(SPReg, RegState::Kill)
963
.addReg(SPReg)
964
.addReg(ScratchReg);
965
} else if (!isLargeFrame) {
966
BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
967
.addReg(SPReg)
968
.addImm(NegFrameSize)
969
.addReg(SPReg);
970
} else {
971
TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
972
BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
973
.addReg(SPReg, RegState::Kill)
974
.addReg(SPReg)
975
.addReg(ScratchReg);
976
}
977
}
978
979
// Save the TOC register after the stack pointer update if a prologue TOC
980
// save is required for the function.
981
if (MustSaveTOC) {
982
assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
983
BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
984
.addReg(TOCReg, getKillRegState(true))
985
.addImm(TOCSaveOffset)
986
.addReg(SPReg);
987
}
988
989
if (!HasRedZone) {
990
assert(!isPPC64 && "A red zone is always available on PPC64");
991
if (HasSTUX) {
992
// The negated frame size is in ScratchReg, and the SPReg has been
993
// decremented by the frame size: SPReg = old SPReg + ScratchReg.
994
// Since FPOffset, PBPOffset, etc. are relative to the beginning of
995
// the stack frame (i.e. the old SP), ideally, we would put the old
996
// SP into a register and use it as the base for the stores. The
997
// problem is that the only available register may be ScratchReg,
998
// which could be R0, and R0 cannot be used as a base address.
999
1000
// First, set ScratchReg to the old SP. This may need to be modified
1001
// later.
1002
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1003
.addReg(ScratchReg, RegState::Kill)
1004
.addReg(SPReg);
1005
1006
if (ScratchReg == PPC::R0) {
1007
// R0 cannot be used as a base register, but it can be used as an
1008
// index in a store-indexed.
1009
int LastOffset = 0;
1010
if (HasFP) {
1011
// R0 += (FPOffset-LastOffset).
1012
// Need addic, since addi treats R0 as 0.
1013
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1014
.addReg(ScratchReg)
1015
.addImm(FPOffset-LastOffset);
1016
LastOffset = FPOffset;
1017
// Store FP into *R0.
1018
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1019
.addReg(FPReg, RegState::Kill) // Save FP.
1020
.addReg(PPC::ZERO)
1021
.addReg(ScratchReg); // This will be the index (R0 is ok here).
1022
}
1023
if (FI->usesPICBase()) {
1024
// R0 += (PBPOffset-LastOffset).
1025
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1026
.addReg(ScratchReg)
1027
.addImm(PBPOffset-LastOffset);
1028
LastOffset = PBPOffset;
1029
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1030
.addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1031
.addReg(PPC::ZERO)
1032
.addReg(ScratchReg); // This will be the index (R0 is ok here).
1033
}
1034
if (HasBP) {
1035
// R0 += (BPOffset-LastOffset).
1036
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1037
.addReg(ScratchReg)
1038
.addImm(BPOffset-LastOffset);
1039
LastOffset = BPOffset;
1040
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1041
.addReg(BPReg, RegState::Kill) // Save BP.
1042
.addReg(PPC::ZERO)
1043
.addReg(ScratchReg); // This will be the index (R0 is ok here).
1044
// BP = R0-LastOffset
1045
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1046
.addReg(ScratchReg, RegState::Kill)
1047
.addImm(-LastOffset);
1048
}
1049
} else {
1050
// ScratchReg is not R0, so use it as the base register. It is
1051
// already set to the old SP, so we can use the offsets directly.
1052
1053
// Now that the stack frame has been allocated, save all the necessary
1054
// registers using ScratchReg as the base address.
1055
if (HasFP)
1056
BuildMI(MBB, MBBI, dl, StoreInst)
1057
.addReg(FPReg)
1058
.addImm(FPOffset)
1059
.addReg(ScratchReg);
1060
if (FI->usesPICBase())
1061
BuildMI(MBB, MBBI, dl, StoreInst)
1062
.addReg(PPC::R30)
1063
.addImm(PBPOffset)
1064
.addReg(ScratchReg);
1065
if (HasBP) {
1066
BuildMI(MBB, MBBI, dl, StoreInst)
1067
.addReg(BPReg)
1068
.addImm(BPOffset)
1069
.addReg(ScratchReg);
1070
BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1071
.addReg(ScratchReg, RegState::Kill)
1072
.addReg(ScratchReg);
1073
}
1074
}
1075
} else {
1076
// The frame size is a known 16-bit constant (fitting in the immediate
1077
// field of STWU). To be here we have to be compiling for PPC32.
1078
// Since the SPReg has been decreased by FrameSize, add it back to each
1079
// offset.
1080
if (HasFP)
1081
BuildMI(MBB, MBBI, dl, StoreInst)
1082
.addReg(FPReg)
1083
.addImm(FrameSize + FPOffset)
1084
.addReg(SPReg);
1085
if (FI->usesPICBase())
1086
BuildMI(MBB, MBBI, dl, StoreInst)
1087
.addReg(PPC::R30)
1088
.addImm(FrameSize + PBPOffset)
1089
.addReg(SPReg);
1090
if (HasBP) {
1091
BuildMI(MBB, MBBI, dl, StoreInst)
1092
.addReg(BPReg)
1093
.addImm(FrameSize + BPOffset)
1094
.addReg(SPReg);
1095
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1096
.addReg(SPReg)
1097
.addImm(FrameSize);
1098
}
1099
}
1100
}
1101
1102
// Save the LR now.
1103
if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1104
SaveLR(LROffset + FrameSize);
1105
1106
// Add Call Frame Information for the instructions we generated above.
1107
if (needsCFI) {
1108
unsigned CFIIndex;
1109
1110
if (HasBP) {
1111
// Define CFA in terms of BP. Do this in preference to using FP/SP,
1112
// because if the stack needed aligning then CFA won't be at a fixed
1113
// offset from FP/SP.
1114
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1115
CFIIndex = MF.addFrameInst(
1116
MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1117
} else {
1118
// Adjust the definition of CFA to account for the change in SP.
1119
assert(NegFrameSize);
1120
CFIIndex = MF.addFrameInst(
1121
MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1122
}
1123
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1124
.addCFIIndex(CFIIndex);
1125
1126
if (HasFP) {
1127
// Describe where FP was saved, at a fixed offset from CFA.
1128
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1129
CFIIndex = MF.addFrameInst(
1130
MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1131
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1132
.addCFIIndex(CFIIndex);
1133
}
1134
1135
if (FI->usesPICBase()) {
1136
// Describe where FP was saved, at a fixed offset from CFA.
1137
unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1138
CFIIndex = MF.addFrameInst(
1139
MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1140
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1141
.addCFIIndex(CFIIndex);
1142
}
1143
1144
if (HasBP) {
1145
// Describe where BP was saved, at a fixed offset from CFA.
1146
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1147
CFIIndex = MF.addFrameInst(
1148
MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1149
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1150
.addCFIIndex(CFIIndex);
1151
}
1152
1153
if (MustSaveLR) {
1154
// Describe where LR was saved, at a fixed offset from CFA.
1155
unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1156
CFIIndex = MF.addFrameInst(
1157
MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1158
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1159
.addCFIIndex(CFIIndex);
1160
}
1161
}
1162
1163
// If there is a frame pointer, copy R1 into R31
1164
if (HasFP) {
1165
BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1166
.addReg(SPReg)
1167
.addReg(SPReg);
1168
1169
if (!HasBP && needsCFI) {
1170
// Change the definition of CFA from SP+offset to FP+offset, because SP
1171
// will change at every alloca.
1172
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1173
unsigned CFIIndex = MF.addFrameInst(
1174
MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1175
1176
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1177
.addCFIIndex(CFIIndex);
1178
}
1179
}
1180
1181
if (needsCFI) {
1182
// Describe where callee saved registers were saved, at fixed offsets from
1183
// CFA.
1184
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1185
for (const CalleeSavedInfo &I : CSI) {
1186
Register Reg = I.getReg();
1187
if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1188
1189
// This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1190
// subregisters of CR2. We just need to emit a move of CR2.
1191
if (PPC::CRBITRCRegClass.contains(Reg))
1192
continue;
1193
1194
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1195
continue;
1196
1197
// For 64-bit SVR4 when we have spilled CRs, the spill location
1198
// is SP+8, not a frame-relative slot.
1199
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1200
// In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1201
// the whole CR word. In the ELFv2 ABI, every CR that was
1202
// actually saved gets its own CFI record.
1203
Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1204
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1205
nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1206
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1207
.addCFIIndex(CFIIndex);
1208
continue;
1209
}
1210
1211
if (I.isSpilledToReg()) {
1212
unsigned SpilledReg = I.getDstReg();
1213
unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1214
nullptr, MRI->getDwarfRegNum(Reg, true),
1215
MRI->getDwarfRegNum(SpilledReg, true)));
1216
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1217
.addCFIIndex(CFIRegister);
1218
} else {
1219
int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1220
// We have changed the object offset above but we do not want to change
1221
// the actual offsets in the CFI instruction so we have to undo the
1222
// offset change here.
1223
if (MovingStackUpdateDown)
1224
Offset -= NegFrameSize;
1225
1226
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1227
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1228
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1229
.addCFIIndex(CFIIndex);
1230
}
1231
}
1232
}
1233
}
1234
1235
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1236
MachineBasicBlock &PrologMBB) const {
1237
bool isPPC64 = Subtarget.isPPC64();
1238
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1239
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1240
MachineFrameInfo &MFI = MF.getFrameInfo();
1241
const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
1242
// AIX assembler does not support cfi directives.
1243
const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1244
auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1245
int Opc = MI.getOpcode();
1246
return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1247
});
1248
if (StackAllocMIPos == PrologMBB.end())
1249
return;
1250
const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1251
MachineBasicBlock *CurrentMBB = &PrologMBB;
1252
DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1253
MachineInstr &MI = *StackAllocMIPos;
1254
int64_t NegFrameSize = MI.getOperand(2).getImm();
1255
unsigned ProbeSize = TLI.getStackProbeSize(MF);
1256
int64_t NegProbeSize = -(int64_t)ProbeSize;
1257
assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1258
int64_t NumBlocks = NegFrameSize / NegProbeSize;
1259
int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1260
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1261
Register ScratchReg = MI.getOperand(0).getReg();
1262
Register FPReg = MI.getOperand(1).getReg();
1263
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1264
bool HasBP = RegInfo->hasBasePointer(MF);
1265
Register BPReg = RegInfo->getBaseRegister(MF);
1266
Align MaxAlign = MFI.getMaxAlign();
1267
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1268
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1269
// Subroutines to generate .cfi_* directives.
1270
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1271
MachineBasicBlock::iterator MBBI, Register Reg) {
1272
unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1273
unsigned CFIIndex = MF.addFrameInst(
1274
MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1275
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276
.addCFIIndex(CFIIndex);
1277
};
1278
auto buildDefCFA = [&](MachineBasicBlock &MBB,
1279
MachineBasicBlock::iterator MBBI, Register Reg,
1280
int Offset) {
1281
unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1282
unsigned CFIIndex = MBB.getParent()->addFrameInst(
1283
MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1284
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285
.addCFIIndex(CFIIndex);
1286
};
1287
// Subroutine to determine if we can use the Imm as part of d-form.
1288
auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1289
// Subroutine to materialize the Imm into TempReg.
1290
auto MaterializeImm = [&](MachineBasicBlock &MBB,
1291
MachineBasicBlock::iterator MBBI, int64_t Imm,
1292
Register &TempReg) {
1293
assert(isInt<32>(Imm) && "Unhandled imm");
1294
if (isInt<16>(Imm))
1295
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1296
.addImm(Imm);
1297
else {
1298
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1299
.addImm(Imm >> 16);
1300
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1301
.addReg(TempReg)
1302
.addImm(Imm & 0xFFFF);
1303
}
1304
};
1305
// Subroutine to store frame pointer and decrease stack pointer by probe size.
1306
auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1307
MachineBasicBlock::iterator MBBI, int64_t NegSize,
1308
Register NegSizeReg, bool UseDForm,
1309
Register StoreReg) {
1310
if (UseDForm)
1311
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1312
.addReg(StoreReg)
1313
.addImm(NegSize)
1314
.addReg(SPReg);
1315
else
1316
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1317
.addReg(StoreReg)
1318
.addReg(SPReg)
1319
.addReg(NegSizeReg);
1320
};
1321
// Used to probe stack when realignment is required.
1322
// Note that, according to ABI's requirement, *sp must always equals the
1323
// value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1324
// Following is pseudo code:
1325
// final_sp = (sp & align) + negframesize;
1326
// neg_gap = final_sp - sp;
1327
// while (neg_gap < negprobesize) {
1328
// stdu fp, negprobesize(sp);
1329
// neg_gap -= negprobesize;
1330
// }
1331
// stdux fp, sp, neg_gap
1332
//
1333
// When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1334
// before probe code, we don't need to save it, so we get one additional reg
1335
// that can be used to materialize the probeside if needed to use xform.
1336
// Otherwise, we can NOT materialize probeside, so we can only use Dform for
1337
// now.
1338
//
1339
// The allocations are:
1340
// if (HasBP && HasRedzone) {
1341
// r0: materialize the probesize if needed so that we can use xform.
1342
// r12: `neg_gap`
1343
// } else {
1344
// r0: back-chain pointer
1345
// r12: `neg_gap`.
1346
// }
1347
auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1348
MachineBasicBlock::iterator MBBI,
1349
Register ScratchReg, Register TempReg) {
1350
assert(HasBP && "The function is supposed to have base pointer when its "
1351
"stack is realigned.");
1352
assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1353
1354
// FIXME: We can eliminate this limitation if we get more infomation about
1355
// which part of redzone are already used. Used redzone can be treated
1356
// probed. But there might be `holes' in redzone probed, this could
1357
// complicate the implementation.
1358
assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1359
"Probe size should be larger or equal to the size of red-zone so "
1360
"that red-zone is not clobbered by probing.");
1361
1362
Register &FinalStackPtr = TempReg;
1363
// FIXME: We only support NegProbeSize materializable by DForm currently.
1364
// When HasBP && HasRedzone, we can use xform if we have an additional idle
1365
// register.
1366
NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1367
assert(isInt<16>(NegProbeSize) &&
1368
"NegProbeSize should be materializable by DForm");
1369
Register CRReg = PPC::CR0;
1370
// Layout of output assembly kinda like:
1371
// bb.0:
1372
// ...
1373
// sub $scratchreg, $finalsp, r1
1374
// cmpdi $scratchreg, <negprobesize>
1375
// bge bb.2
1376
// bb.1:
1377
// stdu <backchain>, <negprobesize>(r1)
1378
// sub $scratchreg, $scratchreg, negprobesize
1379
// cmpdi $scratchreg, <negprobesize>
1380
// blt bb.1
1381
// bb.2:
1382
// stdux <backchain>, r1, $scratchreg
1383
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1384
MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1385
MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1386
MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1387
MF.insert(MBBInsertPoint, ProbeExitMBB);
1388
// bb.2
1389
{
1390
Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1391
allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1392
BackChainPointer);
1393
if (HasRedZone)
1394
// PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1395
// to TempReg to satisfy it.
1396
BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1397
.addReg(BPReg)
1398
.addReg(BPReg);
1399
ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1400
ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1401
}
1402
// bb.0
1403
{
1404
BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1405
.addReg(SPReg)
1406
.addReg(FinalStackPtr);
1407
if (!HasRedZone)
1408
BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1409
BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1410
.addReg(ScratchReg)
1411
.addImm(NegProbeSize);
1412
BuildMI(&MBB, DL, TII.get(PPC::BCC))
1413
.addImm(PPC::PRED_GE)
1414
.addReg(CRReg)
1415
.addMBB(ProbeExitMBB);
1416
MBB.addSuccessor(ProbeLoopBodyMBB);
1417
MBB.addSuccessor(ProbeExitMBB);
1418
}
1419
// bb.1
1420
{
1421
Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1422
allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1423
0, true /*UseDForm*/, BackChainPointer);
1424
BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1425
ScratchReg)
1426
.addReg(ScratchReg)
1427
.addImm(-NegProbeSize);
1428
BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1429
CRReg)
1430
.addReg(ScratchReg)
1431
.addImm(NegProbeSize);
1432
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1433
.addImm(PPC::PRED_LT)
1434
.addReg(CRReg)
1435
.addMBB(ProbeLoopBodyMBB);
1436
ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1437
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1438
}
1439
// Update liveins.
1440
fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB});
1441
return ProbeExitMBB;
1442
};
1443
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1444
// SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1445
// the offset subtracted from SP is determined by SP's runtime value.
1446
if (HasBP && MaxAlign > 1) {
1447
// Calculate final stack pointer.
1448
if (isPPC64)
1449
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1450
.addReg(SPReg)
1451
.addImm(0)
1452
.addImm(64 - Log2(MaxAlign));
1453
else
1454
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1455
.addReg(SPReg)
1456
.addImm(0)
1457
.addImm(32 - Log2(MaxAlign))
1458
.addImm(31);
1459
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1460
FPReg)
1461
.addReg(ScratchReg)
1462
.addReg(SPReg);
1463
MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1464
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1465
FPReg)
1466
.addReg(ScratchReg)
1467
.addReg(FPReg);
1468
CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1469
if (needsCFI)
1470
buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1471
} else {
1472
// Initialize current frame pointer.
1473
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1474
// Use FPReg to calculate CFA.
1475
if (needsCFI)
1476
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1477
// Probe residual part.
1478
if (NegResidualSize) {
1479
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1480
if (!ResidualUseDForm)
1481
MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1482
allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1483
ResidualUseDForm, FPReg);
1484
}
1485
bool UseDForm = CanUseDForm(NegProbeSize);
1486
// If number of blocks is small, just probe them directly.
1487
if (NumBlocks < 3) {
1488
if (!UseDForm)
1489
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1490
for (int i = 0; i < NumBlocks; ++i)
1491
allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1492
FPReg);
1493
if (needsCFI) {
1494
// Restore using SPReg to calculate CFA.
1495
buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1496
}
1497
} else {
1498
// Since CTR is a volatile register and current shrinkwrap implementation
1499
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1500
// CTR loop to probe.
1501
// Calculate trip count and stores it in CTRReg.
1502
MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1503
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1504
.addReg(ScratchReg, RegState::Kill);
1505
if (!UseDForm)
1506
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1507
// Create MBBs of the loop.
1508
MachineFunction::iterator MBBInsertPoint =
1509
std::next(CurrentMBB->getIterator());
1510
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1511
MF.insert(MBBInsertPoint, LoopMBB);
1512
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1513
MF.insert(MBBInsertPoint, ExitMBB);
1514
// Synthesize the loop body.
1515
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1516
UseDForm, FPReg);
1517
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1518
.addMBB(LoopMBB);
1519
LoopMBB->addSuccessor(ExitMBB);
1520
LoopMBB->addSuccessor(LoopMBB);
1521
// Synthesize the exit MBB.
1522
ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1523
std::next(MachineBasicBlock::iterator(MI)),
1524
CurrentMBB->end());
1525
ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1526
CurrentMBB->addSuccessor(LoopMBB);
1527
if (needsCFI) {
1528
// Restore using SPReg to calculate CFA.
1529
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1530
}
1531
// Update liveins.
1532
fullyRecomputeLiveIns({ExitMBB, LoopMBB});
1533
}
1534
}
1535
++NumPrologProbed;
1536
MI.eraseFromParent();
1537
}
1538
1539
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1540
MachineBasicBlock &MBB) const {
1541
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1542
DebugLoc dl;
1543
1544
if (MBBI != MBB.end())
1545
dl = MBBI->getDebugLoc();
1546
1547
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1548
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1549
1550
// Get alignment info so we know how to restore the SP.
1551
const MachineFrameInfo &MFI = MF.getFrameInfo();
1552
1553
// Get the number of bytes allocated from the FrameInfo.
1554
int64_t FrameSize = MFI.getStackSize();
1555
1556
// Get processor type.
1557
bool isPPC64 = Subtarget.isPPC64();
1558
1559
// Check if the link register (LR) has been saved.
1560
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1561
bool MustSaveLR = FI->mustSaveLR();
1562
const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1563
bool MustSaveCR = !MustSaveCRs.empty();
1564
// Do we have a frame pointer and/or base pointer for this function?
1565
bool HasFP = hasFP(MF);
1566
bool HasBP = RegInfo->hasBasePointer(MF);
1567
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1568
bool HasROPProtect = Subtarget.hasROPProtect();
1569
bool HasPrivileged = Subtarget.hasPrivileged();
1570
1571
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1572
Register BPReg = RegInfo->getBaseRegister(MF);
1573
Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1574
Register ScratchReg;
1575
Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1576
const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1577
: PPC::MTLR );
1578
const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1579
: PPC::LWZ );
1580
const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1581
: PPC::LIS );
1582
const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1583
: PPC::OR );
1584
const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1585
: PPC::ORI );
1586
const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1587
: PPC::ADDI );
1588
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1589
: PPC::ADD4 );
1590
const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1591
: PPC::LWZ);
1592
const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1593
: PPC::MTOCRF);
1594
const MCInstrDesc &HashChk =
1595
TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1596
: (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1597
int64_t LROffset = getReturnSaveOffset();
1598
1599
int64_t FPOffset = 0;
1600
1601
// Using the same bool variable as below to suppress compiler warnings.
1602
bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1603
&TempReg);
1604
assert(SingleScratchReg &&
1605
"Could not find an available scratch register");
1606
1607
SingleScratchReg = ScratchReg == TempReg;
1608
1609
if (HasFP) {
1610
int FPIndex = FI->getFramePointerSaveIndex();
1611
assert(FPIndex && "No Frame Pointer Save Slot!");
1612
FPOffset = MFI.getObjectOffset(FPIndex);
1613
}
1614
1615
int64_t BPOffset = 0;
1616
if (HasBP) {
1617
int BPIndex = FI->getBasePointerSaveIndex();
1618
assert(BPIndex && "No Base Pointer Save Slot!");
1619
BPOffset = MFI.getObjectOffset(BPIndex);
1620
}
1621
1622
int64_t PBPOffset = 0;
1623
if (FI->usesPICBase()) {
1624
int PBPIndex = FI->getPICBasePointerSaveIndex();
1625
assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1626
PBPOffset = MFI.getObjectOffset(PBPIndex);
1627
}
1628
1629
bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1630
1631
if (IsReturnBlock) {
1632
unsigned RetOpcode = MBBI->getOpcode();
1633
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1634
RetOpcode == PPC::TCRETURNdi ||
1635
RetOpcode == PPC::TCRETURNai ||
1636
RetOpcode == PPC::TCRETURNri8 ||
1637
RetOpcode == PPC::TCRETURNdi8 ||
1638
RetOpcode == PPC::TCRETURNai8;
1639
1640
if (UsesTCRet) {
1641
int MaxTCRetDelta = FI->getTailCallSPDelta();
1642
MachineOperand &StackAdjust = MBBI->getOperand(1);
1643
assert(StackAdjust.isImm() && "Expecting immediate value.");
1644
// Adjust stack pointer.
1645
int StackAdj = StackAdjust.getImm();
1646
int Delta = StackAdj - MaxTCRetDelta;
1647
assert((Delta >= 0) && "Delta must be positive");
1648
if (MaxTCRetDelta>0)
1649
FrameSize += (StackAdj +Delta);
1650
else
1651
FrameSize += StackAdj;
1652
}
1653
}
1654
1655
// Frames of 32KB & larger require special handling because they cannot be
1656
// indexed into with a simple LD/LWZ immediate offset operand.
1657
bool isLargeFrame = !isInt<16>(FrameSize);
1658
1659
// On targets without red zone, the SP needs to be restored last, so that
1660
// all live contents of the stack frame are upwards of the SP. This means
1661
// that we cannot restore SP just now, since there may be more registers
1662
// to restore from the stack frame (e.g. R31). If the frame size is not
1663
// a simple immediate value, we will need a spare register to hold the
1664
// restored SP. If the frame size is known and small, we can simply adjust
1665
// the offsets of the registers to be restored, and still use SP to restore
1666
// them. In such case, the final update of SP will be to add the frame
1667
// size to it.
1668
// To simplify the code, set RBReg to the base register used to restore
1669
// values from the stack, and set SPAdd to the value that needs to be added
1670
// to the SP at the end. The default values are as if red zone was present.
1671
unsigned RBReg = SPReg;
1672
uint64_t SPAdd = 0;
1673
1674
// Check if we can move the stack update instruction up the epilogue
1675
// past the callee saves. This will allow the move to LR instruction
1676
// to be executed before the restores of the callee saves which means
1677
// that the callee saves can hide the latency from the MTLR instrcution.
1678
MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1679
if (stackUpdateCanBeMoved(MF)) {
1680
const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1681
for (CalleeSavedInfo CSI : Info) {
1682
// If the callee saved register is spilled to another register abort the
1683
// stack update movement.
1684
if (CSI.isSpilledToReg()) {
1685
StackUpdateLoc = MBBI;
1686
break;
1687
}
1688
int FrIdx = CSI.getFrameIdx();
1689
// If the frame index is not negative the callee saved info belongs to a
1690
// stack object that is not a fixed stack object. We ignore non-fixed
1691
// stack objects because we won't move the update of the stack pointer
1692
// past them.
1693
if (FrIdx >= 0)
1694
continue;
1695
1696
if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1697
StackUpdateLoc--;
1698
else {
1699
// Abort the operation as we can't update all CSR restores.
1700
StackUpdateLoc = MBBI;
1701
break;
1702
}
1703
}
1704
}
1705
1706
if (FrameSize) {
1707
// In the prologue, the loaded (or persistent) stack pointer value is
1708
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1709
// zone add this offset back now.
1710
1711
// If the function has a base pointer, the stack pointer has been copied
1712
// to it so we can restore it by copying in the other direction.
1713
if (HasRedZone && HasBP) {
1714
BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1715
addReg(BPReg).
1716
addReg(BPReg);
1717
}
1718
// If this function contained a fastcc call and GuaranteedTailCallOpt is
1719
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1720
// call which invalidates the stack pointer value in SP(0). So we use the
1721
// value of R31 in this case. Similar situation exists with setjmp.
1722
else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1723
assert(HasFP && "Expecting a valid frame pointer.");
1724
if (!HasRedZone)
1725
RBReg = FPReg;
1726
if (!isLargeFrame) {
1727
BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1728
.addReg(FPReg).addImm(FrameSize);
1729
} else {
1730
TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1731
BuildMI(MBB, MBBI, dl, AddInst)
1732
.addReg(RBReg)
1733
.addReg(FPReg)
1734
.addReg(ScratchReg);
1735
}
1736
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1737
if (HasRedZone) {
1738
BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1739
.addReg(SPReg)
1740
.addImm(FrameSize);
1741
} else {
1742
// Make sure that adding FrameSize will not overflow the max offset
1743
// size.
1744
assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1745
"Local offsets should be negative");
1746
SPAdd = FrameSize;
1747
FPOffset += FrameSize;
1748
BPOffset += FrameSize;
1749
PBPOffset += FrameSize;
1750
}
1751
} else {
1752
// We don't want to use ScratchReg as a base register, because it
1753
// could happen to be R0. Use FP instead, but make sure to preserve it.
1754
if (!HasRedZone) {
1755
// If FP is not saved, copy it to ScratchReg.
1756
if (!HasFP)
1757
BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1758
.addReg(FPReg)
1759
.addReg(FPReg);
1760
RBReg = FPReg;
1761
}
1762
BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1763
.addImm(0)
1764
.addReg(SPReg);
1765
}
1766
}
1767
assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1768
// If there is no red zone, ScratchReg may be needed for holding a useful
1769
// value (although not the base register). Make sure it is not overwritten
1770
// too early.
1771
1772
// If we need to restore both the LR and the CR and we only have one
1773
// available scratch register, we must do them one at a time.
1774
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1775
// Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1776
// is live here.
1777
assert(HasRedZone && "Expecting red zone");
1778
BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1779
.addImm(CRSaveOffset)
1780
.addReg(SPReg);
1781
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1782
BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1783
.addReg(TempReg, getKillRegState(i == e-1));
1784
}
1785
1786
// Delay restoring of the LR if ScratchReg is needed. This is ok, since
1787
// LR is stored in the caller's stack frame. ScratchReg will be needed
1788
// if RBReg is anything other than SP. We shouldn't use ScratchReg as
1789
// a base register anyway, because it may happen to be R0.
1790
bool LoadedLR = false;
1791
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1792
BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1793
.addImm(LROffset+SPAdd)
1794
.addReg(RBReg);
1795
LoadedLR = true;
1796
}
1797
1798
if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1799
assert(RBReg == SPReg && "Should be using SP as a base register");
1800
BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1801
.addImm(CRSaveOffset)
1802
.addReg(RBReg);
1803
}
1804
1805
if (HasFP) {
1806
// If there is red zone, restore FP directly, since SP has already been
1807
// restored. Otherwise, restore the value of FP into ScratchReg.
1808
if (HasRedZone || RBReg == SPReg)
1809
BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1810
.addImm(FPOffset)
1811
.addReg(SPReg);
1812
else
1813
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1814
.addImm(FPOffset)
1815
.addReg(RBReg);
1816
}
1817
1818
if (FI->usesPICBase())
1819
BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1820
.addImm(PBPOffset)
1821
.addReg(RBReg);
1822
1823
if (HasBP)
1824
BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1825
.addImm(BPOffset)
1826
.addReg(RBReg);
1827
1828
// There is nothing more to be loaded from the stack, so now we can
1829
// restore SP: SP = RBReg + SPAdd.
1830
if (RBReg != SPReg || SPAdd != 0) {
1831
assert(!HasRedZone && "This should not happen with red zone");
1832
// If SPAdd is 0, generate a copy.
1833
if (SPAdd == 0)
1834
BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1835
.addReg(RBReg)
1836
.addReg(RBReg);
1837
else
1838
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1839
.addReg(RBReg)
1840
.addImm(SPAdd);
1841
1842
assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1843
if (RBReg == FPReg)
1844
BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1845
.addReg(ScratchReg)
1846
.addReg(ScratchReg);
1847
1848
// Now load the LR from the caller's stack frame.
1849
if (MustSaveLR && !LoadedLR)
1850
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1851
.addImm(LROffset)
1852
.addReg(SPReg);
1853
}
1854
1855
if (MustSaveCR &&
1856
!(SingleScratchReg && MustSaveLR))
1857
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1858
BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1859
.addReg(TempReg, getKillRegState(i == e-1));
1860
1861
if (MustSaveLR) {
1862
// If ROP protection is required, an extra instruction is added to compute a
1863
// hash and then compare it to the hash stored in the prologue.
1864
if (HasROPProtect) {
1865
const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1866
const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1867
assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1868
"ROP hash check location offset out of range.");
1869
assert(((ImmOffset & 0x7) == 0) &&
1870
"ROP hash check location offset must be 8 byte aligned.");
1871
BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1872
.addReg(ScratchReg)
1873
.addImm(ImmOffset)
1874
.addReg(SPReg);
1875
}
1876
BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1877
}
1878
1879
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
1880
// call optimization
1881
if (IsReturnBlock) {
1882
unsigned RetOpcode = MBBI->getOpcode();
1883
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1884
(RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1885
MF.getFunction().getCallingConv() == CallingConv::Fast) {
1886
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1887
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1888
1889
if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1890
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1891
.addReg(SPReg).addImm(CallerAllocatedAmt);
1892
} else {
1893
BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1894
.addImm(CallerAllocatedAmt >> 16);
1895
BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1896
.addReg(ScratchReg, RegState::Kill)
1897
.addImm(CallerAllocatedAmt & 0xFFFF);
1898
BuildMI(MBB, MBBI, dl, AddInst)
1899
.addReg(SPReg)
1900
.addReg(FPReg)
1901
.addReg(ScratchReg);
1902
}
1903
} else {
1904
createTailCallBranchInstr(MBB);
1905
}
1906
}
1907
}
1908
1909
void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1910
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1911
1912
// If we got this far a first terminator should exist.
1913
assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1914
1915
DebugLoc dl = MBBI->getDebugLoc();
1916
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1917
1918
// Create branch instruction for pseudo tail call return instruction.
1919
// The TCRETURNdi variants are direct calls. Valid targets for those are
1920
// MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1921
// since we can tail call external functions with PC-Rel (i.e. we don't need
1922
// to worry about different TOC pointers). Some of the external functions will
1923
// be MO_GlobalAddress while others like memcpy for example, are going to
1924
// be MO_ExternalSymbol.
1925
unsigned RetOpcode = MBBI->getOpcode();
1926
if (RetOpcode == PPC::TCRETURNdi) {
1927
MBBI = MBB.getLastNonDebugInstr();
1928
MachineOperand &JumpTarget = MBBI->getOperand(0);
1929
if (JumpTarget.isGlobal())
1930
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1931
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1932
else if (JumpTarget.isSymbol())
1933
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1934
addExternalSymbol(JumpTarget.getSymbolName());
1935
else
1936
llvm_unreachable("Expecting Global or External Symbol");
1937
} else if (RetOpcode == PPC::TCRETURNri) {
1938
MBBI = MBB.getLastNonDebugInstr();
1939
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1940
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1941
} else if (RetOpcode == PPC::TCRETURNai) {
1942
MBBI = MBB.getLastNonDebugInstr();
1943
MachineOperand &JumpTarget = MBBI->getOperand(0);
1944
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1945
} else if (RetOpcode == PPC::TCRETURNdi8) {
1946
MBBI = MBB.getLastNonDebugInstr();
1947
MachineOperand &JumpTarget = MBBI->getOperand(0);
1948
if (JumpTarget.isGlobal())
1949
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1950
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1951
else if (JumpTarget.isSymbol())
1952
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1953
addExternalSymbol(JumpTarget.getSymbolName());
1954
else
1955
llvm_unreachable("Expecting Global or External Symbol");
1956
} else if (RetOpcode == PPC::TCRETURNri8) {
1957
MBBI = MBB.getLastNonDebugInstr();
1958
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1959
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1960
} else if (RetOpcode == PPC::TCRETURNai8) {
1961
MBBI = MBB.getLastNonDebugInstr();
1962
MachineOperand &JumpTarget = MBBI->getOperand(0);
1963
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1964
}
1965
}
1966
1967
void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1968
BitVector &SavedRegs,
1969
RegScavenger *RS) const {
1970
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1971
if (Subtarget.isAIXABI())
1972
updateCalleeSaves(MF, SavedRegs);
1973
1974
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1975
1976
// Do not explicitly save the callee saved VSRp registers.
1977
// The individual VSR subregisters will be saved instead.
1978
SavedRegs.reset(PPC::VSRp26);
1979
SavedRegs.reset(PPC::VSRp27);
1980
SavedRegs.reset(PPC::VSRp28);
1981
SavedRegs.reset(PPC::VSRp29);
1982
SavedRegs.reset(PPC::VSRp30);
1983
SavedRegs.reset(PPC::VSRp31);
1984
1985
// Save and clear the LR state.
1986
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1987
unsigned LR = RegInfo->getRARegister();
1988
FI->setMustSaveLR(MustSaveLR(MF, LR));
1989
SavedRegs.reset(LR);
1990
1991
// Save R31 if necessary
1992
int FPSI = FI->getFramePointerSaveIndex();
1993
const bool isPPC64 = Subtarget.isPPC64();
1994
MachineFrameInfo &MFI = MF.getFrameInfo();
1995
1996
// If the frame pointer save index hasn't been defined yet.
1997
if (!FPSI && needsFP(MF)) {
1998
// Find out what the fix offset of the frame pointer save area.
1999
int FPOffset = getFramePointerSaveOffset();
2000
// Allocate the frame index for frame pointer save area.
2001
FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2002
// Save the result.
2003
FI->setFramePointerSaveIndex(FPSI);
2004
}
2005
2006
int BPSI = FI->getBasePointerSaveIndex();
2007
if (!BPSI && RegInfo->hasBasePointer(MF)) {
2008
int BPOffset = getBasePointerSaveOffset();
2009
// Allocate the frame index for the base pointer save area.
2010
BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2011
// Save the result.
2012
FI->setBasePointerSaveIndex(BPSI);
2013
}
2014
2015
// Reserve stack space for the PIC Base register (R30).
2016
// Only used in SVR4 32-bit.
2017
if (FI->usesPICBase()) {
2018
int PBPSI = MFI.CreateFixedObject(4, -8, true);
2019
FI->setPICBasePointerSaveIndex(PBPSI);
2020
}
2021
2022
// Make sure we don't explicitly spill r31, because, for example, we have
2023
// some inline asm which explicitly clobbers it, when we otherwise have a
2024
// frame pointer and are using r31's spill slot for the prologue/epilogue
2025
// code. Same goes for the base pointer and the PIC base register.
2026
if (needsFP(MF))
2027
SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2028
if (RegInfo->hasBasePointer(MF)) {
2029
SavedRegs.reset(RegInfo->getBaseRegister(MF));
2030
// On AIX, when BaseRegister(R30) is used, need to spill r31 too to match
2031
// AIX trackback table requirement.
2032
if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) &&
2033
Subtarget.isAIXABI()) {
2034
assert(
2035
(RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) &&
2036
"Invalid base register on AIX!");
2037
SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31);
2038
}
2039
}
2040
if (FI->usesPICBase())
2041
SavedRegs.reset(PPC::R30);
2042
2043
// Reserve stack space to move the linkage area to in case of a tail call.
2044
int TCSPDelta = 0;
2045
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2046
(TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2047
MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2048
}
2049
2050
// Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2051
// For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2052
// object at the offset of the CR-save slot in the linkage area. The actual
2053
// save and restore of the condition register will be created as part of the
2054
// prologue and epilogue insertion, but the FixedStack object is needed to
2055
// keep the CalleSavedInfo valid.
2056
if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2057
SavedRegs.test(PPC::CR4))) {
2058
const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2059
const int64_t SpillOffset =
2060
Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2061
int FrameIdx =
2062
MFI.CreateFixedObject(SpillSize, SpillOffset,
2063
/* IsImmutable */ true, /* IsAliased */ false);
2064
FI->setCRSpillFrameIndex(FrameIdx);
2065
}
2066
}
2067
2068
void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2069
RegScavenger *RS) const {
2070
// Get callee saved register information.
2071
MachineFrameInfo &MFI = MF.getFrameInfo();
2072
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2073
2074
// If the function is shrink-wrapped, and if the function has a tail call, the
2075
// tail call might not be in the new RestoreBlock, so real branch instruction
2076
// won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2077
// RestoreBlock. So we handle this case here.
2078
if (MFI.getSavePoint() && MFI.hasTailCall()) {
2079
MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2080
for (MachineBasicBlock &MBB : MF) {
2081
if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2082
createTailCallBranchInstr(MBB);
2083
}
2084
}
2085
2086
// Early exit if no callee saved registers are modified!
2087
if (CSI.empty() && !needsFP(MF)) {
2088
addScavengingSpillSlot(MF, RS);
2089
return;
2090
}
2091
2092
unsigned MinGPR = PPC::R31;
2093
unsigned MinG8R = PPC::X31;
2094
unsigned MinFPR = PPC::F31;
2095
unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2096
2097
bool HasGPSaveArea = false;
2098
bool HasG8SaveArea = false;
2099
bool HasFPSaveArea = false;
2100
bool HasVRSaveArea = false;
2101
2102
SmallVector<CalleeSavedInfo, 18> GPRegs;
2103
SmallVector<CalleeSavedInfo, 18> G8Regs;
2104
SmallVector<CalleeSavedInfo, 18> FPRegs;
2105
SmallVector<CalleeSavedInfo, 18> VRegs;
2106
2107
for (const CalleeSavedInfo &I : CSI) {
2108
Register Reg = I.getReg();
2109
assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2110
(Reg != PPC::X2 && Reg != PPC::R2)) &&
2111
"Not expecting to try to spill R2 in a function that must save TOC");
2112
if (PPC::GPRCRegClass.contains(Reg)) {
2113
HasGPSaveArea = true;
2114
2115
GPRegs.push_back(I);
2116
2117
if (Reg < MinGPR) {
2118
MinGPR = Reg;
2119
}
2120
} else if (PPC::G8RCRegClass.contains(Reg)) {
2121
HasG8SaveArea = true;
2122
2123
G8Regs.push_back(I);
2124
2125
if (Reg < MinG8R) {
2126
MinG8R = Reg;
2127
}
2128
} else if (PPC::F8RCRegClass.contains(Reg)) {
2129
HasFPSaveArea = true;
2130
2131
FPRegs.push_back(I);
2132
2133
if (Reg < MinFPR) {
2134
MinFPR = Reg;
2135
}
2136
} else if (PPC::CRBITRCRegClass.contains(Reg) ||
2137
PPC::CRRCRegClass.contains(Reg)) {
2138
; // do nothing, as we already know whether CRs are spilled
2139
} else if (PPC::VRRCRegClass.contains(Reg) ||
2140
PPC::SPERCRegClass.contains(Reg)) {
2141
// Altivec and SPE are mutually exclusive, but have the same stack
2142
// alignment requirements, so overload the save area for both cases.
2143
HasVRSaveArea = true;
2144
2145
VRegs.push_back(I);
2146
2147
if (Reg < MinVR) {
2148
MinVR = Reg;
2149
}
2150
} else {
2151
llvm_unreachable("Unknown RegisterClass!");
2152
}
2153
}
2154
2155
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2156
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2157
2158
int64_t LowerBound = 0;
2159
2160
// Take into account stack space reserved for tail calls.
2161
int TCSPDelta = 0;
2162
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2163
(TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2164
LowerBound = TCSPDelta;
2165
}
2166
2167
// The Floating-point register save area is right below the back chain word
2168
// of the previous stack frame.
2169
if (HasFPSaveArea) {
2170
for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2171
int FI = FPRegs[i].getFrameIdx();
2172
2173
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2174
}
2175
2176
LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2177
}
2178
2179
// Check whether the frame pointer register is allocated. If so, make sure it
2180
// is spilled to the correct offset.
2181
if (needsFP(MF)) {
2182
int FI = PFI->getFramePointerSaveIndex();
2183
assert(FI && "No Frame Pointer Save Slot!");
2184
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2185
// FP is R31/X31, so no need to update MinGPR/MinG8R.
2186
HasGPSaveArea = true;
2187
}
2188
2189
if (PFI->usesPICBase()) {
2190
int FI = PFI->getPICBasePointerSaveIndex();
2191
assert(FI && "No PIC Base Pointer Save Slot!");
2192
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2193
2194
MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2195
HasGPSaveArea = true;
2196
}
2197
2198
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2199
if (RegInfo->hasBasePointer(MF)) {
2200
int FI = PFI->getBasePointerSaveIndex();
2201
assert(FI && "No Base Pointer Save Slot!");
2202
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2203
2204
Register BP = RegInfo->getBaseRegister(MF);
2205
if (PPC::G8RCRegClass.contains(BP)) {
2206
MinG8R = std::min<unsigned>(MinG8R, BP);
2207
HasG8SaveArea = true;
2208
} else if (PPC::GPRCRegClass.contains(BP)) {
2209
MinGPR = std::min<unsigned>(MinGPR, BP);
2210
HasGPSaveArea = true;
2211
}
2212
}
2213
2214
// General register save area starts right below the Floating-point
2215
// register save area.
2216
if (HasGPSaveArea || HasG8SaveArea) {
2217
// Move general register save area spill slots down, taking into account
2218
// the size of the Floating-point register save area.
2219
for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2220
if (!GPRegs[i].isSpilledToReg()) {
2221
int FI = GPRegs[i].getFrameIdx();
2222
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2223
}
2224
}
2225
2226
// Move general register save area spill slots down, taking into account
2227
// the size of the Floating-point register save area.
2228
for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2229
if (!G8Regs[i].isSpilledToReg()) {
2230
int FI = G8Regs[i].getFrameIdx();
2231
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2232
}
2233
}
2234
2235
unsigned MinReg =
2236
std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2237
TRI->getEncodingValue(MinG8R));
2238
2239
const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2240
LowerBound -= (31 - MinReg + 1) * GPRegSize;
2241
}
2242
2243
// For 32-bit only, the CR save area is below the general register
2244
// save area. For 64-bit SVR4, the CR save area is addressed relative
2245
// to the stack pointer and hence does not need an adjustment here.
2246
// Only CR2 (the first nonvolatile spilled) has an associated frame
2247
// index so that we have a single uniform save area.
2248
if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2249
// Adjust the frame index of the CR spill slot.
2250
for (const auto &CSInfo : CSI) {
2251
if (CSInfo.getReg() == PPC::CR2) {
2252
int FI = CSInfo.getFrameIdx();
2253
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2254
break;
2255
}
2256
}
2257
2258
LowerBound -= 4; // The CR save area is always 4 bytes long.
2259
}
2260
2261
// Both Altivec and SPE have the same alignment and padding requirements
2262
// within the stack frame.
2263
if (HasVRSaveArea) {
2264
// Insert alignment padding, we need 16-byte alignment. Note: for positive
2265
// number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2266
// we are using negative number here (the stack grows downward). We should
2267
// use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2268
// is the alignment size ( n = 16 here) and y is the size after aligning.
2269
assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2270
LowerBound &= ~(15);
2271
2272
for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2273
int FI = VRegs[i].getFrameIdx();
2274
2275
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2276
}
2277
}
2278
2279
addScavengingSpillSlot(MF, RS);
2280
}
2281
2282
void
2283
PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2284
RegScavenger *RS) const {
2285
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2286
// a large stack, which will require scavenging a register to materialize a
2287
// large offset.
2288
2289
// We need to have a scavenger spill slot for spills if the frame size is
2290
// large. In case there is no free register for large-offset addressing,
2291
// this slot is used for the necessary emergency spill. Also, we need the
2292
// slot for dynamic stack allocations.
2293
2294
// The scavenger might be invoked if the frame offset does not fit into
2295
// the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2296
// We don't know the complete frame size here because we've not yet computed
2297
// callee-saved register spills or the needed alignment padding.
2298
unsigned StackSize = determineFrameLayout(MF, true);
2299
MachineFrameInfo &MFI = MF.getFrameInfo();
2300
bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2301
2302
if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2303
(hasSpills(MF) && NeedSpills)) {
2304
const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2305
const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2306
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2307
const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2308
unsigned Size = TRI.getSpillSize(RC);
2309
Align Alignment = TRI.getSpillAlign(RC);
2310
RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2311
2312
// Might we have over-aligned allocas?
2313
bool HasAlVars =
2314
MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2315
2316
// These kinds of spills might need two registers.
2317
if (spillsCR(MF) || HasAlVars)
2318
RS->addScavengingFrameIndex(
2319
MFI.CreateStackObject(Size, Alignment, false));
2320
}
2321
}
2322
2323
// This function checks if a callee saved gpr can be spilled to a volatile
2324
// vector register. This occurs for leaf functions when the option
2325
// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2326
// which were not spilled to vectors, return false so the target independent
2327
// code can handle them by assigning a FrameIdx to a stack slot.
2328
bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2329
MachineFunction &MF, const TargetRegisterInfo *TRI,
2330
std::vector<CalleeSavedInfo> &CSI) const {
2331
2332
if (CSI.empty())
2333
return true; // Early exit if no callee saved registers are modified!
2334
2335
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2336
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2337
const MachineRegisterInfo &MRI = MF.getRegInfo();
2338
2339
if (Subtarget.hasSPE()) {
2340
// In case of SPE we only have SuperRegs and CRs
2341
// in our CalleSaveInfo vector.
2342
2343
for (auto &CalleeSaveReg : CSI) {
2344
MCPhysReg Reg = CalleeSaveReg.getReg();
2345
MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2346
MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2347
2348
if ( // Check only for SuperRegs.
2349
Lower &&
2350
// Replace Reg if only lower-32 bits modified
2351
!MRI.isPhysRegModified(Higher))
2352
CalleeSaveReg = CalleeSavedInfo(Lower);
2353
}
2354
}
2355
2356
// Early exit if cannot spill gprs to volatile vector registers.
2357
MachineFrameInfo &MFI = MF.getFrameInfo();
2358
if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2359
return false;
2360
2361
// Build a BitVector of VSRs that can be used for spilling GPRs.
2362
BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2363
BitVector BVCalleeSaved(TRI->getNumRegs());
2364
for (unsigned i = 0; CSRegs[i]; ++i)
2365
BVCalleeSaved.set(CSRegs[i]);
2366
2367
for (unsigned Reg : BVAllocatable.set_bits()) {
2368
// Set to 0 if the register is not a volatile VSX register, or if it is
2369
// used in the function.
2370
if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2371
MRI.isPhysRegUsed(Reg))
2372
BVAllocatable.reset(Reg);
2373
}
2374
2375
bool AllSpilledToReg = true;
2376
unsigned LastVSRUsedForSpill = 0;
2377
for (auto &CS : CSI) {
2378
if (BVAllocatable.none())
2379
return false;
2380
2381
Register Reg = CS.getReg();
2382
2383
if (!PPC::G8RCRegClass.contains(Reg)) {
2384
AllSpilledToReg = false;
2385
continue;
2386
}
2387
2388
// For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2389
// into one VSR using the mtvsrdd instruction.
2390
if (LastVSRUsedForSpill != 0) {
2391
CS.setDstReg(LastVSRUsedForSpill);
2392
BVAllocatable.reset(LastVSRUsedForSpill);
2393
LastVSRUsedForSpill = 0;
2394
continue;
2395
}
2396
2397
unsigned VolatileVFReg = BVAllocatable.find_first();
2398
if (VolatileVFReg < BVAllocatable.size()) {
2399
CS.setDstReg(VolatileVFReg);
2400
LastVSRUsedForSpill = VolatileVFReg;
2401
} else {
2402
AllSpilledToReg = false;
2403
}
2404
}
2405
return AllSpilledToReg;
2406
}
2407
2408
bool PPCFrameLowering::spillCalleeSavedRegisters(
2409
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2410
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2411
2412
MachineFunction *MF = MBB.getParent();
2413
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2414
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2415
bool MustSaveTOC = FI->mustSaveTOC();
2416
DebugLoc DL;
2417
bool CRSpilled = false;
2418
MachineInstrBuilder CRMIB;
2419
BitVector Spilled(TRI->getNumRegs());
2420
2421
VSRContainingGPRs.clear();
2422
2423
// Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2424
// or two GPRs, so we need table to record information for later save/restore.
2425
for (const CalleeSavedInfo &Info : CSI) {
2426
if (Info.isSpilledToReg()) {
2427
auto &SpilledVSR =
2428
VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2429
assert(SpilledVSR.second == 0 &&
2430
"Can't spill more than two GPRs into VSR!");
2431
if (SpilledVSR.first == 0)
2432
SpilledVSR.first = Info.getReg();
2433
else
2434
SpilledVSR.second = Info.getReg();
2435
}
2436
}
2437
2438
for (const CalleeSavedInfo &I : CSI) {
2439
Register Reg = I.getReg();
2440
2441
// CR2 through CR4 are the nonvolatile CR fields.
2442
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2443
2444
// Add the callee-saved register as live-in; it's killed at the spill.
2445
// Do not do this for callee-saved registers that are live-in to the
2446
// function because they will already be marked live-in and this will be
2447
// adding it for a second time. It is an error to add the same register
2448
// to the set more than once.
2449
const MachineRegisterInfo &MRI = MF->getRegInfo();
2450
bool IsLiveIn = MRI.isLiveIn(Reg);
2451
if (!IsLiveIn)
2452
MBB.addLiveIn(Reg);
2453
2454
if (CRSpilled && IsCRField) {
2455
CRMIB.addReg(Reg, RegState::ImplicitKill);
2456
continue;
2457
}
2458
2459
// The actual spill will happen in the prologue.
2460
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2461
continue;
2462
2463
// Insert the spill to the stack frame.
2464
if (IsCRField) {
2465
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2466
if (!Subtarget.is32BitELFABI()) {
2467
// The actual spill will happen at the start of the prologue.
2468
FuncInfo->addMustSaveCR(Reg);
2469
} else {
2470
CRSpilled = true;
2471
FuncInfo->setSpillsCR();
2472
2473
// 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2474
// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2475
CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2476
.addReg(Reg, RegState::ImplicitKill);
2477
2478
MBB.insert(MI, CRMIB);
2479
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2480
.addReg(PPC::R12,
2481
getKillRegState(true)),
2482
I.getFrameIdx()));
2483
}
2484
} else {
2485
if (I.isSpilledToReg()) {
2486
unsigned Dst = I.getDstReg();
2487
2488
if (Spilled[Dst])
2489
continue;
2490
2491
if (VSRContainingGPRs[Dst].second != 0) {
2492
assert(Subtarget.hasP9Vector() &&
2493
"mtvsrdd is unavailable on pre-P9 targets.");
2494
2495
NumPESpillVSR += 2;
2496
BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2497
.addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2498
.addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2499
} else if (VSRContainingGPRs[Dst].second == 0) {
2500
assert(Subtarget.hasP8Vector() &&
2501
"Can't move GPR to VSR on pre-P8 targets.");
2502
2503
++NumPESpillVSR;
2504
BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2505
TRI->getSubReg(Dst, PPC::sub_64))
2506
.addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2507
} else {
2508
llvm_unreachable("More than two GPRs spilled to a VSR!");
2509
}
2510
Spilled.set(Dst);
2511
} else {
2512
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2513
// Use !IsLiveIn for the kill flag.
2514
// We do not want to kill registers that are live in this function
2515
// before their use because they will become undefined registers.
2516
// Functions without NoUnwind need to preserve the order of elements in
2517
// saved vector registers.
2518
if (Subtarget.needsSwapsForVSXMemOps() &&
2519
!MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2520
TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2521
I.getFrameIdx(), RC, TRI);
2522
else
2523
TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2524
TRI, Register());
2525
}
2526
}
2527
}
2528
return true;
2529
}
2530
2531
static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2532
bool CR4Spilled, MachineBasicBlock &MBB,
2533
MachineBasicBlock::iterator MI,
2534
ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2535
2536
MachineFunction *MF = MBB.getParent();
2537
const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2538
DebugLoc DL;
2539
unsigned MoveReg = PPC::R12;
2540
2541
// 32-bit: FP-relative
2542
MBB.insert(MI,
2543
addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2544
CSI[CSIIndex].getFrameIdx()));
2545
2546
unsigned RestoreOp = PPC::MTOCRF;
2547
if (CR2Spilled)
2548
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2549
.addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2550
2551
if (CR3Spilled)
2552
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2553
.addReg(MoveReg, getKillRegState(!CR4Spilled)));
2554
2555
if (CR4Spilled)
2556
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2557
.addReg(MoveReg, getKillRegState(true)));
2558
}
2559
2560
MachineBasicBlock::iterator PPCFrameLowering::
2561
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2562
MachineBasicBlock::iterator I) const {
2563
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2564
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2565
I->getOpcode() == PPC::ADJCALLSTACKUP) {
2566
// Add (actually subtract) back the amount the callee popped on return.
2567
if (int CalleeAmt = I->getOperand(1).getImm()) {
2568
bool is64Bit = Subtarget.isPPC64();
2569
CalleeAmt *= -1;
2570
unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2571
unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2572
unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2573
unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2574
unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2575
unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2576
const DebugLoc &dl = I->getDebugLoc();
2577
2578
if (isInt<16>(CalleeAmt)) {
2579
BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2580
.addReg(StackReg, RegState::Kill)
2581
.addImm(CalleeAmt);
2582
} else {
2583
MachineBasicBlock::iterator MBBI = I;
2584
BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2585
.addImm(CalleeAmt >> 16);
2586
BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2587
.addReg(TmpReg, RegState::Kill)
2588
.addImm(CalleeAmt & 0xFFFF);
2589
BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2590
.addReg(StackReg, RegState::Kill)
2591
.addReg(TmpReg);
2592
}
2593
}
2594
}
2595
// Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2596
return MBB.erase(I);
2597
}
2598
2599
static bool isCalleeSavedCR(unsigned Reg) {
2600
return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2601
}
2602
2603
bool PPCFrameLowering::restoreCalleeSavedRegisters(
2604
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2605
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2606
MachineFunction *MF = MBB.getParent();
2607
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2608
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2609
bool MustSaveTOC = FI->mustSaveTOC();
2610
bool CR2Spilled = false;
2611
bool CR3Spilled = false;
2612
bool CR4Spilled = false;
2613
unsigned CSIIndex = 0;
2614
BitVector Restored(TRI->getNumRegs());
2615
2616
// Initialize insertion-point logic; we will be restoring in reverse
2617
// order of spill.
2618
MachineBasicBlock::iterator I = MI, BeforeI = I;
2619
bool AtStart = I == MBB.begin();
2620
2621
if (!AtStart)
2622
--BeforeI;
2623
2624
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2625
Register Reg = CSI[i].getReg();
2626
2627
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2628
continue;
2629
2630
// Restore of callee saved condition register field is handled during
2631
// epilogue insertion.
2632
if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2633
continue;
2634
2635
if (Reg == PPC::CR2) {
2636
CR2Spilled = true;
2637
// The spill slot is associated only with CR2, which is the
2638
// first nonvolatile spilled. Save it here.
2639
CSIIndex = i;
2640
continue;
2641
} else if (Reg == PPC::CR3) {
2642
CR3Spilled = true;
2643
continue;
2644
} else if (Reg == PPC::CR4) {
2645
CR4Spilled = true;
2646
continue;
2647
} else {
2648
// On 32-bit ELF when we first encounter a non-CR register after seeing at
2649
// least one CR register, restore all spilled CRs together.
2650
if (CR2Spilled || CR3Spilled || CR4Spilled) {
2651
bool is31 = needsFP(*MF);
2652
restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2653
CSIIndex);
2654
CR2Spilled = CR3Spilled = CR4Spilled = false;
2655
}
2656
2657
if (CSI[i].isSpilledToReg()) {
2658
DebugLoc DL;
2659
unsigned Dst = CSI[i].getDstReg();
2660
2661
if (Restored[Dst])
2662
continue;
2663
2664
if (VSRContainingGPRs[Dst].second != 0) {
2665
assert(Subtarget.hasP9Vector());
2666
NumPEReloadVSR += 2;
2667
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2668
VSRContainingGPRs[Dst].second)
2669
.addReg(Dst);
2670
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2671
VSRContainingGPRs[Dst].first)
2672
.addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2673
} else if (VSRContainingGPRs[Dst].second == 0) {
2674
assert(Subtarget.hasP8Vector());
2675
++NumPEReloadVSR;
2676
BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2677
VSRContainingGPRs[Dst].first)
2678
.addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2679
} else {
2680
llvm_unreachable("More than two GPRs spilled to a VSR!");
2681
}
2682
2683
Restored.set(Dst);
2684
2685
} else {
2686
// Default behavior for non-CR saves.
2687
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2688
2689
// Functions without NoUnwind need to preserve the order of elements in
2690
// saved vector registers.
2691
if (Subtarget.needsSwapsForVSXMemOps() &&
2692
!MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2693
TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2694
TRI);
2695
else
2696
TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2697
Register());
2698
2699
assert(I != MBB.begin() &&
2700
"loadRegFromStackSlot didn't insert any code!");
2701
}
2702
}
2703
2704
// Insert in reverse order.
2705
if (AtStart)
2706
I = MBB.begin();
2707
else {
2708
I = BeforeI;
2709
++I;
2710
}
2711
}
2712
2713
// If we haven't yet spilled the CRs, do so now.
2714
if (CR2Spilled || CR3Spilled || CR4Spilled) {
2715
assert(Subtarget.is32BitELFABI() &&
2716
"Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2717
bool is31 = needsFP(*MF);
2718
restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2719
}
2720
2721
return true;
2722
}
2723
2724
uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2725
return TOCSaveOffset;
2726
}
2727
2728
uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2729
return FramePointerSaveOffset;
2730
}
2731
2732
uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2733
return BasePointerSaveOffset;
2734
}
2735
2736
bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2737
if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2738
return false;
2739
return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2740
}
2741
2742
void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF,
2743
BitVector &SavedRegs) const {
2744
// The AIX ABI uses traceback tables for EH which require that if callee-saved
2745
// register N is used, all registers N-31 must be saved/restored.
2746
// NOTE: The check for AIX is not actually what is relevant. Traceback tables
2747
// on Linux have the same requirements. It is just that AIX is the only ABI
2748
// for which we actually use traceback tables. If another ABI needs to be
2749
// supported that also uses them, we can add a check such as
2750
// Subtarget.usesTraceBackTables().
2751
assert(Subtarget.isAIXABI() &&
2752
"Function updateCalleeSaves should only be called for AIX.");
2753
2754
// If there are no callee saves then there is nothing to do.
2755
if (SavedRegs.none())
2756
return;
2757
2758
const MCPhysReg *CSRegs =
2759
Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF);
2760
MCPhysReg LowestGPR = PPC::R31;
2761
MCPhysReg LowestG8R = PPC::X31;
2762
MCPhysReg LowestFPR = PPC::F31;
2763
MCPhysReg LowestVR = PPC::V31;
2764
2765
// Traverse the CSRs twice so as not to rely on ascending ordering of
2766
// registers in the array. The first pass finds the lowest numbered
2767
// register and the second pass marks all higher numbered registers
2768
// for spilling.
2769
for (int i = 0; CSRegs[i]; i++) {
2770
// Get the lowest numbered register for each class that actually needs
2771
// to be saved.
2772
MCPhysReg Cand = CSRegs[i];
2773
if (!SavedRegs.test(Cand))
2774
continue;
2775
if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR)
2776
LowestGPR = Cand;
2777
else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R)
2778
LowestG8R = Cand;
2779
else if ((PPC::F4RCRegClass.contains(Cand) ||
2780
PPC::F8RCRegClass.contains(Cand)) &&
2781
Cand < LowestFPR)
2782
LowestFPR = Cand;
2783
else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR)
2784
LowestVR = Cand;
2785
}
2786
2787
for (int i = 0; CSRegs[i]; i++) {
2788
MCPhysReg Cand = CSRegs[i];
2789
if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) ||
2790
(PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) ||
2791
((PPC::F4RCRegClass.contains(Cand) ||
2792
PPC::F8RCRegClass.contains(Cand)) &&
2793
Cand > LowestFPR) ||
2794
(PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR))
2795
SavedRegs.set(Cand);
2796
}
2797
}
2798
2799
uint64_t PPCFrameLowering::getStackThreshold() const {
2800
// On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2801
// use `add r1, r1, <scratch_reg>` to release the stack frame.
2802
// Scratch register contains a signed 64-bit number, which is negative
2803
// when extending the stack and is positive when releasing the stack frame.
2804
// To make `stux` and `add` paired, the absolute value of the number contained
2805
// in the scratch register should be the same. Thus the maximum stack size
2806
// is (2^63)-1, i.e., LONG_MAX.
2807
if (Subtarget.isPPC64())
2808
return LONG_MAX;
2809
2810
return TargetFrameLowering::getStackThreshold();
2811
}
2812
2813