Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
35266 views
1
//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file implements the lowering of LLVM calls to DAG nodes.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "X86.h"
15
#include "X86CallingConv.h"
16
#include "X86FrameLowering.h"
17
#include "X86ISelLowering.h"
18
#include "X86InstrBuilder.h"
19
#include "X86MachineFunctionInfo.h"
20
#include "X86TargetMachine.h"
21
#include "X86TargetObjectFile.h"
22
#include "llvm/ADT/Statistic.h"
23
#include "llvm/Analysis/ObjCARCUtil.h"
24
#include "llvm/CodeGen/MachineJumpTableInfo.h"
25
#include "llvm/CodeGen/MachineModuleInfo.h"
26
#include "llvm/CodeGen/WinEHFuncInfo.h"
27
#include "llvm/IR/DiagnosticInfo.h"
28
#include "llvm/IR/IRBuilder.h"
29
#include "llvm/IR/Module.h"
30
31
#define DEBUG_TYPE "x86-isel"
32
33
using namespace llvm;
34
35
STATISTIC(NumTailCalls, "Number of tail calls");
36
37
/// Call this when the user attempts to do something unsupported, like
38
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
39
/// report_fatal_error, so calling code should attempt to recover without
40
/// crashing.
41
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
42
const char *Msg) {
43
MachineFunction &MF = DAG.getMachineFunction();
44
DAG.getContext()->diagnose(
45
DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
46
}
47
48
/// Returns true if a CC can dynamically exclude a register from the list of
49
/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
50
/// the return registers.
51
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
52
switch (CC) {
53
default:
54
return false;
55
case CallingConv::X86_RegCall:
56
case CallingConv::PreserveMost:
57
case CallingConv::PreserveAll:
58
return true;
59
}
60
}
61
62
/// Returns true if a CC can dynamically exclude a register from the list of
63
/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
64
/// the parameters.
65
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
66
return CC == CallingConv::X86_RegCall;
67
}
68
69
static std::pair<MVT, unsigned>
70
handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
71
const X86Subtarget &Subtarget) {
72
// v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
73
// convention is one that uses k registers.
74
if (NumElts == 2)
75
return {MVT::v2i64, 1};
76
if (NumElts == 4)
77
return {MVT::v4i32, 1};
78
if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79
CC != CallingConv::Intel_OCL_BI)
80
return {MVT::v8i16, 1};
81
if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82
CC != CallingConv::Intel_OCL_BI)
83
return {MVT::v16i8, 1};
84
// v32i1 passes in ymm unless we have BWI and the calling convention is
85
// regcall.
86
if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
87
return {MVT::v32i8, 1};
88
// Split v64i1 vectors if we don't have v64i8 available.
89
if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
90
if (Subtarget.useAVX512Regs())
91
return {MVT::v64i8, 1};
92
return {MVT::v32i8, 2};
93
}
94
95
// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
96
if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
97
NumElts > 64)
98
return {MVT::i8, NumElts};
99
100
return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
101
}
102
103
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
104
CallingConv::ID CC,
105
EVT VT) const {
106
if (VT.isVector()) {
107
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
108
unsigned NumElts = VT.getVectorNumElements();
109
110
MVT RegisterVT;
111
unsigned NumRegisters;
112
std::tie(RegisterVT, NumRegisters) =
113
handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
114
if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
115
return RegisterVT;
116
}
117
118
if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
119
return MVT::v8f16;
120
}
121
122
// We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
123
if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
124
!Subtarget.hasX87())
125
return MVT::i32;
126
127
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128
return getRegisterTypeForCallingConv(Context, CC,
129
VT.changeVectorElementType(MVT::f16));
130
131
if (VT == MVT::bf16)
132
return MVT::f16;
133
134
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
135
}
136
137
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
138
CallingConv::ID CC,
139
EVT VT) const {
140
if (VT.isVector()) {
141
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
142
unsigned NumElts = VT.getVectorNumElements();
143
144
MVT RegisterVT;
145
unsigned NumRegisters;
146
std::tie(RegisterVT, NumRegisters) =
147
handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
148
if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
149
return NumRegisters;
150
}
151
152
if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
153
return 1;
154
}
155
156
// We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
157
// x87 is disabled.
158
if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
159
if (VT == MVT::f64)
160
return 2;
161
if (VT == MVT::f80)
162
return 3;
163
}
164
165
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
166
return getNumRegistersForCallingConv(Context, CC,
167
VT.changeVectorElementType(MVT::f16));
168
169
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
170
}
171
172
unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
173
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
174
unsigned &NumIntermediates, MVT &RegisterVT) const {
175
// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
176
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
177
Subtarget.hasAVX512() &&
178
(!isPowerOf2_32(VT.getVectorNumElements()) ||
179
(VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
180
VT.getVectorNumElements() > 64)) {
181
RegisterVT = MVT::i8;
182
IntermediateVT = MVT::i1;
183
NumIntermediates = VT.getVectorNumElements();
184
return NumIntermediates;
185
}
186
187
// Split v64i1 vectors if we don't have v64i8 available.
188
if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
189
CC != CallingConv::X86_RegCall) {
190
RegisterVT = MVT::v32i8;
191
IntermediateVT = MVT::v32i1;
192
NumIntermediates = 2;
193
return 2;
194
}
195
196
// Split vNbf16 vectors according to vNf16.
197
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
198
VT = VT.changeVectorElementType(MVT::f16);
199
200
return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
201
NumIntermediates, RegisterVT);
202
}
203
204
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
205
LLVMContext& Context,
206
EVT VT) const {
207
if (!VT.isVector())
208
return MVT::i8;
209
210
if (Subtarget.hasAVX512()) {
211
// Figure out what this type will be legalized to.
212
EVT LegalVT = VT;
213
while (getTypeAction(Context, LegalVT) != TypeLegal)
214
LegalVT = getTypeToTransformTo(Context, LegalVT);
215
216
// If we got a 512-bit vector then we'll definitely have a vXi1 compare.
217
if (LegalVT.getSimpleVT().is512BitVector())
218
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
219
220
if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
221
// If we legalized to less than a 512-bit vector, then we will use a vXi1
222
// compare for vXi32/vXi64 for sure. If we have BWI we will also support
223
// vXi16/vXi8.
224
MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
225
if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
226
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
227
}
228
}
229
230
return VT.changeVectorElementTypeToInteger();
231
}
232
233
/// Helper for getByValTypeAlignment to determine
234
/// the desired ByVal argument alignment.
235
static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
236
if (MaxAlign == 16)
237
return;
238
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
239
if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
240
MaxAlign = Align(16);
241
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
242
Align EltAlign;
243
getMaxByValAlign(ATy->getElementType(), EltAlign);
244
if (EltAlign > MaxAlign)
245
MaxAlign = EltAlign;
246
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
247
for (auto *EltTy : STy->elements()) {
248
Align EltAlign;
249
getMaxByValAlign(EltTy, EltAlign);
250
if (EltAlign > MaxAlign)
251
MaxAlign = EltAlign;
252
if (MaxAlign == 16)
253
break;
254
}
255
}
256
}
257
258
/// Return the desired alignment for ByVal aggregate
259
/// function arguments in the caller parameter area. For X86, aggregates
260
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
261
/// are at 4-byte boundaries.
262
uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
263
const DataLayout &DL) const {
264
if (Subtarget.is64Bit()) {
265
// Max of 8 and alignment of type.
266
Align TyAlign = DL.getABITypeAlign(Ty);
267
if (TyAlign > 8)
268
return TyAlign.value();
269
return 8;
270
}
271
272
Align Alignment(4);
273
if (Subtarget.hasSSE1())
274
getMaxByValAlign(Ty, Alignment);
275
return Alignment.value();
276
}
277
278
/// It returns EVT::Other if the type should be determined using generic
279
/// target-independent logic.
280
/// For vector ops we check that the overall size isn't larger than our
281
/// preferred vector width.
282
EVT X86TargetLowering::getOptimalMemOpType(
283
const MemOp &Op, const AttributeList &FuncAttributes) const {
284
if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
285
if (Op.size() >= 16 &&
286
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
287
// FIXME: Check if unaligned 64-byte accesses are slow.
288
if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
289
(Subtarget.getPreferVectorWidth() >= 512)) {
290
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
291
}
292
// FIXME: Check if unaligned 32-byte accesses are slow.
293
if (Op.size() >= 32 && Subtarget.hasAVX() &&
294
Subtarget.useLight256BitInstructions()) {
295
// Although this isn't a well-supported type for AVX1, we'll let
296
// legalization and shuffle lowering produce the optimal codegen. If we
297
// choose an optimal type with a vector element larger than a byte,
298
// getMemsetStores() may create an intermediate splat (using an integer
299
// multiply) before we splat as a vector.
300
return MVT::v32i8;
301
}
302
if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
303
return MVT::v16i8;
304
// TODO: Can SSE1 handle a byte vector?
305
// If we have SSE1 registers we should be able to use them.
306
if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
307
(Subtarget.getPreferVectorWidth() >= 128))
308
return MVT::v4f32;
309
} else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
310
Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
311
// Do not use f64 to lower memcpy if source is string constant. It's
312
// better to use i32 to avoid the loads.
313
// Also, do not use f64 to lower memset unless this is a memset of zeros.
314
// The gymnastics of splatting a byte value into an XMM register and then
315
// only using 8-byte stores (because this is a CPU with slow unaligned
316
// 16-byte accesses) makes that a loser.
317
return MVT::f64;
318
}
319
}
320
// This is a compromise. If we reach here, unaligned accesses may be slow on
321
// this target. However, creating smaller, aligned accesses could be even
322
// slower and would certainly be a lot more code.
323
if (Subtarget.is64Bit() && Op.size() >= 8)
324
return MVT::i64;
325
return MVT::i32;
326
}
327
328
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
329
if (VT == MVT::f32)
330
return Subtarget.hasSSE1();
331
if (VT == MVT::f64)
332
return Subtarget.hasSSE2();
333
return true;
334
}
335
336
static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
337
return (8 * Alignment.value()) % SizeInBits == 0;
338
}
339
340
bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
341
if (isBitAligned(Alignment, VT.getSizeInBits()))
342
return true;
343
switch (VT.getSizeInBits()) {
344
default:
345
// 8-byte and under are always assumed to be fast.
346
return true;
347
case 128:
348
return !Subtarget.isUnalignedMem16Slow();
349
case 256:
350
return !Subtarget.isUnalignedMem32Slow();
351
// TODO: What about AVX-512 (512-bit) accesses?
352
}
353
}
354
355
bool X86TargetLowering::allowsMisalignedMemoryAccesses(
356
EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
357
unsigned *Fast) const {
358
if (Fast)
359
*Fast = isMemoryAccessFast(VT, Alignment);
360
// NonTemporal vector memory ops must be aligned.
361
if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
362
// NT loads can only be vector aligned, so if its less aligned than the
363
// minimum vector size (which we can split the vector down to), we might as
364
// well use a regular unaligned vector load.
365
// We don't have any NT loads pre-SSE41.
366
if (!!(Flags & MachineMemOperand::MOLoad))
367
return (Alignment < 16 || !Subtarget.hasSSE41());
368
return false;
369
}
370
// Misaligned accesses of any size are always allowed.
371
return true;
372
}
373
374
bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
375
const DataLayout &DL, EVT VT,
376
unsigned AddrSpace, Align Alignment,
377
MachineMemOperand::Flags Flags,
378
unsigned *Fast) const {
379
if (Fast)
380
*Fast = isMemoryAccessFast(VT, Alignment);
381
if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
382
if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
383
/*Fast=*/nullptr))
384
return true;
385
// NonTemporal vector memory ops are special, and must be aligned.
386
if (!isBitAligned(Alignment, VT.getSizeInBits()))
387
return false;
388
switch (VT.getSizeInBits()) {
389
case 128:
390
if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
391
return true;
392
if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
393
return true;
394
return false;
395
case 256:
396
if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
397
return true;
398
if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
399
return true;
400
return false;
401
case 512:
402
if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
403
return true;
404
return false;
405
default:
406
return false; // Don't have NonTemporal vector memory ops of this size.
407
}
408
}
409
return true;
410
}
411
412
/// Return the entry encoding for a jump table in the
413
/// current function. The returned value is a member of the
414
/// MachineJumpTableInfo::JTEntryKind enum.
415
unsigned X86TargetLowering::getJumpTableEncoding() const {
416
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
417
// symbol.
418
if (isPositionIndependent() && Subtarget.isPICStyleGOT())
419
return MachineJumpTableInfo::EK_Custom32;
420
if (isPositionIndependent() &&
421
getTargetMachine().getCodeModel() == CodeModel::Large &&
422
!Subtarget.isTargetCOFF())
423
return MachineJumpTableInfo::EK_LabelDifference64;
424
425
// Otherwise, use the normal jump table encoding heuristics.
426
return TargetLowering::getJumpTableEncoding();
427
}
428
429
bool X86TargetLowering::useSoftFloat() const {
430
return Subtarget.useSoftFloat();
431
}
432
433
void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
434
ArgListTy &Args) const {
435
436
// Only relabel X86-32 for C / Stdcall CCs.
437
if (Subtarget.is64Bit())
438
return;
439
if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
440
return;
441
unsigned ParamRegs = 0;
442
if (auto *M = MF->getFunction().getParent())
443
ParamRegs = M->getNumberRegisterParameters();
444
445
// Mark the first N int arguments as having reg
446
for (auto &Arg : Args) {
447
Type *T = Arg.Ty;
448
if (T->isIntOrPtrTy())
449
if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
450
unsigned numRegs = 1;
451
if (MF->getDataLayout().getTypeAllocSize(T) > 4)
452
numRegs = 2;
453
if (ParamRegs < numRegs)
454
return;
455
ParamRegs -= numRegs;
456
Arg.IsInReg = true;
457
}
458
}
459
}
460
461
const MCExpr *
462
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
463
const MachineBasicBlock *MBB,
464
unsigned uid,MCContext &Ctx) const{
465
assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
466
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
467
// entries.
468
return MCSymbolRefExpr::create(MBB->getSymbol(),
469
MCSymbolRefExpr::VK_GOTOFF, Ctx);
470
}
471
472
/// Returns relocation base for the given PIC jumptable.
473
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
474
SelectionDAG &DAG) const {
475
if (!Subtarget.is64Bit())
476
// This doesn't have SDLoc associated with it, but is not really the
477
// same as a Register.
478
return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
479
getPointerTy(DAG.getDataLayout()));
480
return Table;
481
}
482
483
/// This returns the relocation base for the given PIC jumptable,
484
/// the same as getPICJumpTableRelocBase, but as an MCExpr.
485
const MCExpr *X86TargetLowering::
486
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
487
MCContext &Ctx) const {
488
// X86-64 uses RIP relative addressing based on the jump table label.
489
if (Subtarget.isPICStyleRIPRel() ||
490
(Subtarget.is64Bit() &&
491
getTargetMachine().getCodeModel() == CodeModel::Large))
492
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
493
494
// Otherwise, the reference is relative to the PIC base.
495
return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
496
}
497
498
std::pair<const TargetRegisterClass *, uint8_t>
499
X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
500
MVT VT) const {
501
const TargetRegisterClass *RRC = nullptr;
502
uint8_t Cost = 1;
503
switch (VT.SimpleTy) {
504
default:
505
return TargetLowering::findRepresentativeClass(TRI, VT);
506
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
507
RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
508
break;
509
case MVT::x86mmx:
510
RRC = &X86::VR64RegClass;
511
break;
512
case MVT::f32: case MVT::f64:
513
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
514
case MVT::v4f32: case MVT::v2f64:
515
case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
516
case MVT::v8f32: case MVT::v4f64:
517
case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
518
case MVT::v16f32: case MVT::v8f64:
519
RRC = &X86::VR128XRegClass;
520
break;
521
}
522
return std::make_pair(RRC, Cost);
523
}
524
525
unsigned X86TargetLowering::getAddressSpace() const {
526
if (Subtarget.is64Bit())
527
return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
528
return 256;
529
}
530
531
static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
532
return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
533
(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
534
}
535
536
static Constant* SegmentOffset(IRBuilderBase &IRB,
537
int Offset, unsigned AddressSpace) {
538
return ConstantExpr::getIntToPtr(
539
ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
540
IRB.getPtrTy(AddressSpace));
541
}
542
543
Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
544
// glibc, bionic, and Fuchsia have a special slot for the stack guard in
545
// tcbhead_t; use it instead of the usual global variable (see
546
// sysdeps/{i386,x86_64}/nptl/tls.h)
547
if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
548
unsigned AddressSpace = getAddressSpace();
549
550
// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
551
if (Subtarget.isTargetFuchsia())
552
return SegmentOffset(IRB, 0x10, AddressSpace);
553
554
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
555
// Specially, some users may customize the base reg and offset.
556
int Offset = M->getStackProtectorGuardOffset();
557
// If we don't set -stack-protector-guard-offset value:
558
// %fs:0x28, unless we're using a Kernel code model, in which case
559
// it's %gs:0x28. gs:0x14 on i386.
560
if (Offset == INT_MAX)
561
Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
562
563
StringRef GuardReg = M->getStackProtectorGuardReg();
564
if (GuardReg == "fs")
565
AddressSpace = X86AS::FS;
566
else if (GuardReg == "gs")
567
AddressSpace = X86AS::GS;
568
569
// Use symbol guard if user specify.
570
StringRef GuardSymb = M->getStackProtectorGuardSymbol();
571
if (!GuardSymb.empty()) {
572
GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
573
if (!GV) {
574
Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
575
: Type::getInt32Ty(M->getContext());
576
GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
577
nullptr, GuardSymb, nullptr,
578
GlobalValue::NotThreadLocal, AddressSpace);
579
if (!Subtarget.isTargetDarwin())
580
GV->setDSOLocal(M->getDirectAccessExternalData());
581
}
582
return GV;
583
}
584
585
return SegmentOffset(IRB, Offset, AddressSpace);
586
}
587
return TargetLowering::getIRStackGuard(IRB);
588
}
589
590
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
591
// MSVC CRT provides functionalities for stack protection.
592
if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
593
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
594
// MSVC CRT has a global variable holding security cookie.
595
M.getOrInsertGlobal("__security_cookie",
596
PointerType::getUnqual(M.getContext()));
597
598
// MSVC CRT has a function to validate security cookie.
599
FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
600
"__security_check_cookie", Type::getVoidTy(M.getContext()),
601
PointerType::getUnqual(M.getContext()));
602
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
603
F->setCallingConv(CallingConv::X86_FastCall);
604
F->addParamAttr(0, Attribute::AttrKind::InReg);
605
}
606
return;
607
}
608
609
StringRef GuardMode = M.getStackProtectorGuard();
610
611
// glibc, bionic, and Fuchsia have a special slot for the stack guard.
612
if ((GuardMode == "tls" || GuardMode.empty()) &&
613
hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
614
return;
615
TargetLowering::insertSSPDeclarations(M);
616
}
617
618
Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
619
// MSVC CRT has a global variable holding security cookie.
620
if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
621
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
622
return M.getGlobalVariable("__security_cookie");
623
}
624
return TargetLowering::getSDagStackGuard(M);
625
}
626
627
Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
628
// MSVC CRT has a function to validate security cookie.
629
if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
630
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
631
return M.getFunction("__security_check_cookie");
632
}
633
return TargetLowering::getSSPStackGuardCheck(M);
634
}
635
636
Value *
637
X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
638
// Android provides a fixed TLS slot for the SafeStack pointer. See the
639
// definition of TLS_SLOT_SAFESTACK in
640
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
641
if (Subtarget.isTargetAndroid()) {
642
// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
643
// %gs:0x24 on i386
644
int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
645
return SegmentOffset(IRB, Offset, getAddressSpace());
646
}
647
648
// Fuchsia is similar.
649
if (Subtarget.isTargetFuchsia()) {
650
// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
651
return SegmentOffset(IRB, 0x18, getAddressSpace());
652
}
653
654
return TargetLowering::getSafeStackPointerLocation(IRB);
655
}
656
657
//===----------------------------------------------------------------------===//
658
// Return Value Calling Convention Implementation
659
//===----------------------------------------------------------------------===//
660
661
bool X86TargetLowering::CanLowerReturn(
662
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
663
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
664
SmallVector<CCValAssign, 16> RVLocs;
665
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
666
return CCInfo.CheckReturn(Outs, RetCC_X86);
667
}
668
669
const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
670
static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
671
return ScratchRegs;
672
}
673
674
ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
675
static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
676
return RCRegs;
677
}
678
679
/// Lowers masks values (v*i1) to the local register values
680
/// \returns DAG node after lowering to register type
681
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
682
const SDLoc &DL, SelectionDAG &DAG) {
683
EVT ValVT = ValArg.getValueType();
684
685
if (ValVT == MVT::v1i1)
686
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
687
DAG.getIntPtrConstant(0, DL));
688
689
if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
690
(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
691
// Two stage lowering might be required
692
// bitcast: v8i1 -> i8 / v16i1 -> i16
693
// anyextend: i8 -> i32 / i16 -> i32
694
EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
695
SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
696
if (ValLoc == MVT::i32)
697
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
698
return ValToCopy;
699
}
700
701
if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
702
(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
703
// One stage lowering is required
704
// bitcast: v32i1 -> i32 / v64i1 -> i64
705
return DAG.getBitcast(ValLoc, ValArg);
706
}
707
708
return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
709
}
710
711
/// Breaks v64i1 value into two registers and adds the new node to the DAG
712
static void Passv64i1ArgInRegs(
713
const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
714
SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
715
CCValAssign &NextVA, const X86Subtarget &Subtarget) {
716
assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
717
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
718
assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
719
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
720
"The value should reside in two registers");
721
722
// Before splitting the value we cast it to i64
723
Arg = DAG.getBitcast(MVT::i64, Arg);
724
725
// Splitting the value into two i32 types
726
SDValue Lo, Hi;
727
std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
728
729
// Attach the two i32 types into corresponding registers
730
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
731
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
732
}
733
734
SDValue
735
X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
736
bool isVarArg,
737
const SmallVectorImpl<ISD::OutputArg> &Outs,
738
const SmallVectorImpl<SDValue> &OutVals,
739
const SDLoc &dl, SelectionDAG &DAG) const {
740
MachineFunction &MF = DAG.getMachineFunction();
741
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
742
743
// In some cases we need to disable registers from the default CSR list.
744
// For example, when they are used as return registers (preserve_* and X86's
745
// regcall) or for argument passing (X86's regcall).
746
bool ShouldDisableCalleeSavedRegister =
747
shouldDisableRetRegFromCSR(CallConv) ||
748
MF.getFunction().hasFnAttribute("no_caller_saved_registers");
749
750
if (CallConv == CallingConv::X86_INTR && !Outs.empty())
751
report_fatal_error("X86 interrupts may not return any value");
752
753
SmallVector<CCValAssign, 16> RVLocs;
754
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
755
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
756
757
SmallVector<std::pair<Register, SDValue>, 4> RetVals;
758
for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
759
++I, ++OutsIndex) {
760
CCValAssign &VA = RVLocs[I];
761
assert(VA.isRegLoc() && "Can only return in registers!");
762
763
// Add the register to the CalleeSaveDisableRegs list.
764
if (ShouldDisableCalleeSavedRegister)
765
MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
766
767
SDValue ValToCopy = OutVals[OutsIndex];
768
EVT ValVT = ValToCopy.getValueType();
769
770
// Promote values to the appropriate types.
771
if (VA.getLocInfo() == CCValAssign::SExt)
772
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
773
else if (VA.getLocInfo() == CCValAssign::ZExt)
774
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
775
else if (VA.getLocInfo() == CCValAssign::AExt) {
776
if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
777
ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
778
else
779
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
780
}
781
else if (VA.getLocInfo() == CCValAssign::BCvt)
782
ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
783
784
assert(VA.getLocInfo() != CCValAssign::FPExt &&
785
"Unexpected FP-extend for return value.");
786
787
// Report an error if we have attempted to return a value via an XMM
788
// register and SSE was disabled.
789
if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
790
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
791
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
792
} else if (!Subtarget.hasSSE2() &&
793
X86::FR64XRegClass.contains(VA.getLocReg()) &&
794
ValVT == MVT::f64) {
795
// When returning a double via an XMM register, report an error if SSE2 is
796
// not enabled.
797
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
798
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
799
}
800
801
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
802
// the RET instruction and handled by the FP Stackifier.
803
if (VA.getLocReg() == X86::FP0 ||
804
VA.getLocReg() == X86::FP1) {
805
// If this is a copy from an xmm register to ST(0), use an FPExtend to
806
// change the value to the FP stack register class.
807
if (isScalarFPTypeInSSEReg(VA.getValVT()))
808
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
809
RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
810
// Don't emit a copytoreg.
811
continue;
812
}
813
814
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
815
// which is returned in RAX / RDX.
816
if (Subtarget.is64Bit()) {
817
if (ValVT == MVT::x86mmx) {
818
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
819
ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
820
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
821
ValToCopy);
822
// If we don't have SSE2 available, convert to v4f32 so the generated
823
// register is legal.
824
if (!Subtarget.hasSSE2())
825
ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
826
}
827
}
828
}
829
830
if (VA.needsCustom()) {
831
assert(VA.getValVT() == MVT::v64i1 &&
832
"Currently the only custom case is when we split v64i1 to 2 regs");
833
834
Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
835
Subtarget);
836
837
// Add the second register to the CalleeSaveDisableRegs list.
838
if (ShouldDisableCalleeSavedRegister)
839
MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
840
} else {
841
RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
842
}
843
}
844
845
SDValue Glue;
846
SmallVector<SDValue, 6> RetOps;
847
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
848
// Operand #1 = Bytes To Pop
849
RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
850
MVT::i32));
851
852
// Copy the result values into the output registers.
853
for (auto &RetVal : RetVals) {
854
if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
855
RetOps.push_back(RetVal.second);
856
continue; // Don't emit a copytoreg.
857
}
858
859
Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
860
Glue = Chain.getValue(1);
861
RetOps.push_back(
862
DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
863
}
864
865
// Swift calling convention does not require we copy the sret argument
866
// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
867
868
// All x86 ABIs require that for returning structs by value we copy
869
// the sret argument into %rax/%eax (depending on ABI) for the return.
870
// We saved the argument into a virtual register in the entry block,
871
// so now we copy the value out and into %rax/%eax.
872
//
873
// Checking Function.hasStructRetAttr() here is insufficient because the IR
874
// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
875
// false, then an sret argument may be implicitly inserted in the SelDAG. In
876
// either case FuncInfo->setSRetReturnReg() will have been called.
877
if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
878
// When we have both sret and another return value, we should use the
879
// original Chain stored in RetOps[0], instead of the current Chain updated
880
// in the above loop. If we only have sret, RetOps[0] equals to Chain.
881
882
// For the case of sret and another return value, we have
883
// Chain_0 at the function entry
884
// Chain_1 = getCopyToReg(Chain_0) in the above loop
885
// If we use Chain_1 in getCopyFromReg, we will have
886
// Val = getCopyFromReg(Chain_1)
887
// Chain_2 = getCopyToReg(Chain_1, Val) from below
888
889
// getCopyToReg(Chain_0) will be glued together with
890
// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
891
// in Unit B, and we will have cyclic dependency between Unit A and Unit B:
892
// Data dependency from Unit B to Unit A due to usage of Val in
893
// getCopyToReg(Chain_1, Val)
894
// Chain dependency from Unit A to Unit B
895
896
// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
897
SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
898
getPointerTy(MF.getDataLayout()));
899
900
Register RetValReg
901
= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
902
X86::RAX : X86::EAX;
903
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
904
Glue = Chain.getValue(1);
905
906
// RAX/EAX now acts like a return value.
907
RetOps.push_back(
908
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
909
910
// Add the returned register to the CalleeSaveDisableRegs list. Don't do
911
// this however for preserve_most/preserve_all to minimize the number of
912
// callee-saved registers for these CCs.
913
if (ShouldDisableCalleeSavedRegister &&
914
CallConv != CallingConv::PreserveAll &&
915
CallConv != CallingConv::PreserveMost)
916
MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
917
}
918
919
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
920
const MCPhysReg *I =
921
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
922
if (I) {
923
for (; *I; ++I) {
924
if (X86::GR64RegClass.contains(*I))
925
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
926
else
927
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
928
}
929
}
930
931
RetOps[0] = Chain; // Update chain.
932
933
// Add the glue if we have it.
934
if (Glue.getNode())
935
RetOps.push_back(Glue);
936
937
X86ISD::NodeType opcode = X86ISD::RET_GLUE;
938
if (CallConv == CallingConv::X86_INTR)
939
opcode = X86ISD::IRET;
940
return DAG.getNode(opcode, dl, MVT::Other, RetOps);
941
}
942
943
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
944
if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
945
return false;
946
947
SDValue TCChain = Chain;
948
SDNode *Copy = *N->use_begin();
949
if (Copy->getOpcode() == ISD::CopyToReg) {
950
// If the copy has a glue operand, we conservatively assume it isn't safe to
951
// perform a tail call.
952
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
953
return false;
954
TCChain = Copy->getOperand(0);
955
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
956
return false;
957
958
bool HasRet = false;
959
for (const SDNode *U : Copy->uses()) {
960
if (U->getOpcode() != X86ISD::RET_GLUE)
961
return false;
962
// If we are returning more than one value, we can definitely
963
// not make a tail call see PR19530
964
if (U->getNumOperands() > 4)
965
return false;
966
if (U->getNumOperands() == 4 &&
967
U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
968
return false;
969
HasRet = true;
970
}
971
972
if (!HasRet)
973
return false;
974
975
Chain = TCChain;
976
return true;
977
}
978
979
EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
980
ISD::NodeType ExtendKind) const {
981
MVT ReturnMVT = MVT::i32;
982
983
bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
984
if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
985
// The ABI does not require i1, i8 or i16 to be extended.
986
//
987
// On Darwin, there is code in the wild relying on Clang's old behaviour of
988
// always extending i8/i16 return values, so keep doing that for now.
989
// (PR26665).
990
ReturnMVT = MVT::i8;
991
}
992
993
EVT MinVT = getRegisterType(Context, ReturnMVT);
994
return VT.bitsLT(MinVT) ? MinVT : VT;
995
}
996
997
/// Reads two 32 bit registers and creates a 64 bit mask value.
998
/// \param VA The current 32 bit value that need to be assigned.
999
/// \param NextVA The next 32 bit value that need to be assigned.
1000
/// \param Root The parent DAG node.
1001
/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1002
/// glue purposes. In the case the DAG is already using
1003
/// physical register instead of virtual, we should glue
1004
/// our new SDValue to InGlue SDvalue.
1005
/// \return a new SDvalue of size 64bit.
1006
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1007
SDValue &Root, SelectionDAG &DAG,
1008
const SDLoc &DL, const X86Subtarget &Subtarget,
1009
SDValue *InGlue = nullptr) {
1010
assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1011
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1012
assert(VA.getValVT() == MVT::v64i1 &&
1013
"Expecting first location of 64 bit width type");
1014
assert(NextVA.getValVT() == VA.getValVT() &&
1015
"The locations should have the same type");
1016
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1017
"The values should reside in two registers");
1018
1019
SDValue Lo, Hi;
1020
SDValue ArgValueLo, ArgValueHi;
1021
1022
MachineFunction &MF = DAG.getMachineFunction();
1023
const TargetRegisterClass *RC = &X86::GR32RegClass;
1024
1025
// Read a 32 bit value from the registers.
1026
if (nullptr == InGlue) {
1027
// When no physical register is present,
1028
// create an intermediate virtual register.
1029
Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1030
ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1031
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1032
ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1033
} else {
1034
// When a physical register is available read the value from it and glue
1035
// the reads together.
1036
ArgValueLo =
1037
DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1038
*InGlue = ArgValueLo.getValue(2);
1039
ArgValueHi =
1040
DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1041
*InGlue = ArgValueHi.getValue(2);
1042
}
1043
1044
// Convert the i32 type into v32i1 type.
1045
Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1046
1047
// Convert the i32 type into v32i1 type.
1048
Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1049
1050
// Concatenate the two values together.
1051
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1052
}
1053
1054
/// The function will lower a register of various sizes (8/16/32/64)
1055
/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1056
/// \returns a DAG node contains the operand after lowering to mask type.
1057
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1058
const EVT &ValLoc, const SDLoc &DL,
1059
SelectionDAG &DAG) {
1060
SDValue ValReturned = ValArg;
1061
1062
if (ValVT == MVT::v1i1)
1063
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1064
1065
if (ValVT == MVT::v64i1) {
1066
// In 32 bit machine, this case is handled by getv64i1Argument
1067
assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1068
// In 64 bit machine, There is no need to truncate the value only bitcast
1069
} else {
1070
MVT MaskLenVT;
1071
switch (ValVT.getSimpleVT().SimpleTy) {
1072
case MVT::v8i1:
1073
MaskLenVT = MVT::i8;
1074
break;
1075
case MVT::v16i1:
1076
MaskLenVT = MVT::i16;
1077
break;
1078
case MVT::v32i1:
1079
MaskLenVT = MVT::i32;
1080
break;
1081
default:
1082
llvm_unreachable("Expecting a vector of i1 types");
1083
}
1084
1085
ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1086
}
1087
return DAG.getBitcast(ValVT, ValReturned);
1088
}
1089
1090
/// Lower the result values of a call into the
1091
/// appropriate copies out of appropriate physical registers.
1092
///
1093
SDValue X86TargetLowering::LowerCallResult(
1094
SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1095
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1096
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1097
uint32_t *RegMask) const {
1098
1099
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1100
// Assign locations to each value returned by this call.
1101
SmallVector<CCValAssign, 16> RVLocs;
1102
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1103
*DAG.getContext());
1104
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1105
1106
// Copy all of the result registers out of their specified physreg.
1107
for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1108
++I, ++InsIndex) {
1109
CCValAssign &VA = RVLocs[I];
1110
EVT CopyVT = VA.getLocVT();
1111
1112
// In some calling conventions we need to remove the used registers
1113
// from the register mask.
1114
if (RegMask) {
1115
for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1116
RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1117
}
1118
1119
// Report an error if there was an attempt to return FP values via XMM
1120
// registers.
1121
if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1122
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1123
if (VA.getLocReg() == X86::XMM1)
1124
VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1125
else
1126
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1127
} else if (!Subtarget.hasSSE2() &&
1128
X86::FR64XRegClass.contains(VA.getLocReg()) &&
1129
CopyVT == MVT::f64) {
1130
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1131
if (VA.getLocReg() == X86::XMM1)
1132
VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1133
else
1134
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1135
}
1136
1137
// If we prefer to use the value in xmm registers, copy it out as f80 and
1138
// use a truncate to move it from fp stack reg to xmm reg.
1139
bool RoundAfterCopy = false;
1140
if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1141
isScalarFPTypeInSSEReg(VA.getValVT())) {
1142
if (!Subtarget.hasX87())
1143
report_fatal_error("X87 register return with X87 disabled");
1144
CopyVT = MVT::f80;
1145
RoundAfterCopy = (CopyVT != VA.getLocVT());
1146
}
1147
1148
SDValue Val;
1149
if (VA.needsCustom()) {
1150
assert(VA.getValVT() == MVT::v64i1 &&
1151
"Currently the only custom case is when we split v64i1 to 2 regs");
1152
Val =
1153
getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1154
} else {
1155
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1156
.getValue(1);
1157
Val = Chain.getValue(0);
1158
InGlue = Chain.getValue(2);
1159
}
1160
1161
if (RoundAfterCopy)
1162
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1163
// This truncation won't change the value.
1164
DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1165
1166
if (VA.isExtInLoc()) {
1167
if (VA.getValVT().isVector() &&
1168
VA.getValVT().getScalarType() == MVT::i1 &&
1169
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1170
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1171
// promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1172
Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1173
} else
1174
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1175
}
1176
1177
if (VA.getLocInfo() == CCValAssign::BCvt)
1178
Val = DAG.getBitcast(VA.getValVT(), Val);
1179
1180
InVals.push_back(Val);
1181
}
1182
1183
return Chain;
1184
}
1185
1186
//===----------------------------------------------------------------------===//
1187
// C & StdCall & Fast Calling Convention implementation
1188
//===----------------------------------------------------------------------===//
1189
// StdCall calling convention seems to be standard for many Windows' API
1190
// routines and around. It differs from C calling convention just a little:
1191
// callee should clean up the stack, not caller. Symbols should be also
1192
// decorated in some fancy way :) It doesn't support any vector arguments.
1193
// For info on fast calling convention see Fast Calling Convention (tail call)
1194
// implementation LowerX86_32FastCCCallTo.
1195
1196
/// Determines whether Args, either a set of outgoing arguments to a call, or a
1197
/// set of incoming args of a call, contains an sret pointer that the callee
1198
/// pops
1199
template <typename T>
1200
static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1201
const X86Subtarget &Subtarget) {
1202
// Not C++20 (yet), so no concepts available.
1203
static_assert(std::is_same_v<T, ISD::OutputArg> ||
1204
std::is_same_v<T, ISD::InputArg>,
1205
"requires ISD::OutputArg or ISD::InputArg");
1206
1207
// Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1208
// for most compilations.
1209
if (!Subtarget.is32Bit())
1210
return false;
1211
1212
if (Args.empty())
1213
return false;
1214
1215
// Most calls do not have an sret argument, check the arg next.
1216
const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1217
if (!Flags.isSRet() || Flags.isInReg())
1218
return false;
1219
1220
// The MSVCabi does not pop the sret.
1221
if (Subtarget.getTargetTriple().isOSMSVCRT())
1222
return false;
1223
1224
// MCUs don't pop the sret
1225
if (Subtarget.isTargetMCU())
1226
return false;
1227
1228
// Callee pops argument
1229
return true;
1230
}
1231
1232
/// Make a copy of an aggregate at address specified by "Src" to address
1233
/// "Dst" with size and alignment information specified by the specific
1234
/// parameter attribute. The copy will be passed as a byval function parameter.
1235
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1236
SDValue Chain, ISD::ArgFlagsTy Flags,
1237
SelectionDAG &DAG, const SDLoc &dl) {
1238
SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1239
1240
return DAG.getMemcpy(
1241
Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1242
/*isVolatile*/ false, /*AlwaysInline=*/true,
1243
/*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1244
}
1245
1246
/// Return true if the calling convention is one that we can guarantee TCO for.
1247
static bool canGuaranteeTCO(CallingConv::ID CC) {
1248
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1249
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
1250
CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
1251
}
1252
1253
/// Return true if we might ever do TCO for calls with this calling convention.
1254
static bool mayTailCallThisCC(CallingConv::ID CC) {
1255
switch (CC) {
1256
// C calling conventions:
1257
case CallingConv::C:
1258
case CallingConv::Win64:
1259
case CallingConv::X86_64_SysV:
1260
case CallingConv::PreserveNone:
1261
// Callee pop conventions:
1262
case CallingConv::X86_ThisCall:
1263
case CallingConv::X86_StdCall:
1264
case CallingConv::X86_VectorCall:
1265
case CallingConv::X86_FastCall:
1266
// Swift:
1267
case CallingConv::Swift:
1268
return true;
1269
default:
1270
return canGuaranteeTCO(CC);
1271
}
1272
}
1273
1274
/// Return true if the function is being made into a tailcall target by
1275
/// changing its ABI.
1276
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1277
return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1278
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
1279
}
1280
1281
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1282
if (!CI->isTailCall())
1283
return false;
1284
1285
CallingConv::ID CalleeCC = CI->getCallingConv();
1286
if (!mayTailCallThisCC(CalleeCC))
1287
return false;
1288
1289
return true;
1290
}
1291
1292
SDValue
1293
X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1294
const SmallVectorImpl<ISD::InputArg> &Ins,
1295
const SDLoc &dl, SelectionDAG &DAG,
1296
const CCValAssign &VA,
1297
MachineFrameInfo &MFI, unsigned i) const {
1298
// Create the nodes corresponding to a load from this parameter slot.
1299
ISD::ArgFlagsTy Flags = Ins[i].Flags;
1300
bool AlwaysUseMutable = shouldGuaranteeTCO(
1301
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1302
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1303
EVT ValVT;
1304
MVT PtrVT = getPointerTy(DAG.getDataLayout());
1305
1306
// If value is passed by pointer we have address passed instead of the value
1307
// itself. No need to extend if the mask value and location share the same
1308
// absolute size.
1309
bool ExtendedInMem =
1310
VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1311
VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1312
1313
if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1314
ValVT = VA.getLocVT();
1315
else
1316
ValVT = VA.getValVT();
1317
1318
// FIXME: For now, all byval parameter objects are marked mutable. This can be
1319
// changed with more analysis.
1320
// In case of tail call optimization mark all arguments mutable. Since they
1321
// could be overwritten by lowering of arguments in case of a tail call.
1322
if (Flags.isByVal()) {
1323
unsigned Bytes = Flags.getByValSize();
1324
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1325
1326
// FIXME: For now, all byval parameter objects are marked as aliasing. This
1327
// can be improved with deeper analysis.
1328
int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1329
/*isAliased=*/true);
1330
return DAG.getFrameIndex(FI, PtrVT);
1331
}
1332
1333
EVT ArgVT = Ins[i].ArgVT;
1334
1335
// If this is a vector that has been split into multiple parts, don't elide
1336
// the copy. The layout on the stack may not match the packed in-memory
1337
// layout.
1338
bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1339
1340
// This is an argument in memory. We might be able to perform copy elision.
1341
// If the argument is passed directly in memory without any extension, then we
1342
// can perform copy elision. Large vector types, for example, may be passed
1343
// indirectly by pointer.
1344
if (Flags.isCopyElisionCandidate() &&
1345
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1346
!ScalarizedVector) {
1347
SDValue PartAddr;
1348
if (Ins[i].PartOffset == 0) {
1349
// If this is a one-part value or the first part of a multi-part value,
1350
// create a stack object for the entire argument value type and return a
1351
// load from our portion of it. This assumes that if the first part of an
1352
// argument is in memory, the rest will also be in memory.
1353
int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1354
/*IsImmutable=*/false);
1355
PartAddr = DAG.getFrameIndex(FI, PtrVT);
1356
return DAG.getLoad(
1357
ValVT, dl, Chain, PartAddr,
1358
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
1359
}
1360
1361
// This is not the first piece of an argument in memory. See if there is
1362
// already a fixed stack object including this offset. If so, assume it
1363
// was created by the PartOffset == 0 branch above and create a load from
1364
// the appropriate offset into it.
1365
int64_t PartBegin = VA.getLocMemOffset();
1366
int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1367
int FI = MFI.getObjectIndexBegin();
1368
for (; MFI.isFixedObjectIndex(FI); ++FI) {
1369
int64_t ObjBegin = MFI.getObjectOffset(FI);
1370
int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1371
if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1372
break;
1373
}
1374
if (MFI.isFixedObjectIndex(FI)) {
1375
SDValue Addr =
1376
DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1377
DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1378
return DAG.getLoad(ValVT, dl, Chain, Addr,
1379
MachinePointerInfo::getFixedStack(
1380
DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1381
}
1382
}
1383
1384
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1385
VA.getLocMemOffset(), isImmutable);
1386
1387
// Set SExt or ZExt flag.
1388
if (VA.getLocInfo() == CCValAssign::ZExt) {
1389
MFI.setObjectZExt(FI, true);
1390
} else if (VA.getLocInfo() == CCValAssign::SExt) {
1391
MFI.setObjectSExt(FI, true);
1392
}
1393
1394
MaybeAlign Alignment;
1395
if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1396
ValVT != MVT::f80)
1397
Alignment = MaybeAlign(4);
1398
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1399
SDValue Val = DAG.getLoad(
1400
ValVT, dl, Chain, FIN,
1401
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
1402
Alignment);
1403
return ExtendedInMem
1404
? (VA.getValVT().isVector()
1405
? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1406
: DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1407
: Val;
1408
}
1409
1410
// FIXME: Get this from tablegen.
1411
static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1412
const X86Subtarget &Subtarget) {
1413
assert(Subtarget.is64Bit());
1414
1415
if (Subtarget.isCallingConvWin64(CallConv)) {
1416
static const MCPhysReg GPR64ArgRegsWin64[] = {
1417
X86::RCX, X86::RDX, X86::R8, X86::R9
1418
};
1419
return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1420
}
1421
1422
static const MCPhysReg GPR64ArgRegs64Bit[] = {
1423
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1424
};
1425
return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1426
}
1427
1428
// FIXME: Get this from tablegen.
1429
static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1430
CallingConv::ID CallConv,
1431
const X86Subtarget &Subtarget) {
1432
assert(Subtarget.is64Bit());
1433
if (Subtarget.isCallingConvWin64(CallConv)) {
1434
// The XMM registers which might contain var arg parameters are shadowed
1435
// in their paired GPR. So we only need to save the GPR to their home
1436
// slots.
1437
// TODO: __vectorcall will change this.
1438
return std::nullopt;
1439
}
1440
1441
bool isSoftFloat = Subtarget.useSoftFloat();
1442
if (isSoftFloat || !Subtarget.hasSSE1())
1443
// Kernel mode asks for SSE to be disabled, so there are no XMM argument
1444
// registers.
1445
return std::nullopt;
1446
1447
static const MCPhysReg XMMArgRegs64Bit[] = {
1448
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1449
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1450
};
1451
return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1452
}
1453
1454
#ifndef NDEBUG
1455
static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1456
return llvm::is_sorted(
1457
ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1458
return A.getValNo() < B.getValNo();
1459
});
1460
}
1461
#endif
1462
1463
namespace {
1464
/// This is a helper class for lowering variable arguments parameters.
1465
class VarArgsLoweringHelper {
1466
public:
1467
VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1468
SelectionDAG &DAG, const X86Subtarget &Subtarget,
1469
CallingConv::ID CallConv, CCState &CCInfo)
1470
: FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1471
TheMachineFunction(DAG.getMachineFunction()),
1472
TheFunction(TheMachineFunction.getFunction()),
1473
FrameInfo(TheMachineFunction.getFrameInfo()),
1474
FrameLowering(*Subtarget.getFrameLowering()),
1475
TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1476
CCInfo(CCInfo) {}
1477
1478
// Lower variable arguments parameters.
1479
void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1480
1481
private:
1482
void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1483
1484
void forwardMustTailParameters(SDValue &Chain);
1485
1486
bool is64Bit() const { return Subtarget.is64Bit(); }
1487
bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1488
1489
X86MachineFunctionInfo *FuncInfo;
1490
const SDLoc &DL;
1491
SelectionDAG &DAG;
1492
const X86Subtarget &Subtarget;
1493
MachineFunction &TheMachineFunction;
1494
const Function &TheFunction;
1495
MachineFrameInfo &FrameInfo;
1496
const TargetFrameLowering &FrameLowering;
1497
const TargetLowering &TargLowering;
1498
CallingConv::ID CallConv;
1499
CCState &CCInfo;
1500
};
1501
} // namespace
1502
1503
void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1504
SDValue &Chain, unsigned StackSize) {
1505
// If the function takes variable number of arguments, make a frame index for
1506
// the start of the first vararg value... for expansion of llvm.va_start. We
1507
// can skip this if there are no va_start calls.
1508
if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1509
CallConv != CallingConv::X86_ThisCall)) {
1510
FuncInfo->setVarArgsFrameIndex(
1511
FrameInfo.CreateFixedObject(1, StackSize, true));
1512
}
1513
1514
// 64-bit calling conventions support varargs and register parameters, so we
1515
// have to do extra work to spill them in the prologue.
1516
if (is64Bit()) {
1517
// Find the first unallocated argument registers.
1518
ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1519
ArrayRef<MCPhysReg> ArgXMMs =
1520
get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1521
unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1522
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1523
1524
assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1525
"SSE register cannot be used when SSE is disabled!");
1526
1527
if (isWin64()) {
1528
// Get to the caller-allocated home save location. Add 8 to account
1529
// for the return address.
1530
int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1531
FuncInfo->setRegSaveFrameIndex(
1532
FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1533
// Fixup to set vararg frame on shadow area (4 x i64).
1534
if (NumIntRegs < 4)
1535
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1536
} else {
1537
// For X86-64, if there are vararg parameters that are passed via
1538
// registers, then we must store them to their spots on the stack so
1539
// they may be loaded by dereferencing the result of va_next.
1540
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1541
FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1542
FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1543
ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1544
}
1545
1546
SmallVector<SDValue, 6>
1547
LiveGPRs; // list of SDValue for GPR registers keeping live input value
1548
SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1549
// keeping live input value
1550
SDValue ALVal; // if applicable keeps SDValue for %al register
1551
1552
// Gather all the live in physical registers.
1553
for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1554
Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1555
LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1556
}
1557
const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1558
if (!AvailableXmms.empty()) {
1559
Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1560
ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1561
for (MCPhysReg Reg : AvailableXmms) {
1562
// FastRegisterAllocator spills virtual registers at basic
1563
// block boundary. That leads to usages of xmm registers
1564
// outside of check for %al. Pass physical registers to
1565
// VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1566
TheMachineFunction.getRegInfo().addLiveIn(Reg);
1567
LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1568
}
1569
}
1570
1571
// Store the integer parameter registers.
1572
SmallVector<SDValue, 8> MemOps;
1573
SDValue RSFIN =
1574
DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1575
TargLowering.getPointerTy(DAG.getDataLayout()));
1576
unsigned Offset = FuncInfo->getVarArgsGPOffset();
1577
for (SDValue Val : LiveGPRs) {
1578
SDValue FIN = DAG.getNode(ISD::ADD, DL,
1579
TargLowering.getPointerTy(DAG.getDataLayout()),
1580
RSFIN, DAG.getIntPtrConstant(Offset, DL));
1581
SDValue Store =
1582
DAG.getStore(Val.getValue(1), DL, Val, FIN,
1583
MachinePointerInfo::getFixedStack(
1584
DAG.getMachineFunction(),
1585
FuncInfo->getRegSaveFrameIndex(), Offset));
1586
MemOps.push_back(Store);
1587
Offset += 8;
1588
}
1589
1590
// Now store the XMM (fp + vector) parameter registers.
1591
if (!LiveXMMRegs.empty()) {
1592
SmallVector<SDValue, 12> SaveXMMOps;
1593
SaveXMMOps.push_back(Chain);
1594
SaveXMMOps.push_back(ALVal);
1595
SaveXMMOps.push_back(RSFIN);
1596
SaveXMMOps.push_back(
1597
DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1598
llvm::append_range(SaveXMMOps, LiveXMMRegs);
1599
MachineMemOperand *StoreMMO =
1600
DAG.getMachineFunction().getMachineMemOperand(
1601
MachinePointerInfo::getFixedStack(
1602
DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1603
Offset),
1604
MachineMemOperand::MOStore, 128, Align(16));
1605
MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1606
DL, DAG.getVTList(MVT::Other),
1607
SaveXMMOps, MVT::i8, StoreMMO));
1608
}
1609
1610
if (!MemOps.empty())
1611
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1612
}
1613
}
1614
1615
void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1616
// Find the largest legal vector type.
1617
MVT VecVT = MVT::Other;
1618
// FIXME: Only some x86_32 calling conventions support AVX512.
1619
if (Subtarget.useAVX512Regs() &&
1620
(is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1621
CallConv == CallingConv::Intel_OCL_BI)))
1622
VecVT = MVT::v16f32;
1623
else if (Subtarget.hasAVX())
1624
VecVT = MVT::v8f32;
1625
else if (Subtarget.hasSSE2())
1626
VecVT = MVT::v4f32;
1627
1628
// We forward some GPRs and some vector types.
1629
SmallVector<MVT, 2> RegParmTypes;
1630
MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1631
RegParmTypes.push_back(IntVT);
1632
if (VecVT != MVT::Other)
1633
RegParmTypes.push_back(VecVT);
1634
1635
// Compute the set of forwarded registers. The rest are scratch.
1636
SmallVectorImpl<ForwardedRegister> &Forwards =
1637
FuncInfo->getForwardedMustTailRegParms();
1638
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1639
1640
// Forward AL for SysV x86_64 targets, since it is used for varargs.
1641
if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1642
Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1643
Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1644
}
1645
1646
// Copy all forwards from physical to virtual registers.
1647
for (ForwardedRegister &FR : Forwards) {
1648
// FIXME: Can we use a less constrained schedule?
1649
SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1650
FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1651
TargLowering.getRegClassFor(FR.VT));
1652
Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1653
}
1654
}
1655
1656
void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1657
unsigned StackSize) {
1658
// Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1659
// If necessary, it would be set into the correct value later.
1660
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1661
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1662
1663
if (FrameInfo.hasVAStart())
1664
createVarArgAreaAndStoreRegisters(Chain, StackSize);
1665
1666
if (FrameInfo.hasMustTailInVarArgFunc())
1667
forwardMustTailParameters(Chain);
1668
}
1669
1670
SDValue X86TargetLowering::LowerFormalArguments(
1671
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1672
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1673
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1674
MachineFunction &MF = DAG.getMachineFunction();
1675
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1676
1677
const Function &F = MF.getFunction();
1678
if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1679
F.getName() == "main")
1680
FuncInfo->setForceFramePointer(true);
1681
1682
MachineFrameInfo &MFI = MF.getFrameInfo();
1683
bool Is64Bit = Subtarget.is64Bit();
1684
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1685
1686
assert(
1687
!(IsVarArg && canGuaranteeTCO(CallConv)) &&
1688
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1689
1690
// Assign locations to all of the incoming arguments.
1691
SmallVector<CCValAssign, 16> ArgLocs;
1692
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1693
1694
// Allocate shadow area for Win64.
1695
if (IsWin64)
1696
CCInfo.AllocateStack(32, Align(8));
1697
1698
CCInfo.AnalyzeArguments(Ins, CC_X86);
1699
1700
// In vectorcall calling convention a second pass is required for the HVA
1701
// types.
1702
if (CallingConv::X86_VectorCall == CallConv) {
1703
CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1704
}
1705
1706
// The next loop assumes that the locations are in the same order of the
1707
// input arguments.
1708
assert(isSortedByValueNo(ArgLocs) &&
1709
"Argument Location list must be sorted before lowering");
1710
1711
SDValue ArgValue;
1712
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1713
++I, ++InsIndex) {
1714
assert(InsIndex < Ins.size() && "Invalid Ins index");
1715
CCValAssign &VA = ArgLocs[I];
1716
1717
if (VA.isRegLoc()) {
1718
EVT RegVT = VA.getLocVT();
1719
if (VA.needsCustom()) {
1720
assert(
1721
VA.getValVT() == MVT::v64i1 &&
1722
"Currently the only custom case is when we split v64i1 to 2 regs");
1723
1724
// v64i1 values, in regcall calling convention, that are
1725
// compiled to 32 bit arch, are split up into two registers.
1726
ArgValue =
1727
getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1728
} else {
1729
const TargetRegisterClass *RC;
1730
if (RegVT == MVT::i8)
1731
RC = &X86::GR8RegClass;
1732
else if (RegVT == MVT::i16)
1733
RC = &X86::GR16RegClass;
1734
else if (RegVT == MVT::i32)
1735
RC = &X86::GR32RegClass;
1736
else if (Is64Bit && RegVT == MVT::i64)
1737
RC = &X86::GR64RegClass;
1738
else if (RegVT == MVT::f16)
1739
RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1740
else if (RegVT == MVT::f32)
1741
RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1742
else if (RegVT == MVT::f64)
1743
RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1744
else if (RegVT == MVT::f80)
1745
RC = &X86::RFP80RegClass;
1746
else if (RegVT == MVT::f128)
1747
RC = &X86::VR128RegClass;
1748
else if (RegVT.is512BitVector())
1749
RC = &X86::VR512RegClass;
1750
else if (RegVT.is256BitVector())
1751
RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1752
else if (RegVT.is128BitVector())
1753
RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1754
else if (RegVT == MVT::x86mmx)
1755
RC = &X86::VR64RegClass;
1756
else if (RegVT == MVT::v1i1)
1757
RC = &X86::VK1RegClass;
1758
else if (RegVT == MVT::v8i1)
1759
RC = &X86::VK8RegClass;
1760
else if (RegVT == MVT::v16i1)
1761
RC = &X86::VK16RegClass;
1762
else if (RegVT == MVT::v32i1)
1763
RC = &X86::VK32RegClass;
1764
else if (RegVT == MVT::v64i1)
1765
RC = &X86::VK64RegClass;
1766
else
1767
llvm_unreachable("Unknown argument type!");
1768
1769
Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1770
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1771
}
1772
1773
// If this is an 8 or 16-bit value, it is really passed promoted to 32
1774
// bits. Insert an assert[sz]ext to capture this, then truncate to the
1775
// right size.
1776
if (VA.getLocInfo() == CCValAssign::SExt)
1777
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1778
DAG.getValueType(VA.getValVT()));
1779
else if (VA.getLocInfo() == CCValAssign::ZExt)
1780
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1781
DAG.getValueType(VA.getValVT()));
1782
else if (VA.getLocInfo() == CCValAssign::BCvt)
1783
ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1784
1785
if (VA.isExtInLoc()) {
1786
// Handle MMX values passed in XMM regs.
1787
if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1788
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1789
else if (VA.getValVT().isVector() &&
1790
VA.getValVT().getScalarType() == MVT::i1 &&
1791
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1792
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1793
// Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1794
ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1795
} else
1796
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1797
}
1798
} else {
1799
assert(VA.isMemLoc());
1800
ArgValue =
1801
LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1802
}
1803
1804
// If value is passed via pointer - do a load.
1805
if (VA.getLocInfo() == CCValAssign::Indirect &&
1806
!(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1807
ArgValue =
1808
DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1809
}
1810
1811
InVals.push_back(ArgValue);
1812
}
1813
1814
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1815
if (Ins[I].Flags.isSwiftAsync()) {
1816
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1817
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1818
X86FI->setHasSwiftAsyncContext(true);
1819
else {
1820
int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1821
int FI =
1822
MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1823
X86FI->setSwiftAsyncContextFrameIdx(FI);
1824
SDValue St = DAG.getStore(
1825
DAG.getEntryNode(), dl, InVals[I],
1826
DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1827
MachinePointerInfo::getFixedStack(MF, FI));
1828
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1829
}
1830
}
1831
1832
// Swift calling convention does not require we copy the sret argument
1833
// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1834
if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1835
continue;
1836
1837
// All x86 ABIs require that for returning structs by value we copy the
1838
// sret argument into %rax/%eax (depending on ABI) for the return. Save
1839
// the argument into a virtual register so that we can access it from the
1840
// return points.
1841
if (Ins[I].Flags.isSRet()) {
1842
assert(!FuncInfo->getSRetReturnReg() &&
1843
"SRet return has already been set");
1844
MVT PtrTy = getPointerTy(DAG.getDataLayout());
1845
Register Reg =
1846
MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
1847
FuncInfo->setSRetReturnReg(Reg);
1848
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1849
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1850
break;
1851
}
1852
}
1853
1854
unsigned StackSize = CCInfo.getStackSize();
1855
// Align stack specially for tail calls.
1856
if (shouldGuaranteeTCO(CallConv,
1857
MF.getTarget().Options.GuaranteedTailCallOpt))
1858
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1859
1860
if (IsVarArg)
1861
VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1862
.lowerVarArgsParameters(Chain, StackSize);
1863
1864
// Some CCs need callee pop.
1865
if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1866
MF.getTarget().Options.GuaranteedTailCallOpt)) {
1867
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1868
} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1869
// X86 interrupts must pop the error code (and the alignment padding) if
1870
// present.
1871
FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1872
} else {
1873
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1874
// If this is an sret function, the return should pop the hidden pointer.
1875
if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1876
FuncInfo->setBytesToPopOnReturn(4);
1877
}
1878
1879
if (!Is64Bit) {
1880
// RegSaveFrameIndex is X86-64 only.
1881
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1882
}
1883
1884
FuncInfo->setArgumentStackSize(StackSize);
1885
1886
if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1887
EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1888
if (Personality == EHPersonality::CoreCLR) {
1889
assert(Is64Bit);
1890
// TODO: Add a mechanism to frame lowering that will allow us to indicate
1891
// that we'd prefer this slot be allocated towards the bottom of the frame
1892
// (i.e. near the stack pointer after allocating the frame). Every
1893
// funclet needs a copy of this slot in its (mostly empty) frame, and the
1894
// offset from the bottom of this and each funclet's frame must be the
1895
// same, so the size of funclets' (mostly empty) frames is dictated by
1896
// how far this slot is from the bottom (since they allocate just enough
1897
// space to accommodate holding this slot at the correct offset).
1898
int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1899
EHInfo->PSPSymFrameIdx = PSPSymFI;
1900
}
1901
}
1902
1903
if (shouldDisableArgRegFromCSR(CallConv) ||
1904
F.hasFnAttribute("no_caller_saved_registers")) {
1905
MachineRegisterInfo &MRI = MF.getRegInfo();
1906
for (std::pair<Register, Register> Pair : MRI.liveins())
1907
MRI.disableCalleeSavedRegister(Pair.first);
1908
}
1909
1910
if (CallingConv::PreserveNone == CallConv)
1911
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1912
if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1913
Ins[I].Flags.isSwiftError()) {
1914
errorUnsupported(DAG, dl,
1915
"Swift attributes can't be used with preserve_none");
1916
break;
1917
}
1918
}
1919
1920
return Chain;
1921
}
1922
1923
SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1924
SDValue Arg, const SDLoc &dl,
1925
SelectionDAG &DAG,
1926
const CCValAssign &VA,
1927
ISD::ArgFlagsTy Flags,
1928
bool isByVal) const {
1929
unsigned LocMemOffset = VA.getLocMemOffset();
1930
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1931
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1932
StackPtr, PtrOff);
1933
if (isByVal)
1934
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1935
1936
MaybeAlign Alignment;
1937
if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1938
Arg.getSimpleValueType() != MVT::f80)
1939
Alignment = MaybeAlign(4);
1940
return DAG.getStore(
1941
Chain, dl, Arg, PtrOff,
1942
MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
1943
Alignment);
1944
}
1945
1946
/// Emit a load of return address if tail call
1947
/// optimization is performed and it is required.
1948
SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1949
SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1950
bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1951
// Adjust the Return address stack slot.
1952
EVT VT = getPointerTy(DAG.getDataLayout());
1953
OutRetAddr = getReturnAddressFrameIndex(DAG);
1954
1955
// Load the "old" Return address.
1956
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1957
return SDValue(OutRetAddr.getNode(), 1);
1958
}
1959
1960
/// Emit a store of the return address if tail call
1961
/// optimization is performed and it is required (FPDiff!=0).
1962
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1963
SDValue Chain, SDValue RetAddrFrIdx,
1964
EVT PtrVT, unsigned SlotSize,
1965
int FPDiff, const SDLoc &dl) {
1966
// Store the return address to the appropriate stack slot.
1967
if (!FPDiff) return Chain;
1968
// Calculate the new stack slot for the return address.
1969
int NewReturnAddrFI =
1970
MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1971
false);
1972
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1973
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1974
MachinePointerInfo::getFixedStack(
1975
DAG.getMachineFunction(), NewReturnAddrFI));
1976
return Chain;
1977
}
1978
1979
/// Returns a vector_shuffle mask for an movs{s|d}, movd
1980
/// operation of specified width.
1981
SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1982
SDValue V1, SDValue V2) const {
1983
unsigned NumElems = VT.getVectorNumElements();
1984
SmallVector<int, 8> Mask;
1985
Mask.push_back(NumElems);
1986
for (unsigned i = 1; i != NumElems; ++i)
1987
Mask.push_back(i);
1988
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1989
}
1990
1991
SDValue
1992
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1993
SmallVectorImpl<SDValue> &InVals) const {
1994
SelectionDAG &DAG = CLI.DAG;
1995
SDLoc &dl = CLI.DL;
1996
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1997
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1998
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1999
SDValue Chain = CLI.Chain;
2000
SDValue Callee = CLI.Callee;
2001
CallingConv::ID CallConv = CLI.CallConv;
2002
bool &isTailCall = CLI.IsTailCall;
2003
bool isVarArg = CLI.IsVarArg;
2004
const auto *CB = CLI.CB;
2005
2006
MachineFunction &MF = DAG.getMachineFunction();
2007
bool Is64Bit = Subtarget.is64Bit();
2008
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2009
bool IsSibcall = false;
2010
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2011
CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2012
bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2013
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2014
bool HasNCSR = (CB && isa<CallInst>(CB) &&
2015
CB->hasFnAttr("no_caller_saved_registers"));
2016
bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2017
bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2018
bool IsCFICall = IsIndirectCall && CLI.CFIType;
2019
const Module *M = MF.getFunction().getParent();
2020
Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2021
2022
MachineFunction::CallSiteInfo CSInfo;
2023
if (CallConv == CallingConv::X86_INTR)
2024
report_fatal_error("X86 interrupts may not be called directly");
2025
2026
// Analyze operands of the call, assigning locations to each operand.
2027
SmallVector<CCValAssign, 16> ArgLocs;
2028
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2029
2030
// Allocate shadow area for Win64.
2031
if (IsWin64)
2032
CCInfo.AllocateStack(32, Align(8));
2033
2034
CCInfo.AnalyzeArguments(Outs, CC_X86);
2035
2036
// In vectorcall calling convention a second pass is required for the HVA
2037
// types.
2038
if (CallingConv::X86_VectorCall == CallConv) {
2039
CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2040
}
2041
2042
bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2043
if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2044
// If we are using a GOT, disable tail calls to external symbols with
2045
// default visibility. Tail calling such a symbol requires using a GOT
2046
// relocation, which forces early binding of the symbol. This breaks code
2047
// that require lazy function symbol resolution. Using musttail or
2048
// GuaranteedTailCallOpt will override this.
2049
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2050
if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2051
G->getGlobal()->hasDefaultVisibility()))
2052
isTailCall = false;
2053
}
2054
2055
if (isTailCall && !IsMustTail) {
2056
// Check if it's really possible to do a tail call.
2057
isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2058
IsCalleePopSRet);
2059
2060
// Sibcalls are automatically detected tailcalls which do not require
2061
// ABI changes.
2062
if (!IsGuaranteeTCO && isTailCall)
2063
IsSibcall = true;
2064
2065
if (isTailCall)
2066
++NumTailCalls;
2067
}
2068
2069
if (IsMustTail && !isTailCall)
2070
report_fatal_error("failed to perform tail call elimination on a call "
2071
"site marked musttail");
2072
2073
assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2074
"Var args not supported with calling convention fastcc, ghc or hipe");
2075
2076
// Get a count of how many bytes are to be pushed on the stack.
2077
unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2078
if (IsSibcall)
2079
// This is a sibcall. The memory operands are available in caller's
2080
// own caller's stack.
2081
NumBytes = 0;
2082
else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2083
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2084
2085
int FPDiff = 0;
2086
if (isTailCall &&
2087
shouldGuaranteeTCO(CallConv,
2088
MF.getTarget().Options.GuaranteedTailCallOpt)) {
2089
// Lower arguments at fp - stackoffset + fpdiff.
2090
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2091
2092
FPDiff = NumBytesCallerPushed - NumBytes;
2093
2094
// Set the delta of movement of the returnaddr stackslot.
2095
// But only set if delta is greater than previous delta.
2096
if (FPDiff < X86Info->getTCReturnAddrDelta())
2097
X86Info->setTCReturnAddrDelta(FPDiff);
2098
}
2099
2100
unsigned NumBytesToPush = NumBytes;
2101
unsigned NumBytesToPop = NumBytes;
2102
2103
// If we have an inalloca argument, all stack space has already been allocated
2104
// for us and be right at the top of the stack. We don't support multiple
2105
// arguments passed in memory when using inalloca.
2106
if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2107
NumBytesToPush = 0;
2108
if (!ArgLocs.back().isMemLoc())
2109
report_fatal_error("cannot use inalloca attribute on a register "
2110
"parameter");
2111
if (ArgLocs.back().getLocMemOffset() != 0)
2112
report_fatal_error("any parameter with the inalloca attribute must be "
2113
"the only memory argument");
2114
} else if (CLI.IsPreallocated) {
2115
assert(ArgLocs.back().isMemLoc() &&
2116
"cannot use preallocated attribute on a register "
2117
"parameter");
2118
SmallVector<size_t, 4> PreallocatedOffsets;
2119
for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2120
if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2121
PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2122
}
2123
}
2124
auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2125
size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2126
MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2127
MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2128
NumBytesToPush = 0;
2129
}
2130
2131
if (!IsSibcall && !IsMustTail)
2132
Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2133
NumBytes - NumBytesToPush, dl);
2134
2135
SDValue RetAddrFrIdx;
2136
// Load return address for tail calls.
2137
if (isTailCall && FPDiff)
2138
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2139
Is64Bit, FPDiff, dl);
2140
2141
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2142
SmallVector<SDValue, 8> MemOpChains;
2143
SDValue StackPtr;
2144
2145
// The next loop assumes that the locations are in the same order of the
2146
// input arguments.
2147
assert(isSortedByValueNo(ArgLocs) &&
2148
"Argument Location list must be sorted before lowering");
2149
2150
// Walk the register/memloc assignments, inserting copies/loads. In the case
2151
// of tail call optimization arguments are handle later.
2152
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2153
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2154
++I, ++OutIndex) {
2155
assert(OutIndex < Outs.size() && "Invalid Out index");
2156
// Skip inalloca/preallocated arguments, they have already been written.
2157
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2158
if (Flags.isInAlloca() || Flags.isPreallocated())
2159
continue;
2160
2161
CCValAssign &VA = ArgLocs[I];
2162
EVT RegVT = VA.getLocVT();
2163
SDValue Arg = OutVals[OutIndex];
2164
bool isByVal = Flags.isByVal();
2165
2166
// Promote the value if needed.
2167
switch (VA.getLocInfo()) {
2168
default: llvm_unreachable("Unknown loc info!");
2169
case CCValAssign::Full: break;
2170
case CCValAssign::SExt:
2171
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2172
break;
2173
case CCValAssign::ZExt:
2174
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2175
break;
2176
case CCValAssign::AExt:
2177
if (Arg.getValueType().isVector() &&
2178
Arg.getValueType().getVectorElementType() == MVT::i1)
2179
Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2180
else if (RegVT.is128BitVector()) {
2181
// Special case: passing MMX values in XMM registers.
2182
Arg = DAG.getBitcast(MVT::i64, Arg);
2183
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2184
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2185
} else
2186
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2187
break;
2188
case CCValAssign::BCvt:
2189
Arg = DAG.getBitcast(RegVT, Arg);
2190
break;
2191
case CCValAssign::Indirect: {
2192
if (isByVal) {
2193
// Memcpy the argument to a temporary stack slot to prevent
2194
// the caller from seeing any modifications the callee may make
2195
// as guaranteed by the `byval` attribute.
2196
int FrameIdx = MF.getFrameInfo().CreateStackObject(
2197
Flags.getByValSize(),
2198
std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2199
SDValue StackSlot =
2200
DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2201
Chain =
2202
CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2203
// From now on treat this as a regular pointer
2204
Arg = StackSlot;
2205
isByVal = false;
2206
} else {
2207
// Store the argument.
2208
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2209
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2210
Chain = DAG.getStore(
2211
Chain, dl, Arg, SpillSlot,
2212
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2213
Arg = SpillSlot;
2214
}
2215
break;
2216
}
2217
}
2218
2219
if (VA.needsCustom()) {
2220
assert(VA.getValVT() == MVT::v64i1 &&
2221
"Currently the only custom case is when we split v64i1 to 2 regs");
2222
// Split v64i1 value into two registers
2223
Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2224
} else if (VA.isRegLoc()) {
2225
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2226
const TargetOptions &Options = DAG.getTarget().Options;
2227
if (Options.EmitCallSiteInfo)
2228
CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2229
if (isVarArg && IsWin64) {
2230
// Win64 ABI requires argument XMM reg to be copied to the corresponding
2231
// shadow reg if callee is a varargs function.
2232
Register ShadowReg;
2233
switch (VA.getLocReg()) {
2234
case X86::XMM0: ShadowReg = X86::RCX; break;
2235
case X86::XMM1: ShadowReg = X86::RDX; break;
2236
case X86::XMM2: ShadowReg = X86::R8; break;
2237
case X86::XMM3: ShadowReg = X86::R9; break;
2238
}
2239
if (ShadowReg)
2240
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2241
}
2242
} else if (!IsSibcall && (!isTailCall || isByVal)) {
2243
assert(VA.isMemLoc());
2244
if (!StackPtr.getNode())
2245
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2246
getPointerTy(DAG.getDataLayout()));
2247
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2248
dl, DAG, VA, Flags, isByVal));
2249
}
2250
}
2251
2252
if (!MemOpChains.empty())
2253
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2254
2255
if (Subtarget.isPICStyleGOT()) {
2256
// ELF / PIC requires GOT in the EBX register before function calls via PLT
2257
// GOT pointer (except regcall).
2258
if (!isTailCall) {
2259
// Indirect call with RegCall calling convertion may use up all the
2260
// general registers, so it is not suitable to bind EBX reister for
2261
// GOT address, just let register allocator handle it.
2262
if (CallConv != CallingConv::X86_RegCall)
2263
RegsToPass.push_back(std::make_pair(
2264
Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2265
getPointerTy(DAG.getDataLayout()))));
2266
} else {
2267
// If we are tail calling and generating PIC/GOT style code load the
2268
// address of the callee into ECX. The value in ecx is used as target of
2269
// the tail jump. This is done to circumvent the ebx/callee-saved problem
2270
// for tail calls on PIC/GOT architectures. Normally we would just put the
2271
// address of GOT into ebx and then call target@PLT. But for tail calls
2272
// ebx would be restored (since ebx is callee saved) before jumping to the
2273
// target@PLT.
2274
2275
// Note: The actual moving to ECX is done further down.
2276
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2277
if (G && !G->getGlobal()->hasLocalLinkage() &&
2278
G->getGlobal()->hasDefaultVisibility())
2279
Callee = LowerGlobalAddress(Callee, DAG);
2280
else if (isa<ExternalSymbolSDNode>(Callee))
2281
Callee = LowerExternalSymbol(Callee, DAG);
2282
}
2283
}
2284
2285
if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2286
(Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2287
// From AMD64 ABI document:
2288
// For calls that may call functions that use varargs or stdargs
2289
// (prototype-less calls or calls to functions containing ellipsis (...) in
2290
// the declaration) %al is used as hidden argument to specify the number
2291
// of SSE registers used. The contents of %al do not need to match exactly
2292
// the number of registers, but must be an ubound on the number of SSE
2293
// registers used and is in the range 0 - 8 inclusive.
2294
2295
// Count the number of XMM registers allocated.
2296
static const MCPhysReg XMMArgRegs[] = {
2297
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2298
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2299
};
2300
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2301
assert((Subtarget.hasSSE1() || !NumXMMRegs)
2302
&& "SSE registers cannot be used when SSE is disabled");
2303
RegsToPass.push_back(std::make_pair(Register(X86::AL),
2304
DAG.getConstant(NumXMMRegs, dl,
2305
MVT::i8)));
2306
}
2307
2308
if (isVarArg && IsMustTail) {
2309
const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2310
for (const auto &F : Forwards) {
2311
SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2312
RegsToPass.push_back(std::make_pair(F.PReg, Val));
2313
}
2314
}
2315
2316
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2317
// don't need this because the eligibility check rejects calls that require
2318
// shuffling arguments passed in memory.
2319
if (!IsSibcall && isTailCall) {
2320
// Force all the incoming stack arguments to be loaded from the stack
2321
// before any new outgoing arguments are stored to the stack, because the
2322
// outgoing stack slots may alias the incoming argument stack slots, and
2323
// the alias isn't otherwise explicit. This is slightly more conservative
2324
// than necessary, because it means that each store effectively depends
2325
// on every argument instead of just those arguments it would clobber.
2326
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2327
2328
SmallVector<SDValue, 8> MemOpChains2;
2329
SDValue FIN;
2330
int FI = 0;
2331
for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2332
++I, ++OutsIndex) {
2333
CCValAssign &VA = ArgLocs[I];
2334
2335
if (VA.isRegLoc()) {
2336
if (VA.needsCustom()) {
2337
assert((CallConv == CallingConv::X86_RegCall) &&
2338
"Expecting custom case only in regcall calling convention");
2339
// This means that we are in special case where one argument was
2340
// passed through two register locations - Skip the next location
2341
++I;
2342
}
2343
2344
continue;
2345
}
2346
2347
assert(VA.isMemLoc());
2348
SDValue Arg = OutVals[OutsIndex];
2349
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2350
// Skip inalloca/preallocated arguments. They don't require any work.
2351
if (Flags.isInAlloca() || Flags.isPreallocated())
2352
continue;
2353
// Create frame index.
2354
int32_t Offset = VA.getLocMemOffset()+FPDiff;
2355
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2356
FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2357
FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2358
2359
if (Flags.isByVal()) {
2360
// Copy relative to framepointer.
2361
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
2362
if (!StackPtr.getNode())
2363
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2364
getPointerTy(DAG.getDataLayout()));
2365
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2366
StackPtr, Source);
2367
2368
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2369
ArgChain,
2370
Flags, DAG, dl));
2371
} else {
2372
// Store relative to framepointer.
2373
MemOpChains2.push_back(DAG.getStore(
2374
ArgChain, dl, Arg, FIN,
2375
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
2376
}
2377
}
2378
2379
if (!MemOpChains2.empty())
2380
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2381
2382
// Store the return address to the appropriate stack slot.
2383
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2384
getPointerTy(DAG.getDataLayout()),
2385
RegInfo->getSlotSize(), FPDiff, dl);
2386
}
2387
2388
// Build a sequence of copy-to-reg nodes chained together with token chain
2389
// and glue operands which copy the outgoing args into registers.
2390
SDValue InGlue;
2391
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2392
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2393
RegsToPass[i].second, InGlue);
2394
InGlue = Chain.getValue(1);
2395
}
2396
2397
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2398
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2399
// In the 64-bit large code model, we have to make all calls
2400
// through a register, since the call instruction's 32-bit
2401
// pc-relative offset may not be large enough to hold the whole
2402
// address.
2403
} else if (Callee->getOpcode() == ISD::GlobalAddress ||
2404
Callee->getOpcode() == ISD::ExternalSymbol) {
2405
// Lower direct calls to global addresses and external symbols. Setting
2406
// ForCall to true here has the effect of removing WrapperRIP when possible
2407
// to allow direct calls to be selected without first materializing the
2408
// address into a register.
2409
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2410
} else if (Subtarget.isTarget64BitILP32() &&
2411
Callee.getValueType() == MVT::i32) {
2412
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2413
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2414
}
2415
2416
// Returns a chain & a glue for retval copy to use.
2417
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2418
SmallVector<SDValue, 8> Ops;
2419
2420
if (!IsSibcall && isTailCall && !IsMustTail) {
2421
Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2422
InGlue = Chain.getValue(1);
2423
}
2424
2425
Ops.push_back(Chain);
2426
Ops.push_back(Callee);
2427
2428
if (isTailCall)
2429
Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2430
2431
// Add argument registers to the end of the list so that they are known live
2432
// into the call.
2433
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2434
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2435
RegsToPass[i].second.getValueType()));
2436
2437
// Add a register mask operand representing the call-preserved registers.
2438
const uint32_t *Mask = [&]() {
2439
auto AdaptedCC = CallConv;
2440
// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2441
// use X86_INTR calling convention because it has the same CSR mask
2442
// (same preserved registers).
2443
if (HasNCSR)
2444
AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2445
// If NoCalleeSavedRegisters is requested, than use GHC since it happens
2446
// to use the CSR_NoRegs_RegMask.
2447
if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2448
AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2449
return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2450
}();
2451
assert(Mask && "Missing call preserved mask for calling convention");
2452
2453
// If this is an invoke in a 32-bit function using a funclet-based
2454
// personality, assume the function clobbers all registers. If an exception
2455
// is thrown, the runtime will not restore CSRs.
2456
// FIXME: Model this more precisely so that we can register allocate across
2457
// the normal edge and spill and fill across the exceptional edge.
2458
if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2459
const Function &CallerFn = MF.getFunction();
2460
EHPersonality Pers =
2461
CallerFn.hasPersonalityFn()
2462
? classifyEHPersonality(CallerFn.getPersonalityFn())
2463
: EHPersonality::Unknown;
2464
if (isFuncletEHPersonality(Pers))
2465
Mask = RegInfo->getNoPreservedMask();
2466
}
2467
2468
// Define a new register mask from the existing mask.
2469
uint32_t *RegMask = nullptr;
2470
2471
// In some calling conventions we need to remove the used physical registers
2472
// from the reg mask. Create a new RegMask for such calling conventions.
2473
// RegMask for calling conventions that disable only return registers (e.g.
2474
// preserve_most) will be modified later in LowerCallResult.
2475
bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2476
if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2477
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2478
2479
// Allocate a new Reg Mask and copy Mask.
2480
RegMask = MF.allocateRegMask();
2481
unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2482
memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2483
2484
// Make sure all sub registers of the argument registers are reset
2485
// in the RegMask.
2486
if (ShouldDisableArgRegs) {
2487
for (auto const &RegPair : RegsToPass)
2488
for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2489
RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2490
}
2491
2492
// Create the RegMask Operand according to our updated mask.
2493
Ops.push_back(DAG.getRegisterMask(RegMask));
2494
} else {
2495
// Create the RegMask Operand according to the static mask.
2496
Ops.push_back(DAG.getRegisterMask(Mask));
2497
}
2498
2499
if (InGlue.getNode())
2500
Ops.push_back(InGlue);
2501
2502
if (isTailCall) {
2503
// We used to do:
2504
//// If this is the first return lowered for this function, add the regs
2505
//// to the liveout set for the function.
2506
// This isn't right, although it's probably harmless on x86; liveouts
2507
// should be computed from returns not tail calls. Consider a void
2508
// function making a tail call to a function returning int.
2509
MF.getFrameInfo().setHasTailCall();
2510
SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
2511
2512
if (IsCFICall)
2513
Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2514
2515
DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2516
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2517
return Ret;
2518
}
2519
2520
if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2521
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2522
} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2523
// Calls with a "clang.arc.attachedcall" bundle are special. They should be
2524
// expanded to the call, directly followed by a special marker sequence and
2525
// a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2526
assert(!isTailCall &&
2527
"tail calls cannot be marked with clang.arc.attachedcall");
2528
assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2529
2530
// Add a target global address for the retainRV/claimRV runtime function
2531
// just before the call target.
2532
Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
2533
auto PtrVT = getPointerTy(DAG.getDataLayout());
2534
auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2535
Ops.insert(Ops.begin() + 1, GA);
2536
Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2537
} else {
2538
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2539
}
2540
2541
if (IsCFICall)
2542
Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2543
2544
InGlue = Chain.getValue(1);
2545
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2546
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2547
2548
// Save heapallocsite metadata.
2549
if (CLI.CB)
2550
if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2551
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2552
2553
// Create the CALLSEQ_END node.
2554
unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2555
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2556
DAG.getTarget().Options.GuaranteedTailCallOpt))
2557
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2558
else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2559
// If this call passes a struct-return pointer, the callee
2560
// pops that struct pointer.
2561
NumBytesForCalleeToPop = 4;
2562
2563
// Returns a glue for retval copy to use.
2564
if (!IsSibcall) {
2565
Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2566
InGlue, dl);
2567
InGlue = Chain.getValue(1);
2568
}
2569
2570
if (CallingConv::PreserveNone == CallConv)
2571
for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2572
if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2573
Outs[I].Flags.isSwiftError()) {
2574
errorUnsupported(DAG, dl,
2575
"Swift attributes can't be used with preserve_none");
2576
break;
2577
}
2578
}
2579
2580
// Handle result values, copying them out of physregs into vregs that we
2581
// return.
2582
return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2583
InVals, RegMask);
2584
}
2585
2586
//===----------------------------------------------------------------------===//
2587
// Fast Calling Convention (tail call) implementation
2588
//===----------------------------------------------------------------------===//
2589
2590
// Like std call, callee cleans arguments, convention except that ECX is
2591
// reserved for storing the tail called function address. Only 2 registers are
2592
// free for argument passing (inreg). Tail call optimization is performed
2593
// provided:
2594
// * tailcallopt is enabled
2595
// * caller/callee are fastcc
2596
// On X86_64 architecture with GOT-style position independent code only local
2597
// (within module) calls are supported at the moment.
2598
// To keep the stack aligned according to platform abi the function
2599
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2600
// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2601
// If a tail called function callee has more arguments than the caller the
2602
// caller needs to make sure that there is room to move the RETADDR to. This is
2603
// achieved by reserving an area the size of the argument delta right after the
2604
// original RETADDR, but before the saved framepointer or the spilled registers
2605
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2606
// stack layout:
2607
// arg1
2608
// arg2
2609
// RETADDR
2610
// [ new RETADDR
2611
// move area ]
2612
// (possible EBP)
2613
// ESI
2614
// EDI
2615
// local1 ..
2616
2617
/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2618
/// requirement.
2619
unsigned
2620
X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2621
SelectionDAG &DAG) const {
2622
const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2623
const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2624
assert(StackSize % SlotSize == 0 &&
2625
"StackSize must be a multiple of SlotSize");
2626
return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2627
}
2628
2629
/// Return true if the given stack call argument is already available in the
2630
/// same position (relatively) of the caller's incoming argument stack.
2631
static
2632
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2633
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2634
const X86InstrInfo *TII, const CCValAssign &VA) {
2635
unsigned Bytes = Arg.getValueSizeInBits() / 8;
2636
2637
for (;;) {
2638
// Look through nodes that don't alter the bits of the incoming value.
2639
unsigned Op = Arg.getOpcode();
2640
if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2641
Op == ISD::AssertZext) {
2642
Arg = Arg.getOperand(0);
2643
continue;
2644
}
2645
if (Op == ISD::TRUNCATE) {
2646
const SDValue &TruncInput = Arg.getOperand(0);
2647
if (TruncInput.getOpcode() == ISD::AssertZext &&
2648
cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2649
Arg.getValueType()) {
2650
Arg = TruncInput.getOperand(0);
2651
continue;
2652
}
2653
}
2654
break;
2655
}
2656
2657
int FI = INT_MAX;
2658
if (Arg.getOpcode() == ISD::CopyFromReg) {
2659
Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2660
if (!VR.isVirtual())
2661
return false;
2662
MachineInstr *Def = MRI->getVRegDef(VR);
2663
if (!Def)
2664
return false;
2665
if (!Flags.isByVal()) {
2666
if (!TII->isLoadFromStackSlot(*Def, FI))
2667
return false;
2668
} else {
2669
unsigned Opcode = Def->getOpcode();
2670
if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2671
Opcode == X86::LEA64_32r) &&
2672
Def->getOperand(1).isFI()) {
2673
FI = Def->getOperand(1).getIndex();
2674
Bytes = Flags.getByValSize();
2675
} else
2676
return false;
2677
}
2678
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2679
if (Flags.isByVal())
2680
// ByVal argument is passed in as a pointer but it's now being
2681
// dereferenced. e.g.
2682
// define @foo(%struct.X* %A) {
2683
// tail call @bar(%struct.X* byval %A)
2684
// }
2685
return false;
2686
SDValue Ptr = Ld->getBasePtr();
2687
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2688
if (!FINode)
2689
return false;
2690
FI = FINode->getIndex();
2691
} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2692
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2693
FI = FINode->getIndex();
2694
Bytes = Flags.getByValSize();
2695
} else
2696
return false;
2697
2698
assert(FI != INT_MAX);
2699
if (!MFI.isFixedObjectIndex(FI))
2700
return false;
2701
2702
if (Offset != MFI.getObjectOffset(FI))
2703
return false;
2704
2705
// If this is not byval, check that the argument stack object is immutable.
2706
// inalloca and argument copy elision can create mutable argument stack
2707
// objects. Byval objects can be mutated, but a byval call intends to pass the
2708
// mutated memory.
2709
if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2710
return false;
2711
2712
if (VA.getLocVT().getFixedSizeInBits() >
2713
Arg.getValueSizeInBits().getFixedValue()) {
2714
// If the argument location is wider than the argument type, check that any
2715
// extension flags match.
2716
if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2717
Flags.isSExt() != MFI.isObjectSExt(FI)) {
2718
return false;
2719
}
2720
}
2721
2722
return Bytes == MFI.getObjectSize(FI);
2723
}
2724
2725
/// Check whether the call is eligible for tail call optimization. Targets
2726
/// that want to do tail call optimization should implement this function.
2727
/// Note that the x86 backend does not check musttail calls for eligibility! The
2728
/// rest of x86 tail call lowering must be prepared to forward arguments of any
2729
/// type.
2730
bool X86TargetLowering::IsEligibleForTailCallOptimization(
2731
TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2732
SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2733
SelectionDAG &DAG = CLI.DAG;
2734
const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2735
const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2736
const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2737
SDValue Callee = CLI.Callee;
2738
CallingConv::ID CalleeCC = CLI.CallConv;
2739
bool isVarArg = CLI.IsVarArg;
2740
2741
if (!mayTailCallThisCC(CalleeCC))
2742
return false;
2743
2744
// If -tailcallopt is specified, make fastcc functions tail-callable.
2745
MachineFunction &MF = DAG.getMachineFunction();
2746
const Function &CallerF = MF.getFunction();
2747
2748
// If the function return type is x86_fp80 and the callee return type is not,
2749
// then the FP_EXTEND of the call result is not a nop. It's not safe to
2750
// perform a tailcall optimization here.
2751
if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2752
return false;
2753
2754
CallingConv::ID CallerCC = CallerF.getCallingConv();
2755
bool CCMatch = CallerCC == CalleeCC;
2756
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2757
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2758
bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2759
CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2760
2761
// Win64 functions have extra shadow space for argument homing. Don't do the
2762
// sibcall if the caller and callee have mismatched expectations for this
2763
// space.
2764
if (IsCalleeWin64 != IsCallerWin64)
2765
return false;
2766
2767
if (IsGuaranteeTCO) {
2768
if (canGuaranteeTCO(CalleeCC) && CCMatch)
2769
return true;
2770
return false;
2771
}
2772
2773
// Look for obvious safe cases to perform tail call optimization that do not
2774
// require ABI changes. This is what gcc calls sibcall.
2775
2776
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2777
// emit a special epilogue.
2778
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2779
if (RegInfo->hasStackRealignment(MF))
2780
return false;
2781
2782
// Also avoid sibcall optimization if we're an sret return fn and the callee
2783
// is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2784
// insufficient.
2785
if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2786
// For a compatible tail call the callee must return our sret pointer. So it
2787
// needs to be (a) an sret function itself and (b) we pass our sret as its
2788
// sret. Condition #b is harder to determine.
2789
return false;
2790
} else if (IsCalleePopSRet)
2791
// The callee pops an sret, so we cannot tail-call, as our caller doesn't
2792
// expect that.
2793
return false;
2794
2795
// Do not sibcall optimize vararg calls unless all arguments are passed via
2796
// registers.
2797
LLVMContext &C = *DAG.getContext();
2798
if (isVarArg && !Outs.empty()) {
2799
// Optimizing for varargs on Win64 is unlikely to be safe without
2800
// additional testing.
2801
if (IsCalleeWin64 || IsCallerWin64)
2802
return false;
2803
2804
for (const auto &VA : ArgLocs)
2805
if (!VA.isRegLoc())
2806
return false;
2807
}
2808
2809
// If the call result is in ST0 / ST1, it needs to be popped off the x87
2810
// stack. Therefore, if it's not used by the call it is not safe to optimize
2811
// this into a sibcall.
2812
bool Unused = false;
2813
for (const auto &In : Ins) {
2814
if (!In.Used) {
2815
Unused = true;
2816
break;
2817
}
2818
}
2819
if (Unused) {
2820
SmallVector<CCValAssign, 16> RVLocs;
2821
CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2822
RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2823
for (const auto &VA : RVLocs) {
2824
if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2825
return false;
2826
}
2827
}
2828
2829
// Check that the call results are passed in the same way.
2830
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2831
RetCC_X86, RetCC_X86))
2832
return false;
2833
// The callee has to preserve all registers the caller needs to preserve.
2834
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2835
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2836
if (!CCMatch) {
2837
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2838
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2839
return false;
2840
}
2841
2842
unsigned StackArgsSize = CCInfo.getStackSize();
2843
2844
// If the callee takes no arguments then go on to check the results of the
2845
// call.
2846
if (!Outs.empty()) {
2847
if (StackArgsSize > 0) {
2848
// Check if the arguments are already laid out in the right way as
2849
// the caller's fixed stack objects.
2850
MachineFrameInfo &MFI = MF.getFrameInfo();
2851
const MachineRegisterInfo *MRI = &MF.getRegInfo();
2852
const X86InstrInfo *TII = Subtarget.getInstrInfo();
2853
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2854
const CCValAssign &VA = ArgLocs[I];
2855
SDValue Arg = OutVals[I];
2856
ISD::ArgFlagsTy Flags = Outs[I].Flags;
2857
if (VA.getLocInfo() == CCValAssign::Indirect)
2858
return false;
2859
if (!VA.isRegLoc()) {
2860
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2861
TII, VA))
2862
return false;
2863
}
2864
}
2865
}
2866
2867
bool PositionIndependent = isPositionIndependent();
2868
// If the tailcall address may be in a register, then make sure it's
2869
// possible to register allocate for it. In 32-bit, the call address can
2870
// only target EAX, EDX, or ECX since the tail call must be scheduled after
2871
// callee-saved registers are restored. These happen to be the same
2872
// registers used to pass 'inreg' arguments so watch out for those.
2873
if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2874
!isa<ExternalSymbolSDNode>(Callee)) ||
2875
PositionIndependent)) {
2876
unsigned NumInRegs = 0;
2877
// In PIC we need an extra register to formulate the address computation
2878
// for the callee.
2879
unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2880
2881
for (const auto &VA : ArgLocs) {
2882
if (!VA.isRegLoc())
2883
continue;
2884
Register Reg = VA.getLocReg();
2885
switch (Reg) {
2886
default: break;
2887
case X86::EAX: case X86::EDX: case X86::ECX:
2888
if (++NumInRegs == MaxInRegs)
2889
return false;
2890
break;
2891
}
2892
}
2893
}
2894
2895
const MachineRegisterInfo &MRI = MF.getRegInfo();
2896
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2897
return false;
2898
}
2899
2900
bool CalleeWillPop =
2901
X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2902
MF.getTarget().Options.GuaranteedTailCallOpt);
2903
2904
if (unsigned BytesToPop =
2905
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2906
// If we have bytes to pop, the callee must pop them.
2907
bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2908
if (!CalleePopMatches)
2909
return false;
2910
} else if (CalleeWillPop && StackArgsSize > 0) {
2911
// If we don't have bytes to pop, make sure the callee doesn't pop any.
2912
return false;
2913
}
2914
2915
return true;
2916
}
2917
2918
/// Determines whether the callee is required to pop its own arguments.
2919
/// Callee pop is necessary to support tail calls.
2920
bool X86::isCalleePop(CallingConv::ID CallingConv,
2921
bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2922
// If GuaranteeTCO is true, we force some calls to be callee pop so that we
2923
// can guarantee TCO.
2924
if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2925
return true;
2926
2927
switch (CallingConv) {
2928
default:
2929
return false;
2930
case CallingConv::X86_StdCall:
2931
case CallingConv::X86_FastCall:
2932
case CallingConv::X86_ThisCall:
2933
case CallingConv::X86_VectorCall:
2934
return !is64Bit;
2935
}
2936
}
2937
2938