Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
35266 views
//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file9/// This file implements the lowering of LLVM calls to DAG nodes.10//11//===----------------------------------------------------------------------===//1213#include "X86.h"14#include "X86CallingConv.h"15#include "X86FrameLowering.h"16#include "X86ISelLowering.h"17#include "X86InstrBuilder.h"18#include "X86MachineFunctionInfo.h"19#include "X86TargetMachine.h"20#include "X86TargetObjectFile.h"21#include "llvm/ADT/Statistic.h"22#include "llvm/Analysis/ObjCARCUtil.h"23#include "llvm/CodeGen/MachineJumpTableInfo.h"24#include "llvm/CodeGen/MachineModuleInfo.h"25#include "llvm/CodeGen/WinEHFuncInfo.h"26#include "llvm/IR/DiagnosticInfo.h"27#include "llvm/IR/IRBuilder.h"28#include "llvm/IR/Module.h"2930#define DEBUG_TYPE "x86-isel"3132using namespace llvm;3334STATISTIC(NumTailCalls, "Number of tail calls");3536/// Call this when the user attempts to do something unsupported, like37/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike38/// report_fatal_error, so calling code should attempt to recover without39/// crashing.40static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,41const char *Msg) {42MachineFunction &MF = DAG.getMachineFunction();43DAG.getContext()->diagnose(44DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));45}4647/// Returns true if a CC can dynamically exclude a register from the list of48/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on49/// the return registers.50static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {51switch (CC) {52default:53return false;54case CallingConv::X86_RegCall:55case CallingConv::PreserveMost:56case CallingConv::PreserveAll:57return true;58}59}6061/// Returns true if a CC can dynamically exclude a register from the list of62/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on63/// the parameters.64static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {65return CC == CallingConv::X86_RegCall;66}6768static std::pair<MVT, unsigned>69handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,70const X86Subtarget &Subtarget) {71// v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling72// convention is one that uses k registers.73if (NumElts == 2)74return {MVT::v2i64, 1};75if (NumElts == 4)76return {MVT::v4i32, 1};77if (NumElts == 8 && CC != CallingConv::X86_RegCall &&78CC != CallingConv::Intel_OCL_BI)79return {MVT::v8i16, 1};80if (NumElts == 16 && CC != CallingConv::X86_RegCall &&81CC != CallingConv::Intel_OCL_BI)82return {MVT::v16i8, 1};83// v32i1 passes in ymm unless we have BWI and the calling convention is84// regcall.85if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))86return {MVT::v32i8, 1};87// Split v64i1 vectors if we don't have v64i8 available.88if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {89if (Subtarget.useAVX512Regs())90return {MVT::v64i8, 1};91return {MVT::v32i8, 2};92}9394// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.95if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||96NumElts > 64)97return {MVT::i8, NumElts};9899return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};100}101102MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,103CallingConv::ID CC,104EVT VT) const {105if (VT.isVector()) {106if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {107unsigned NumElts = VT.getVectorNumElements();108109MVT RegisterVT;110unsigned NumRegisters;111std::tie(RegisterVT, NumRegisters) =112handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);113if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)114return RegisterVT;115}116117if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)118return MVT::v8f16;119}120121// We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.122if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&123!Subtarget.hasX87())124return MVT::i32;125126if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)127return getRegisterTypeForCallingConv(Context, CC,128VT.changeVectorElementType(MVT::f16));129130if (VT == MVT::bf16)131return MVT::f16;132133return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);134}135136unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,137CallingConv::ID CC,138EVT VT) const {139if (VT.isVector()) {140if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {141unsigned NumElts = VT.getVectorNumElements();142143MVT RegisterVT;144unsigned NumRegisters;145std::tie(RegisterVT, NumRegisters) =146handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);147if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)148return NumRegisters;149}150151if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)152return 1;153}154155// We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if156// x87 is disabled.157if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {158if (VT == MVT::f64)159return 2;160if (VT == MVT::f80)161return 3;162}163164if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)165return getNumRegistersForCallingConv(Context, CC,166VT.changeVectorElementType(MVT::f16));167168return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);169}170171unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(172LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,173unsigned &NumIntermediates, MVT &RegisterVT) const {174// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.175if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&176Subtarget.hasAVX512() &&177(!isPowerOf2_32(VT.getVectorNumElements()) ||178(VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||179VT.getVectorNumElements() > 64)) {180RegisterVT = MVT::i8;181IntermediateVT = MVT::i1;182NumIntermediates = VT.getVectorNumElements();183return NumIntermediates;184}185186// Split v64i1 vectors if we don't have v64i8 available.187if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&188CC != CallingConv::X86_RegCall) {189RegisterVT = MVT::v32i8;190IntermediateVT = MVT::v32i1;191NumIntermediates = 2;192return 2;193}194195// Split vNbf16 vectors according to vNf16.196if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)197VT = VT.changeVectorElementType(MVT::f16);198199return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,200NumIntermediates, RegisterVT);201}202203EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,204LLVMContext& Context,205EVT VT) const {206if (!VT.isVector())207return MVT::i8;208209if (Subtarget.hasAVX512()) {210// Figure out what this type will be legalized to.211EVT LegalVT = VT;212while (getTypeAction(Context, LegalVT) != TypeLegal)213LegalVT = getTypeToTransformTo(Context, LegalVT);214215// If we got a 512-bit vector then we'll definitely have a vXi1 compare.216if (LegalVT.getSimpleVT().is512BitVector())217return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());218219if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {220// If we legalized to less than a 512-bit vector, then we will use a vXi1221// compare for vXi32/vXi64 for sure. If we have BWI we will also support222// vXi16/vXi8.223MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();224if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)225return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());226}227}228229return VT.changeVectorElementTypeToInteger();230}231232/// Helper for getByValTypeAlignment to determine233/// the desired ByVal argument alignment.234static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {235if (MaxAlign == 16)236return;237if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {238if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)239MaxAlign = Align(16);240} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {241Align EltAlign;242getMaxByValAlign(ATy->getElementType(), EltAlign);243if (EltAlign > MaxAlign)244MaxAlign = EltAlign;245} else if (StructType *STy = dyn_cast<StructType>(Ty)) {246for (auto *EltTy : STy->elements()) {247Align EltAlign;248getMaxByValAlign(EltTy, EltAlign);249if (EltAlign > MaxAlign)250MaxAlign = EltAlign;251if (MaxAlign == 16)252break;253}254}255}256257/// Return the desired alignment for ByVal aggregate258/// function arguments in the caller parameter area. For X86, aggregates259/// that contain SSE vectors are placed at 16-byte boundaries while the rest260/// are at 4-byte boundaries.261uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,262const DataLayout &DL) const {263if (Subtarget.is64Bit()) {264// Max of 8 and alignment of type.265Align TyAlign = DL.getABITypeAlign(Ty);266if (TyAlign > 8)267return TyAlign.value();268return 8;269}270271Align Alignment(4);272if (Subtarget.hasSSE1())273getMaxByValAlign(Ty, Alignment);274return Alignment.value();275}276277/// It returns EVT::Other if the type should be determined using generic278/// target-independent logic.279/// For vector ops we check that the overall size isn't larger than our280/// preferred vector width.281EVT X86TargetLowering::getOptimalMemOpType(282const MemOp &Op, const AttributeList &FuncAttributes) const {283if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {284if (Op.size() >= 16 &&285(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {286// FIXME: Check if unaligned 64-byte accesses are slow.287if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&288(Subtarget.getPreferVectorWidth() >= 512)) {289return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;290}291// FIXME: Check if unaligned 32-byte accesses are slow.292if (Op.size() >= 32 && Subtarget.hasAVX() &&293Subtarget.useLight256BitInstructions()) {294// Although this isn't a well-supported type for AVX1, we'll let295// legalization and shuffle lowering produce the optimal codegen. If we296// choose an optimal type with a vector element larger than a byte,297// getMemsetStores() may create an intermediate splat (using an integer298// multiply) before we splat as a vector.299return MVT::v32i8;300}301if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))302return MVT::v16i8;303// TODO: Can SSE1 handle a byte vector?304// If we have SSE1 registers we should be able to use them.305if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&306(Subtarget.getPreferVectorWidth() >= 128))307return MVT::v4f32;308} else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&309Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {310// Do not use f64 to lower memcpy if source is string constant. It's311// better to use i32 to avoid the loads.312// Also, do not use f64 to lower memset unless this is a memset of zeros.313// The gymnastics of splatting a byte value into an XMM register and then314// only using 8-byte stores (because this is a CPU with slow unaligned315// 16-byte accesses) makes that a loser.316return MVT::f64;317}318}319// This is a compromise. If we reach here, unaligned accesses may be slow on320// this target. However, creating smaller, aligned accesses could be even321// slower and would certainly be a lot more code.322if (Subtarget.is64Bit() && Op.size() >= 8)323return MVT::i64;324return MVT::i32;325}326327bool X86TargetLowering::isSafeMemOpType(MVT VT) const {328if (VT == MVT::f32)329return Subtarget.hasSSE1();330if (VT == MVT::f64)331return Subtarget.hasSSE2();332return true;333}334335static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {336return (8 * Alignment.value()) % SizeInBits == 0;337}338339bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {340if (isBitAligned(Alignment, VT.getSizeInBits()))341return true;342switch (VT.getSizeInBits()) {343default:344// 8-byte and under are always assumed to be fast.345return true;346case 128:347return !Subtarget.isUnalignedMem16Slow();348case 256:349return !Subtarget.isUnalignedMem32Slow();350// TODO: What about AVX-512 (512-bit) accesses?351}352}353354bool X86TargetLowering::allowsMisalignedMemoryAccesses(355EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,356unsigned *Fast) const {357if (Fast)358*Fast = isMemoryAccessFast(VT, Alignment);359// NonTemporal vector memory ops must be aligned.360if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {361// NT loads can only be vector aligned, so if its less aligned than the362// minimum vector size (which we can split the vector down to), we might as363// well use a regular unaligned vector load.364// We don't have any NT loads pre-SSE41.365if (!!(Flags & MachineMemOperand::MOLoad))366return (Alignment < 16 || !Subtarget.hasSSE41());367return false;368}369// Misaligned accesses of any size are always allowed.370return true;371}372373bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,374const DataLayout &DL, EVT VT,375unsigned AddrSpace, Align Alignment,376MachineMemOperand::Flags Flags,377unsigned *Fast) const {378if (Fast)379*Fast = isMemoryAccessFast(VT, Alignment);380if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {381if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,382/*Fast=*/nullptr))383return true;384// NonTemporal vector memory ops are special, and must be aligned.385if (!isBitAligned(Alignment, VT.getSizeInBits()))386return false;387switch (VT.getSizeInBits()) {388case 128:389if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())390return true;391if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())392return true;393return false;394case 256:395if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())396return true;397if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())398return true;399return false;400case 512:401if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())402return true;403return false;404default:405return false; // Don't have NonTemporal vector memory ops of this size.406}407}408return true;409}410411/// Return the entry encoding for a jump table in the412/// current function. The returned value is a member of the413/// MachineJumpTableInfo::JTEntryKind enum.414unsigned X86TargetLowering::getJumpTableEncoding() const {415// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF416// symbol.417if (isPositionIndependent() && Subtarget.isPICStyleGOT())418return MachineJumpTableInfo::EK_Custom32;419if (isPositionIndependent() &&420getTargetMachine().getCodeModel() == CodeModel::Large &&421!Subtarget.isTargetCOFF())422return MachineJumpTableInfo::EK_LabelDifference64;423424// Otherwise, use the normal jump table encoding heuristics.425return TargetLowering::getJumpTableEncoding();426}427428bool X86TargetLowering::useSoftFloat() const {429return Subtarget.useSoftFloat();430}431432void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,433ArgListTy &Args) const {434435// Only relabel X86-32 for C / Stdcall CCs.436if (Subtarget.is64Bit())437return;438if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)439return;440unsigned ParamRegs = 0;441if (auto *M = MF->getFunction().getParent())442ParamRegs = M->getNumberRegisterParameters();443444// Mark the first N int arguments as having reg445for (auto &Arg : Args) {446Type *T = Arg.Ty;447if (T->isIntOrPtrTy())448if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {449unsigned numRegs = 1;450if (MF->getDataLayout().getTypeAllocSize(T) > 4)451numRegs = 2;452if (ParamRegs < numRegs)453return;454ParamRegs -= numRegs;455Arg.IsInReg = true;456}457}458}459460const MCExpr *461X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,462const MachineBasicBlock *MBB,463unsigned uid,MCContext &Ctx) const{464assert(isPositionIndependent() && Subtarget.isPICStyleGOT());465// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF466// entries.467return MCSymbolRefExpr::create(MBB->getSymbol(),468MCSymbolRefExpr::VK_GOTOFF, Ctx);469}470471/// Returns relocation base for the given PIC jumptable.472SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,473SelectionDAG &DAG) const {474if (!Subtarget.is64Bit())475// This doesn't have SDLoc associated with it, but is not really the476// same as a Register.477return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),478getPointerTy(DAG.getDataLayout()));479return Table;480}481482/// This returns the relocation base for the given PIC jumptable,483/// the same as getPICJumpTableRelocBase, but as an MCExpr.484const MCExpr *X86TargetLowering::485getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,486MCContext &Ctx) const {487// X86-64 uses RIP relative addressing based on the jump table label.488if (Subtarget.isPICStyleRIPRel() ||489(Subtarget.is64Bit() &&490getTargetMachine().getCodeModel() == CodeModel::Large))491return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);492493// Otherwise, the reference is relative to the PIC base.494return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);495}496497std::pair<const TargetRegisterClass *, uint8_t>498X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,499MVT VT) const {500const TargetRegisterClass *RRC = nullptr;501uint8_t Cost = 1;502switch (VT.SimpleTy) {503default:504return TargetLowering::findRepresentativeClass(TRI, VT);505case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:506RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;507break;508case MVT::x86mmx:509RRC = &X86::VR64RegClass;510break;511case MVT::f32: case MVT::f64:512case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:513case MVT::v4f32: case MVT::v2f64:514case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:515case MVT::v8f32: case MVT::v4f64:516case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:517case MVT::v16f32: case MVT::v8f64:518RRC = &X86::VR128XRegClass;519break;520}521return std::make_pair(RRC, Cost);522}523524unsigned X86TargetLowering::getAddressSpace() const {525if (Subtarget.is64Bit())526return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;527return 256;528}529530static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {531return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||532(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));533}534535static Constant* SegmentOffset(IRBuilderBase &IRB,536int Offset, unsigned AddressSpace) {537return ConstantExpr::getIntToPtr(538ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),539IRB.getPtrTy(AddressSpace));540}541542Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {543// glibc, bionic, and Fuchsia have a special slot for the stack guard in544// tcbhead_t; use it instead of the usual global variable (see545// sysdeps/{i386,x86_64}/nptl/tls.h)546if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {547unsigned AddressSpace = getAddressSpace();548549// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.550if (Subtarget.isTargetFuchsia())551return SegmentOffset(IRB, 0x10, AddressSpace);552553Module *M = IRB.GetInsertBlock()->getParent()->getParent();554// Specially, some users may customize the base reg and offset.555int Offset = M->getStackProtectorGuardOffset();556// If we don't set -stack-protector-guard-offset value:557// %fs:0x28, unless we're using a Kernel code model, in which case558// it's %gs:0x28. gs:0x14 on i386.559if (Offset == INT_MAX)560Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;561562StringRef GuardReg = M->getStackProtectorGuardReg();563if (GuardReg == "fs")564AddressSpace = X86AS::FS;565else if (GuardReg == "gs")566AddressSpace = X86AS::GS;567568// Use symbol guard if user specify.569StringRef GuardSymb = M->getStackProtectorGuardSymbol();570if (!GuardSymb.empty()) {571GlobalVariable *GV = M->getGlobalVariable(GuardSymb);572if (!GV) {573Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())574: Type::getInt32Ty(M->getContext());575GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,576nullptr, GuardSymb, nullptr,577GlobalValue::NotThreadLocal, AddressSpace);578if (!Subtarget.isTargetDarwin())579GV->setDSOLocal(M->getDirectAccessExternalData());580}581return GV;582}583584return SegmentOffset(IRB, Offset, AddressSpace);585}586return TargetLowering::getIRStackGuard(IRB);587}588589void X86TargetLowering::insertSSPDeclarations(Module &M) const {590// MSVC CRT provides functionalities for stack protection.591if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||592Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {593// MSVC CRT has a global variable holding security cookie.594M.getOrInsertGlobal("__security_cookie",595PointerType::getUnqual(M.getContext()));596597// MSVC CRT has a function to validate security cookie.598FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(599"__security_check_cookie", Type::getVoidTy(M.getContext()),600PointerType::getUnqual(M.getContext()));601if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {602F->setCallingConv(CallingConv::X86_FastCall);603F->addParamAttr(0, Attribute::AttrKind::InReg);604}605return;606}607608StringRef GuardMode = M.getStackProtectorGuard();609610// glibc, bionic, and Fuchsia have a special slot for the stack guard.611if ((GuardMode == "tls" || GuardMode.empty()) &&612hasStackGuardSlotTLS(Subtarget.getTargetTriple()))613return;614TargetLowering::insertSSPDeclarations(M);615}616617Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {618// MSVC CRT has a global variable holding security cookie.619if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||620Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {621return M.getGlobalVariable("__security_cookie");622}623return TargetLowering::getSDagStackGuard(M);624}625626Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {627// MSVC CRT has a function to validate security cookie.628if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||629Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {630return M.getFunction("__security_check_cookie");631}632return TargetLowering::getSSPStackGuardCheck(M);633}634635Value *636X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {637// Android provides a fixed TLS slot for the SafeStack pointer. See the638// definition of TLS_SLOT_SAFESTACK in639// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h640if (Subtarget.isTargetAndroid()) {641// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:642// %gs:0x24 on i386643int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;644return SegmentOffset(IRB, Offset, getAddressSpace());645}646647// Fuchsia is similar.648if (Subtarget.isTargetFuchsia()) {649// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.650return SegmentOffset(IRB, 0x18, getAddressSpace());651}652653return TargetLowering::getSafeStackPointerLocation(IRB);654}655656//===----------------------------------------------------------------------===//657// Return Value Calling Convention Implementation658//===----------------------------------------------------------------------===//659660bool X86TargetLowering::CanLowerReturn(661CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,662const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {663SmallVector<CCValAssign, 16> RVLocs;664CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);665return CCInfo.CheckReturn(Outs, RetCC_X86);666}667668const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {669static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };670return ScratchRegs;671}672673ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {674static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};675return RCRegs;676}677678/// Lowers masks values (v*i1) to the local register values679/// \returns DAG node after lowering to register type680static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,681const SDLoc &DL, SelectionDAG &DAG) {682EVT ValVT = ValArg.getValueType();683684if (ValVT == MVT::v1i1)685return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,686DAG.getIntPtrConstant(0, DL));687688if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||689(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {690// Two stage lowering might be required691// bitcast: v8i1 -> i8 / v16i1 -> i16692// anyextend: i8 -> i32 / i16 -> i32693EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;694SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);695if (ValLoc == MVT::i32)696ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);697return ValToCopy;698}699700if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||701(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {702// One stage lowering is required703// bitcast: v32i1 -> i32 / v64i1 -> i64704return DAG.getBitcast(ValLoc, ValArg);705}706707return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);708}709710/// Breaks v64i1 value into two registers and adds the new node to the DAG711static void Passv64i1ArgInRegs(712const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,713SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,714CCValAssign &NextVA, const X86Subtarget &Subtarget) {715assert(Subtarget.hasBWI() && "Expected AVX512BW target!");716assert(Subtarget.is32Bit() && "Expecting 32 bit target");717assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");718assert(VA.isRegLoc() && NextVA.isRegLoc() &&719"The value should reside in two registers");720721// Before splitting the value we cast it to i64722Arg = DAG.getBitcast(MVT::i64, Arg);723724// Splitting the value into two i32 types725SDValue Lo, Hi;726std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);727728// Attach the two i32 types into corresponding registers729RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));730RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));731}732733SDValue734X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,735bool isVarArg,736const SmallVectorImpl<ISD::OutputArg> &Outs,737const SmallVectorImpl<SDValue> &OutVals,738const SDLoc &dl, SelectionDAG &DAG) const {739MachineFunction &MF = DAG.getMachineFunction();740X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();741742// In some cases we need to disable registers from the default CSR list.743// For example, when they are used as return registers (preserve_* and X86's744// regcall) or for argument passing (X86's regcall).745bool ShouldDisableCalleeSavedRegister =746shouldDisableRetRegFromCSR(CallConv) ||747MF.getFunction().hasFnAttribute("no_caller_saved_registers");748749if (CallConv == CallingConv::X86_INTR && !Outs.empty())750report_fatal_error("X86 interrupts may not return any value");751752SmallVector<CCValAssign, 16> RVLocs;753CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());754CCInfo.AnalyzeReturn(Outs, RetCC_X86);755756SmallVector<std::pair<Register, SDValue>, 4> RetVals;757for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;758++I, ++OutsIndex) {759CCValAssign &VA = RVLocs[I];760assert(VA.isRegLoc() && "Can only return in registers!");761762// Add the register to the CalleeSaveDisableRegs list.763if (ShouldDisableCalleeSavedRegister)764MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());765766SDValue ValToCopy = OutVals[OutsIndex];767EVT ValVT = ValToCopy.getValueType();768769// Promote values to the appropriate types.770if (VA.getLocInfo() == CCValAssign::SExt)771ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);772else if (VA.getLocInfo() == CCValAssign::ZExt)773ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);774else if (VA.getLocInfo() == CCValAssign::AExt) {775if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)776ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);777else778ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);779}780else if (VA.getLocInfo() == CCValAssign::BCvt)781ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);782783assert(VA.getLocInfo() != CCValAssign::FPExt &&784"Unexpected FP-extend for return value.");785786// Report an error if we have attempted to return a value via an XMM787// register and SSE was disabled.788if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {789errorUnsupported(DAG, dl, "SSE register return with SSE disabled");790VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.791} else if (!Subtarget.hasSSE2() &&792X86::FR64XRegClass.contains(VA.getLocReg()) &&793ValVT == MVT::f64) {794// When returning a double via an XMM register, report an error if SSE2 is795// not enabled.796errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");797VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.798}799800// Returns in ST0/ST1 are handled specially: these are pushed as operands to801// the RET instruction and handled by the FP Stackifier.802if (VA.getLocReg() == X86::FP0 ||803VA.getLocReg() == X86::FP1) {804// If this is a copy from an xmm register to ST(0), use an FPExtend to805// change the value to the FP stack register class.806if (isScalarFPTypeInSSEReg(VA.getValVT()))807ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);808RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));809// Don't emit a copytoreg.810continue;811}812813// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64814// which is returned in RAX / RDX.815if (Subtarget.is64Bit()) {816if (ValVT == MVT::x86mmx) {817if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {818ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);819ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,820ValToCopy);821// If we don't have SSE2 available, convert to v4f32 so the generated822// register is legal.823if (!Subtarget.hasSSE2())824ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);825}826}827}828829if (VA.needsCustom()) {830assert(VA.getValVT() == MVT::v64i1 &&831"Currently the only custom case is when we split v64i1 to 2 regs");832833Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],834Subtarget);835836// Add the second register to the CalleeSaveDisableRegs list.837if (ShouldDisableCalleeSavedRegister)838MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());839} else {840RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));841}842}843844SDValue Glue;845SmallVector<SDValue, 6> RetOps;846RetOps.push_back(Chain); // Operand #0 = Chain (updated below)847// Operand #1 = Bytes To Pop848RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,849MVT::i32));850851// Copy the result values into the output registers.852for (auto &RetVal : RetVals) {853if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {854RetOps.push_back(RetVal.second);855continue; // Don't emit a copytoreg.856}857858Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);859Glue = Chain.getValue(1);860RetOps.push_back(861DAG.getRegister(RetVal.first, RetVal.second.getValueType()));862}863864// Swift calling convention does not require we copy the sret argument865// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.866867// All x86 ABIs require that for returning structs by value we copy868// the sret argument into %rax/%eax (depending on ABI) for the return.869// We saved the argument into a virtual register in the entry block,870// so now we copy the value out and into %rax/%eax.871//872// Checking Function.hasStructRetAttr() here is insufficient because the IR873// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is874// false, then an sret argument may be implicitly inserted in the SelDAG. In875// either case FuncInfo->setSRetReturnReg() will have been called.876if (Register SRetReg = FuncInfo->getSRetReturnReg()) {877// When we have both sret and another return value, we should use the878// original Chain stored in RetOps[0], instead of the current Chain updated879// in the above loop. If we only have sret, RetOps[0] equals to Chain.880881// For the case of sret and another return value, we have882// Chain_0 at the function entry883// Chain_1 = getCopyToReg(Chain_0) in the above loop884// If we use Chain_1 in getCopyFromReg, we will have885// Val = getCopyFromReg(Chain_1)886// Chain_2 = getCopyToReg(Chain_1, Val) from below887888// getCopyToReg(Chain_0) will be glued together with889// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be890// in Unit B, and we will have cyclic dependency between Unit A and Unit B:891// Data dependency from Unit B to Unit A due to usage of Val in892// getCopyToReg(Chain_1, Val)893// Chain dependency from Unit A to Unit B894895// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.896SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,897getPointerTy(MF.getDataLayout()));898899Register RetValReg900= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?901X86::RAX : X86::EAX;902Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);903Glue = Chain.getValue(1);904905// RAX/EAX now acts like a return value.906RetOps.push_back(907DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));908909// Add the returned register to the CalleeSaveDisableRegs list. Don't do910// this however for preserve_most/preserve_all to minimize the number of911// callee-saved registers for these CCs.912if (ShouldDisableCalleeSavedRegister &&913CallConv != CallingConv::PreserveAll &&914CallConv != CallingConv::PreserveMost)915MF.getRegInfo().disableCalleeSavedRegister(RetValReg);916}917918const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();919const MCPhysReg *I =920TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());921if (I) {922for (; *I; ++I) {923if (X86::GR64RegClass.contains(*I))924RetOps.push_back(DAG.getRegister(*I, MVT::i64));925else926llvm_unreachable("Unexpected register class in CSRsViaCopy!");927}928}929930RetOps[0] = Chain; // Update chain.931932// Add the glue if we have it.933if (Glue.getNode())934RetOps.push_back(Glue);935936X86ISD::NodeType opcode = X86ISD::RET_GLUE;937if (CallConv == CallingConv::X86_INTR)938opcode = X86ISD::IRET;939return DAG.getNode(opcode, dl, MVT::Other, RetOps);940}941942bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {943if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))944return false;945946SDValue TCChain = Chain;947SDNode *Copy = *N->use_begin();948if (Copy->getOpcode() == ISD::CopyToReg) {949// If the copy has a glue operand, we conservatively assume it isn't safe to950// perform a tail call.951if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)952return false;953TCChain = Copy->getOperand(0);954} else if (Copy->getOpcode() != ISD::FP_EXTEND)955return false;956957bool HasRet = false;958for (const SDNode *U : Copy->uses()) {959if (U->getOpcode() != X86ISD::RET_GLUE)960return false;961// If we are returning more than one value, we can definitely962// not make a tail call see PR19530963if (U->getNumOperands() > 4)964return false;965if (U->getNumOperands() == 4 &&966U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)967return false;968HasRet = true;969}970971if (!HasRet)972return false;973974Chain = TCChain;975return true;976}977978EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,979ISD::NodeType ExtendKind) const {980MVT ReturnMVT = MVT::i32;981982bool Darwin = Subtarget.getTargetTriple().isOSDarwin();983if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {984// The ABI does not require i1, i8 or i16 to be extended.985//986// On Darwin, there is code in the wild relying on Clang's old behaviour of987// always extending i8/i16 return values, so keep doing that for now.988// (PR26665).989ReturnMVT = MVT::i8;990}991992EVT MinVT = getRegisterType(Context, ReturnMVT);993return VT.bitsLT(MinVT) ? MinVT : VT;994}995996/// Reads two 32 bit registers and creates a 64 bit mask value.997/// \param VA The current 32 bit value that need to be assigned.998/// \param NextVA The next 32 bit value that need to be assigned.999/// \param Root The parent DAG node.1000/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for1001/// glue purposes. In the case the DAG is already using1002/// physical register instead of virtual, we should glue1003/// our new SDValue to InGlue SDvalue.1004/// \return a new SDvalue of size 64bit.1005static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,1006SDValue &Root, SelectionDAG &DAG,1007const SDLoc &DL, const X86Subtarget &Subtarget,1008SDValue *InGlue = nullptr) {1009assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");1010assert(Subtarget.is32Bit() && "Expecting 32 bit target");1011assert(VA.getValVT() == MVT::v64i1 &&1012"Expecting first location of 64 bit width type");1013assert(NextVA.getValVT() == VA.getValVT() &&1014"The locations should have the same type");1015assert(VA.isRegLoc() && NextVA.isRegLoc() &&1016"The values should reside in two registers");10171018SDValue Lo, Hi;1019SDValue ArgValueLo, ArgValueHi;10201021MachineFunction &MF = DAG.getMachineFunction();1022const TargetRegisterClass *RC = &X86::GR32RegClass;10231024// Read a 32 bit value from the registers.1025if (nullptr == InGlue) {1026// When no physical register is present,1027// create an intermediate virtual register.1028Register Reg = MF.addLiveIn(VA.getLocReg(), RC);1029ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);1030Reg = MF.addLiveIn(NextVA.getLocReg(), RC);1031ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);1032} else {1033// When a physical register is available read the value from it and glue1034// the reads together.1035ArgValueLo =1036DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);1037*InGlue = ArgValueLo.getValue(2);1038ArgValueHi =1039DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);1040*InGlue = ArgValueHi.getValue(2);1041}10421043// Convert the i32 type into v32i1 type.1044Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);10451046// Convert the i32 type into v32i1 type.1047Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);10481049// Concatenate the two values together.1050return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);1051}10521053/// The function will lower a register of various sizes (8/16/32/64)1054/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)1055/// \returns a DAG node contains the operand after lowering to mask type.1056static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,1057const EVT &ValLoc, const SDLoc &DL,1058SelectionDAG &DAG) {1059SDValue ValReturned = ValArg;10601061if (ValVT == MVT::v1i1)1062return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);10631064if (ValVT == MVT::v64i1) {1065// In 32 bit machine, this case is handled by getv64i1Argument1066assert(ValLoc == MVT::i64 && "Expecting only i64 locations");1067// In 64 bit machine, There is no need to truncate the value only bitcast1068} else {1069MVT MaskLenVT;1070switch (ValVT.getSimpleVT().SimpleTy) {1071case MVT::v8i1:1072MaskLenVT = MVT::i8;1073break;1074case MVT::v16i1:1075MaskLenVT = MVT::i16;1076break;1077case MVT::v32i1:1078MaskLenVT = MVT::i32;1079break;1080default:1081llvm_unreachable("Expecting a vector of i1 types");1082}10831084ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);1085}1086return DAG.getBitcast(ValVT, ValReturned);1087}10881089/// Lower the result values of a call into the1090/// appropriate copies out of appropriate physical registers.1091///1092SDValue X86TargetLowering::LowerCallResult(1093SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,1094const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1095SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,1096uint32_t *RegMask) const {10971098const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();1099// Assign locations to each value returned by this call.1100SmallVector<CCValAssign, 16> RVLocs;1101CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,1102*DAG.getContext());1103CCInfo.AnalyzeCallResult(Ins, RetCC_X86);11041105// Copy all of the result registers out of their specified physreg.1106for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;1107++I, ++InsIndex) {1108CCValAssign &VA = RVLocs[I];1109EVT CopyVT = VA.getLocVT();11101111// In some calling conventions we need to remove the used registers1112// from the register mask.1113if (RegMask) {1114for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))1115RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));1116}11171118// Report an error if there was an attempt to return FP values via XMM1119// registers.1120if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {1121errorUnsupported(DAG, dl, "SSE register return with SSE disabled");1122if (VA.getLocReg() == X86::XMM1)1123VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.1124else1125VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.1126} else if (!Subtarget.hasSSE2() &&1127X86::FR64XRegClass.contains(VA.getLocReg()) &&1128CopyVT == MVT::f64) {1129errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");1130if (VA.getLocReg() == X86::XMM1)1131VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.1132else1133VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.1134}11351136// If we prefer to use the value in xmm registers, copy it out as f80 and1137// use a truncate to move it from fp stack reg to xmm reg.1138bool RoundAfterCopy = false;1139if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&1140isScalarFPTypeInSSEReg(VA.getValVT())) {1141if (!Subtarget.hasX87())1142report_fatal_error("X87 register return with X87 disabled");1143CopyVT = MVT::f80;1144RoundAfterCopy = (CopyVT != VA.getLocVT());1145}11461147SDValue Val;1148if (VA.needsCustom()) {1149assert(VA.getValVT() == MVT::v64i1 &&1150"Currently the only custom case is when we split v64i1 to 2 regs");1151Val =1152getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);1153} else {1154Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)1155.getValue(1);1156Val = Chain.getValue(0);1157InGlue = Chain.getValue(2);1158}11591160if (RoundAfterCopy)1161Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,1162// This truncation won't change the value.1163DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));11641165if (VA.isExtInLoc()) {1166if (VA.getValVT().isVector() &&1167VA.getValVT().getScalarType() == MVT::i1 &&1168((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||1169(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {1170// promoting a mask type (v*i1) into a register of type i64/i32/i16/i81171Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);1172} else1173Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);1174}11751176if (VA.getLocInfo() == CCValAssign::BCvt)1177Val = DAG.getBitcast(VA.getValVT(), Val);11781179InVals.push_back(Val);1180}11811182return Chain;1183}11841185//===----------------------------------------------------------------------===//1186// C & StdCall & Fast Calling Convention implementation1187//===----------------------------------------------------------------------===//1188// StdCall calling convention seems to be standard for many Windows' API1189// routines and around. It differs from C calling convention just a little:1190// callee should clean up the stack, not caller. Symbols should be also1191// decorated in some fancy way :) It doesn't support any vector arguments.1192// For info on fast calling convention see Fast Calling Convention (tail call)1193// implementation LowerX86_32FastCCCallTo.11941195/// Determines whether Args, either a set of outgoing arguments to a call, or a1196/// set of incoming args of a call, contains an sret pointer that the callee1197/// pops1198template <typename T>1199static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,1200const X86Subtarget &Subtarget) {1201// Not C++20 (yet), so no concepts available.1202static_assert(std::is_same_v<T, ISD::OutputArg> ||1203std::is_same_v<T, ISD::InputArg>,1204"requires ISD::OutputArg or ISD::InputArg");12051206// Only 32-bit pops the sret. It's a 64-bit world these days, so early-out1207// for most compilations.1208if (!Subtarget.is32Bit())1209return false;12101211if (Args.empty())1212return false;12131214// Most calls do not have an sret argument, check the arg next.1215const ISD::ArgFlagsTy &Flags = Args[0].Flags;1216if (!Flags.isSRet() || Flags.isInReg())1217return false;12181219// The MSVCabi does not pop the sret.1220if (Subtarget.getTargetTriple().isOSMSVCRT())1221return false;12221223// MCUs don't pop the sret1224if (Subtarget.isTargetMCU())1225return false;12261227// Callee pops argument1228return true;1229}12301231/// Make a copy of an aggregate at address specified by "Src" to address1232/// "Dst" with size and alignment information specified by the specific1233/// parameter attribute. The copy will be passed as a byval function parameter.1234static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,1235SDValue Chain, ISD::ArgFlagsTy Flags,1236SelectionDAG &DAG, const SDLoc &dl) {1237SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);12381239return DAG.getMemcpy(1240Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),1241/*isVolatile*/ false, /*AlwaysInline=*/true,1242/*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());1243}12441245/// Return true if the calling convention is one that we can guarantee TCO for.1246static bool canGuaranteeTCO(CallingConv::ID CC) {1247return (CC == CallingConv::Fast || CC == CallingConv::GHC ||1248CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||1249CC == CallingConv::Tail || CC == CallingConv::SwiftTail);1250}12511252/// Return true if we might ever do TCO for calls with this calling convention.1253static bool mayTailCallThisCC(CallingConv::ID CC) {1254switch (CC) {1255// C calling conventions:1256case CallingConv::C:1257case CallingConv::Win64:1258case CallingConv::X86_64_SysV:1259case CallingConv::PreserveNone:1260// Callee pop conventions:1261case CallingConv::X86_ThisCall:1262case CallingConv::X86_StdCall:1263case CallingConv::X86_VectorCall:1264case CallingConv::X86_FastCall:1265// Swift:1266case CallingConv::Swift:1267return true;1268default:1269return canGuaranteeTCO(CC);1270}1271}12721273/// Return true if the function is being made into a tailcall target by1274/// changing its ABI.1275static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {1276return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||1277CC == CallingConv::Tail || CC == CallingConv::SwiftTail;1278}12791280bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {1281if (!CI->isTailCall())1282return false;12831284CallingConv::ID CalleeCC = CI->getCallingConv();1285if (!mayTailCallThisCC(CalleeCC))1286return false;12871288return true;1289}12901291SDValue1292X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,1293const SmallVectorImpl<ISD::InputArg> &Ins,1294const SDLoc &dl, SelectionDAG &DAG,1295const CCValAssign &VA,1296MachineFrameInfo &MFI, unsigned i) const {1297// Create the nodes corresponding to a load from this parameter slot.1298ISD::ArgFlagsTy Flags = Ins[i].Flags;1299bool AlwaysUseMutable = shouldGuaranteeTCO(1300CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);1301bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();1302EVT ValVT;1303MVT PtrVT = getPointerTy(DAG.getDataLayout());13041305// If value is passed by pointer we have address passed instead of the value1306// itself. No need to extend if the mask value and location share the same1307// absolute size.1308bool ExtendedInMem =1309VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&1310VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();13111312if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)1313ValVT = VA.getLocVT();1314else1315ValVT = VA.getValVT();13161317// FIXME: For now, all byval parameter objects are marked mutable. This can be1318// changed with more analysis.1319// In case of tail call optimization mark all arguments mutable. Since they1320// could be overwritten by lowering of arguments in case of a tail call.1321if (Flags.isByVal()) {1322unsigned Bytes = Flags.getByValSize();1323if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.13241325// FIXME: For now, all byval parameter objects are marked as aliasing. This1326// can be improved with deeper analysis.1327int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,1328/*isAliased=*/true);1329return DAG.getFrameIndex(FI, PtrVT);1330}13311332EVT ArgVT = Ins[i].ArgVT;13331334// If this is a vector that has been split into multiple parts, don't elide1335// the copy. The layout on the stack may not match the packed in-memory1336// layout.1337bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();13381339// This is an argument in memory. We might be able to perform copy elision.1340// If the argument is passed directly in memory without any extension, then we1341// can perform copy elision. Large vector types, for example, may be passed1342// indirectly by pointer.1343if (Flags.isCopyElisionCandidate() &&1344VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&1345!ScalarizedVector) {1346SDValue PartAddr;1347if (Ins[i].PartOffset == 0) {1348// If this is a one-part value or the first part of a multi-part value,1349// create a stack object for the entire argument value type and return a1350// load from our portion of it. This assumes that if the first part of an1351// argument is in memory, the rest will also be in memory.1352int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),1353/*IsImmutable=*/false);1354PartAddr = DAG.getFrameIndex(FI, PtrVT);1355return DAG.getLoad(1356ValVT, dl, Chain, PartAddr,1357MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));1358}13591360// This is not the first piece of an argument in memory. See if there is1361// already a fixed stack object including this offset. If so, assume it1362// was created by the PartOffset == 0 branch above and create a load from1363// the appropriate offset into it.1364int64_t PartBegin = VA.getLocMemOffset();1365int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;1366int FI = MFI.getObjectIndexBegin();1367for (; MFI.isFixedObjectIndex(FI); ++FI) {1368int64_t ObjBegin = MFI.getObjectOffset(FI);1369int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);1370if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)1371break;1372}1373if (MFI.isFixedObjectIndex(FI)) {1374SDValue Addr =1375DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),1376DAG.getIntPtrConstant(Ins[i].PartOffset, dl));1377return DAG.getLoad(ValVT, dl, Chain, Addr,1378MachinePointerInfo::getFixedStack(1379DAG.getMachineFunction(), FI, Ins[i].PartOffset));1380}1381}13821383int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,1384VA.getLocMemOffset(), isImmutable);13851386// Set SExt or ZExt flag.1387if (VA.getLocInfo() == CCValAssign::ZExt) {1388MFI.setObjectZExt(FI, true);1389} else if (VA.getLocInfo() == CCValAssign::SExt) {1390MFI.setObjectSExt(FI, true);1391}13921393MaybeAlign Alignment;1394if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&1395ValVT != MVT::f80)1396Alignment = MaybeAlign(4);1397SDValue FIN = DAG.getFrameIndex(FI, PtrVT);1398SDValue Val = DAG.getLoad(1399ValVT, dl, Chain, FIN,1400MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),1401Alignment);1402return ExtendedInMem1403? (VA.getValVT().isVector()1404? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)1405: DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))1406: Val;1407}14081409// FIXME: Get this from tablegen.1410static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,1411const X86Subtarget &Subtarget) {1412assert(Subtarget.is64Bit());14131414if (Subtarget.isCallingConvWin64(CallConv)) {1415static const MCPhysReg GPR64ArgRegsWin64[] = {1416X86::RCX, X86::RDX, X86::R8, X86::R91417};1418return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));1419}14201421static const MCPhysReg GPR64ArgRegs64Bit[] = {1422X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R91423};1424return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));1425}14261427// FIXME: Get this from tablegen.1428static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,1429CallingConv::ID CallConv,1430const X86Subtarget &Subtarget) {1431assert(Subtarget.is64Bit());1432if (Subtarget.isCallingConvWin64(CallConv)) {1433// The XMM registers which might contain var arg parameters are shadowed1434// in their paired GPR. So we only need to save the GPR to their home1435// slots.1436// TODO: __vectorcall will change this.1437return std::nullopt;1438}14391440bool isSoftFloat = Subtarget.useSoftFloat();1441if (isSoftFloat || !Subtarget.hasSSE1())1442// Kernel mode asks for SSE to be disabled, so there are no XMM argument1443// registers.1444return std::nullopt;14451446static const MCPhysReg XMMArgRegs64Bit[] = {1447X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,1448X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM71449};1450return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));1451}14521453#ifndef NDEBUG1454static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {1455return llvm::is_sorted(1456ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {1457return A.getValNo() < B.getValNo();1458});1459}1460#endif14611462namespace {1463/// This is a helper class for lowering variable arguments parameters.1464class VarArgsLoweringHelper {1465public:1466VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,1467SelectionDAG &DAG, const X86Subtarget &Subtarget,1468CallingConv::ID CallConv, CCState &CCInfo)1469: FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),1470TheMachineFunction(DAG.getMachineFunction()),1471TheFunction(TheMachineFunction.getFunction()),1472FrameInfo(TheMachineFunction.getFrameInfo()),1473FrameLowering(*Subtarget.getFrameLowering()),1474TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),1475CCInfo(CCInfo) {}14761477// Lower variable arguments parameters.1478void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);14791480private:1481void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);14821483void forwardMustTailParameters(SDValue &Chain);14841485bool is64Bit() const { return Subtarget.is64Bit(); }1486bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }14871488X86MachineFunctionInfo *FuncInfo;1489const SDLoc &DL;1490SelectionDAG &DAG;1491const X86Subtarget &Subtarget;1492MachineFunction &TheMachineFunction;1493const Function &TheFunction;1494MachineFrameInfo &FrameInfo;1495const TargetFrameLowering &FrameLowering;1496const TargetLowering &TargLowering;1497CallingConv::ID CallConv;1498CCState &CCInfo;1499};1500} // namespace15011502void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(1503SDValue &Chain, unsigned StackSize) {1504// If the function takes variable number of arguments, make a frame index for1505// the start of the first vararg value... for expansion of llvm.va_start. We1506// can skip this if there are no va_start calls.1507if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&1508CallConv != CallingConv::X86_ThisCall)) {1509FuncInfo->setVarArgsFrameIndex(1510FrameInfo.CreateFixedObject(1, StackSize, true));1511}15121513// 64-bit calling conventions support varargs and register parameters, so we1514// have to do extra work to spill them in the prologue.1515if (is64Bit()) {1516// Find the first unallocated argument registers.1517ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);1518ArrayRef<MCPhysReg> ArgXMMs =1519get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);1520unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);1521unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);15221523assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&1524"SSE register cannot be used when SSE is disabled!");15251526if (isWin64()) {1527// Get to the caller-allocated home save location. Add 8 to account1528// for the return address.1529int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;1530FuncInfo->setRegSaveFrameIndex(1531FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));1532// Fixup to set vararg frame on shadow area (4 x i64).1533if (NumIntRegs < 4)1534FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());1535} else {1536// For X86-64, if there are vararg parameters that are passed via1537// registers, then we must store them to their spots on the stack so1538// they may be loaded by dereferencing the result of va_next.1539FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);1540FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);1541FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(1542ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));1543}15441545SmallVector<SDValue, 6>1546LiveGPRs; // list of SDValue for GPR registers keeping live input value1547SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers1548// keeping live input value1549SDValue ALVal; // if applicable keeps SDValue for %al register15501551// Gather all the live in physical registers.1552for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {1553Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);1554LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));1555}1556const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);1557if (!AvailableXmms.empty()) {1558Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);1559ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);1560for (MCPhysReg Reg : AvailableXmms) {1561// FastRegisterAllocator spills virtual registers at basic1562// block boundary. That leads to usages of xmm registers1563// outside of check for %al. Pass physical registers to1564// VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.1565TheMachineFunction.getRegInfo().addLiveIn(Reg);1566LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));1567}1568}15691570// Store the integer parameter registers.1571SmallVector<SDValue, 8> MemOps;1572SDValue RSFIN =1573DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),1574TargLowering.getPointerTy(DAG.getDataLayout()));1575unsigned Offset = FuncInfo->getVarArgsGPOffset();1576for (SDValue Val : LiveGPRs) {1577SDValue FIN = DAG.getNode(ISD::ADD, DL,1578TargLowering.getPointerTy(DAG.getDataLayout()),1579RSFIN, DAG.getIntPtrConstant(Offset, DL));1580SDValue Store =1581DAG.getStore(Val.getValue(1), DL, Val, FIN,1582MachinePointerInfo::getFixedStack(1583DAG.getMachineFunction(),1584FuncInfo->getRegSaveFrameIndex(), Offset));1585MemOps.push_back(Store);1586Offset += 8;1587}15881589// Now store the XMM (fp + vector) parameter registers.1590if (!LiveXMMRegs.empty()) {1591SmallVector<SDValue, 12> SaveXMMOps;1592SaveXMMOps.push_back(Chain);1593SaveXMMOps.push_back(ALVal);1594SaveXMMOps.push_back(RSFIN);1595SaveXMMOps.push_back(1596DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));1597llvm::append_range(SaveXMMOps, LiveXMMRegs);1598MachineMemOperand *StoreMMO =1599DAG.getMachineFunction().getMachineMemOperand(1600MachinePointerInfo::getFixedStack(1601DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),1602Offset),1603MachineMemOperand::MOStore, 128, Align(16));1604MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,1605DL, DAG.getVTList(MVT::Other),1606SaveXMMOps, MVT::i8, StoreMMO));1607}16081609if (!MemOps.empty())1610Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);1611}1612}16131614void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {1615// Find the largest legal vector type.1616MVT VecVT = MVT::Other;1617// FIXME: Only some x86_32 calling conventions support AVX512.1618if (Subtarget.useAVX512Regs() &&1619(is64Bit() || (CallConv == CallingConv::X86_VectorCall ||1620CallConv == CallingConv::Intel_OCL_BI)))1621VecVT = MVT::v16f32;1622else if (Subtarget.hasAVX())1623VecVT = MVT::v8f32;1624else if (Subtarget.hasSSE2())1625VecVT = MVT::v4f32;16261627// We forward some GPRs and some vector types.1628SmallVector<MVT, 2> RegParmTypes;1629MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;1630RegParmTypes.push_back(IntVT);1631if (VecVT != MVT::Other)1632RegParmTypes.push_back(VecVT);16331634// Compute the set of forwarded registers. The rest are scratch.1635SmallVectorImpl<ForwardedRegister> &Forwards =1636FuncInfo->getForwardedMustTailRegParms();1637CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);16381639// Forward AL for SysV x86_64 targets, since it is used for varargs.1640if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {1641Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);1642Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));1643}16441645// Copy all forwards from physical to virtual registers.1646for (ForwardedRegister &FR : Forwards) {1647// FIXME: Can we use a less constrained schedule?1648SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);1649FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(1650TargLowering.getRegClassFor(FR.VT));1651Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);1652}1653}16541655void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,1656unsigned StackSize) {1657// Set FrameIndex to the 0xAAAAAAA value to mark unset state.1658// If necessary, it would be set into the correct value later.1659FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);1660FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);16611662if (FrameInfo.hasVAStart())1663createVarArgAreaAndStoreRegisters(Chain, StackSize);16641665if (FrameInfo.hasMustTailInVarArgFunc())1666forwardMustTailParameters(Chain);1667}16681669SDValue X86TargetLowering::LowerFormalArguments(1670SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,1671const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1672SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {1673MachineFunction &MF = DAG.getMachineFunction();1674X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();16751676const Function &F = MF.getFunction();1677if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&1678F.getName() == "main")1679FuncInfo->setForceFramePointer(true);16801681MachineFrameInfo &MFI = MF.getFrameInfo();1682bool Is64Bit = Subtarget.is64Bit();1683bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);16841685assert(1686!(IsVarArg && canGuaranteeTCO(CallConv)) &&1687"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");16881689// Assign locations to all of the incoming arguments.1690SmallVector<CCValAssign, 16> ArgLocs;1691CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());16921693// Allocate shadow area for Win64.1694if (IsWin64)1695CCInfo.AllocateStack(32, Align(8));16961697CCInfo.AnalyzeArguments(Ins, CC_X86);16981699// In vectorcall calling convention a second pass is required for the HVA1700// types.1701if (CallingConv::X86_VectorCall == CallConv) {1702CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);1703}17041705// The next loop assumes that the locations are in the same order of the1706// input arguments.1707assert(isSortedByValueNo(ArgLocs) &&1708"Argument Location list must be sorted before lowering");17091710SDValue ArgValue;1711for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;1712++I, ++InsIndex) {1713assert(InsIndex < Ins.size() && "Invalid Ins index");1714CCValAssign &VA = ArgLocs[I];17151716if (VA.isRegLoc()) {1717EVT RegVT = VA.getLocVT();1718if (VA.needsCustom()) {1719assert(1720VA.getValVT() == MVT::v64i1 &&1721"Currently the only custom case is when we split v64i1 to 2 regs");17221723// v64i1 values, in regcall calling convention, that are1724// compiled to 32 bit arch, are split up into two registers.1725ArgValue =1726getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);1727} else {1728const TargetRegisterClass *RC;1729if (RegVT == MVT::i8)1730RC = &X86::GR8RegClass;1731else if (RegVT == MVT::i16)1732RC = &X86::GR16RegClass;1733else if (RegVT == MVT::i32)1734RC = &X86::GR32RegClass;1735else if (Is64Bit && RegVT == MVT::i64)1736RC = &X86::GR64RegClass;1737else if (RegVT == MVT::f16)1738RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;1739else if (RegVT == MVT::f32)1740RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;1741else if (RegVT == MVT::f64)1742RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;1743else if (RegVT == MVT::f80)1744RC = &X86::RFP80RegClass;1745else if (RegVT == MVT::f128)1746RC = &X86::VR128RegClass;1747else if (RegVT.is512BitVector())1748RC = &X86::VR512RegClass;1749else if (RegVT.is256BitVector())1750RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;1751else if (RegVT.is128BitVector())1752RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;1753else if (RegVT == MVT::x86mmx)1754RC = &X86::VR64RegClass;1755else if (RegVT == MVT::v1i1)1756RC = &X86::VK1RegClass;1757else if (RegVT == MVT::v8i1)1758RC = &X86::VK8RegClass;1759else if (RegVT == MVT::v16i1)1760RC = &X86::VK16RegClass;1761else if (RegVT == MVT::v32i1)1762RC = &X86::VK32RegClass;1763else if (RegVT == MVT::v64i1)1764RC = &X86::VK64RegClass;1765else1766llvm_unreachable("Unknown argument type!");17671768Register Reg = MF.addLiveIn(VA.getLocReg(), RC);1769ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);1770}17711772// If this is an 8 or 16-bit value, it is really passed promoted to 321773// bits. Insert an assert[sz]ext to capture this, then truncate to the1774// right size.1775if (VA.getLocInfo() == CCValAssign::SExt)1776ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,1777DAG.getValueType(VA.getValVT()));1778else if (VA.getLocInfo() == CCValAssign::ZExt)1779ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,1780DAG.getValueType(VA.getValVT()));1781else if (VA.getLocInfo() == CCValAssign::BCvt)1782ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);17831784if (VA.isExtInLoc()) {1785// Handle MMX values passed in XMM regs.1786if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)1787ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);1788else if (VA.getValVT().isVector() &&1789VA.getValVT().getScalarType() == MVT::i1 &&1790((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||1791(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {1792// Promoting a mask type (v*i1) into a register of type i64/i32/i16/i81793ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);1794} else1795ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);1796}1797} else {1798assert(VA.isMemLoc());1799ArgValue =1800LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);1801}18021803// If value is passed via pointer - do a load.1804if (VA.getLocInfo() == CCValAssign::Indirect &&1805!(Ins[I].Flags.isByVal() && VA.isRegLoc())) {1806ArgValue =1807DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());1808}18091810InVals.push_back(ArgValue);1811}18121813for (unsigned I = 0, E = Ins.size(); I != E; ++I) {1814if (Ins[I].Flags.isSwiftAsync()) {1815auto X86FI = MF.getInfo<X86MachineFunctionInfo>();1816if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))1817X86FI->setHasSwiftAsyncContext(true);1818else {1819int PtrSize = Subtarget.is64Bit() ? 8 : 4;1820int FI =1821MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);1822X86FI->setSwiftAsyncContextFrameIdx(FI);1823SDValue St = DAG.getStore(1824DAG.getEntryNode(), dl, InVals[I],1825DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),1826MachinePointerInfo::getFixedStack(MF, FI));1827Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);1828}1829}18301831// Swift calling convention does not require we copy the sret argument1832// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.1833if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)1834continue;18351836// All x86 ABIs require that for returning structs by value we copy the1837// sret argument into %rax/%eax (depending on ABI) for the return. Save1838// the argument into a virtual register so that we can access it from the1839// return points.1840if (Ins[I].Flags.isSRet()) {1841assert(!FuncInfo->getSRetReturnReg() &&1842"SRet return has already been set");1843MVT PtrTy = getPointerTy(DAG.getDataLayout());1844Register Reg =1845MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));1846FuncInfo->setSRetReturnReg(Reg);1847SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);1848Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);1849break;1850}1851}18521853unsigned StackSize = CCInfo.getStackSize();1854// Align stack specially for tail calls.1855if (shouldGuaranteeTCO(CallConv,1856MF.getTarget().Options.GuaranteedTailCallOpt))1857StackSize = GetAlignedArgumentStackSize(StackSize, DAG);18581859if (IsVarArg)1860VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)1861.lowerVarArgsParameters(Chain, StackSize);18621863// Some CCs need callee pop.1864if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,1865MF.getTarget().Options.GuaranteedTailCallOpt)) {1866FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.1867} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {1868// X86 interrupts must pop the error code (and the alignment padding) if1869// present.1870FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);1871} else {1872FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.1873// If this is an sret function, the return should pop the hidden pointer.1874if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))1875FuncInfo->setBytesToPopOnReturn(4);1876}18771878if (!Is64Bit) {1879// RegSaveFrameIndex is X86-64 only.1880FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);1881}18821883FuncInfo->setArgumentStackSize(StackSize);18841885if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {1886EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());1887if (Personality == EHPersonality::CoreCLR) {1888assert(Is64Bit);1889// TODO: Add a mechanism to frame lowering that will allow us to indicate1890// that we'd prefer this slot be allocated towards the bottom of the frame1891// (i.e. near the stack pointer after allocating the frame). Every1892// funclet needs a copy of this slot in its (mostly empty) frame, and the1893// offset from the bottom of this and each funclet's frame must be the1894// same, so the size of funclets' (mostly empty) frames is dictated by1895// how far this slot is from the bottom (since they allocate just enough1896// space to accommodate holding this slot at the correct offset).1897int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);1898EHInfo->PSPSymFrameIdx = PSPSymFI;1899}1900}19011902if (shouldDisableArgRegFromCSR(CallConv) ||1903F.hasFnAttribute("no_caller_saved_registers")) {1904MachineRegisterInfo &MRI = MF.getRegInfo();1905for (std::pair<Register, Register> Pair : MRI.liveins())1906MRI.disableCalleeSavedRegister(Pair.first);1907}19081909if (CallingConv::PreserveNone == CallConv)1910for (unsigned I = 0, E = Ins.size(); I != E; ++I) {1911if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||1912Ins[I].Flags.isSwiftError()) {1913errorUnsupported(DAG, dl,1914"Swift attributes can't be used with preserve_none");1915break;1916}1917}19181919return Chain;1920}19211922SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,1923SDValue Arg, const SDLoc &dl,1924SelectionDAG &DAG,1925const CCValAssign &VA,1926ISD::ArgFlagsTy Flags,1927bool isByVal) const {1928unsigned LocMemOffset = VA.getLocMemOffset();1929SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);1930PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),1931StackPtr, PtrOff);1932if (isByVal)1933return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);19341935MaybeAlign Alignment;1936if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&1937Arg.getSimpleValueType() != MVT::f80)1938Alignment = MaybeAlign(4);1939return DAG.getStore(1940Chain, dl, Arg, PtrOff,1941MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),1942Alignment);1943}19441945/// Emit a load of return address if tail call1946/// optimization is performed and it is required.1947SDValue X86TargetLowering::EmitTailCallLoadRetAddr(1948SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,1949bool Is64Bit, int FPDiff, const SDLoc &dl) const {1950// Adjust the Return address stack slot.1951EVT VT = getPointerTy(DAG.getDataLayout());1952OutRetAddr = getReturnAddressFrameIndex(DAG);19531954// Load the "old" Return address.1955OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());1956return SDValue(OutRetAddr.getNode(), 1);1957}19581959/// Emit a store of the return address if tail call1960/// optimization is performed and it is required (FPDiff!=0).1961static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,1962SDValue Chain, SDValue RetAddrFrIdx,1963EVT PtrVT, unsigned SlotSize,1964int FPDiff, const SDLoc &dl) {1965// Store the return address to the appropriate stack slot.1966if (!FPDiff) return Chain;1967// Calculate the new stack slot for the return address.1968int NewReturnAddrFI =1969MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,1970false);1971SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);1972Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,1973MachinePointerInfo::getFixedStack(1974DAG.getMachineFunction(), NewReturnAddrFI));1975return Chain;1976}19771978/// Returns a vector_shuffle mask for an movs{s|d}, movd1979/// operation of specified width.1980SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,1981SDValue V1, SDValue V2) const {1982unsigned NumElems = VT.getVectorNumElements();1983SmallVector<int, 8> Mask;1984Mask.push_back(NumElems);1985for (unsigned i = 1; i != NumElems; ++i)1986Mask.push_back(i);1987return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);1988}19891990SDValue1991X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,1992SmallVectorImpl<SDValue> &InVals) const {1993SelectionDAG &DAG = CLI.DAG;1994SDLoc &dl = CLI.DL;1995SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;1996SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;1997SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;1998SDValue Chain = CLI.Chain;1999SDValue Callee = CLI.Callee;2000CallingConv::ID CallConv = CLI.CallConv;2001bool &isTailCall = CLI.IsTailCall;2002bool isVarArg = CLI.IsVarArg;2003const auto *CB = CLI.CB;20042005MachineFunction &MF = DAG.getMachineFunction();2006bool Is64Bit = Subtarget.is64Bit();2007bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);2008bool IsSibcall = false;2009bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||2010CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;2011bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);2012X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();2013bool HasNCSR = (CB && isa<CallInst>(CB) &&2014CB->hasFnAttr("no_caller_saved_registers"));2015bool HasNoCfCheck = (CB && CB->doesNoCfCheck());2016bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());2017bool IsCFICall = IsIndirectCall && CLI.CFIType;2018const Module *M = MF.getFunction().getParent();2019Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");20202021MachineFunction::CallSiteInfo CSInfo;2022if (CallConv == CallingConv::X86_INTR)2023report_fatal_error("X86 interrupts may not be called directly");20242025// Analyze operands of the call, assigning locations to each operand.2026SmallVector<CCValAssign, 16> ArgLocs;2027CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());20282029// Allocate shadow area for Win64.2030if (IsWin64)2031CCInfo.AllocateStack(32, Align(8));20322033CCInfo.AnalyzeArguments(Outs, CC_X86);20342035// In vectorcall calling convention a second pass is required for the HVA2036// types.2037if (CallingConv::X86_VectorCall == CallConv) {2038CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);2039}20402041bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();2042if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {2043// If we are using a GOT, disable tail calls to external symbols with2044// default visibility. Tail calling such a symbol requires using a GOT2045// relocation, which forces early binding of the symbol. This breaks code2046// that require lazy function symbol resolution. Using musttail or2047// GuaranteedTailCallOpt will override this.2048GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);2049if (!G || (!G->getGlobal()->hasLocalLinkage() &&2050G->getGlobal()->hasDefaultVisibility()))2051isTailCall = false;2052}20532054if (isTailCall && !IsMustTail) {2055// Check if it's really possible to do a tail call.2056isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,2057IsCalleePopSRet);20582059// Sibcalls are automatically detected tailcalls which do not require2060// ABI changes.2061if (!IsGuaranteeTCO && isTailCall)2062IsSibcall = true;20632064if (isTailCall)2065++NumTailCalls;2066}20672068if (IsMustTail && !isTailCall)2069report_fatal_error("failed to perform tail call elimination on a call "2070"site marked musttail");20712072assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&2073"Var args not supported with calling convention fastcc, ghc or hipe");20742075// Get a count of how many bytes are to be pushed on the stack.2076unsigned NumBytes = CCInfo.getAlignedCallFrameSize();2077if (IsSibcall)2078// This is a sibcall. The memory operands are available in caller's2079// own caller's stack.2080NumBytes = 0;2081else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))2082NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);20832084int FPDiff = 0;2085if (isTailCall &&2086shouldGuaranteeTCO(CallConv,2087MF.getTarget().Options.GuaranteedTailCallOpt)) {2088// Lower arguments at fp - stackoffset + fpdiff.2089unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();20902091FPDiff = NumBytesCallerPushed - NumBytes;20922093// Set the delta of movement of the returnaddr stackslot.2094// But only set if delta is greater than previous delta.2095if (FPDiff < X86Info->getTCReturnAddrDelta())2096X86Info->setTCReturnAddrDelta(FPDiff);2097}20982099unsigned NumBytesToPush = NumBytes;2100unsigned NumBytesToPop = NumBytes;21012102// If we have an inalloca argument, all stack space has already been allocated2103// for us and be right at the top of the stack. We don't support multiple2104// arguments passed in memory when using inalloca.2105if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {2106NumBytesToPush = 0;2107if (!ArgLocs.back().isMemLoc())2108report_fatal_error("cannot use inalloca attribute on a register "2109"parameter");2110if (ArgLocs.back().getLocMemOffset() != 0)2111report_fatal_error("any parameter with the inalloca attribute must be "2112"the only memory argument");2113} else if (CLI.IsPreallocated) {2114assert(ArgLocs.back().isMemLoc() &&2115"cannot use preallocated attribute on a register "2116"parameter");2117SmallVector<size_t, 4> PreallocatedOffsets;2118for (size_t i = 0; i < CLI.OutVals.size(); ++i) {2119if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {2120PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());2121}2122}2123auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();2124size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);2125MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);2126MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);2127NumBytesToPush = 0;2128}21292130if (!IsSibcall && !IsMustTail)2131Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,2132NumBytes - NumBytesToPush, dl);21332134SDValue RetAddrFrIdx;2135// Load return address for tail calls.2136if (isTailCall && FPDiff)2137Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,2138Is64Bit, FPDiff, dl);21392140SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;2141SmallVector<SDValue, 8> MemOpChains;2142SDValue StackPtr;21432144// The next loop assumes that the locations are in the same order of the2145// input arguments.2146assert(isSortedByValueNo(ArgLocs) &&2147"Argument Location list must be sorted before lowering");21482149// Walk the register/memloc assignments, inserting copies/loads. In the case2150// of tail call optimization arguments are handle later.2151const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();2152for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;2153++I, ++OutIndex) {2154assert(OutIndex < Outs.size() && "Invalid Out index");2155// Skip inalloca/preallocated arguments, they have already been written.2156ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;2157if (Flags.isInAlloca() || Flags.isPreallocated())2158continue;21592160CCValAssign &VA = ArgLocs[I];2161EVT RegVT = VA.getLocVT();2162SDValue Arg = OutVals[OutIndex];2163bool isByVal = Flags.isByVal();21642165// Promote the value if needed.2166switch (VA.getLocInfo()) {2167default: llvm_unreachable("Unknown loc info!");2168case CCValAssign::Full: break;2169case CCValAssign::SExt:2170Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);2171break;2172case CCValAssign::ZExt:2173Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);2174break;2175case CCValAssign::AExt:2176if (Arg.getValueType().isVector() &&2177Arg.getValueType().getVectorElementType() == MVT::i1)2178Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);2179else if (RegVT.is128BitVector()) {2180// Special case: passing MMX values in XMM registers.2181Arg = DAG.getBitcast(MVT::i64, Arg);2182Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);2183Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);2184} else2185Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);2186break;2187case CCValAssign::BCvt:2188Arg = DAG.getBitcast(RegVT, Arg);2189break;2190case CCValAssign::Indirect: {2191if (isByVal) {2192// Memcpy the argument to a temporary stack slot to prevent2193// the caller from seeing any modifications the callee may make2194// as guaranteed by the `byval` attribute.2195int FrameIdx = MF.getFrameInfo().CreateStackObject(2196Flags.getByValSize(),2197std::max(Align(16), Flags.getNonZeroByValAlign()), false);2198SDValue StackSlot =2199DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));2200Chain =2201CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);2202// From now on treat this as a regular pointer2203Arg = StackSlot;2204isByVal = false;2205} else {2206// Store the argument.2207SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());2208int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();2209Chain = DAG.getStore(2210Chain, dl, Arg, SpillSlot,2211MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));2212Arg = SpillSlot;2213}2214break;2215}2216}22172218if (VA.needsCustom()) {2219assert(VA.getValVT() == MVT::v64i1 &&2220"Currently the only custom case is when we split v64i1 to 2 regs");2221// Split v64i1 value into two registers2222Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);2223} else if (VA.isRegLoc()) {2224RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));2225const TargetOptions &Options = DAG.getTarget().Options;2226if (Options.EmitCallSiteInfo)2227CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);2228if (isVarArg && IsWin64) {2229// Win64 ABI requires argument XMM reg to be copied to the corresponding2230// shadow reg if callee is a varargs function.2231Register ShadowReg;2232switch (VA.getLocReg()) {2233case X86::XMM0: ShadowReg = X86::RCX; break;2234case X86::XMM1: ShadowReg = X86::RDX; break;2235case X86::XMM2: ShadowReg = X86::R8; break;2236case X86::XMM3: ShadowReg = X86::R9; break;2237}2238if (ShadowReg)2239RegsToPass.push_back(std::make_pair(ShadowReg, Arg));2240}2241} else if (!IsSibcall && (!isTailCall || isByVal)) {2242assert(VA.isMemLoc());2243if (!StackPtr.getNode())2244StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),2245getPointerTy(DAG.getDataLayout()));2246MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,2247dl, DAG, VA, Flags, isByVal));2248}2249}22502251if (!MemOpChains.empty())2252Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);22532254if (Subtarget.isPICStyleGOT()) {2255// ELF / PIC requires GOT in the EBX register before function calls via PLT2256// GOT pointer (except regcall).2257if (!isTailCall) {2258// Indirect call with RegCall calling convertion may use up all the2259// general registers, so it is not suitable to bind EBX reister for2260// GOT address, just let register allocator handle it.2261if (CallConv != CallingConv::X86_RegCall)2262RegsToPass.push_back(std::make_pair(2263Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),2264getPointerTy(DAG.getDataLayout()))));2265} else {2266// If we are tail calling and generating PIC/GOT style code load the2267// address of the callee into ECX. The value in ecx is used as target of2268// the tail jump. This is done to circumvent the ebx/callee-saved problem2269// for tail calls on PIC/GOT architectures. Normally we would just put the2270// address of GOT into ebx and then call target@PLT. But for tail calls2271// ebx would be restored (since ebx is callee saved) before jumping to the2272// target@PLT.22732274// Note: The actual moving to ECX is done further down.2275GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);2276if (G && !G->getGlobal()->hasLocalLinkage() &&2277G->getGlobal()->hasDefaultVisibility())2278Callee = LowerGlobalAddress(Callee, DAG);2279else if (isa<ExternalSymbolSDNode>(Callee))2280Callee = LowerExternalSymbol(Callee, DAG);2281}2282}22832284if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&2285(Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {2286// From AMD64 ABI document:2287// For calls that may call functions that use varargs or stdargs2288// (prototype-less calls or calls to functions containing ellipsis (...) in2289// the declaration) %al is used as hidden argument to specify the number2290// of SSE registers used. The contents of %al do not need to match exactly2291// the number of registers, but must be an ubound on the number of SSE2292// registers used and is in the range 0 - 8 inclusive.22932294// Count the number of XMM registers allocated.2295static const MCPhysReg XMMArgRegs[] = {2296X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,2297X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM72298};2299unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);2300assert((Subtarget.hasSSE1() || !NumXMMRegs)2301&& "SSE registers cannot be used when SSE is disabled");2302RegsToPass.push_back(std::make_pair(Register(X86::AL),2303DAG.getConstant(NumXMMRegs, dl,2304MVT::i8)));2305}23062307if (isVarArg && IsMustTail) {2308const auto &Forwards = X86Info->getForwardedMustTailRegParms();2309for (const auto &F : Forwards) {2310SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);2311RegsToPass.push_back(std::make_pair(F.PReg, Val));2312}2313}23142315// For tail calls lower the arguments to the 'real' stack slots. Sibcalls2316// don't need this because the eligibility check rejects calls that require2317// shuffling arguments passed in memory.2318if (!IsSibcall && isTailCall) {2319// Force all the incoming stack arguments to be loaded from the stack2320// before any new outgoing arguments are stored to the stack, because the2321// outgoing stack slots may alias the incoming argument stack slots, and2322// the alias isn't otherwise explicit. This is slightly more conservative2323// than necessary, because it means that each store effectively depends2324// on every argument instead of just those arguments it would clobber.2325SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);23262327SmallVector<SDValue, 8> MemOpChains2;2328SDValue FIN;2329int FI = 0;2330for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;2331++I, ++OutsIndex) {2332CCValAssign &VA = ArgLocs[I];23332334if (VA.isRegLoc()) {2335if (VA.needsCustom()) {2336assert((CallConv == CallingConv::X86_RegCall) &&2337"Expecting custom case only in regcall calling convention");2338// This means that we are in special case where one argument was2339// passed through two register locations - Skip the next location2340++I;2341}23422343continue;2344}23452346assert(VA.isMemLoc());2347SDValue Arg = OutVals[OutsIndex];2348ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;2349// Skip inalloca/preallocated arguments. They don't require any work.2350if (Flags.isInAlloca() || Flags.isPreallocated())2351continue;2352// Create frame index.2353int32_t Offset = VA.getLocMemOffset()+FPDiff;2354uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;2355FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);2356FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));23572358if (Flags.isByVal()) {2359// Copy relative to framepointer.2360SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);2361if (!StackPtr.getNode())2362StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),2363getPointerTy(DAG.getDataLayout()));2364Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),2365StackPtr, Source);23662367MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,2368ArgChain,2369Flags, DAG, dl));2370} else {2371// Store relative to framepointer.2372MemOpChains2.push_back(DAG.getStore(2373ArgChain, dl, Arg, FIN,2374MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));2375}2376}23772378if (!MemOpChains2.empty())2379Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);23802381// Store the return address to the appropriate stack slot.2382Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,2383getPointerTy(DAG.getDataLayout()),2384RegInfo->getSlotSize(), FPDiff, dl);2385}23862387// Build a sequence of copy-to-reg nodes chained together with token chain2388// and glue operands which copy the outgoing args into registers.2389SDValue InGlue;2390for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {2391Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,2392RegsToPass[i].second, InGlue);2393InGlue = Chain.getValue(1);2394}23952396if (DAG.getTarget().getCodeModel() == CodeModel::Large) {2397assert(Is64Bit && "Large code model is only legal in 64-bit mode.");2398// In the 64-bit large code model, we have to make all calls2399// through a register, since the call instruction's 32-bit2400// pc-relative offset may not be large enough to hold the whole2401// address.2402} else if (Callee->getOpcode() == ISD::GlobalAddress ||2403Callee->getOpcode() == ISD::ExternalSymbol) {2404// Lower direct calls to global addresses and external symbols. Setting2405// ForCall to true here has the effect of removing WrapperRIP when possible2406// to allow direct calls to be selected without first materializing the2407// address into a register.2408Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);2409} else if (Subtarget.isTarget64BitILP32() &&2410Callee.getValueType() == MVT::i32) {2411// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI2412Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);2413}24142415// Returns a chain & a glue for retval copy to use.2416SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);2417SmallVector<SDValue, 8> Ops;24182419if (!IsSibcall && isTailCall && !IsMustTail) {2420Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);2421InGlue = Chain.getValue(1);2422}24232424Ops.push_back(Chain);2425Ops.push_back(Callee);24262427if (isTailCall)2428Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));24292430// Add argument registers to the end of the list so that they are known live2431// into the call.2432for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)2433Ops.push_back(DAG.getRegister(RegsToPass[i].first,2434RegsToPass[i].second.getValueType()));24352436// Add a register mask operand representing the call-preserved registers.2437const uint32_t *Mask = [&]() {2438auto AdaptedCC = CallConv;2439// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),2440// use X86_INTR calling convention because it has the same CSR mask2441// (same preserved registers).2442if (HasNCSR)2443AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;2444// If NoCalleeSavedRegisters is requested, than use GHC since it happens2445// to use the CSR_NoRegs_RegMask.2446if (CB && CB->hasFnAttr("no_callee_saved_registers"))2447AdaptedCC = (CallingConv::ID)CallingConv::GHC;2448return RegInfo->getCallPreservedMask(MF, AdaptedCC);2449}();2450assert(Mask && "Missing call preserved mask for calling convention");24512452// If this is an invoke in a 32-bit function using a funclet-based2453// personality, assume the function clobbers all registers. If an exception2454// is thrown, the runtime will not restore CSRs.2455// FIXME: Model this more precisely so that we can register allocate across2456// the normal edge and spill and fill across the exceptional edge.2457if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {2458const Function &CallerFn = MF.getFunction();2459EHPersonality Pers =2460CallerFn.hasPersonalityFn()2461? classifyEHPersonality(CallerFn.getPersonalityFn())2462: EHPersonality::Unknown;2463if (isFuncletEHPersonality(Pers))2464Mask = RegInfo->getNoPreservedMask();2465}24662467// Define a new register mask from the existing mask.2468uint32_t *RegMask = nullptr;24692470// In some calling conventions we need to remove the used physical registers2471// from the reg mask. Create a new RegMask for such calling conventions.2472// RegMask for calling conventions that disable only return registers (e.g.2473// preserve_most) will be modified later in LowerCallResult.2474bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;2475if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {2476const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();24772478// Allocate a new Reg Mask and copy Mask.2479RegMask = MF.allocateRegMask();2480unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());2481memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);24822483// Make sure all sub registers of the argument registers are reset2484// in the RegMask.2485if (ShouldDisableArgRegs) {2486for (auto const &RegPair : RegsToPass)2487for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))2488RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));2489}24902491// Create the RegMask Operand according to our updated mask.2492Ops.push_back(DAG.getRegisterMask(RegMask));2493} else {2494// Create the RegMask Operand according to the static mask.2495Ops.push_back(DAG.getRegisterMask(Mask));2496}24972498if (InGlue.getNode())2499Ops.push_back(InGlue);25002501if (isTailCall) {2502// We used to do:2503//// If this is the first return lowered for this function, add the regs2504//// to the liveout set for the function.2505// This isn't right, although it's probably harmless on x86; liveouts2506// should be computed from returns not tail calls. Consider a void2507// function making a tail call to a function returning int.2508MF.getFrameInfo().setHasTailCall();2509SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);25102511if (IsCFICall)2512Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());25132514DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);2515DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));2516return Ret;2517}25182519if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {2520Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);2521} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {2522// Calls with a "clang.arc.attachedcall" bundle are special. They should be2523// expanded to the call, directly followed by a special marker sequence and2524// a call to a ObjC library function. Use the CALL_RVMARKER to do that.2525assert(!isTailCall &&2526"tail calls cannot be marked with clang.arc.attachedcall");2527assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");25282529// Add a target global address for the retainRV/claimRV runtime function2530// just before the call target.2531Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);2532auto PtrVT = getPointerTy(DAG.getDataLayout());2533auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);2534Ops.insert(Ops.begin() + 1, GA);2535Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);2536} else {2537Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);2538}25392540if (IsCFICall)2541Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());25422543InGlue = Chain.getValue(1);2544DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);2545DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));25462547// Save heapallocsite metadata.2548if (CLI.CB)2549if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))2550DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);25512552// Create the CALLSEQ_END node.2553unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.2554if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,2555DAG.getTarget().Options.GuaranteedTailCallOpt))2556NumBytesForCalleeToPop = NumBytes; // Callee pops everything2557else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)2558// If this call passes a struct-return pointer, the callee2559// pops that struct pointer.2560NumBytesForCalleeToPop = 4;25612562// Returns a glue for retval copy to use.2563if (!IsSibcall) {2564Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,2565InGlue, dl);2566InGlue = Chain.getValue(1);2567}25682569if (CallingConv::PreserveNone == CallConv)2570for (unsigned I = 0, E = Outs.size(); I != E; ++I) {2571if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||2572Outs[I].Flags.isSwiftError()) {2573errorUnsupported(DAG, dl,2574"Swift attributes can't be used with preserve_none");2575break;2576}2577}25782579// Handle result values, copying them out of physregs into vregs that we2580// return.2581return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,2582InVals, RegMask);2583}25842585//===----------------------------------------------------------------------===//2586// Fast Calling Convention (tail call) implementation2587//===----------------------------------------------------------------------===//25882589// Like std call, callee cleans arguments, convention except that ECX is2590// reserved for storing the tail called function address. Only 2 registers are2591// free for argument passing (inreg). Tail call optimization is performed2592// provided:2593// * tailcallopt is enabled2594// * caller/callee are fastcc2595// On X86_64 architecture with GOT-style position independent code only local2596// (within module) calls are supported at the moment.2597// To keep the stack aligned according to platform abi the function2598// GetAlignedArgumentStackSize ensures that argument delta is always multiples2599// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)2600// If a tail called function callee has more arguments than the caller the2601// caller needs to make sure that there is room to move the RETADDR to. This is2602// achieved by reserving an area the size of the argument delta right after the2603// original RETADDR, but before the saved framepointer or the spilled registers2604// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)2605// stack layout:2606// arg12607// arg22608// RETADDR2609// [ new RETADDR2610// move area ]2611// (possible EBP)2612// ESI2613// EDI2614// local1 ..26152616/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align2617/// requirement.2618unsigned2619X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,2620SelectionDAG &DAG) const {2621const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();2622const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();2623assert(StackSize % SlotSize == 0 &&2624"StackSize must be a multiple of SlotSize");2625return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;2626}26272628/// Return true if the given stack call argument is already available in the2629/// same position (relatively) of the caller's incoming argument stack.2630static2631bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,2632MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,2633const X86InstrInfo *TII, const CCValAssign &VA) {2634unsigned Bytes = Arg.getValueSizeInBits() / 8;26352636for (;;) {2637// Look through nodes that don't alter the bits of the incoming value.2638unsigned Op = Arg.getOpcode();2639if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||2640Op == ISD::AssertZext) {2641Arg = Arg.getOperand(0);2642continue;2643}2644if (Op == ISD::TRUNCATE) {2645const SDValue &TruncInput = Arg.getOperand(0);2646if (TruncInput.getOpcode() == ISD::AssertZext &&2647cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==2648Arg.getValueType()) {2649Arg = TruncInput.getOperand(0);2650continue;2651}2652}2653break;2654}26552656int FI = INT_MAX;2657if (Arg.getOpcode() == ISD::CopyFromReg) {2658Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();2659if (!VR.isVirtual())2660return false;2661MachineInstr *Def = MRI->getVRegDef(VR);2662if (!Def)2663return false;2664if (!Flags.isByVal()) {2665if (!TII->isLoadFromStackSlot(*Def, FI))2666return false;2667} else {2668unsigned Opcode = Def->getOpcode();2669if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||2670Opcode == X86::LEA64_32r) &&2671Def->getOperand(1).isFI()) {2672FI = Def->getOperand(1).getIndex();2673Bytes = Flags.getByValSize();2674} else2675return false;2676}2677} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {2678if (Flags.isByVal())2679// ByVal argument is passed in as a pointer but it's now being2680// dereferenced. e.g.2681// define @foo(%struct.X* %A) {2682// tail call @bar(%struct.X* byval %A)2683// }2684return false;2685SDValue Ptr = Ld->getBasePtr();2686FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);2687if (!FINode)2688return false;2689FI = FINode->getIndex();2690} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {2691FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);2692FI = FINode->getIndex();2693Bytes = Flags.getByValSize();2694} else2695return false;26962697assert(FI != INT_MAX);2698if (!MFI.isFixedObjectIndex(FI))2699return false;27002701if (Offset != MFI.getObjectOffset(FI))2702return false;27032704// If this is not byval, check that the argument stack object is immutable.2705// inalloca and argument copy elision can create mutable argument stack2706// objects. Byval objects can be mutated, but a byval call intends to pass the2707// mutated memory.2708if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))2709return false;27102711if (VA.getLocVT().getFixedSizeInBits() >2712Arg.getValueSizeInBits().getFixedValue()) {2713// If the argument location is wider than the argument type, check that any2714// extension flags match.2715if (Flags.isZExt() != MFI.isObjectZExt(FI) ||2716Flags.isSExt() != MFI.isObjectSExt(FI)) {2717return false;2718}2719}27202721return Bytes == MFI.getObjectSize(FI);2722}27232724/// Check whether the call is eligible for tail call optimization. Targets2725/// that want to do tail call optimization should implement this function.2726/// Note that the x86 backend does not check musttail calls for eligibility! The2727/// rest of x86 tail call lowering must be prepared to forward arguments of any2728/// type.2729bool X86TargetLowering::IsEligibleForTailCallOptimization(2730TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,2731SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {2732SelectionDAG &DAG = CLI.DAG;2733const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;2734const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;2735const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;2736SDValue Callee = CLI.Callee;2737CallingConv::ID CalleeCC = CLI.CallConv;2738bool isVarArg = CLI.IsVarArg;27392740if (!mayTailCallThisCC(CalleeCC))2741return false;27422743// If -tailcallopt is specified, make fastcc functions tail-callable.2744MachineFunction &MF = DAG.getMachineFunction();2745const Function &CallerF = MF.getFunction();27462747// If the function return type is x86_fp80 and the callee return type is not,2748// then the FP_EXTEND of the call result is not a nop. It's not safe to2749// perform a tailcall optimization here.2750if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())2751return false;27522753CallingConv::ID CallerCC = CallerF.getCallingConv();2754bool CCMatch = CallerCC == CalleeCC;2755bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);2756bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);2757bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||2758CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;27592760// Win64 functions have extra shadow space for argument homing. Don't do the2761// sibcall if the caller and callee have mismatched expectations for this2762// space.2763if (IsCalleeWin64 != IsCallerWin64)2764return false;27652766if (IsGuaranteeTCO) {2767if (canGuaranteeTCO(CalleeCC) && CCMatch)2768return true;2769return false;2770}27712772// Look for obvious safe cases to perform tail call optimization that do not2773// require ABI changes. This is what gcc calls sibcall.27742775// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to2776// emit a special epilogue.2777const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();2778if (RegInfo->hasStackRealignment(MF))2779return false;27802781// Also avoid sibcall optimization if we're an sret return fn and the callee2782// is incompatible. See comment in LowerReturn about why hasStructRetAttr is2783// insufficient.2784if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {2785// For a compatible tail call the callee must return our sret pointer. So it2786// needs to be (a) an sret function itself and (b) we pass our sret as its2787// sret. Condition #b is harder to determine.2788return false;2789} else if (IsCalleePopSRet)2790// The callee pops an sret, so we cannot tail-call, as our caller doesn't2791// expect that.2792return false;27932794// Do not sibcall optimize vararg calls unless all arguments are passed via2795// registers.2796LLVMContext &C = *DAG.getContext();2797if (isVarArg && !Outs.empty()) {2798// Optimizing for varargs on Win64 is unlikely to be safe without2799// additional testing.2800if (IsCalleeWin64 || IsCallerWin64)2801return false;28022803for (const auto &VA : ArgLocs)2804if (!VA.isRegLoc())2805return false;2806}28072808// If the call result is in ST0 / ST1, it needs to be popped off the x872809// stack. Therefore, if it's not used by the call it is not safe to optimize2810// this into a sibcall.2811bool Unused = false;2812for (const auto &In : Ins) {2813if (!In.Used) {2814Unused = true;2815break;2816}2817}2818if (Unused) {2819SmallVector<CCValAssign, 16> RVLocs;2820CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);2821RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);2822for (const auto &VA : RVLocs) {2823if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)2824return false;2825}2826}28272828// Check that the call results are passed in the same way.2829if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,2830RetCC_X86, RetCC_X86))2831return false;2832// The callee has to preserve all registers the caller needs to preserve.2833const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();2834const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);2835if (!CCMatch) {2836const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);2837if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))2838return false;2839}28402841unsigned StackArgsSize = CCInfo.getStackSize();28422843// If the callee takes no arguments then go on to check the results of the2844// call.2845if (!Outs.empty()) {2846if (StackArgsSize > 0) {2847// Check if the arguments are already laid out in the right way as2848// the caller's fixed stack objects.2849MachineFrameInfo &MFI = MF.getFrameInfo();2850const MachineRegisterInfo *MRI = &MF.getRegInfo();2851const X86InstrInfo *TII = Subtarget.getInstrInfo();2852for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {2853const CCValAssign &VA = ArgLocs[I];2854SDValue Arg = OutVals[I];2855ISD::ArgFlagsTy Flags = Outs[I].Flags;2856if (VA.getLocInfo() == CCValAssign::Indirect)2857return false;2858if (!VA.isRegLoc()) {2859if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,2860TII, VA))2861return false;2862}2863}2864}28652866bool PositionIndependent = isPositionIndependent();2867// If the tailcall address may be in a register, then make sure it's2868// possible to register allocate for it. In 32-bit, the call address can2869// only target EAX, EDX, or ECX since the tail call must be scheduled after2870// callee-saved registers are restored. These happen to be the same2871// registers used to pass 'inreg' arguments so watch out for those.2872if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&2873!isa<ExternalSymbolSDNode>(Callee)) ||2874PositionIndependent)) {2875unsigned NumInRegs = 0;2876// In PIC we need an extra register to formulate the address computation2877// for the callee.2878unsigned MaxInRegs = PositionIndependent ? 2 : 3;28792880for (const auto &VA : ArgLocs) {2881if (!VA.isRegLoc())2882continue;2883Register Reg = VA.getLocReg();2884switch (Reg) {2885default: break;2886case X86::EAX: case X86::EDX: case X86::ECX:2887if (++NumInRegs == MaxInRegs)2888return false;2889break;2890}2891}2892}28932894const MachineRegisterInfo &MRI = MF.getRegInfo();2895if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))2896return false;2897}28982899bool CalleeWillPop =2900X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,2901MF.getTarget().Options.GuaranteedTailCallOpt);29022903if (unsigned BytesToPop =2904MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {2905// If we have bytes to pop, the callee must pop them.2906bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;2907if (!CalleePopMatches)2908return false;2909} else if (CalleeWillPop && StackArgsSize > 0) {2910// If we don't have bytes to pop, make sure the callee doesn't pop any.2911return false;2912}29132914return true;2915}29162917/// Determines whether the callee is required to pop its own arguments.2918/// Callee pop is necessary to support tail calls.2919bool X86::isCalleePop(CallingConv::ID CallingConv,2920bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {2921// If GuaranteeTCO is true, we force some calls to be callee pop so that we2922// can guarantee TCO.2923if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))2924return true;29252926switch (CallingConv) {2927default:2928return false;2929case CallingConv::X86_StdCall:2930case CallingConv::X86_FastCall:2931case CallingConv::X86_ThisCall:2932case CallingConv::X86_VectorCall:2933return !is64Bit;2934}2935}293629372938