Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
35269 views
//=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the implementation of custom routines for the X869// Calling Convention that aren't done by tablegen.10//11//===----------------------------------------------------------------------===//1213#include "X86CallingConv.h"14#include "X86Subtarget.h"15#include "llvm/ADT/SmallVector.h"16#include "llvm/CodeGen/CallingConvLower.h"17#include "llvm/IR/CallingConv.h"18#include "llvm/IR/Module.h"1920using namespace llvm;2122/// When regcall calling convention compiled to 32 bit arch, special treatment23/// is required for 64 bit masks.24/// The value should be assigned to two GPRs.25/// \return true if registers were allocated and false otherwise.26static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,27MVT &LocVT,28CCValAssign::LocInfo &LocInfo,29ISD::ArgFlagsTy &ArgFlags,30CCState &State) {31// List of GPR registers that are available to store values in regcall32// calling convention.33static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,34X86::ESI};3536// The vector will save all the available registers for allocation.37SmallVector<unsigned, 5> AvailableRegs;3839// searching for the available registers.40for (auto Reg : RegList) {41if (!State.isAllocated(Reg))42AvailableRegs.push_back(Reg);43}4445const size_t RequiredGprsUponSplit = 2;46if (AvailableRegs.size() < RequiredGprsUponSplit)47return false; // Not enough free registers - continue the search.4849// Allocating the available registers.50for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {5152// Marking the register as located.53unsigned Reg = State.AllocateReg(AvailableRegs[I]);5455// Since we previously made sure that 2 registers are available56// we expect that a real register number will be returned.57assert(Reg && "Expecting a register will be available");5859// Assign the value to the allocated register60State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));61}6263// Successful in allocating registers - stop scanning next rules.64return true;65}6667static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) {68if (ValVT.is512BitVector()) {69static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,70X86::ZMM3, X86::ZMM4, X86::ZMM5};71return ArrayRef(std::begin(RegListZMM), std::end(RegListZMM));72}7374if (ValVT.is256BitVector()) {75static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,76X86::YMM3, X86::YMM4, X86::YMM5};77return ArrayRef(std::begin(RegListYMM), std::end(RegListYMM));78}7980static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,81X86::XMM3, X86::XMM4, X86::XMM5};82return ArrayRef(std::begin(RegListXMM), std::end(RegListXMM));83}8485static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() {86static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9};87return ArrayRef(std::begin(RegListGPR), std::end(RegListGPR));88}8990static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,91MVT &LocVT,92CCValAssign::LocInfo &LocInfo,93ISD::ArgFlagsTy &ArgFlags,94CCState &State) {9596ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT);97bool Is64bit = static_cast<const X86Subtarget &>(98State.getMachineFunction().getSubtarget())99.is64Bit();100101for (auto Reg : RegList) {102// If the register is not marked as allocated - assign to it.103if (!State.isAllocated(Reg)) {104unsigned AssigedReg = State.AllocateReg(Reg);105assert(AssigedReg == Reg && "Expecting a valid register allocation");106State.addLoc(107CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));108return true;109}110// If the register is marked as shadow allocated - assign to it.111if (Is64bit && State.IsShadowAllocatedReg(Reg)) {112State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));113return true;114}115}116117llvm_unreachable("Clang should ensure that hva marked vectors will have "118"an available register.");119return false;120}121122/// Vectorcall calling convention has special handling for vector types or123/// HVA for 64 bit arch.124/// For HVAs shadow registers might be allocated on the first pass125/// and actual XMM registers are allocated on the second pass.126/// For vector types, actual XMM registers are allocated on the first pass.127/// \return true if registers were allocated and false otherwise.128static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,129CCValAssign::LocInfo &LocInfo,130ISD::ArgFlagsTy &ArgFlags, CCState &State) {131// On the second pass, go through the HVAs only.132if (ArgFlags.isSecArgPass()) {133if (ArgFlags.isHva())134return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,135ArgFlags, State);136return true;137}138139// Process only vector types as defined by vectorcall spec:140// "A vector type is either a floating-point type, for example,141// a float or double, or an SIMD vector type, for example, __m128 or __m256".142if (!(ValVT.isFloatingPoint() ||143(ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {144// If R9 was already assigned it means that we are after the fourth element145// and because this is not an HVA / Vector type, we need to allocate146// shadow XMM register.147if (State.isAllocated(X86::R9)) {148// Assign shadow XMM register.149(void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));150}151152return false;153}154155if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {156// Assign shadow GPR register.157(void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());158159// Assign XMM register - (shadow for HVA and non-shadow for non HVA).160if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {161// In Vectorcall Calling convention, additional shadow stack can be162// created on top of the basic 32 bytes of win64.163// It can happen if the fifth or sixth argument is vector type or HVA.164// At that case for each argument a shadow stack of 8 bytes is allocated.165const TargetRegisterInfo *TRI =166State.getMachineFunction().getSubtarget().getRegisterInfo();167if (TRI->regsOverlap(Reg, X86::XMM4) ||168TRI->regsOverlap(Reg, X86::XMM5))169State.AllocateStack(8, Align(8));170171if (!ArgFlags.isHva()) {172State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));173return true; // Allocated a register - Stop the search.174}175}176}177178// If this is an HVA - Stop the search,179// otherwise continue the search.180return ArgFlags.isHva();181}182183/// Vectorcall calling convention has special handling for vector types or184/// HVA for 32 bit arch.185/// For HVAs actual XMM registers are allocated on the second pass.186/// For vector types, actual XMM registers are allocated on the first pass.187/// \return true if registers were allocated and false otherwise.188static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,189CCValAssign::LocInfo &LocInfo,190ISD::ArgFlagsTy &ArgFlags, CCState &State) {191// On the second pass, go through the HVAs only.192if (ArgFlags.isSecArgPass()) {193if (ArgFlags.isHva())194return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,195ArgFlags, State);196return true;197}198199// Process only vector types as defined by vectorcall spec:200// "A vector type is either a floating point type, for example,201// a float or double, or an SIMD vector type, for example, __m128 or __m256".202if (!(ValVT.isFloatingPoint() ||203(ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {204return false;205}206207if (ArgFlags.isHva())208return true; // If this is an HVA - Stop the search.209210// Assign XMM register.211if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {212State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));213return true;214}215216// In case we did not find an available XMM register for a vector -217// pass it indirectly.218// It is similar to CCPassIndirect, with the addition of inreg.219if (!ValVT.isFloatingPoint()) {220LocVT = MVT::i32;221LocInfo = CCValAssign::Indirect;222ArgFlags.setInReg();223}224225return false; // No register was assigned - Continue the search.226}227228static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,229CCValAssign::LocInfo &, ISD::ArgFlagsTy &,230CCState &) {231llvm_unreachable("The AnyReg calling convention is only supported by the "232"stackmap and patchpoint intrinsics.");233// gracefully fallback to X86 C calling convention on Release builds.234return false;235}236237static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,238CCValAssign::LocInfo &LocInfo,239ISD::ArgFlagsTy &ArgFlags, CCState &State) {240// This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure241// not to split i64 and double between a register and stack242static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};243static const unsigned NumRegs = std::size(RegList);244245SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();246247// If this is the first part of an double/i64/i128, or if we're already248// in the middle of a split, add to the pending list. If this is not249// the end of the split, return, otherwise go on to process the pending250// list251if (ArgFlags.isSplit() || !PendingMembers.empty()) {252PendingMembers.push_back(253CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));254if (!ArgFlags.isSplitEnd())255return true;256}257258// If there are no pending members, we are not in the middle of a split,259// so do the usual inreg stuff.260if (PendingMembers.empty()) {261if (unsigned Reg = State.AllocateReg(RegList)) {262State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));263return true;264}265return false;266}267268assert(ArgFlags.isSplitEnd());269270// We now have the entire original argument in PendingMembers, so decide271// whether to use registers or the stack.272// Per the MCU ABI:273// a) To use registers, we need to have enough of them free to contain274// the entire argument.275// b) We never want to use more than 2 registers for a single argument.276277unsigned FirstFree = State.getFirstUnallocated(RegList);278bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);279280for (auto &It : PendingMembers) {281if (UseRegs)282It.convertToReg(State.AllocateReg(RegList[FirstFree++]));283else284It.convertToMem(State.AllocateStack(4, Align(4)));285State.addLoc(It);286}287288PendingMembers.clear();289290return true;291}292293/// X86 interrupt handlers can only take one or two stack arguments, but if294/// there are two arguments, they are in the opposite order from the standard295/// convention. Therefore, we have to look at the argument count up front before296/// allocating stack for each argument.297static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,298CCValAssign::LocInfo &LocInfo,299ISD::ArgFlagsTy &ArgFlags, CCState &State) {300const MachineFunction &MF = State.getMachineFunction();301size_t ArgCount = State.getMachineFunction().getFunction().arg_size();302bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit();303unsigned SlotSize = Is64Bit ? 8 : 4;304unsigned Offset;305if (ArgCount == 1 && ValNo == 0) {306// If we have one argument, the argument is five stack slots big, at fixed307// offset zero.308Offset = State.AllocateStack(5 * SlotSize, Align(4));309} else if (ArgCount == 2 && ValNo == 0) {310// If we have two arguments, the stack slot is *after* the error code311// argument. Pretend it doesn't consume stack space, and account for it when312// we assign the second argument.313Offset = SlotSize;314} else if (ArgCount == 2 && ValNo == 1) {315// If this is the second of two arguments, it must be the error code. It316// appears first on the stack, and is then followed by the five slot317// interrupt struct.318Offset = 0;319(void)State.AllocateStack(6 * SlotSize, Align(4));320} else {321report_fatal_error("unsupported x86 interrupt prototype");322}323324// FIXME: This should be accounted for in325// X86FrameLowering::getFrameIndexReference, not here.326if (Is64Bit && ArgCount == 2)327Offset += SlotSize;328329State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));330return true;331}332333static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,334CCValAssign::LocInfo &LocInfo,335ISD::ArgFlagsTy &ArgFlags, CCState &State) {336if (LocVT != MVT::i64) {337LocVT = MVT::i64;338LocInfo = CCValAssign::ZExt;339}340return false;341}342343// Provides entry points of CC_X86 and RetCC_X86.344#include "X86GenCallingConv.inc"345346347