Path: blob/main/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
35269 views
//=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the custom routines for the ARM Calling Convention that9// aren't done by tablegen, and includes the table generated implementations.10//11//===----------------------------------------------------------------------===//1213#include "ARM.h"14#include "ARMCallingConv.h"15#include "ARMSubtarget.h"16#include "ARMRegisterInfo.h"17using namespace llvm;1819// APCS f64 is in register pairs, possibly split to stack20static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,21CCValAssign::LocInfo LocInfo,22CCState &State, bool CanFail) {23static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };2425// Try to get the first register.26if (unsigned Reg = State.AllocateReg(RegList))27State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));28else {29// For the 2nd half of a v2f64, do not fail.30if (CanFail)31return false;3233// Put the whole thing on the stack.34State.addLoc(CCValAssign::getCustomMem(35ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));36return true;37}3839// Try to get the second register.40if (unsigned Reg = State.AllocateReg(RegList))41State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));42else43State.addLoc(CCValAssign::getCustomMem(44ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));45return true;46}4748static bool CC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,49CCValAssign::LocInfo LocInfo,50ISD::ArgFlagsTy ArgFlags,51CCState &State) {52if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))53return false;54if (LocVT == MVT::v2f64 &&55!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))56return false;57return true; // we handled it58}5960// AAPCS f64 is in aligned register pairs61static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,62CCValAssign::LocInfo LocInfo,63CCState &State, bool CanFail) {64static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };65static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };66static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };67static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };6869unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);70if (Reg == 0) {7172// If we had R3 unallocated only, now we still must to waste it.73Reg = State.AllocateReg(GPRArgRegs);74assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");7576// For the 2nd half of a v2f64, do not just fail.77if (CanFail)78return false;7980// Put the whole thing on the stack.81State.addLoc(CCValAssign::getCustomMem(82ValNo, ValVT, State.AllocateStack(8, Align(8)), LocVT, LocInfo));83return true;84}8586unsigned i;87for (i = 0; i < 2; ++i)88if (HiRegList[i] == Reg)89break;9091unsigned T = State.AllocateReg(LoRegList[i]);92(void)T;93assert(T == LoRegList[i] && "Could not allocate register");9495State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));96State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],97LocVT, LocInfo));98return true;99}100101static bool CC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,102CCValAssign::LocInfo LocInfo,103ISD::ArgFlagsTy ArgFlags,104CCState &State) {105if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))106return false;107if (LocVT == MVT::v2f64 &&108!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))109return false;110return true; // we handled it111}112113static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT,114CCValAssign::LocInfo LocInfo, CCState &State) {115static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };116static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };117118unsigned Reg = State.AllocateReg(HiRegList, LoRegList);119if (Reg == 0)120return false; // we didn't handle it121122unsigned i;123for (i = 0; i < 2; ++i)124if (HiRegList[i] == Reg)125break;126127State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));128State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],129LocVT, LocInfo));130return true;131}132133static bool RetCC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,134CCValAssign::LocInfo LocInfo,135ISD::ArgFlagsTy ArgFlags,136CCState &State) {137if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))138return false;139if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))140return false;141return true; // we handled it142}143144static bool RetCC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,145CCValAssign::LocInfo LocInfo,146ISD::ArgFlagsTy ArgFlags,147CCState &State) {148return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,149State);150}151152static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };153154static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,155ARM::S4, ARM::S5, ARM::S6, ARM::S7,156ARM::S8, ARM::S9, ARM::S10, ARM::S11,157ARM::S12, ARM::S13, ARM::S14, ARM::S15 };158static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,159ARM::D4, ARM::D5, ARM::D6, ARM::D7 };160static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };161162163// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA164// has InConsecutiveRegs set, and that the last member also has165// InConsecutiveRegsLast set. We must process all members of the HA before166// we can allocate it, as we need to know the total number of registers that167// will be needed in order to (attempt to) allocate a contiguous block.168static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,169MVT LocVT,170CCValAssign::LocInfo LocInfo,171ISD::ArgFlagsTy ArgFlags,172CCState &State) {173SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();174175// AAPCS HFAs must have 1-4 elements, all of the same type176if (PendingMembers.size() > 0)177assert(PendingMembers[0].getLocVT() == LocVT);178179// Add the argument to the list to be allocated once we know the size of the180// aggregate. Store the type's required alignment as extra info for later: in181// the [N x i64] case all trace has been removed by the time we actually get182// to do allocation.183PendingMembers.push_back(CCValAssign::getPending(184ValNo, ValVT, LocVT, LocInfo, ArgFlags.getNonZeroOrigAlign().value()));185186if (!ArgFlags.isInConsecutiveRegsLast())187return true;188189// Try to allocate a contiguous block of registers, each of the correct190// size to hold one member.191auto &DL = State.getMachineFunction().getDataLayout();192const Align StackAlign = DL.getStackAlignment();193const Align FirstMemberAlign(PendingMembers[0].getExtraInfo());194Align Alignment = std::min(FirstMemberAlign, StackAlign);195196ArrayRef<MCPhysReg> RegList;197switch (LocVT.SimpleTy) {198case MVT::i32: {199RegList = RRegList;200unsigned RegIdx = State.getFirstUnallocated(RegList);201202// First consume all registers that would give an unaligned object. Whether203// we go on stack or in regs, no-one will be using them in future.204unsigned RegAlign = alignTo(Alignment.value(), 4) / 4;205while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())206State.AllocateReg(RegList[RegIdx++]);207208break;209}210case MVT::f16:211case MVT::bf16:212case MVT::f32:213RegList = SRegList;214break;215case MVT::v4f16:216case MVT::v4bf16:217case MVT::f64:218RegList = DRegList;219break;220case MVT::v8f16:221case MVT::v8bf16:222case MVT::v2f64:223RegList = QRegList;224break;225default:226llvm_unreachable("Unexpected member type for block aggregate");227break;228}229230unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());231if (RegResult) {232for (CCValAssign &PendingMember : PendingMembers) {233PendingMember.convertToReg(RegResult);234State.addLoc(PendingMember);235++RegResult;236}237PendingMembers.clear();238return true;239}240241// Register allocation failed, we'll be needing the stack242unsigned Size = LocVT.getSizeInBits() / 8;243if (LocVT == MVT::i32 && State.getStackSize() == 0) {244// If nothing else has used the stack until this point, a non-HFA aggregate245// can be split between regs and stack.246unsigned RegIdx = State.getFirstUnallocated(RegList);247for (auto &It : PendingMembers) {248if (RegIdx >= RegList.size())249It.convertToMem(State.AllocateStack(Size, Align(Size)));250else251It.convertToReg(State.AllocateReg(RegList[RegIdx++]));252253State.addLoc(It);254}255PendingMembers.clear();256return true;257}258259if (LocVT != MVT::i32)260RegList = SRegList;261262// Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)263for (auto Reg : RegList)264State.AllocateReg(Reg);265266// Clamp the alignment between 4 and 8.267if (State.getMachineFunction().getSubtarget<ARMSubtarget>().isTargetAEABI())268Alignment = ArgFlags.getNonZeroMemAlign() <= 4 ? Align(4) : Align(8);269270// After the first item has been allocated, the rest are packed as tightly as271// possible. (E.g. an incoming i64 would have starting Align of 8, but we'll272// be allocating a bunch of i32 slots).273for (auto &It : PendingMembers) {274It.convertToMem(State.AllocateStack(Size, Alignment));275State.addLoc(It);276Alignment = Align(1);277}278279// All pending members have now been allocated280PendingMembers.clear();281282// This will be allocated by the last member of the aggregate283return true;284}285286static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,287CCValAssign::LocInfo LocInfo, CCState &State,288ArrayRef<MCPhysReg> RegList) {289unsigned Reg = State.AllocateReg(RegList);290if (Reg) {291State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));292return true;293}294return false;295}296297static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,298CCValAssign::LocInfo LocInfo,299ISD::ArgFlagsTy ArgFlags, CCState &State) {300// f16 arguments are extended to i32 and assigned to a register in [r0, r3]301return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,302RRegList);303}304305static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,306CCValAssign::LocInfo LocInfo,307ISD::ArgFlagsTy ArgFlags,308CCState &State) {309// f16 arguments are extended to f32 and assigned to a register in [s0, s15]310return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,311SRegList);312}313314// Include the table generated calling convention implementations.315#include "ARMGenCallingConv.inc"316317318