Path: blob/main/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
35294 views
//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the interfaces that VE uses to lower LLVM code into a9// selection DAG.10//11//===----------------------------------------------------------------------===//1213#include "VEISelLowering.h"14#include "MCTargetDesc/VEMCExpr.h"15#include "VECustomDAG.h"16#include "VEInstrBuilder.h"17#include "VEMachineFunctionInfo.h"18#include "VERegisterInfo.h"19#include "VETargetMachine.h"20#include "llvm/ADT/StringSwitch.h"21#include "llvm/CodeGen/CallingConvLower.h"22#include "llvm/CodeGen/MachineFrameInfo.h"23#include "llvm/CodeGen/MachineFunction.h"24#include "llvm/CodeGen/MachineInstrBuilder.h"25#include "llvm/CodeGen/MachineJumpTableInfo.h"26#include "llvm/CodeGen/MachineModuleInfo.h"27#include "llvm/CodeGen/MachineRegisterInfo.h"28#include "llvm/CodeGen/SelectionDAG.h"29#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"30#include "llvm/IR/DerivedTypes.h"31#include "llvm/IR/Function.h"32#include "llvm/IR/IRBuilder.h"33#include "llvm/IR/Module.h"34#include "llvm/Support/ErrorHandling.h"35#include "llvm/Support/KnownBits.h"36using namespace llvm;3738#define DEBUG_TYPE "ve-lower"3940//===----------------------------------------------------------------------===//41// Calling Convention Implementation42//===----------------------------------------------------------------------===//4344#include "VEGenCallingConv.inc"4546CCAssignFn *getReturnCC(CallingConv::ID CallConv) {47switch (CallConv) {48default:49return RetCC_VE_C;50case CallingConv::Fast:51return RetCC_VE_Fast;52}53}5455CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {56if (IsVarArg)57return CC_VE2;58switch (CallConv) {59default:60return CC_VE_C;61case CallingConv::Fast:62return CC_VE_Fast;63}64}6566bool VETargetLowering::CanLowerReturn(67CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,68const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {69CCAssignFn *RetCC = getReturnCC(CallConv);70SmallVector<CCValAssign, 16> RVLocs;71CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);72return CCInfo.CheckReturn(Outs, RetCC);73}7475static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,76MVT::v256f32, MVT::v512f32, MVT::v256f64};7778static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};7980static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};8182void VETargetLowering::initRegisterClasses() {83// Set up the register classes.84addRegisterClass(MVT::i32, &VE::I32RegClass);85addRegisterClass(MVT::i64, &VE::I64RegClass);86addRegisterClass(MVT::f32, &VE::F32RegClass);87addRegisterClass(MVT::f64, &VE::I64RegClass);88addRegisterClass(MVT::f128, &VE::F128RegClass);8990if (Subtarget->enableVPU()) {91for (MVT VecVT : AllVectorVTs)92addRegisterClass(VecVT, &VE::V64RegClass);93addRegisterClass(MVT::v256i1, &VE::VMRegClass);94addRegisterClass(MVT::v512i1, &VE::VM512RegClass);95}96}9798void VETargetLowering::initSPUActions() {99const auto &TM = getTargetMachine();100/// Load & Store {101102// VE doesn't have i1 sign extending load.103for (MVT VT : MVT::integer_valuetypes()) {104setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);105setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);106setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);107setTruncStoreAction(VT, MVT::i1, Expand);108}109110// VE doesn't have floating point extload/truncstore, so expand them.111for (MVT FPVT : MVT::fp_valuetypes()) {112for (MVT OtherFPVT : MVT::fp_valuetypes()) {113setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);114setTruncStoreAction(FPVT, OtherFPVT, Expand);115}116}117118// VE doesn't have fp128 load/store, so expand them in custom lower.119setOperationAction(ISD::LOAD, MVT::f128, Custom);120setOperationAction(ISD::STORE, MVT::f128, Custom);121122/// } Load & Store123124// Custom legalize address nodes into LO/HI parts.125MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));126setOperationAction(ISD::BlockAddress, PtrVT, Custom);127setOperationAction(ISD::GlobalAddress, PtrVT, Custom);128setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);129setOperationAction(ISD::ConstantPool, PtrVT, Custom);130setOperationAction(ISD::JumpTable, PtrVT, Custom);131132/// VAARG handling {133setOperationAction(ISD::VASTART, MVT::Other, Custom);134// VAARG needs to be lowered to access with 8 bytes alignment.135setOperationAction(ISD::VAARG, MVT::Other, Custom);136// Use the default implementation.137setOperationAction(ISD::VACOPY, MVT::Other, Expand);138setOperationAction(ISD::VAEND, MVT::Other, Expand);139/// } VAARG handling140141/// Stack {142setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);143setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);144145// Use the default implementation.146setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);147setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);148/// } Stack149150/// Branch {151152// VE doesn't have BRCOND153setOperationAction(ISD::BRCOND, MVT::Other, Expand);154155// BR_JT is not implemented yet.156setOperationAction(ISD::BR_JT, MVT::Other, Expand);157158/// } Branch159160/// Int Ops {161for (MVT IntVT : {MVT::i32, MVT::i64}) {162// VE has no REM or DIVREM operations.163setOperationAction(ISD::UREM, IntVT, Expand);164setOperationAction(ISD::SREM, IntVT, Expand);165setOperationAction(ISD::SDIVREM, IntVT, Expand);166setOperationAction(ISD::UDIVREM, IntVT, Expand);167168// VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.169setOperationAction(ISD::SHL_PARTS, IntVT, Expand);170setOperationAction(ISD::SRA_PARTS, IntVT, Expand);171setOperationAction(ISD::SRL_PARTS, IntVT, Expand);172173// VE has no MULHU/S or U/SMUL_LOHI operations.174// TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.175setOperationAction(ISD::MULHU, IntVT, Expand);176setOperationAction(ISD::MULHS, IntVT, Expand);177setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);178setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);179180// VE has no CTTZ, ROTL, ROTR operations.181setOperationAction(ISD::CTTZ, IntVT, Expand);182setOperationAction(ISD::ROTL, IntVT, Expand);183setOperationAction(ISD::ROTR, IntVT, Expand);184185// VE has 64 bits instruction which works as i64 BSWAP operation. This186// instruction works fine as i32 BSWAP operation with an additional187// parameter. Use isel patterns to lower BSWAP.188setOperationAction(ISD::BSWAP, IntVT, Legal);189190// VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP191// operations. Use isel patterns for i64, promote for i32.192LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;193setOperationAction(ISD::BITREVERSE, IntVT, Act);194setOperationAction(ISD::CTLZ, IntVT, Act);195setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);196setOperationAction(ISD::CTPOP, IntVT, Act);197198// VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.199// Use isel patterns for i64, promote for i32.200setOperationAction(ISD::AND, IntVT, Act);201setOperationAction(ISD::OR, IntVT, Act);202setOperationAction(ISD::XOR, IntVT, Act);203204// Legal smax and smin205setOperationAction(ISD::SMAX, IntVT, Legal);206setOperationAction(ISD::SMIN, IntVT, Legal);207}208/// } Int Ops209210/// Conversion {211// VE doesn't have instructions for fp<->uint, so expand them by llvm212setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64213setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64214setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);215setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);216217// fp16 not supported218for (MVT FPVT : MVT::fp_valuetypes()) {219setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);220setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);221}222/// } Conversion223224/// Floating-point Ops {225/// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,226/// and fcmp.227228// VE doesn't have following floating point operations.229for (MVT VT : MVT::fp_valuetypes()) {230setOperationAction(ISD::FNEG, VT, Expand);231setOperationAction(ISD::FREM, VT, Expand);232}233234// VE doesn't have fdiv of f128.235setOperationAction(ISD::FDIV, MVT::f128, Expand);236237for (MVT FPVT : {MVT::f32, MVT::f64}) {238// f32 and f64 uses ConstantFP. f128 uses ConstantPool.239setOperationAction(ISD::ConstantFP, FPVT, Legal);240}241/// } Floating-point Ops242243/// Floating-point math functions {244245// VE doesn't have following floating point math functions.246for (MVT VT : MVT::fp_valuetypes()) {247setOperationAction(ISD::FABS, VT, Expand);248setOperationAction(ISD::FCOPYSIGN, VT, Expand);249setOperationAction(ISD::FCOS, VT, Expand);250setOperationAction(ISD::FMA, VT, Expand);251setOperationAction(ISD::FPOW, VT, Expand);252setOperationAction(ISD::FSIN, VT, Expand);253setOperationAction(ISD::FSQRT, VT, Expand);254}255256// VE has single and double FMINNUM and FMAXNUM257for (MVT VT : {MVT::f32, MVT::f64}) {258setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);259}260261/// } Floating-point math functions262263/// Atomic instructions {264265setMaxAtomicSizeInBitsSupported(64);266setMinCmpXchgSizeInBits(32);267setSupportsUnalignedAtomics(false);268269// Use custom inserter for ATOMIC_FENCE.270setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);271272// Other atomic instructions.273for (MVT VT : MVT::integer_valuetypes()) {274// Support i8/i16 atomic swap.275setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);276277// FIXME: Support "atmam" instructions.278setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);279setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);280setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);281setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);282283// VE doesn't have follwing instructions.284setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);285setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);286setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);287setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);288setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);289setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);290setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);291setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);292}293294/// } Atomic instructions295296/// SJLJ instructions {297setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);298setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);299setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);300if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)301setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");302/// } SJLJ instructions303304// Intrinsic instructions305setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);306}307308void VETargetLowering::initVPUActions() {309for (MVT LegalMaskVT : AllMaskVTs)310setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);311312for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})313setOperationAction(Opc, MVT::v512i1, Custom);314315for (MVT LegalVecVT : AllVectorVTs) {316setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);317setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);318setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);319// Translate all vector instructions with legal element types to VVP_*320// nodes.321// TODO We will custom-widen into VVP_* nodes in the future. While we are322// buildling the infrastructure for this, we only do this for legal vector323// VTs.324#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \325setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);326#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \327setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);328setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);329setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);330#include "VVPNodes.def"331}332333for (MVT LegalPackedVT : AllPackedVTs) {334setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);335setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);336}337338// vNt32, vNt64 ops (legal element types)339for (MVT VT : MVT::vector_valuetypes()) {340MVT ElemVT = VT.getVectorElementType();341unsigned ElemBits = ElemVT.getScalarSizeInBits();342if (ElemBits != 32 && ElemBits != 64)343continue;344345for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})346setOperationAction(MemOpc, VT, Custom);347348const ISD::NodeType IntReductionOCs[] = {349ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,350ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,351ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};352353for (unsigned IntRedOpc : IntReductionOCs)354setOperationAction(IntRedOpc, VT, Custom);355}356357// v256i1 and v512i1 ops358for (MVT MaskVT : AllMaskVTs) {359// Custom lower mask ops360setOperationAction(ISD::STORE, MaskVT, Custom);361setOperationAction(ISD::LOAD, MaskVT, Custom);362}363}364365SDValue366VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,367bool IsVarArg,368const SmallVectorImpl<ISD::OutputArg> &Outs,369const SmallVectorImpl<SDValue> &OutVals,370const SDLoc &DL, SelectionDAG &DAG) const {371// CCValAssign - represent the assignment of the return value to locations.372SmallVector<CCValAssign, 16> RVLocs;373374// CCState - Info about the registers and stack slot.375CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,376*DAG.getContext());377378// Analyze return values.379CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));380381SDValue Glue;382SmallVector<SDValue, 4> RetOps(1, Chain);383384// Copy the result values into the output registers.385for (unsigned i = 0; i != RVLocs.size(); ++i) {386CCValAssign &VA = RVLocs[i];387assert(VA.isRegLoc() && "Can only return in registers!");388assert(!VA.needsCustom() && "Unexpected custom lowering");389SDValue OutVal = OutVals[i];390391// Integer return values must be sign or zero extended by the callee.392switch (VA.getLocInfo()) {393case CCValAssign::Full:394break;395case CCValAssign::SExt:396OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);397break;398case CCValAssign::ZExt:399OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);400break;401case CCValAssign::AExt:402OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);403break;404case CCValAssign::BCvt: {405// Convert a float return value to i64 with padding.406// 63 31 0407// +------+------+408// | float| 0 |409// +------+------+410assert(VA.getLocVT() == MVT::i64);411assert(VA.getValVT() == MVT::f32);412SDValue Undef = SDValue(413DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);414SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);415OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,416MVT::i64, Undef, OutVal, Sub_f32),4170);418break;419}420default:421llvm_unreachable("Unknown loc info!");422}423424Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);425426// Guarantee that all emitted copies are stuck together with flags.427Glue = Chain.getValue(1);428RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));429}430431RetOps[0] = Chain; // Update chain.432433// Add the glue if we have it.434if (Glue.getNode())435RetOps.push_back(Glue);436437return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);438}439440SDValue VETargetLowering::LowerFormalArguments(441SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,442const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,443SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {444MachineFunction &MF = DAG.getMachineFunction();445446// Get the base offset of the incoming arguments stack space.447unsigned ArgsBaseOffset = Subtarget->getRsaSize();448// Get the size of the preserved arguments area449unsigned ArgsPreserved = 64;450451// Analyze arguments according to CC_VE.452SmallVector<CCValAssign, 16> ArgLocs;453CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,454*DAG.getContext());455// Allocate the preserved area first.456CCInfo.AllocateStack(ArgsPreserved, Align(8));457// We already allocated the preserved area, so the stack offset computed458// by CC_VE would be correct now.459CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));460461for (const CCValAssign &VA : ArgLocs) {462assert(!VA.needsCustom() && "Unexpected custom lowering");463if (VA.isRegLoc()) {464// This argument is passed in a register.465// All integer register arguments are promoted by the caller to i64.466467// Create a virtual register for the promoted live-in value.468Register VReg =469MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));470SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());471472// The caller promoted the argument, so insert an Assert?ext SDNode so we473// won't promote the value again in this function.474switch (VA.getLocInfo()) {475case CCValAssign::SExt:476Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,477DAG.getValueType(VA.getValVT()));478break;479case CCValAssign::ZExt:480Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,481DAG.getValueType(VA.getValVT()));482break;483case CCValAssign::BCvt: {484// Extract a float argument from i64 with padding.485// 63 31 0486// +------+------+487// | float| 0 |488// +------+------+489assert(VA.getLocVT() == MVT::i64);490assert(VA.getValVT() == MVT::f32);491SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);492Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,493MVT::f32, Arg, Sub_f32),4940);495break;496}497default:498break;499}500501// Truncate the register down to the argument type.502if (VA.isExtInLoc())503Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);504505InVals.push_back(Arg);506continue;507}508509// The registers are exhausted. This argument was passed on the stack.510assert(VA.isMemLoc());511// The CC_VE_Full/Half functions compute stack offsets relative to the512// beginning of the arguments area at %fp + the size of reserved area.513unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;514unsigned ValSize = VA.getValVT().getSizeInBits() / 8;515516// Adjust offset for a float argument by adding 4 since the argument is517// stored in 8 bytes buffer with offset like below. LLVM generates518// 4 bytes load instruction, so need to adjust offset here. This519// adjustment is required in only LowerFormalArguments. In LowerCall,520// a float argument is converted to i64 first, and stored as 8 bytes521// data, which is required by ABI, so no need for adjustment.522// 0 4523// +------+------+524// | empty| float|525// +------+------+526if (VA.getValVT() == MVT::f32)527Offset += 4;528529int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);530InVals.push_back(531DAG.getLoad(VA.getValVT(), DL, Chain,532DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),533MachinePointerInfo::getFixedStack(MF, FI)));534}535536if (!IsVarArg)537return Chain;538539// This function takes variable arguments, some of which may have been passed540// in registers %s0-%s8.541//542// The va_start intrinsic needs to know the offset to the first variable543// argument.544// TODO: need to calculate offset correctly once we support f128.545unsigned ArgOffset = ArgLocs.size() * 8;546VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();547// Skip the reserved area at the top of stack.548FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);549550return Chain;551}552553// FIXME? Maybe this could be a TableGen attribute on some registers and554// this table could be generated automatically from RegInfo.555Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,556const MachineFunction &MF) const {557Register Reg = StringSwitch<Register>(RegName)558.Case("sp", VE::SX11) // Stack pointer559.Case("fp", VE::SX9) // Frame pointer560.Case("sl", VE::SX8) // Stack limit561.Case("lr", VE::SX10) // Link register562.Case("tp", VE::SX14) // Thread pointer563.Case("outer", VE::SX12) // Outer regiser564.Case("info", VE::SX17) // Info area register565.Case("got", VE::SX15) // Global offset table register566.Case("plt", VE::SX16) // Procedure linkage table register567.Default(0);568569if (Reg)570return Reg;571572report_fatal_error("Invalid register name global variable");573}574575//===----------------------------------------------------------------------===//576// TargetLowering Implementation577//===----------------------------------------------------------------------===//578579SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,580SmallVectorImpl<SDValue> &InVals) const {581SelectionDAG &DAG = CLI.DAG;582SDLoc DL = CLI.DL;583SDValue Chain = CLI.Chain;584auto PtrVT = getPointerTy(DAG.getDataLayout());585586// VE target does not yet support tail call optimization.587CLI.IsTailCall = false;588589// Get the base offset of the outgoing arguments stack space.590unsigned ArgsBaseOffset = Subtarget->getRsaSize();591// Get the size of the preserved arguments area592unsigned ArgsPreserved = 8 * 8u;593594// Analyze operands of the call, assigning locations to each operand.595SmallVector<CCValAssign, 16> ArgLocs;596CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,597*DAG.getContext());598// Allocate the preserved area first.599CCInfo.AllocateStack(ArgsPreserved, Align(8));600// We already allocated the preserved area, so the stack offset computed601// by CC_VE would be correct now.602CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));603604// VE requires to use both register and stack for varargs or no-prototyped605// functions.606bool UseBoth = CLI.IsVarArg;607608// Analyze operands again if it is required to store BOTH.609SmallVector<CCValAssign, 16> ArgLocs2;610CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),611ArgLocs2, *DAG.getContext());612if (UseBoth)613CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));614615// Get the size of the outgoing arguments stack space requirement.616unsigned ArgsSize = CCInfo.getStackSize();617618// Keep stack frames 16-byte aligned.619ArgsSize = alignTo(ArgsSize, 16);620621// Adjust the stack pointer to make room for the arguments.622// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls623// with more than 6 arguments.624Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);625626// Collect the set of registers to pass to the function and their values.627// This will be emitted as a sequence of CopyToReg nodes glued to the call628// instruction.629SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;630631// Collect chains from all the memory opeations that copy arguments to the632// stack. They must follow the stack pointer adjustment above and precede the633// call instruction itself.634SmallVector<SDValue, 8> MemOpChains;635636// VE needs to get address of callee function in a register637// So, prepare to copy it to SX12 here.638639// If the callee is a GlobalAddress node (quite common, every direct call is)640// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.641// Likewise ExternalSymbol -> TargetExternalSymbol.642SDValue Callee = CLI.Callee;643644bool IsPICCall = isPositionIndependent();645646// PC-relative references to external symbols should go through $stub.647// If so, we need to prepare GlobalBaseReg first.648const TargetMachine &TM = DAG.getTarget();649const GlobalValue *GV = nullptr;650auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);651if (CalleeG)652GV = CalleeG->getGlobal();653bool Local = TM.shouldAssumeDSOLocal(GV);654bool UsePlt = !Local;655MachineFunction &MF = DAG.getMachineFunction();656657// Turn GlobalAddress/ExternalSymbol node into a value node658// containing the address of them here.659if (CalleeG) {660if (IsPICCall) {661if (UsePlt)662Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);663Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);664Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);665} else {666Callee =667makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);668}669} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {670if (IsPICCall) {671if (UsePlt)672Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);673Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);674Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);675} else {676Callee =677makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);678}679}680681RegsToPass.push_back(std::make_pair(VE::SX12, Callee));682683for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {684CCValAssign &VA = ArgLocs[i];685SDValue Arg = CLI.OutVals[i];686687// Promote the value if needed.688switch (VA.getLocInfo()) {689default:690llvm_unreachable("Unknown location info!");691case CCValAssign::Full:692break;693case CCValAssign::SExt:694Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);695break;696case CCValAssign::ZExt:697Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);698break;699case CCValAssign::AExt:700Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);701break;702case CCValAssign::BCvt: {703// Convert a float argument to i64 with padding.704// 63 31 0705// +------+------+706// | float| 0 |707// +------+------+708assert(VA.getLocVT() == MVT::i64);709assert(VA.getValVT() == MVT::f32);710SDValue Undef = SDValue(711DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);712SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);713Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,714MVT::i64, Undef, Arg, Sub_f32),7150);716break;717}718}719720if (VA.isRegLoc()) {721RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));722if (!UseBoth)723continue;724VA = ArgLocs2[i];725}726727assert(VA.isMemLoc());728729// Create a store off the stack pointer for this argument.730SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);731// The argument area starts at %fp/%sp + the size of reserved area.732SDValue PtrOff =733DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);734PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);735MemOpChains.push_back(736DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));737}738739// Emit all stores, make sure they occur before the call.740if (!MemOpChains.empty())741Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);742743// Build a sequence of CopyToReg nodes glued together with token chain and744// glue operands which copy the outgoing args into registers. The InGlue is745// necessary since all emitted instructions must be stuck together in order746// to pass the live physical registers.747SDValue InGlue;748for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {749Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,750RegsToPass[i].second, InGlue);751InGlue = Chain.getValue(1);752}753754// Build the operands for the call instruction itself.755SmallVector<SDValue, 8> Ops;756Ops.push_back(Chain);757for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)758Ops.push_back(DAG.getRegister(RegsToPass[i].first,759RegsToPass[i].second.getValueType()));760761// Add a register mask operand representing the call-preserved registers.762const VERegisterInfo *TRI = Subtarget->getRegisterInfo();763const uint32_t *Mask =764TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);765assert(Mask && "Missing call preserved mask for calling convention");766Ops.push_back(DAG.getRegisterMask(Mask));767768// Make sure the CopyToReg nodes are glued to the call instruction which769// consumes the registers.770if (InGlue.getNode())771Ops.push_back(InGlue);772773// Now the call itself.774SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);775Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);776InGlue = Chain.getValue(1);777778// Revert the stack pointer immediately after the call.779Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);780InGlue = Chain.getValue(1);781782// Now extract the return values. This is more or less the same as783// LowerFormalArguments.784785// Assign locations to each value returned by this call.786SmallVector<CCValAssign, 16> RVLocs;787CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,788*DAG.getContext());789790// Set inreg flag manually for codegen generated library calls that791// return float.792if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)793CLI.Ins[0].Flags.setInReg();794795RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));796797// Copy all of the result registers out of their specified physreg.798for (unsigned i = 0; i != RVLocs.size(); ++i) {799CCValAssign &VA = RVLocs[i];800assert(!VA.needsCustom() && "Unexpected custom lowering");801Register Reg = VA.getLocReg();802803// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can804// reside in the same register in the high and low bits. Reuse the805// CopyFromReg previous node to avoid duplicate copies.806SDValue RV;807if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))808if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)809RV = Chain.getValue(0);810811// But usually we'll create a new CopyFromReg for a different register.812if (!RV.getNode()) {813RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);814Chain = RV.getValue(1);815InGlue = Chain.getValue(2);816}817818// The callee promoted the return value, so insert an Assert?ext SDNode so819// we won't promote the value again in this function.820switch (VA.getLocInfo()) {821case CCValAssign::SExt:822RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,823DAG.getValueType(VA.getValVT()));824break;825case CCValAssign::ZExt:826RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,827DAG.getValueType(VA.getValVT()));828break;829case CCValAssign::BCvt: {830// Extract a float return value from i64 with padding.831// 63 31 0832// +------+------+833// | float| 0 |834// +------+------+835assert(VA.getLocVT() == MVT::i64);836assert(VA.getValVT() == MVT::f32);837SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);838RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,839MVT::f32, RV, Sub_f32),8400);841break;842}843default:844break;845}846847// Truncate the register down to the return value type.848if (VA.isExtInLoc())849RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);850851InVals.push_back(RV);852}853854return Chain;855}856857bool VETargetLowering::isOffsetFoldingLegal(858const GlobalAddressSDNode *GA) const {859// VE uses 64 bit addressing, so we need multiple instructions to generate860// an address. Folding address with offset increases the number of861// instructions, so that we disable it here. Offsets will be folded in862// the DAG combine later if it worth to do so.863return false;864}865866/// isFPImmLegal - Returns true if the target can instruction select the867/// specified FP immediate natively. If false, the legalizer will868/// materialize the FP immediate as a load from a constant pool.869bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,870bool ForCodeSize) const {871return VT == MVT::f32 || VT == MVT::f64;872}873874/// Determine if the target supports unaligned memory accesses.875///876/// This function returns true if the target allows unaligned memory accesses877/// of the specified type in the given address space. If true, it also returns878/// whether the unaligned memory access is "fast" in the last argument by879/// reference. This is used, for example, in situations where an array880/// copy/move/set is converted to a sequence of store operations. Its use881/// helps to ensure that such replacements don't generate code that causes an882/// alignment error (trap) on the target machine.883bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,884unsigned AddrSpace,885Align A,886MachineMemOperand::Flags,887unsigned *Fast) const {888if (Fast) {889// It's fast anytime on VE890*Fast = 1;891}892return true;893}894895VETargetLowering::VETargetLowering(const TargetMachine &TM,896const VESubtarget &STI)897: TargetLowering(TM), Subtarget(&STI) {898// Instructions which use registers as conditionals examine all the899// bits (as does the pseudo SELECT_CC expansion). I don't think it900// matters much whether it's ZeroOrOneBooleanContent, or901// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the902// former.903setBooleanContents(ZeroOrOneBooleanContent);904setBooleanVectorContents(ZeroOrOneBooleanContent);905906initRegisterClasses();907initSPUActions();908initVPUActions();909910setStackPointerRegisterToSaveRestore(VE::SX11);911912// We have target-specific dag combine patterns for the following nodes:913setTargetDAGCombine(ISD::TRUNCATE);914setTargetDAGCombine(ISD::SELECT);915setTargetDAGCombine(ISD::SELECT_CC);916917// Set function alignment to 16 bytes918setMinFunctionAlignment(Align(16));919920// VE stores all argument by 8 bytes alignment921setMinStackArgumentAlignment(Align(8));922923computeRegisterProperties(Subtarget->getRegisterInfo());924}925926const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {927#define TARGET_NODE_CASE(NAME) \928case VEISD::NAME: \929return "VEISD::" #NAME;930switch ((VEISD::NodeType)Opcode) {931case VEISD::FIRST_NUMBER:932break;933TARGET_NODE_CASE(CMPI)934TARGET_NODE_CASE(CMPU)935TARGET_NODE_CASE(CMPF)936TARGET_NODE_CASE(CMPQ)937TARGET_NODE_CASE(CMOV)938TARGET_NODE_CASE(CALL)939TARGET_NODE_CASE(EH_SJLJ_LONGJMP)940TARGET_NODE_CASE(EH_SJLJ_SETJMP)941TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)942TARGET_NODE_CASE(GETFUNPLT)943TARGET_NODE_CASE(GETSTACKTOP)944TARGET_NODE_CASE(GETTLSADDR)945TARGET_NODE_CASE(GLOBAL_BASE_REG)946TARGET_NODE_CASE(Hi)947TARGET_NODE_CASE(Lo)948TARGET_NODE_CASE(RET_GLUE)949TARGET_NODE_CASE(TS1AM)950TARGET_NODE_CASE(VEC_UNPACK_LO)951TARGET_NODE_CASE(VEC_UNPACK_HI)952TARGET_NODE_CASE(VEC_PACK)953TARGET_NODE_CASE(VEC_BROADCAST)954TARGET_NODE_CASE(REPL_I32)955TARGET_NODE_CASE(REPL_F32)956957TARGET_NODE_CASE(LEGALAVL)958959// Register the VVP_* SDNodes.960#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)961#include "VVPNodes.def"962}963#undef TARGET_NODE_CASE964return nullptr;965}966967EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,968EVT VT) const {969return MVT::i32;970}971972// Convert to a target node and set target flags.973SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,974SelectionDAG &DAG) const {975if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))976return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),977GA->getValueType(0), GA->getOffset(), TF);978979if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))980return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),9810, TF);982983if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))984return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),985CP->getAlign(), CP->getOffset(), TF);986987if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))988return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),989TF);990991if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))992return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);993994llvm_unreachable("Unhandled address SDNode");995}996997// Split Op into high and low parts according to HiTF and LoTF.998// Return an ADD node combining the parts.999SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,1000SelectionDAG &DAG) const {1001SDLoc DL(Op);1002EVT VT = Op.getValueType();1003SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));1004SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));1005return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);1006}10071008// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,1009// or ExternalSymbol SDNode.1010SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {1011SDLoc DL(Op);1012EVT PtrVT = Op.getValueType();10131014// Handle PIC mode first. VE needs a got load for every variable!1015if (isPositionIndependent()) {1016auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);10171018if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||1019(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {1020// Create following instructions for local linkage PIC code.1021// lea %reg, label@gotoff_lo1022// and %reg, %reg, (32)01023// lea.sl %reg, label@gotoff_hi(%reg, %got)1024SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,1025VEMCExpr::VK_VE_GOTOFF_LO32, DAG);1026SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);1027return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);1028}1029// Create following instructions for not local linkage PIC code.1030// lea %reg, label@got_lo1031// and %reg, %reg, (32)01032// lea.sl %reg, label@got_hi(%reg)1033// ld %reg, (%reg, %got)1034SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,1035VEMCExpr::VK_VE_GOT_LO32, DAG);1036SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);1037SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);1038return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,1039MachinePointerInfo::getGOT(DAG.getMachineFunction()));1040}10411042// This is one of the absolute code models.1043switch (getTargetMachine().getCodeModel()) {1044default:1045llvm_unreachable("Unsupported absolute code model");1046case CodeModel::Small:1047case CodeModel::Medium:1048case CodeModel::Large:1049// abs64.1050return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);1051}1052}10531054/// Custom Lower {10551056// The mappings for emitLeading/TrailingFence for VE is designed by following1057// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html1058Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,1059Instruction *Inst,1060AtomicOrdering Ord) const {1061switch (Ord) {1062case AtomicOrdering::NotAtomic:1063case AtomicOrdering::Unordered:1064llvm_unreachable("Invalid fence: unordered/non-atomic");1065case AtomicOrdering::Monotonic:1066case AtomicOrdering::Acquire:1067return nullptr; // Nothing to do1068case AtomicOrdering::Release:1069case AtomicOrdering::AcquireRelease:1070return Builder.CreateFence(AtomicOrdering::Release);1071case AtomicOrdering::SequentiallyConsistent:1072if (!Inst->hasAtomicStore())1073return nullptr; // Nothing to do1074return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);1075}1076llvm_unreachable("Unknown fence ordering in emitLeadingFence");1077}10781079Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,1080Instruction *Inst,1081AtomicOrdering Ord) const {1082switch (Ord) {1083case AtomicOrdering::NotAtomic:1084case AtomicOrdering::Unordered:1085llvm_unreachable("Invalid fence: unordered/not-atomic");1086case AtomicOrdering::Monotonic:1087case AtomicOrdering::Release:1088return nullptr; // Nothing to do1089case AtomicOrdering::Acquire:1090case AtomicOrdering::AcquireRelease:1091return Builder.CreateFence(AtomicOrdering::Acquire);1092case AtomicOrdering::SequentiallyConsistent:1093return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);1094}1095llvm_unreachable("Unknown fence ordering in emitTrailingFence");1096}10971098SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,1099SelectionDAG &DAG) const {1100SDLoc DL(Op);1101AtomicOrdering FenceOrdering =1102static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));1103SyncScope::ID FenceSSID =1104static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));11051106// VE uses Release consistency, so need a fence instruction if it is a1107// cross-thread fence.1108if (FenceSSID == SyncScope::System) {1109switch (FenceOrdering) {1110case AtomicOrdering::NotAtomic:1111case AtomicOrdering::Unordered:1112case AtomicOrdering::Monotonic:1113// No need to generate fencem instruction here.1114break;1115case AtomicOrdering::Acquire:1116// Generate "fencem 2" as acquire fence.1117return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,1118DAG.getTargetConstant(2, DL, MVT::i32),1119Op.getOperand(0)),11200);1121case AtomicOrdering::Release:1122// Generate "fencem 1" as release fence.1123return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,1124DAG.getTargetConstant(1, DL, MVT::i32),1125Op.getOperand(0)),11260);1127case AtomicOrdering::AcquireRelease:1128case AtomicOrdering::SequentiallyConsistent:1129// Generate "fencem 3" as acq_rel and seq_cst fence.1130// FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,1131// so seq_cst may require more instruction for them.1132return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,1133DAG.getTargetConstant(3, DL, MVT::i32),1134Op.getOperand(0)),11350);1136}1137}11381139// MEMBARRIER is a compiler barrier; it codegens to a no-op.1140return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));1141}11421143TargetLowering::AtomicExpansionKind1144VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {1145// We have TS1AM implementation for i8/i16/i32/i64, so use it.1146if (AI->getOperation() == AtomicRMWInst::Xchg) {1147return AtomicExpansionKind::None;1148}1149// FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.11501151// Otherwise, expand it using compare and exchange instruction to not call1152// __sync_fetch_and_* functions.1153return AtomicExpansionKind::CmpXChg;1154}11551156static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,1157SDValue &Bits) {1158SDLoc DL(Op);1159AtomicSDNode *N = cast<AtomicSDNode>(Op);1160SDValue Ptr = N->getOperand(1);1161SDValue Val = N->getOperand(2);1162EVT PtrVT = Ptr.getValueType();1163bool Byte = N->getMemoryVT() == MVT::i8;1164// Remainder = AND Ptr, 31165// Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)1166// Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)1167// Bits = Remainder << 31168// NewVal = Val << Bits1169SDValue Const3 = DAG.getConstant(3, DL, PtrVT);1170SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});1171SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)1172: DAG.getConstant(3, DL, MVT::i32);1173Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});1174Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});1175return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});1176}11771178static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,1179SDValue Bits) {1180SDLoc DL(Op);1181EVT VT = Data.getValueType();1182bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;1183// NewData = Data >> Bits1184// Result = NewData & 0xff ; If Byte is true (1 byte)1185// Result = NewData & 0xffff ; If Byte is false (2 bytes)11861187SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);1188return DAG.getNode(ISD::AND, DL, VT,1189{NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});1190}11911192SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,1193SelectionDAG &DAG) const {1194SDLoc DL(Op);1195AtomicSDNode *N = cast<AtomicSDNode>(Op);11961197if (N->getMemoryVT() == MVT::i8) {1198// For i8, use "ts1am"1199// Input:1200// ATOMIC_SWAP Ptr, Val, Order1201//1202// Output:1203// Remainder = AND Ptr, 31204// Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.1205// Bits = Remainder << 31206// NewVal = Val << Bits1207//1208// Aligned = AND Ptr, -41209// Data = TS1AM Aligned, Flag, NewVal1210//1211// NewData = Data >> Bits1212// Result = NewData & 0xff ; 1 byte result1213SDValue Flag;1214SDValue Bits;1215SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);12161217SDValue Ptr = N->getOperand(1);1218SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),1219{Ptr, DAG.getConstant(-4, DL, MVT::i64)});1220SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),1221DAG.getVTList(Op.getNode()->getValueType(0),1222Op.getNode()->getValueType(1)),1223{N->getChain(), Aligned, Flag, NewVal},1224N->getMemOperand());12251226SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);1227SDValue Chain = TS1AM.getValue(1);1228return DAG.getMergeValues({Result, Chain}, DL);1229}1230if (N->getMemoryVT() == MVT::i16) {1231// For i16, use "ts1am"1232SDValue Flag;1233SDValue Bits;1234SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);12351236SDValue Ptr = N->getOperand(1);1237SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),1238{Ptr, DAG.getConstant(-4, DL, MVT::i64)});1239SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),1240DAG.getVTList(Op.getNode()->getValueType(0),1241Op.getNode()->getValueType(1)),1242{N->getChain(), Aligned, Flag, NewVal},1243N->getMemOperand());12441245SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);1246SDValue Chain = TS1AM.getValue(1);1247return DAG.getMergeValues({Result, Chain}, DL);1248}1249// Otherwise, let llvm legalize it.1250return Op;1251}12521253SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,1254SelectionDAG &DAG) const {1255return makeAddress(Op, DAG);1256}12571258SDValue VETargetLowering::lowerBlockAddress(SDValue Op,1259SelectionDAG &DAG) const {1260return makeAddress(Op, DAG);1261}12621263SDValue VETargetLowering::lowerConstantPool(SDValue Op,1264SelectionDAG &DAG) const {1265return makeAddress(Op, DAG);1266}12671268SDValue1269VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,1270SelectionDAG &DAG) const {1271SDLoc DL(Op);12721273// Generate the following code:1274// t1: ch,glue = callseq_start t0, 0, 01275// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:11276// t3: ch,glue = callseq_end t2, 0, 0, t2:21277// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:11278SDValue Label = withTargetFlags(Op, 0, DAG);1279EVT PtrVT = Op.getValueType();12801281// Lowering the machine isd will make sure everything is in the right1282// location.1283SDValue Chain = DAG.getEntryNode();1284SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);1285const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(1286DAG.getMachineFunction(), CallingConv::C);1287Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);1288SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};1289Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);1290Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);1291Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));12921293// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.1294MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();1295MFI.setHasCalls(true);12961297// Also generate code to prepare a GOT register if it is PIC.1298if (isPositionIndependent()) {1299MachineFunction &MF = DAG.getMachineFunction();1300Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);1301}13021303return Chain;1304}13051306SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,1307SelectionDAG &DAG) const {1308// The current implementation of nld (2.26) doesn't allow local exec model1309// code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always1310// generate the general dynamic model code sequence.1311//1312// *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf1313return lowerToTLSGeneralDynamicModel(Op, DAG);1314}13151316SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {1317return makeAddress(Op, DAG);1318}13191320// Lower a f128 load into two f64 loads.1321static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {1322SDLoc DL(Op);1323LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());1324assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");1325Align Alignment = LdNode->getAlign();1326if (Alignment > 8)1327Alignment = Align(8);13281329SDValue Lo64 =1330DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),1331LdNode->getPointerInfo(), Alignment,1332LdNode->isVolatile() ? MachineMemOperand::MOVolatile1333: MachineMemOperand::MONone);1334EVT AddrVT = LdNode->getBasePtr().getValueType();1335SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),1336DAG.getConstant(8, DL, AddrVT));1337SDValue Hi64 =1338DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,1339LdNode->getPointerInfo(), Alignment,1340LdNode->isVolatile() ? MachineMemOperand::MOVolatile1341: MachineMemOperand::MONone);13421343SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);1344SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);13451346// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)1347SDNode *InFP128 =1348DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);1349InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,1350SDValue(InFP128, 0), Hi64, SubRegEven);1351InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,1352SDValue(InFP128, 0), Lo64, SubRegOdd);1353SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),1354SDValue(Hi64.getNode(), 1)};1355SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);1356SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};1357return DAG.getMergeValues(Ops, DL);1358}13591360// Lower a vXi1 load into following instructions1361// LDrii %1, (,%addr)1362// LVMxir %vm, 0, %11363// LDrii %2, 8(,%addr)1364// LVMxir %vm, 0, %21365// ...1366static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {1367SDLoc DL(Op);1368LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());1369assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");13701371SDValue BasePtr = LdNode->getBasePtr();1372Align Alignment = LdNode->getAlign();1373if (Alignment > 8)1374Alignment = Align(8);13751376EVT AddrVT = BasePtr.getValueType();1377EVT MemVT = LdNode->getMemoryVT();1378if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {1379SDValue OutChains[4];1380SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);1381for (int i = 0; i < 4; ++i) {1382// Generate load dag and prepare chains.1383SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,1384DAG.getConstant(8 * i, DL, AddrVT));1385SDValue Val =1386DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,1387LdNode->getPointerInfo(), Alignment,1388LdNode->isVolatile() ? MachineMemOperand::MOVolatile1389: MachineMemOperand::MONone);1390OutChains[i] = SDValue(Val.getNode(), 1);13911392VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,1393DAG.getTargetConstant(i, DL, MVT::i64), Val,1394SDValue(VM, 0));1395}1396SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);1397SDValue Ops[2] = {SDValue(VM, 0), OutChain};1398return DAG.getMergeValues(Ops, DL);1399} else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {1400SDValue OutChains[8];1401SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);1402for (int i = 0; i < 8; ++i) {1403// Generate load dag and prepare chains.1404SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,1405DAG.getConstant(8 * i, DL, AddrVT));1406SDValue Val =1407DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,1408LdNode->getPointerInfo(), Alignment,1409LdNode->isVolatile() ? MachineMemOperand::MOVolatile1410: MachineMemOperand::MONone);1411OutChains[i] = SDValue(Val.getNode(), 1);14121413VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,1414DAG.getTargetConstant(i, DL, MVT::i64), Val,1415SDValue(VM, 0));1416}1417SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);1418SDValue Ops[2] = {SDValue(VM, 0), OutChain};1419return DAG.getMergeValues(Ops, DL);1420} else {1421// Otherwise, ask llvm to expand it.1422return SDValue();1423}1424}14251426SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {1427LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());1428EVT MemVT = LdNode->getMemoryVT();14291430// If VPU is enabled, always expand non-mask vector loads to VVP1431if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))1432return lowerToVVP(Op, DAG);14331434SDValue BasePtr = LdNode->getBasePtr();1435if (isa<FrameIndexSDNode>(BasePtr.getNode())) {1436// Do not expand store instruction with frame index here because of1437// dependency problems. We expand it later in eliminateFrameIndex().1438return Op;1439}14401441if (MemVT == MVT::f128)1442return lowerLoadF128(Op, DAG);1443if (isMaskType(MemVT))1444return lowerLoadI1(Op, DAG);14451446return Op;1447}14481449// Lower a f128 store into two f64 stores.1450static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {1451SDLoc DL(Op);1452StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());1453assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");14541455SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);1456SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);14571458SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,1459StNode->getValue(), SubRegEven);1460SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,1461StNode->getValue(), SubRegOdd);14621463Align Alignment = StNode->getAlign();1464if (Alignment > 8)1465Alignment = Align(8);14661467// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)1468SDValue OutChains[2];1469OutChains[0] =1470DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),1471StNode->getBasePtr(), MachinePointerInfo(), Alignment,1472StNode->isVolatile() ? MachineMemOperand::MOVolatile1473: MachineMemOperand::MONone);1474EVT AddrVT = StNode->getBasePtr().getValueType();1475SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),1476DAG.getConstant(8, DL, AddrVT));1477OutChains[1] =1478DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,1479MachinePointerInfo(), Alignment,1480StNode->isVolatile() ? MachineMemOperand::MOVolatile1481: MachineMemOperand::MONone);1482return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);1483}14841485// Lower a vXi1 store into following instructions1486// SVMi %1, %vm, 01487// STrii %1, (,%addr)1488// SVMi %2, %vm, 11489// STrii %2, 8(,%addr)1490// ...1491static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {1492SDLoc DL(Op);1493StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());1494assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");14951496SDValue BasePtr = StNode->getBasePtr();1497Align Alignment = StNode->getAlign();1498if (Alignment > 8)1499Alignment = Align(8);1500EVT AddrVT = BasePtr.getValueType();1501EVT MemVT = StNode->getMemoryVT();1502if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {1503SDValue OutChains[4];1504for (int i = 0; i < 4; ++i) {1505SDNode *V =1506DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),1507DAG.getTargetConstant(i, DL, MVT::i64));1508SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,1509DAG.getConstant(8 * i, DL, AddrVT));1510OutChains[i] =1511DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,1512MachinePointerInfo(), Alignment,1513StNode->isVolatile() ? MachineMemOperand::MOVolatile1514: MachineMemOperand::MONone);1515}1516return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);1517} else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {1518SDValue OutChains[8];1519for (int i = 0; i < 8; ++i) {1520SDNode *V =1521DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),1522DAG.getTargetConstant(i, DL, MVT::i64));1523SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,1524DAG.getConstant(8 * i, DL, AddrVT));1525OutChains[i] =1526DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,1527MachinePointerInfo(), Alignment,1528StNode->isVolatile() ? MachineMemOperand::MOVolatile1529: MachineMemOperand::MONone);1530}1531return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);1532} else {1533// Otherwise, ask llvm to expand it.1534return SDValue();1535}1536}15371538SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {1539StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());1540assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");1541EVT MemVT = StNode->getMemoryVT();15421543// If VPU is enabled, always expand non-mask vector stores to VVP1544if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))1545return lowerToVVP(Op, DAG);15461547SDValue BasePtr = StNode->getBasePtr();1548if (isa<FrameIndexSDNode>(BasePtr.getNode())) {1549// Do not expand store instruction with frame index here because of1550// dependency problems. We expand it later in eliminateFrameIndex().1551return Op;1552}15531554if (MemVT == MVT::f128)1555return lowerStoreF128(Op, DAG);1556if (isMaskType(MemVT))1557return lowerStoreI1(Op, DAG);15581559// Otherwise, ask llvm to expand it.1560return SDValue();1561}15621563SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {1564MachineFunction &MF = DAG.getMachineFunction();1565VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();1566auto PtrVT = getPointerTy(DAG.getDataLayout());15671568// Need frame address to find the address of VarArgsFrameIndex.1569MF.getFrameInfo().setFrameAddressIsTaken(true);15701571// vastart just stores the address of the VarArgsFrameIndex slot into the1572// memory location argument.1573SDLoc DL(Op);1574SDValue Offset =1575DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),1576DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));1577const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();1578return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),1579MachinePointerInfo(SV));1580}15811582SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {1583SDNode *Node = Op.getNode();1584EVT VT = Node->getValueType(0);1585SDValue InChain = Node->getOperand(0);1586SDValue VAListPtr = Node->getOperand(1);1587EVT PtrVT = VAListPtr.getValueType();1588const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();1589SDLoc DL(Node);1590SDValue VAList =1591DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));1592SDValue Chain = VAList.getValue(1);1593SDValue NextPtr;15941595if (VT == MVT::f128) {1596// VE f128 values must be stored with 16 bytes alignment. We don't1597// know the actual alignment of VAList, so we take alignment of it1598// dynamically.1599int Align = 16;1600VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,1601DAG.getConstant(Align - 1, DL, PtrVT));1602VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,1603DAG.getConstant(-Align, DL, PtrVT));1604// Increment the pointer, VAList, by 16 to the next vaarg.1605NextPtr =1606DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));1607} else if (VT == MVT::f32) {1608// float --> need special handling like below.1609// 0 41610// +------+------+1611// | empty| float|1612// +------+------+1613// Increment the pointer, VAList, by 8 to the next vaarg.1614NextPtr =1615DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));1616// Then, adjust VAList.1617unsigned InternalOffset = 4;1618VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,1619DAG.getConstant(InternalOffset, DL, PtrVT));1620} else {1621// Increment the pointer, VAList, by 8 to the next vaarg.1622NextPtr =1623DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));1624}16251626// Store the incremented VAList to the legalized pointer.1627InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));16281629// Load the actual argument out of the pointer VAList.1630// We can't count on greater alignment than the word size.1631return DAG.getLoad(1632VT, DL, InChain, VAList, MachinePointerInfo(),1633Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));1634}16351636SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,1637SelectionDAG &DAG) const {1638// Generate following code.1639// (void)__llvm_grow_stack(size);1640// ret = GETSTACKTOP; // pseudo instruction1641SDLoc DL(Op);16421643// Get the inputs.1644SDNode *Node = Op.getNode();1645SDValue Chain = Op.getOperand(0);1646SDValue Size = Op.getOperand(1);1647MaybeAlign Alignment(Op.getConstantOperandVal(2));1648EVT VT = Node->getValueType(0);16491650// Chain the dynamic stack allocation so that it doesn't modify the stack1651// pointer when other instructions are using the stack.1652Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);16531654const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();1655Align StackAlign = TFI.getStackAlign();1656bool NeedsAlign = Alignment.valueOrOne() > StackAlign;16571658// Prepare arguments1659TargetLowering::ArgListTy Args;1660TargetLowering::ArgListEntry Entry;1661Entry.Node = Size;1662Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());1663Args.push_back(Entry);1664if (NeedsAlign) {1665Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);1666Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());1667Args.push_back(Entry);1668}1669Type *RetTy = Type::getVoidTy(*DAG.getContext());16701671EVT PtrVT = Op.getValueType();1672SDValue Callee;1673if (NeedsAlign) {1674Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);1675} else {1676Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);1677}16781679TargetLowering::CallLoweringInfo CLI(DAG);1680CLI.setDebugLoc(DL)1681.setChain(Chain)1682.setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))1683.setDiscardResult(true);1684std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);1685Chain = pair.second;1686SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);1687if (NeedsAlign) {1688Result = DAG.getNode(ISD::ADD, DL, VT, Result,1689DAG.getConstant((Alignment->value() - 1ULL), DL, VT));1690Result = DAG.getNode(ISD::AND, DL, VT, Result,1691DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));1692}1693// Chain = Result.getValue(1);1694Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);16951696SDValue Ops[2] = {Result, Chain};1697return DAG.getMergeValues(Ops, DL);1698}16991700SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,1701SelectionDAG &DAG) const {1702SDLoc DL(Op);1703return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),1704Op.getOperand(1));1705}17061707SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,1708SelectionDAG &DAG) const {1709SDLoc DL(Op);1710return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,1711DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),1712Op.getOperand(1));1713}17141715SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,1716SelectionDAG &DAG) const {1717SDLoc DL(Op);1718return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,1719Op.getOperand(0));1720}17211722static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,1723const VETargetLowering &TLI,1724const VESubtarget *Subtarget) {1725SDLoc DL(Op);1726MachineFunction &MF = DAG.getMachineFunction();1727EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());17281729MachineFrameInfo &MFI = MF.getFrameInfo();1730MFI.setFrameAddressIsTaken(true);17311732unsigned Depth = Op.getConstantOperandVal(0);1733const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();1734Register FrameReg = RegInfo->getFrameRegister(MF);1735SDValue FrameAddr =1736DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);1737while (Depth--)1738FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),1739FrameAddr, MachinePointerInfo());1740return FrameAddr;1741}17421743static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,1744const VETargetLowering &TLI,1745const VESubtarget *Subtarget) {1746MachineFunction &MF = DAG.getMachineFunction();1747MachineFrameInfo &MFI = MF.getFrameInfo();1748MFI.setReturnAddressIsTaken(true);17491750if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))1751return SDValue();17521753SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);17541755SDLoc DL(Op);1756EVT VT = Op.getValueType();1757SDValue Offset = DAG.getConstant(8, DL, VT);1758return DAG.getLoad(VT, DL, DAG.getEntryNode(),1759DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),1760MachinePointerInfo());1761}17621763SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,1764SelectionDAG &DAG) const {1765SDLoc DL(Op);1766unsigned IntNo = Op.getConstantOperandVal(0);1767switch (IntNo) {1768default: // Don't custom lower most intrinsics.1769return SDValue();1770case Intrinsic::eh_sjlj_lsda: {1771MachineFunction &MF = DAG.getMachineFunction();1772MVT VT = Op.getSimpleValueType();1773const VETargetMachine *TM =1774static_cast<const VETargetMachine *>(&DAG.getTarget());17751776// Create GCC_except_tableXX string. The real symbol for that will be1777// generated in EHStreamer::emitExceptionTable() later. So, we just1778// borrow it's name here.1779TM->getStrList()->push_back(std::string(1780(Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));1781SDValue Addr =1782DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);1783if (isPositionIndependent()) {1784Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,1785VEMCExpr::VK_VE_GOTOFF_LO32, DAG);1786SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);1787return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);1788}1789return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);1790}1791}1792}17931794static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {1795if (!isa<BuildVectorSDNode>(N))1796return false;1797const auto *BVN = cast<BuildVectorSDNode>(N);17981799// Find first non-undef insertion.1800unsigned Idx;1801for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {1802auto ElemV = BVN->getOperand(Idx);1803if (!ElemV->isUndef())1804break;1805}1806// Catch the (hypothetical) all-undef case.1807if (Idx == BVN->getNumOperands())1808return false;1809// Remember insertion.1810UniqueIdx = Idx++;1811// Verify that all other insertions are undef.1812for (; Idx < BVN->getNumOperands(); ++Idx) {1813auto ElemV = BVN->getOperand(Idx);1814if (!ElemV->isUndef())1815return false;1816}1817return true;1818}18191820static SDValue getSplatValue(SDNode *N) {1821if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {1822return BuildVec->getSplatValue();1823}1824return SDValue();1825}18261827SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,1828SelectionDAG &DAG) const {1829VECustomDAG CDAG(DAG, Op);1830MVT ResultVT = Op.getSimpleValueType();18311832// If there is just one element, expand to INSERT_VECTOR_ELT.1833unsigned UniqueIdx;1834if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {1835SDValue AccuV = CDAG.getUNDEF(Op.getValueType());1836auto ElemV = Op->getOperand(UniqueIdx);1837SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);1838return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});1839}18401841// Else emit a broadcast.1842if (SDValue ScalarV = getSplatValue(Op.getNode())) {1843unsigned NumEls = ResultVT.getVectorNumElements();1844auto AVL = CDAG.getConstant(NumEls, MVT::i32);1845return CDAG.getBroadcast(ResultVT, ScalarV, AVL);1846}18471848// Expand1849return SDValue();1850}18511852TargetLowering::LegalizeAction1853VETargetLowering::getCustomOperationAction(SDNode &Op) const {1854// Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize1855// these operations (transform nodes such that their AVL parameter refers to1856// packs of 64bit, instead of number of elements.18571858// Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to1859// re-visit them.1860if (isPackingSupportOpcode(Op.getOpcode()))1861return Legal;18621863// Custom lower to legalize AVL for packed mode.1864if (isVVPOrVEC(Op.getOpcode()))1865return Custom;1866return Legal;1867}18681869SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {1870LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));1871unsigned Opcode = Op.getOpcode();18721873/// Scalar isel.1874switch (Opcode) {1875case ISD::ATOMIC_FENCE:1876return lowerATOMIC_FENCE(Op, DAG);1877case ISD::ATOMIC_SWAP:1878return lowerATOMIC_SWAP(Op, DAG);1879case ISD::BlockAddress:1880return lowerBlockAddress(Op, DAG);1881case ISD::ConstantPool:1882return lowerConstantPool(Op, DAG);1883case ISD::DYNAMIC_STACKALLOC:1884return lowerDYNAMIC_STACKALLOC(Op, DAG);1885case ISD::EH_SJLJ_LONGJMP:1886return lowerEH_SJLJ_LONGJMP(Op, DAG);1887case ISD::EH_SJLJ_SETJMP:1888return lowerEH_SJLJ_SETJMP(Op, DAG);1889case ISD::EH_SJLJ_SETUP_DISPATCH:1890return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);1891case ISD::FRAMEADDR:1892return lowerFRAMEADDR(Op, DAG, *this, Subtarget);1893case ISD::GlobalAddress:1894return lowerGlobalAddress(Op, DAG);1895case ISD::GlobalTLSAddress:1896return lowerGlobalTLSAddress(Op, DAG);1897case ISD::INTRINSIC_WO_CHAIN:1898return lowerINTRINSIC_WO_CHAIN(Op, DAG);1899case ISD::JumpTable:1900return lowerJumpTable(Op, DAG);1901case ISD::LOAD:1902return lowerLOAD(Op, DAG);1903case ISD::RETURNADDR:1904return lowerRETURNADDR(Op, DAG, *this, Subtarget);1905case ISD::BUILD_VECTOR:1906return lowerBUILD_VECTOR(Op, DAG);1907case ISD::STORE:1908return lowerSTORE(Op, DAG);1909case ISD::VASTART:1910return lowerVASTART(Op, DAG);1911case ISD::VAARG:1912return lowerVAARG(Op, DAG);19131914case ISD::INSERT_VECTOR_ELT:1915return lowerINSERT_VECTOR_ELT(Op, DAG);1916case ISD::EXTRACT_VECTOR_ELT:1917return lowerEXTRACT_VECTOR_ELT(Op, DAG);1918}19191920/// Vector isel.1921if (ISD::isVPOpcode(Opcode))1922return lowerToVVP(Op, DAG);19231924switch (Opcode) {1925default:1926llvm_unreachable("Should not custom lower this!");19271928// Legalize the AVL of this internal node.1929case VEISD::VEC_BROADCAST:1930#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:1931#include "VVPNodes.def"1932// AVL already legalized.1933if (getAnnotatedNodeAVL(Op).second)1934return Op;1935return legalizeInternalVectorOp(Op, DAG);19361937// Translate into a VEC_*/VVP_* layer operation.1938case ISD::MLOAD:1939case ISD::MSTORE:1940#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:1941#include "VVPNodes.def"1942if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))1943return splitMaskArithmetic(Op, DAG);1944return lowerToVVP(Op, DAG);1945}1946}1947/// } Custom Lower19481949void VETargetLowering::ReplaceNodeResults(SDNode *N,1950SmallVectorImpl<SDValue> &Results,1951SelectionDAG &DAG) const {1952switch (N->getOpcode()) {1953case ISD::ATOMIC_SWAP:1954// Let LLVM expand atomic swap instruction through LowerOperation.1955return;1956default:1957LLVM_DEBUG(N->dumpr(&DAG));1958llvm_unreachable("Do not know how to custom type legalize this operation!");1959}1960}19611962/// JumpTable for VE.1963///1964/// VE cannot generate relocatable symbol in jump table. VE cannot1965/// generate expressions using symbols in both text segment and data1966/// segment like below.1967/// .4byte .LBB0_2-.LJTI0_01968/// So, we generate offset from the top of function like below as1969/// a custom label.1970/// .4byte .LBB0_2-<function name>19711972unsigned VETargetLowering::getJumpTableEncoding() const {1973// Use custom label for PIC.1974if (isPositionIndependent())1975return MachineJumpTableInfo::EK_Custom32;19761977// Otherwise, use the normal jump table encoding heuristics.1978return TargetLowering::getJumpTableEncoding();1979}19801981const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(1982const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,1983unsigned Uid, MCContext &Ctx) const {1984assert(isPositionIndependent());19851986// Generate custom label for PIC like below.1987// .4bytes .LBB0_2-<function name>1988const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);1989MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());1990const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);1991return MCBinaryExpr::createSub(Value, Base, Ctx);1992}19931994SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,1995SelectionDAG &DAG) const {1996assert(isPositionIndependent());1997SDLoc DL(Table);1998Function *Function = &DAG.getMachineFunction().getFunction();1999assert(Function != nullptr);2000auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());20012002// In the jump table, we have following values in PIC mode.2003// .4bytes .LBB0_2-<function name>2004// We need to add this value and the address of this function to generate2005// .LBB0_2 label correctly under PIC mode. So, we want to generate following2006// instructions:2007// lea %reg, fun@gotoff_lo2008// and %reg, %reg, (32)02009// lea.sl %reg, fun@gotoff_hi(%reg, %got)2010// In order to do so, we need to genarate correctly marked DAG node using2011// makeHiLoPair.2012SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);2013SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,2014VEMCExpr::VK_VE_GOTOFF_LO32, DAG);2015SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);2016return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);2017}20182019Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,2020MachineBasicBlock::iterator I,2021MachineBasicBlock *TargetBB,2022const DebugLoc &DL) const {2023MachineFunction *MF = MBB.getParent();2024MachineRegisterInfo &MRI = MF->getRegInfo();2025const VEInstrInfo *TII = Subtarget->getInstrInfo();20262027const TargetRegisterClass *RC = &VE::I64RegClass;2028Register Tmp1 = MRI.createVirtualRegister(RC);2029Register Tmp2 = MRI.createVirtualRegister(RC);2030Register Result = MRI.createVirtualRegister(RC);20312032if (isPositionIndependent()) {2033// Create following instructions for local linkage PIC code.2034// lea %Tmp1, TargetBB@gotoff_lo2035// and %Tmp2, %Tmp1, (32)02036// lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT2037BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)2038.addImm(0)2039.addImm(0)2040.addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);2041BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)2042.addReg(Tmp1, getKillRegState(true))2043.addImm(M0(32));2044BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)2045.addReg(VE::SX15)2046.addReg(Tmp2, getKillRegState(true))2047.addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);2048} else {2049// Create following instructions for non-PIC code.2050// lea %Tmp1, TargetBB@lo2051// and %Tmp2, %Tmp1, (32)02052// lea.sl %Result, TargetBB@hi(%Tmp2)2053BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)2054.addImm(0)2055.addImm(0)2056.addMBB(TargetBB, VEMCExpr::VK_VE_LO32);2057BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)2058.addReg(Tmp1, getKillRegState(true))2059.addImm(M0(32));2060BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)2061.addReg(Tmp2, getKillRegState(true))2062.addImm(0)2063.addMBB(TargetBB, VEMCExpr::VK_VE_HI32);2064}2065return Result;2066}20672068Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,2069MachineBasicBlock::iterator I,2070StringRef Symbol, const DebugLoc &DL,2071bool IsLocal = false,2072bool IsCall = false) const {2073MachineFunction *MF = MBB.getParent();2074MachineRegisterInfo &MRI = MF->getRegInfo();2075const VEInstrInfo *TII = Subtarget->getInstrInfo();20762077const TargetRegisterClass *RC = &VE::I64RegClass;2078Register Result = MRI.createVirtualRegister(RC);20792080if (isPositionIndependent()) {2081if (IsCall && !IsLocal) {2082// Create following instructions for non-local linkage PIC code function2083// calls. These instructions uses IC and magic number -24, so we expand2084// them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.2085// lea %Reg, Symbol@plt_lo(-24)2086// and %Reg, %Reg, (32)02087// sic %s162088// lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT2089BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)2090.addExternalSymbol("abort");2091} else if (IsLocal) {2092Register Tmp1 = MRI.createVirtualRegister(RC);2093Register Tmp2 = MRI.createVirtualRegister(RC);2094// Create following instructions for local linkage PIC code.2095// lea %Tmp1, Symbol@gotoff_lo2096// and %Tmp2, %Tmp1, (32)02097// lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT2098BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)2099.addImm(0)2100.addImm(0)2101.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);2102BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)2103.addReg(Tmp1, getKillRegState(true))2104.addImm(M0(32));2105BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)2106.addReg(VE::SX15)2107.addReg(Tmp2, getKillRegState(true))2108.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);2109} else {2110Register Tmp1 = MRI.createVirtualRegister(RC);2111Register Tmp2 = MRI.createVirtualRegister(RC);2112// Create following instructions for not local linkage PIC code.2113// lea %Tmp1, Symbol@got_lo2114// and %Tmp2, %Tmp1, (32)02115// lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT2116// ld %Result, 0(%Tmp3)2117Register Tmp3 = MRI.createVirtualRegister(RC);2118BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)2119.addImm(0)2120.addImm(0)2121.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);2122BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)2123.addReg(Tmp1, getKillRegState(true))2124.addImm(M0(32));2125BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)2126.addReg(VE::SX15)2127.addReg(Tmp2, getKillRegState(true))2128.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);2129BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)2130.addReg(Tmp3, getKillRegState(true))2131.addImm(0)2132.addImm(0);2133}2134} else {2135Register Tmp1 = MRI.createVirtualRegister(RC);2136Register Tmp2 = MRI.createVirtualRegister(RC);2137// Create following instructions for non-PIC code.2138// lea %Tmp1, Symbol@lo2139// and %Tmp2, %Tmp1, (32)02140// lea.sl %Result, Symbol@hi(%Tmp2)2141BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)2142.addImm(0)2143.addImm(0)2144.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);2145BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)2146.addReg(Tmp1, getKillRegState(true))2147.addImm(M0(32));2148BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)2149.addReg(Tmp2, getKillRegState(true))2150.addImm(0)2151.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);2152}2153return Result;2154}21552156void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,2157MachineBasicBlock *MBB,2158MachineBasicBlock *DispatchBB,2159int FI, int Offset) const {2160DebugLoc DL = MI.getDebugLoc();2161const VEInstrInfo *TII = Subtarget->getInstrInfo();21622163Register LabelReg =2164prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);21652166// Store an address of DispatchBB to a given jmpbuf[1] where has next IC2167// referenced by longjmp (throw) later.2168MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));2169addFrameReference(MIB, FI, Offset); // jmpbuf[1]2170MIB.addReg(LabelReg, getKillRegState(true));2171}21722173MachineBasicBlock *2174VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,2175MachineBasicBlock *MBB) const {2176DebugLoc DL = MI.getDebugLoc();2177MachineFunction *MF = MBB->getParent();2178const TargetInstrInfo *TII = Subtarget->getInstrInfo();2179const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();2180MachineRegisterInfo &MRI = MF->getRegInfo();21812182const BasicBlock *BB = MBB->getBasicBlock();2183MachineFunction::iterator I = ++MBB->getIterator();21842185// Memory Reference.2186SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),2187MI.memoperands_end());2188Register BufReg = MI.getOperand(1).getReg();21892190Register DstReg;21912192DstReg = MI.getOperand(0).getReg();2193const TargetRegisterClass *RC = MRI.getRegClass(DstReg);2194assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");2195(void)TRI;2196Register MainDestReg = MRI.createVirtualRegister(RC);2197Register RestoreDestReg = MRI.createVirtualRegister(RC);21982199// For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following2200// instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.2201//2202// ThisMBB:2203// buf[3] = %s17 iff %s17 is used as BP2204// buf[1] = RestoreMBB as IC after longjmp2205// # SjLjSetup RestoreMBB2206//2207// MainMBB:2208// v_main = 02209//2210// SinkMBB:2211// v = phi(v_main, MainMBB, v_restore, RestoreMBB)2212// ...2213//2214// RestoreMBB:2215// %s17 = buf[3] = iff %s17 is used as BP2216// v_restore = 12217// goto SinkMBB22182219MachineBasicBlock *ThisMBB = MBB;2220MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);2221MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);2222MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);2223MF->insert(I, MainMBB);2224MF->insert(I, SinkMBB);2225MF->push_back(RestoreMBB);2226RestoreMBB->setMachineBlockAddressTaken();22272228// Transfer the remainder of BB and its successor edges to SinkMBB.2229SinkMBB->splice(SinkMBB->begin(), MBB,2230std::next(MachineBasicBlock::iterator(MI)), MBB->end());2231SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);22322233// ThisMBB:2234Register LabelReg =2235prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);22362237// Store BP in buf[3] iff this function is using BP.2238const VEFrameLowering *TFI = Subtarget->getFrameLowering();2239if (TFI->hasBP(*MF)) {2240MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));2241MIB.addReg(BufReg);2242MIB.addImm(0);2243MIB.addImm(24);2244MIB.addReg(VE::SX17);2245MIB.setMemRefs(MMOs);2246}22472248// Store IP in buf[1].2249MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));2250MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.2251MIB.addImm(0);2252MIB.addImm(8);2253MIB.addReg(LabelReg, getKillRegState(true));2254MIB.setMemRefs(MMOs);22552256// SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.22572258// Insert setup.2259MIB =2260BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);22612262const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();2263MIB.addRegMask(RegInfo->getNoPreservedMask());2264ThisMBB->addSuccessor(MainMBB);2265ThisMBB->addSuccessor(RestoreMBB);22662267// MainMBB:2268BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)2269.addImm(0)2270.addImm(0)2271.addImm(0);2272MainMBB->addSuccessor(SinkMBB);22732274// SinkMBB:2275BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)2276.addReg(MainDestReg)2277.addMBB(MainMBB)2278.addReg(RestoreDestReg)2279.addMBB(RestoreMBB);22802281// RestoreMBB:2282// Restore BP from buf[3] iff this function is using BP. The address of2283// buf is in SX10.2284// FIXME: Better to not use SX10 here2285if (TFI->hasBP(*MF)) {2286MachineInstrBuilder MIB =2287BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);2288MIB.addReg(VE::SX10);2289MIB.addImm(0);2290MIB.addImm(24);2291MIB.setMemRefs(MMOs);2292}2293BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)2294.addImm(0)2295.addImm(0)2296.addImm(1);2297BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);2298RestoreMBB->addSuccessor(SinkMBB);22992300MI.eraseFromParent();2301return SinkMBB;2302}23032304MachineBasicBlock *2305VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,2306MachineBasicBlock *MBB) const {2307DebugLoc DL = MI.getDebugLoc();2308MachineFunction *MF = MBB->getParent();2309const TargetInstrInfo *TII = Subtarget->getInstrInfo();2310MachineRegisterInfo &MRI = MF->getRegInfo();23112312// Memory Reference.2313SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),2314MI.memoperands_end());2315Register BufReg = MI.getOperand(0).getReg();23162317Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);2318// Since FP is only updated here but NOT referenced, it's treated as GPR.2319Register FP = VE::SX9;2320Register SP = VE::SX11;23212322MachineInstrBuilder MIB;23232324MachineBasicBlock *ThisMBB = MBB;23252326// For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.2327//2328// ThisMBB:2329// %fp = load buf[0]2330// %jmp = load buf[1]2331// %s10 = buf ; Store an address of buf to SX10 for RestoreMBB2332// %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.2333// jmp %jmp23342335// Reload FP.2336MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);2337MIB.addReg(BufReg);2338MIB.addImm(0);2339MIB.addImm(0);2340MIB.setMemRefs(MMOs);23412342// Reload IP.2343MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);2344MIB.addReg(BufReg);2345MIB.addImm(0);2346MIB.addImm(8);2347MIB.setMemRefs(MMOs);23482349// Copy BufReg to SX10 for later use in setjmp.2350// FIXME: Better to not use SX10 here2351BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)2352.addReg(BufReg)2353.addImm(0);23542355// Reload SP.2356MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);2357MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.2358MIB.addImm(0);2359MIB.addImm(16);2360MIB.setMemRefs(MMOs);23612362// Jump.2363BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))2364.addReg(Tmp, getKillRegState(true))2365.addImm(0);23662367MI.eraseFromParent();2368return ThisMBB;2369}23702371MachineBasicBlock *2372VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,2373MachineBasicBlock *BB) const {2374DebugLoc DL = MI.getDebugLoc();2375MachineFunction *MF = BB->getParent();2376MachineFrameInfo &MFI = MF->getFrameInfo();2377MachineRegisterInfo &MRI = MF->getRegInfo();2378const VEInstrInfo *TII = Subtarget->getInstrInfo();2379int FI = MFI.getFunctionContextIndex();23802381// Get a mapping of the call site numbers to all of the landing pads they're2382// associated with.2383DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;2384unsigned MaxCSNum = 0;2385for (auto &MBB : *MF) {2386if (!MBB.isEHPad())2387continue;23882389MCSymbol *Sym = nullptr;2390for (const auto &MI : MBB) {2391if (MI.isDebugInstr())2392continue;23932394assert(MI.isEHLabel() && "expected EH_LABEL");2395Sym = MI.getOperand(0).getMCSymbol();2396break;2397}23982399if (!MF->hasCallSiteLandingPad(Sym))2400continue;24012402for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {2403CallSiteNumToLPad[CSI].push_back(&MBB);2404MaxCSNum = std::max(MaxCSNum, CSI);2405}2406}24072408// Get an ordered list of the machine basic blocks for the jump table.2409std::vector<MachineBasicBlock *> LPadList;2410SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;2411LPadList.reserve(CallSiteNumToLPad.size());24122413for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {2414for (auto &LP : CallSiteNumToLPad[CSI]) {2415LPadList.push_back(LP);2416InvokeBBs.insert(LP->pred_begin(), LP->pred_end());2417}2418}24192420assert(!LPadList.empty() &&2421"No landing pad destinations for the dispatch jump table!");24222423// The %fn_context is allocated like below (from --print-after=sjljehprepare):2424// %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }2425//2426// This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.2427// First `i64` is callsite, so callsite is FI+8.2428static const int OffsetIC = 72;2429static const int OffsetCS = 8;24302431// Create the MBBs for the dispatch code like following:2432//2433// ThisMBB:2434// Prepare DispatchBB address and store it to buf[1].2435// ...2436//2437// DispatchBB:2438// %s15 = GETGOT iff isPositionIndependent2439// %callsite = load callsite2440// brgt.l.t #size of callsites, %callsite, DispContBB2441//2442// TrapBB:2443// Call abort.2444//2445// DispContBB:2446// %breg = address of jump table2447// %pc = load and calculate next pc from %breg and %callsite2448// jmp %pc24492450// Shove the dispatch's address into the return slot in the function context.2451MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();2452DispatchBB->setIsEHPad(true);24532454// Trap BB will causes trap like `assert(0)`.2455MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();2456DispatchBB->addSuccessor(TrapBB);24572458MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();2459DispatchBB->addSuccessor(DispContBB);24602461// Insert MBBs.2462MF->push_back(DispatchBB);2463MF->push_back(DispContBB);2464MF->push_back(TrapBB);24652466// Insert code to call abort in the TrapBB.2467Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,2468/* Local */ false, /* Call */ true);2469BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)2470.addReg(Abort, getKillRegState(true))2471.addImm(0)2472.addImm(0);24732474// Insert code into the entry block that creates and registers the function2475// context.2476setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);24772478// Create the jump table and associated information2479unsigned JTE = getJumpTableEncoding();2480MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);2481unsigned MJTI = JTI->createJumpTableIndex(LPadList);24822483const VERegisterInfo &RI = TII->getRegisterInfo();2484// Add a register mask with no preserved registers. This results in all2485// registers being marked as clobbered.2486BuildMI(DispatchBB, DL, TII->get(VE::NOP))2487.addRegMask(RI.getNoPreservedMask());24882489if (isPositionIndependent()) {2490// Force to generate GETGOT, since current implementation doesn't store GOT2491// register.2492BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);2493}24942495// IReg is used as an index in a memory operand and therefore can't be SP2496const TargetRegisterClass *RC = &VE::I64RegClass;2497Register IReg = MRI.createVirtualRegister(RC);2498addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,2499OffsetCS);2500if (LPadList.size() < 64) {2501BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))2502.addImm(VECC::CC_ILE)2503.addImm(LPadList.size())2504.addReg(IReg)2505.addMBB(TrapBB);2506} else {2507assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");2508Register TmpReg = MRI.createVirtualRegister(RC);2509BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)2510.addImm(0)2511.addImm(0)2512.addImm(LPadList.size());2513BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))2514.addImm(VECC::CC_ILE)2515.addReg(TmpReg, getKillRegState(true))2516.addReg(IReg)2517.addMBB(TrapBB);2518}25192520Register BReg = MRI.createVirtualRegister(RC);2521Register Tmp1 = MRI.createVirtualRegister(RC);2522Register Tmp2 = MRI.createVirtualRegister(RC);25232524if (isPositionIndependent()) {2525// Create following instructions for local linkage PIC code.2526// lea %Tmp1, .LJTI0_0@gotoff_lo2527// and %Tmp2, %Tmp1, (32)02528// lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT2529BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)2530.addImm(0)2531.addImm(0)2532.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);2533BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)2534.addReg(Tmp1, getKillRegState(true))2535.addImm(M0(32));2536BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)2537.addReg(VE::SX15)2538.addReg(Tmp2, getKillRegState(true))2539.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);2540} else {2541// Create following instructions for non-PIC code.2542// lea %Tmp1, .LJTI0_0@lo2543// and %Tmp2, %Tmp1, (32)02544// lea.sl %BReg, .LJTI0_0@hi(%Tmp2)2545BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)2546.addImm(0)2547.addImm(0)2548.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);2549BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)2550.addReg(Tmp1, getKillRegState(true))2551.addImm(M0(32));2552BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)2553.addReg(Tmp2, getKillRegState(true))2554.addImm(0)2555.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);2556}25572558switch (JTE) {2559case MachineJumpTableInfo::EK_BlockAddress: {2560// Generate simple block address code for no-PIC model.2561// sll %Tmp1, %IReg, 32562// lds %TReg, 0(%Tmp1, %BReg)2563// bcfla %TReg25642565Register TReg = MRI.createVirtualRegister(RC);2566Register Tmp1 = MRI.createVirtualRegister(RC);25672568BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)2569.addReg(IReg, getKillRegState(true))2570.addImm(3);2571BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)2572.addReg(BReg, getKillRegState(true))2573.addReg(Tmp1, getKillRegState(true))2574.addImm(0);2575BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))2576.addReg(TReg, getKillRegState(true))2577.addImm(0);2578break;2579}2580case MachineJumpTableInfo::EK_Custom32: {2581// Generate block address code using differences from the function pointer2582// for PIC model.2583// sll %Tmp1, %IReg, 22584// ldl.zx %OReg, 0(%Tmp1, %BReg)2585// Prepare function address in BReg2.2586// adds.l %TReg, %BReg2, %OReg2587// bcfla %TReg25882589assert(isPositionIndependent());2590Register OReg = MRI.createVirtualRegister(RC);2591Register TReg = MRI.createVirtualRegister(RC);2592Register Tmp1 = MRI.createVirtualRegister(RC);25932594BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)2595.addReg(IReg, getKillRegState(true))2596.addImm(2);2597BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)2598.addReg(BReg, getKillRegState(true))2599.addReg(Tmp1, getKillRegState(true))2600.addImm(0);2601Register BReg2 =2602prepareSymbol(*DispContBB, DispContBB->end(),2603DispContBB->getParent()->getName(), DL, /* Local */ true);2604BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)2605.addReg(OReg, getKillRegState(true))2606.addReg(BReg2, getKillRegState(true));2607BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))2608.addReg(TReg, getKillRegState(true))2609.addImm(0);2610break;2611}2612default:2613llvm_unreachable("Unexpected jump table encoding");2614}26152616// Add the jump table entries as successors to the MBB.2617SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;2618for (auto &LP : LPadList)2619if (SeenMBBs.insert(LP).second)2620DispContBB->addSuccessor(LP);26212622// N.B. the order the invoke BBs are processed in doesn't matter here.2623SmallVector<MachineBasicBlock *, 64> MBBLPads;2624const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();2625for (MachineBasicBlock *MBB : InvokeBBs) {2626// Remove the landing pad successor from the invoke block and replace it2627// with the new dispatch block.2628// Keep a copy of Successors since it's modified inside the loop.2629SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),2630MBB->succ_rend());2631// FIXME: Avoid quadratic complexity.2632for (auto *MBBS : Successors) {2633if (MBBS->isEHPad()) {2634MBB->removeSuccessor(MBBS);2635MBBLPads.push_back(MBBS);2636}2637}26382639MBB->addSuccessor(DispatchBB);26402641// Find the invoke call and mark all of the callee-saved registers as2642// 'implicit defined' so that they're spilled. This prevents code from2643// moving instructions to before the EH block, where they will never be2644// executed.2645for (auto &II : reverse(*MBB)) {2646if (!II.isCall())2647continue;26482649DenseMap<Register, bool> DefRegs;2650for (auto &MOp : II.operands())2651if (MOp.isReg())2652DefRegs[MOp.getReg()] = true;26532654MachineInstrBuilder MIB(*MF, &II);2655for (unsigned RI = 0; SavedRegs[RI]; ++RI) {2656Register Reg = SavedRegs[RI];2657if (!DefRegs[Reg])2658MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);2659}26602661break;2662}2663}26642665// Mark all former landing pads as non-landing pads. The dispatch is the only2666// landing pad now.2667for (auto &LP : MBBLPads)2668LP->setIsEHPad(false);26692670// The instruction is gone now.2671MI.eraseFromParent();2672return BB;2673}26742675MachineBasicBlock *2676VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,2677MachineBasicBlock *BB) const {2678switch (MI.getOpcode()) {2679default:2680llvm_unreachable("Unknown Custom Instruction!");2681case VE::EH_SjLj_LongJmp:2682return emitEHSjLjLongJmp(MI, BB);2683case VE::EH_SjLj_SetJmp:2684return emitEHSjLjSetJmp(MI, BB);2685case VE::EH_SjLj_Setup_Dispatch:2686return emitSjLjDispatchBlock(MI, BB);2687}2688}26892690static bool isSimm7(SDValue V) {2691EVT VT = V.getValueType();2692if (VT.isVector())2693return false;26942695if (VT.isInteger()) {2696if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))2697return isInt<7>(C->getSExtValue());2698} else if (VT.isFloatingPoint()) {2699if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {2700if (VT == MVT::f32 || VT == MVT::f64) {2701const APInt &Imm = C->getValueAPF().bitcastToAPInt();2702uint64_t Val = Imm.getSExtValue();2703if (Imm.getBitWidth() == 32)2704Val <<= 32; // Immediate value of float place at higher bits on VE.2705return isInt<7>(Val);2706}2707}2708}2709return false;2710}27112712static bool isMImm(SDValue V) {2713EVT VT = V.getValueType();2714if (VT.isVector())2715return false;27162717if (VT.isInteger()) {2718if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))2719return isMImmVal(getImmVal(C));2720} else if (VT.isFloatingPoint()) {2721if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {2722if (VT == MVT::f32) {2723// Float value places at higher bits, so ignore lower 32 bits.2724return isMImm32Val(getFpImmVal(C) >> 32);2725} else if (VT == MVT::f64) {2726return isMImmVal(getFpImmVal(C));2727}2728}2729}2730return false;2731}27322733static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {2734if (SrcVT.isFloatingPoint()) {2735if (SrcVT == MVT::f128)2736return VEISD::CMPQ;2737return VEISD::CMPF;2738}2739return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;2740}27412742static EVT decideCompType(EVT SrcVT) {2743if (SrcVT == MVT::f128)2744return MVT::f64;2745return SrcVT;2746}27472748static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,2749bool WithCMov) {2750if (SrcVT.isFloatingPoint()) {2751// For the case of floating point setcc, only unordered comparison2752// or general comparison with -enable-no-nans-fp-math option reach2753// here, so it is safe even if values are NaN. Only f128 doesn't2754// safe since VE uses f64 result of f128 comparison.2755return SrcVT != MVT::f128;2756}2757if (isIntEqualitySetCC(CC)) {2758// For the case of equal or not equal, it is safe without comparison with 0.2759return true;2760}2761if (WithCMov) {2762// For the case of integer setcc with cmov, all signed comparison with 02763// are safe.2764return isSignedIntSetCC(CC);2765}2766// For the case of integer setcc, only signed 64 bits comparison is safe.2767// For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes2768// less than 0 witout CMPU. For 32 bits, other half of 32 bits are2769// uncoditional, so it is not safe too without CMPI..2770return isSignedIntSetCC(CC) && SrcVT == MVT::i64;2771}27722773static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,2774ISD::CondCode CC, bool WithCMov,2775const SDLoc &DL, SelectionDAG &DAG) {2776// Compare values. If RHS is 0 and it is safe to calculate without2777// comparison, we don't generate an instruction for comparison.2778EVT CompVT = decideCompType(VT);2779if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&2780(isNullConstant(RHS) || isNullFPConstant(RHS))) {2781return LHS;2782}2783return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);2784}27852786SDValue VETargetLowering::combineSelect(SDNode *N,2787DAGCombinerInfo &DCI) const {2788assert(N->getOpcode() == ISD::SELECT &&2789"Should be called with a SELECT node");2790ISD::CondCode CC = ISD::CondCode::SETNE;2791SDValue Cond = N->getOperand(0);2792SDValue True = N->getOperand(1);2793SDValue False = N->getOperand(2);27942795// We handle only scalar SELECT.2796EVT VT = N->getValueType(0);2797if (VT.isVector())2798return SDValue();27992800// Peform combineSelect after leagalize DAG.2801if (!DCI.isAfterLegalizeDAG())2802return SDValue();28032804EVT VT0 = Cond.getValueType();2805if (isMImm(True)) {2806// VE's condition move can handle MImm in True clause, so nothing to do.2807} else if (isMImm(False)) {2808// VE's condition move can handle MImm in True clause, so swap True and2809// False clauses if False has MImm value. And, update condition code.2810std::swap(True, False);2811CC = getSetCCInverse(CC, VT0);2812}28132814SDLoc DL(N);2815SelectionDAG &DAG = DCI.DAG;2816VECC::CondCode VECCVal;2817if (VT0.isFloatingPoint()) {2818VECCVal = fpCondCode2Fcc(CC);2819} else {2820VECCVal = intCondCode2Icc(CC);2821}2822SDValue Ops[] = {Cond, True, False,2823DAG.getConstant(VECCVal, DL, MVT::i32)};2824return DAG.getNode(VEISD::CMOV, DL, VT, Ops);2825}28262827SDValue VETargetLowering::combineSelectCC(SDNode *N,2828DAGCombinerInfo &DCI) const {2829assert(N->getOpcode() == ISD::SELECT_CC &&2830"Should be called with a SELECT_CC node");2831ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();2832SDValue LHS = N->getOperand(0);2833SDValue RHS = N->getOperand(1);2834SDValue True = N->getOperand(2);2835SDValue False = N->getOperand(3);28362837// We handle only scalar SELECT_CC.2838EVT VT = N->getValueType(0);2839if (VT.isVector())2840return SDValue();28412842// Peform combineSelectCC after leagalize DAG.2843if (!DCI.isAfterLegalizeDAG())2844return SDValue();28452846// We handle only i32/i64/f32/f64/f128 comparisons.2847EVT LHSVT = LHS.getValueType();2848assert(LHSVT == RHS.getValueType());2849switch (LHSVT.getSimpleVT().SimpleTy) {2850case MVT::i32:2851case MVT::i64:2852case MVT::f32:2853case MVT::f64:2854case MVT::f128:2855break;2856default:2857// Return SDValue to let llvm handle other types.2858return SDValue();2859}28602861if (isMImm(RHS)) {2862// VE's comparison can handle MImm in RHS, so nothing to do.2863} else if (isSimm7(RHS)) {2864// VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and2865// update condition code.2866std::swap(LHS, RHS);2867CC = getSetCCSwappedOperands(CC);2868}2869if (isMImm(True)) {2870// VE's condition move can handle MImm in True clause, so nothing to do.2871} else if (isMImm(False)) {2872// VE's condition move can handle MImm in True clause, so swap True and2873// False clauses if False has MImm value. And, update condition code.2874std::swap(True, False);2875CC = getSetCCInverse(CC, LHSVT);2876}28772878SDLoc DL(N);2879SelectionDAG &DAG = DCI.DAG;28802881bool WithCMov = true;2882SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);28832884VECC::CondCode VECCVal;2885if (LHSVT.isFloatingPoint()) {2886VECCVal = fpCondCode2Fcc(CC);2887} else {2888VECCVal = intCondCode2Icc(CC);2889}2890SDValue Ops[] = {CompNode, True, False,2891DAG.getConstant(VECCVal, DL, MVT::i32)};2892return DAG.getNode(VEISD::CMOV, DL, VT, Ops);2893}28942895static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);2896static bool isI32Insn(const SDNode *User, const SDNode *N) {2897switch (User->getOpcode()) {2898default:2899return false;2900case ISD::ADD:2901case ISD::SUB:2902case ISD::MUL:2903case ISD::SDIV:2904case ISD::UDIV:2905case ISD::SETCC:2906case ISD::SMIN:2907case ISD::SMAX:2908case ISD::SHL:2909case ISD::SRA:2910case ISD::BSWAP:2911case ISD::SINT_TO_FP:2912case ISD::UINT_TO_FP:2913case ISD::BR_CC:2914case ISD::BITCAST:2915case ISD::ATOMIC_CMP_SWAP:2916case ISD::ATOMIC_SWAP:2917case VEISD::CMPU:2918case VEISD::CMPI:2919return true;2920case ISD::SRL:2921if (N->getOperand(0).getOpcode() != ISD::SRL)2922return true;2923// (srl (trunc (srl ...))) may be optimized by combining srl, so2924// doesn't optimize trunc now.2925return false;2926case ISD::SELECT_CC:2927if (User->getOperand(2).getNode() != N &&2928User->getOperand(3).getNode() != N)2929return true;2930return isI32InsnAllUses(User, N);2931case VEISD::CMOV:2932// CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.2933// However, trunc in true or false clauses is not safe.2934if (User->getOperand(1).getNode() != N &&2935User->getOperand(2).getNode() != N &&2936isa<ConstantSDNode>(User->getOperand(3))) {2937VECC::CondCode VECCVal =2938static_cast<VECC::CondCode>(User->getConstantOperandVal(3));2939return isIntVECondCode(VECCVal);2940}2941[[fallthrough]];2942case ISD::AND:2943case ISD::OR:2944case ISD::XOR:2945case ISD::SELECT:2946case ISD::CopyToReg:2947// Check all use of selections, bit operations, and copies. If all of them2948// are safe, optimize truncate to extract_subreg.2949return isI32InsnAllUses(User, N);2950}2951}29522953static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {2954// Check all use of User node. If all of them are safe, optimize2955// truncate to extract_subreg.2956for (const SDNode *U : User->uses()) {2957switch (U->getOpcode()) {2958default:2959// If the use is an instruction which treats the source operand as i32,2960// it is safe to avoid truncate here.2961if (isI32Insn(U, N))2962continue;2963break;2964case ISD::ANY_EXTEND:2965case ISD::SIGN_EXTEND:2966case ISD::ZERO_EXTEND: {2967// Special optimizations to the combination of ext and trunc.2968// (ext ... (select ... (trunc ...))) is safe to avoid truncate here2969// since this truncate instruction clears higher 32 bits which is filled2970// by one of ext instructions later.2971assert(N->getValueType(0) == MVT::i32 &&2972"find truncate to not i32 integer");2973if (User->getOpcode() == ISD::SELECT_CC ||2974User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)2975continue;2976break;2977}2978}2979return false;2980}2981return true;2982}29832984// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is2985// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td2986// is sometime too late. So, doing it at here.2987SDValue VETargetLowering::combineTRUNCATE(SDNode *N,2988DAGCombinerInfo &DCI) const {2989assert(N->getOpcode() == ISD::TRUNCATE &&2990"Should be called with a TRUNCATE node");29912992SelectionDAG &DAG = DCI.DAG;2993SDLoc DL(N);2994EVT VT = N->getValueType(0);29952996// We prefer to do this when all types are legal.2997if (!DCI.isAfterLegalizeDAG())2998return SDValue();29993000// Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.3001if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&3002isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&3003isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))3004return SDValue();30053006// Check all use of this TRUNCATE.3007for (const SDNode *User : N->uses()) {3008// Make sure that we're not going to replace TRUNCATE for non i323009// instructions.3010//3011// FIXME: Although we could sometimes handle this, and it does occur in3012// practice that one of the condition inputs to the select is also one of3013// the outputs, we currently can't deal with this.3014if (isI32Insn(User, N))3015continue;30163017return SDValue();3018}30193020SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);3021return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,3022N->getOperand(0), SubI32),30230);3024}30253026SDValue VETargetLowering::PerformDAGCombine(SDNode *N,3027DAGCombinerInfo &DCI) const {3028switch (N->getOpcode()) {3029default:3030break;3031case ISD::SELECT:3032return combineSelect(N, DCI);3033case ISD::SELECT_CC:3034return combineSelectCC(N, DCI);3035case ISD::TRUNCATE:3036return combineTRUNCATE(N, DCI);3037}30383039return SDValue();3040}30413042//===----------------------------------------------------------------------===//3043// VE Inline Assembly Support3044//===----------------------------------------------------------------------===//30453046VETargetLowering::ConstraintType3047VETargetLowering::getConstraintType(StringRef Constraint) const {3048if (Constraint.size() == 1) {3049switch (Constraint[0]) {3050default:3051break;3052case 'v': // vector registers3053return C_RegisterClass;3054}3055}3056return TargetLowering::getConstraintType(Constraint);3057}30583059std::pair<unsigned, const TargetRegisterClass *>3060VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,3061StringRef Constraint,3062MVT VT) const {3063const TargetRegisterClass *RC = nullptr;3064if (Constraint.size() == 1) {3065switch (Constraint[0]) {3066default:3067return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);3068case 'r':3069RC = &VE::I64RegClass;3070break;3071case 'v':3072RC = &VE::V64RegClass;3073break;3074}3075return std::make_pair(0U, RC);3076}30773078return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);3079}30803081//===----------------------------------------------------------------------===//3082// VE Target Optimization Support3083//===----------------------------------------------------------------------===//30843085unsigned VETargetLowering::getMinimumJumpTableEntries() const {3086// Specify 8 for PIC model to relieve the impact of PIC load instructions.3087if (isJumpTableRelative())3088return 8;30893090return TargetLowering::getMinimumJumpTableEntries();3091}30923093bool VETargetLowering::hasAndNot(SDValue Y) const {3094EVT VT = Y.getValueType();30953096// VE doesn't have vector and not instruction.3097if (VT.isVector())3098return false;30993100// VE allows different immediate values for X and Y where ~X & Y.3101// Only simm7 works for X, and only mimm works for Y on VE. However, this3102// function is used to check whether an immediate value is OK for and-not3103// instruction as both X and Y. Generating additional instruction to3104// retrieve an immediate value is no good since the purpose of this3105// function is to convert a series of 3 instructions to another series of3106// 3 instructions with better parallelism. Therefore, we return false3107// for all immediate values now.3108// FIXME: Change hasAndNot function to have two operands to make it work3109// correctly with Aurora VE.3110if (isa<ConstantSDNode>(Y))3111return false;31123113// It's ok for generic registers.3114return true;3115}31163117SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,3118SelectionDAG &DAG) const {3119assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");3120MVT VT = Op.getOperand(0).getSimpleValueType();31213122// Special treatment for packed V64 types.3123assert(VT == MVT::v512i32 || VT == MVT::v512f32);3124(void)VT;3125// Example of codes:3126// %packed_v = extractelt %vr, %idx / 23127// %v = %packed_v >> (%idx % 2 * 32)3128// %res = %v & 0xffffffff31293130SDValue Vec = Op.getOperand(0);3131SDValue Idx = Op.getOperand(1);3132SDLoc DL(Op);3133SDValue Result = Op;3134if (false /* Idx->isConstant() */) {3135// TODO: optimized implementation using constant values3136} else {3137SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);3138SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});3139SDValue PackedElt =3140SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);3141SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});3142SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});3143SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);3144Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});3145PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});3146SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);3147PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});3148SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);3149Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,3150MVT::i32, PackedElt, SubI32),31510);31523153if (Op.getSimpleValueType() == MVT::f32) {3154Result = DAG.getBitcast(MVT::f32, Result);3155} else {3156assert(Op.getSimpleValueType() == MVT::i32);3157}3158}3159return Result;3160}31613162SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,3163SelectionDAG &DAG) const {3164assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");3165MVT VT = Op.getOperand(0).getSimpleValueType();31663167// Special treatment for packed V64 types.3168assert(VT == MVT::v512i32 || VT == MVT::v512f32);3169(void)VT;3170// The v512i32 and v512f32 starts from upper bits (0..31). This "upper3171// bits" required `val << 32` from C implementation's point of view.3172//3173// Example of codes:3174// %packed_elt = extractelt %vr, (%idx >> 1)3175// %shift = ((%idx & 1) ^ 1) << 53176// %packed_elt &= 0xffffffff00000000 >> shift3177// %packed_elt |= (zext %val) << shift3178// %vr = insertelt %vr, %packed_elt, (%idx >> 1)31793180SDLoc DL(Op);3181SDValue Vec = Op.getOperand(0);3182SDValue Val = Op.getOperand(1);3183SDValue Idx = Op.getOperand(2);3184if (Idx.getSimpleValueType() == MVT::i32)3185Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);3186if (Val.getSimpleValueType() == MVT::f32)3187Val = DAG.getBitcast(MVT::i32, Val);3188assert(Val.getSimpleValueType() == MVT::i32);3189Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);31903191SDValue Result = Op;3192if (false /* Idx->isConstant()*/) {3193// TODO: optimized implementation using constant values3194} else {3195SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);3196SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});3197SDValue PackedElt =3198SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);3199SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});3200SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});3201SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);3202Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});3203SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);3204Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});3205PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});3206Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});3207PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});3208Result =3209SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),3210{HalfIdx, PackedElt, Vec}),32110);3212}3213return Result;3214}321532163217