Path: blob/main/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
35268 views
//===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the interfaces that BPF uses to lower LLVM code into a9// selection DAG.10//11//===----------------------------------------------------------------------===//1213#include "BPFISelLowering.h"14#include "BPF.h"15#include "BPFSubtarget.h"16#include "BPFTargetMachine.h"17#include "llvm/CodeGen/CallingConvLower.h"18#include "llvm/CodeGen/MachineFrameInfo.h"19#include "llvm/CodeGen/MachineFunction.h"20#include "llvm/CodeGen/MachineInstrBuilder.h"21#include "llvm/CodeGen/MachineRegisterInfo.h"22#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"23#include "llvm/CodeGen/ValueTypes.h"24#include "llvm/IR/DiagnosticInfo.h"25#include "llvm/IR/DiagnosticPrinter.h"26#include "llvm/Support/Debug.h"27#include "llvm/Support/ErrorHandling.h"28#include "llvm/Support/MathExtras.h"29#include "llvm/Support/raw_ostream.h"3031using namespace llvm;3233#define DEBUG_TYPE "bpf-lower"3435static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",36cl::Hidden, cl::init(false),37cl::desc("Expand memcpy into load/store pairs in order"));3839static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,40SDValue Val = {}) {41std::string Str;42if (Val) {43raw_string_ostream OS(Str);44Val->print(OS);45OS << ' ';46}47MachineFunction &MF = DAG.getMachineFunction();48DAG.getContext()->diagnose(DiagnosticInfoUnsupported(49MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc()));50}5152BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,53const BPFSubtarget &STI)54: TargetLowering(TM) {5556// Set up the register classes.57addRegisterClass(MVT::i64, &BPF::GPRRegClass);58if (STI.getHasAlu32())59addRegisterClass(MVT::i32, &BPF::GPR32RegClass);6061// Compute derived properties from the register classes62computeRegisterProperties(STI.getRegisterInfo());6364setStackPointerRegisterToSaveRestore(BPF::R11);6566setOperationAction(ISD::BR_CC, MVT::i64, Custom);67setOperationAction(ISD::BR_JT, MVT::Other, Expand);68setOperationAction(ISD::BRIND, MVT::Other, Expand);69setOperationAction(ISD::BRCOND, MVT::Other, Expand);7071setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);7273setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);74setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);75setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);7677// Set unsupported atomic operations as Custom so78// we can emit better error messages than fatal error79// from selectiondag.80for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {81if (VT == MVT::i32) {82if (STI.getHasAlu32())83continue;84} else {85setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);86}8788setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);89setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);90setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);91setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);92setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);93}9495for (auto VT : { MVT::i32, MVT::i64 }) {96if (VT == MVT::i32 && !STI.getHasAlu32())97continue;9899setOperationAction(ISD::SDIVREM, VT, Expand);100setOperationAction(ISD::UDIVREM, VT, Expand);101if (!STI.hasSdivSmod()) {102setOperationAction(ISD::SDIV, VT, Custom);103setOperationAction(ISD::SREM, VT, Custom);104}105setOperationAction(ISD::MULHU, VT, Expand);106setOperationAction(ISD::MULHS, VT, Expand);107setOperationAction(ISD::UMUL_LOHI, VT, Expand);108setOperationAction(ISD::SMUL_LOHI, VT, Expand);109setOperationAction(ISD::ROTR, VT, Expand);110setOperationAction(ISD::ROTL, VT, Expand);111setOperationAction(ISD::SHL_PARTS, VT, Expand);112setOperationAction(ISD::SRL_PARTS, VT, Expand);113setOperationAction(ISD::SRA_PARTS, VT, Expand);114setOperationAction(ISD::CTPOP, VT, Expand);115setOperationAction(ISD::CTTZ, VT, Expand);116setOperationAction(ISD::CTLZ, VT, Expand);117setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);118setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);119120setOperationAction(ISD::SETCC, VT, Expand);121setOperationAction(ISD::SELECT, VT, Expand);122setOperationAction(ISD::SELECT_CC, VT, Custom);123}124125if (STI.getHasAlu32()) {126setOperationAction(ISD::BSWAP, MVT::i32, Promote);127setOperationAction(ISD::BR_CC, MVT::i32,128STI.getHasJmp32() ? Custom : Promote);129}130131setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);132if (!STI.hasMovsx()) {133setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);134setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);135setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);136}137138// Extended load operations for i1 types must be promoted139for (MVT VT : MVT::integer_valuetypes()) {140setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);141setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);142setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);143144if (!STI.hasLdsx()) {145setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);146setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);147setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);148}149}150151setBooleanContents(ZeroOrOneBooleanContent);152setMaxAtomicSizeInBitsSupported(64);153154// Function alignments155setMinFunctionAlignment(Align(8));156setPrefFunctionAlignment(Align(8));157158if (BPFExpandMemcpyInOrder) {159// LLVM generic code will try to expand memcpy into load/store pairs at this160// stage which is before quite a few IR optimization passes, therefore the161// loads and stores could potentially be moved apart from each other which162// will cause trouble to memcpy pattern matcher inside kernel eBPF JIT163// compilers.164//165// When -bpf-expand-memcpy-in-order specified, we want to defer the expand166// of memcpy to later stage in IR optimization pipeline so those load/store167// pairs won't be touched and could be kept in order. Hence, we set168// MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores169// code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.170MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;171MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;172MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;173MaxLoadsPerMemcmp = 0;174} else {175// inline memcpy() for kernel to see explicit copy176unsigned CommonMaxStores =177STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();178179MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;180MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;181MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;182MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;183}184185// CPU/Feature control186HasAlu32 = STI.getHasAlu32();187HasJmp32 = STI.getHasJmp32();188HasJmpExt = STI.getHasJmpExt();189HasMovsx = STI.hasMovsx();190}191192bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {193return false;194}195196bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {197if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())198return false;199unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();200unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();201return NumBits1 > NumBits2;202}203204bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {205if (!VT1.isInteger() || !VT2.isInteger())206return false;207unsigned NumBits1 = VT1.getSizeInBits();208unsigned NumBits2 = VT2.getSizeInBits();209return NumBits1 > NumBits2;210}211212bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {213if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())214return false;215unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();216unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();217return NumBits1 == 32 && NumBits2 == 64;218}219220bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {221if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())222return false;223unsigned NumBits1 = VT1.getSizeInBits();224unsigned NumBits2 = VT2.getSizeInBits();225return NumBits1 == 32 && NumBits2 == 64;226}227228bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {229EVT VT1 = Val.getValueType();230if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {231MVT MT1 = VT1.getSimpleVT().SimpleTy;232MVT MT2 = VT2.getSimpleVT().SimpleTy;233if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&234(MT2 == MVT::i32 || MT2 == MVT::i64))235return true;236}237return TargetLoweringBase::isZExtFree(Val, VT2);238}239240BPFTargetLowering::ConstraintType241BPFTargetLowering::getConstraintType(StringRef Constraint) const {242if (Constraint.size() == 1) {243switch (Constraint[0]) {244default:245break;246case 'w':247return C_RegisterClass;248}249}250251return TargetLowering::getConstraintType(Constraint);252}253254std::pair<unsigned, const TargetRegisterClass *>255BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,256StringRef Constraint,257MVT VT) const {258if (Constraint.size() == 1) {259// GCC Constraint Letters260switch (Constraint[0]) {261case 'r': // GENERAL_REGS262return std::make_pair(0U, &BPF::GPRRegClass);263case 'w':264if (HasAlu32)265return std::make_pair(0U, &BPF::GPR32RegClass);266break;267default:268break;269}270}271272return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);273}274275void BPFTargetLowering::ReplaceNodeResults(276SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {277const char *Msg;278uint32_t Opcode = N->getOpcode();279switch (Opcode) {280default:281report_fatal_error("unhandled custom legalization: " + Twine(Opcode));282case ISD::ATOMIC_LOAD_ADD:283case ISD::ATOMIC_LOAD_AND:284case ISD::ATOMIC_LOAD_OR:285case ISD::ATOMIC_LOAD_XOR:286case ISD::ATOMIC_SWAP:287case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:288if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)289Msg = "unsupported atomic operation, please use 32/64 bit version";290else291Msg = "unsupported atomic operation, please use 64 bit version";292break;293}294295SDLoc DL(N);296// We'll still produce a fatal error downstream, but this diagnostic is more297// user-friendly.298fail(DL, DAG, Msg);299}300301SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {302switch (Op.getOpcode()) {303default:304report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));305case ISD::BR_CC:306return LowerBR_CC(Op, DAG);307case ISD::GlobalAddress:308return LowerGlobalAddress(Op, DAG);309case ISD::ConstantPool:310return LowerConstantPool(Op, DAG);311case ISD::SELECT_CC:312return LowerSELECT_CC(Op, DAG);313case ISD::SDIV:314case ISD::SREM:315return LowerSDIVSREM(Op, DAG);316case ISD::DYNAMIC_STACKALLOC:317return LowerDYNAMIC_STACKALLOC(Op, DAG);318}319}320321// Calling Convention Implementation322#include "BPFGenCallingConv.inc"323324SDValue BPFTargetLowering::LowerFormalArguments(325SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,326const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,327SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {328switch (CallConv) {329default:330report_fatal_error("unimplemented calling convention: " + Twine(CallConv));331case CallingConv::C:332case CallingConv::Fast:333break;334}335336MachineFunction &MF = DAG.getMachineFunction();337MachineRegisterInfo &RegInfo = MF.getRegInfo();338339// Assign locations to all of the incoming arguments.340SmallVector<CCValAssign, 16> ArgLocs;341CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());342CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);343344bool HasMemArgs = false;345for (size_t I = 0; I < ArgLocs.size(); ++I) {346auto &VA = ArgLocs[I];347348if (VA.isRegLoc()) {349// Arguments passed in registers350EVT RegVT = VA.getLocVT();351MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;352switch (SimpleTy) {353default: {354std::string Str;355{356raw_string_ostream OS(Str);357RegVT.print(OS);358}359report_fatal_error("unhandled argument type: " + Twine(Str));360}361case MVT::i32:362case MVT::i64:363Register VReg = RegInfo.createVirtualRegister(364SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);365RegInfo.addLiveIn(VA.getLocReg(), VReg);366SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);367368// If this is an value that has been promoted to wider types, insert an369// assert[sz]ext to capture this, then truncate to the right size.370if (VA.getLocInfo() == CCValAssign::SExt)371ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,372DAG.getValueType(VA.getValVT()));373else if (VA.getLocInfo() == CCValAssign::ZExt)374ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,375DAG.getValueType(VA.getValVT()));376377if (VA.getLocInfo() != CCValAssign::Full)378ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);379380InVals.push_back(ArgValue);381382break;383}384} else {385if (VA.isMemLoc())386HasMemArgs = true;387else388report_fatal_error("unhandled argument location");389InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT()));390}391}392if (HasMemArgs)393fail(DL, DAG, "stack arguments are not supported");394if (IsVarArg)395fail(DL, DAG, "variadic functions are not supported");396if (MF.getFunction().hasStructRetAttr())397fail(DL, DAG, "aggregate returns are not supported");398399return Chain;400}401402const size_t BPFTargetLowering::MaxArgs = 5;403404SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,405SmallVectorImpl<SDValue> &InVals) const {406SelectionDAG &DAG = CLI.DAG;407auto &Outs = CLI.Outs;408auto &OutVals = CLI.OutVals;409auto &Ins = CLI.Ins;410SDValue Chain = CLI.Chain;411SDValue Callee = CLI.Callee;412bool &IsTailCall = CLI.IsTailCall;413CallingConv::ID CallConv = CLI.CallConv;414bool IsVarArg = CLI.IsVarArg;415MachineFunction &MF = DAG.getMachineFunction();416417// BPF target does not support tail call optimization.418IsTailCall = false;419420switch (CallConv) {421default:422report_fatal_error("unsupported calling convention: " + Twine(CallConv));423case CallingConv::Fast:424case CallingConv::C:425break;426}427428// Analyze operands of the call, assigning locations to each operand.429SmallVector<CCValAssign, 16> ArgLocs;430CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());431432CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);433434unsigned NumBytes = CCInfo.getStackSize();435436if (Outs.size() > MaxArgs)437fail(CLI.DL, DAG, "too many arguments", Callee);438439for (auto &Arg : Outs) {440ISD::ArgFlagsTy Flags = Arg.Flags;441if (!Flags.isByVal())442continue;443fail(CLI.DL, DAG, "pass by value not supported", Callee);444break;445}446447auto PtrVT = getPointerTy(MF.getDataLayout());448Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);449450SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;451452// Walk arg assignments453for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) {454CCValAssign &VA = ArgLocs[i];455SDValue &Arg = OutVals[i];456457// Promote the value if needed.458switch (VA.getLocInfo()) {459default:460report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo()));461case CCValAssign::Full:462break;463case CCValAssign::SExt:464Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg);465break;466case CCValAssign::ZExt:467Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg);468break;469case CCValAssign::AExt:470Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg);471break;472}473474// Push arguments into RegsToPass vector475if (VA.isRegLoc())476RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));477else478report_fatal_error("stack arguments are not supported");479}480481SDValue InGlue;482483// Build a sequence of copy-to-reg nodes chained together with token chain and484// flag operands which copy the outgoing args into registers. The InGlue in485// necessary since all emitted instructions must be stuck together.486for (auto &Reg : RegsToPass) {487Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue);488InGlue = Chain.getValue(1);489}490491// If the callee is a GlobalAddress node (quite common, every direct call is)492// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.493// Likewise ExternalSymbol -> TargetExternalSymbol.494if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {495Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,496G->getOffset(), 0);497} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {498Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);499fail(CLI.DL, DAG,500Twine("A call to built-in function '" + StringRef(E->getSymbol()) +501"' is not supported."));502}503504// Returns a chain & a flag for retval copy to use.505SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);506SmallVector<SDValue, 8> Ops;507Ops.push_back(Chain);508Ops.push_back(Callee);509510// Add argument registers to the end of the list so that they are511// known live into the call.512for (auto &Reg : RegsToPass)513Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));514515if (InGlue.getNode())516Ops.push_back(InGlue);517518Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);519InGlue = Chain.getValue(1);520521DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);522523// Create the CALLSEQ_END node.524Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL);525InGlue = Chain.getValue(1);526527// Handle result values, copying them out of physregs into vregs that we528// return.529return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG,530InVals);531}532533SDValue534BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,535bool IsVarArg,536const SmallVectorImpl<ISD::OutputArg> &Outs,537const SmallVectorImpl<SDValue> &OutVals,538const SDLoc &DL, SelectionDAG &DAG) const {539unsigned Opc = BPFISD::RET_GLUE;540541// CCValAssign - represent the assignment of the return value to a location542SmallVector<CCValAssign, 16> RVLocs;543MachineFunction &MF = DAG.getMachineFunction();544545// CCState - Info about the registers and stack slot.546CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());547548if (MF.getFunction().getReturnType()->isAggregateType()) {549fail(DL, DAG, "aggregate returns are not supported");550return DAG.getNode(Opc, DL, MVT::Other, Chain);551}552553// Analize return values.554CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);555556SDValue Glue;557SmallVector<SDValue, 4> RetOps(1, Chain);558559// Copy the result values into the output registers.560for (size_t i = 0; i != RVLocs.size(); ++i) {561CCValAssign &VA = RVLocs[i];562if (!VA.isRegLoc())563report_fatal_error("stack return values are not supported");564565Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);566567// Guarantee that all emitted copies are stuck together,568// avoiding something bad.569Glue = Chain.getValue(1);570RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));571}572573RetOps[0] = Chain; // Update chain.574575// Add the glue if we have it.576if (Glue.getNode())577RetOps.push_back(Glue);578579return DAG.getNode(Opc, DL, MVT::Other, RetOps);580}581582SDValue BPFTargetLowering::LowerCallResult(583SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,584const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,585SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {586587MachineFunction &MF = DAG.getMachineFunction();588// Assign locations to each value returned by this call.589SmallVector<CCValAssign, 16> RVLocs;590CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());591592if (Ins.size() > 1) {593fail(DL, DAG, "only small returns supported");594for (auto &In : Ins)595InVals.push_back(DAG.getConstant(0, DL, In.VT));596return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);597}598599CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);600601// Copy all of the result registers out of their specified physreg.602for (auto &Val : RVLocs) {603Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),604Val.getValVT(), InGlue).getValue(1);605InGlue = Chain.getValue(2);606InVals.push_back(Chain.getValue(0));607}608609return Chain;610}611612static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {613switch (CC) {614default:615break;616case ISD::SETULT:617case ISD::SETULE:618case ISD::SETLT:619case ISD::SETLE:620CC = ISD::getSetCCSwappedOperands(CC);621std::swap(LHS, RHS);622break;623}624}625626SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {627SDLoc DL(Op);628fail(DL, DAG,629"unsupported signed division, please convert to unsigned div/mod.");630return DAG.getUNDEF(Op->getValueType(0));631}632633SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,634SelectionDAG &DAG) const {635SDLoc DL(Op);636fail(DL, DAG, "unsupported dynamic stack allocation");637auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};638return DAG.getMergeValues(Ops, SDLoc());639}640641SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {642SDValue Chain = Op.getOperand(0);643ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();644SDValue LHS = Op.getOperand(2);645SDValue RHS = Op.getOperand(3);646SDValue Dest = Op.getOperand(4);647SDLoc DL(Op);648649if (!getHasJmpExt())650NegateCC(LHS, RHS, CC);651652return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,653DAG.getConstant(CC, DL, LHS.getValueType()), Dest);654}655656SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {657SDValue LHS = Op.getOperand(0);658SDValue RHS = Op.getOperand(1);659SDValue TrueV = Op.getOperand(2);660SDValue FalseV = Op.getOperand(3);661ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();662SDLoc DL(Op);663664if (!getHasJmpExt())665NegateCC(LHS, RHS, CC);666667SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType());668SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);669SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};670671return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);672}673674const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {675switch ((BPFISD::NodeType)Opcode) {676case BPFISD::FIRST_NUMBER:677break;678case BPFISD::RET_GLUE:679return "BPFISD::RET_GLUE";680case BPFISD::CALL:681return "BPFISD::CALL";682case BPFISD::SELECT_CC:683return "BPFISD::SELECT_CC";684case BPFISD::BR_CC:685return "BPFISD::BR_CC";686case BPFISD::Wrapper:687return "BPFISD::Wrapper";688case BPFISD::MEMCPY:689return "BPFISD::MEMCPY";690}691return nullptr;692}693694static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,695SelectionDAG &DAG, unsigned Flags) {696return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);697}698699static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,700SelectionDAG &DAG, unsigned Flags) {701return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),702N->getOffset(), Flags);703}704705template <class NodeTy>706SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,707unsigned Flags) const {708SDLoc DL(N);709710SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags);711712return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);713}714715SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,716SelectionDAG &DAG) const {717GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);718if (N->getOffset() != 0)719report_fatal_error("invalid offset for global address: " +720Twine(N->getOffset()));721return getAddr(N, DAG);722}723724SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,725SelectionDAG &DAG) const {726ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);727728return getAddr(N, DAG);729}730731unsigned732BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,733unsigned Reg, bool isSigned) const {734const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();735const TargetRegisterClass *RC = getRegClassFor(MVT::i64);736int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;737MachineFunction *F = BB->getParent();738DebugLoc DL = MI.getDebugLoc();739740MachineRegisterInfo &RegInfo = F->getRegInfo();741742if (!isSigned) {743Register PromotedReg0 = RegInfo.createVirtualRegister(RC);744BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);745return PromotedReg0;746}747Register PromotedReg0 = RegInfo.createVirtualRegister(RC);748Register PromotedReg1 = RegInfo.createVirtualRegister(RC);749Register PromotedReg2 = RegInfo.createVirtualRegister(RC);750if (HasMovsx) {751BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);752} else {753BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);754BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)755.addReg(PromotedReg0).addImm(32);756BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)757.addReg(PromotedReg1).addImm(32);758}759760return PromotedReg2;761}762763MachineBasicBlock *764BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,765MachineBasicBlock *BB)766const {767MachineFunction *MF = MI.getParent()->getParent();768MachineRegisterInfo &MRI = MF->getRegInfo();769MachineInstrBuilder MIB(*MF, MI);770unsigned ScratchReg;771772// This function does custom insertion during lowering BPFISD::MEMCPY which773// only has two register operands from memcpy semantics, the copy source774// address and the copy destination address.775//776// Because we will expand BPFISD::MEMCPY into load/store pairs, we will need777// a third scratch register to serve as the destination register of load and778// source register of store.779//780// The scratch register here is with the Define | Dead | EarlyClobber flags.781// The EarlyClobber flag has the semantic property that the operand it is782// attached to is clobbered before the rest of the inputs are read. Hence it783// must be unique among the operands to the instruction. The Define flag is784// needed to coerce the machine verifier that an Undef value isn't a problem785// as we anyway is loading memory into it. The Dead flag is needed as the786// value in scratch isn't supposed to be used by any other instruction.787ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);788MIB.addReg(ScratchReg,789RegState::Define | RegState::Dead | RegState::EarlyClobber);790791return BB;792}793794MachineBasicBlock *795BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,796MachineBasicBlock *BB) const {797const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();798DebugLoc DL = MI.getDebugLoc();799unsigned Opc = MI.getOpcode();800bool isSelectRROp = (Opc == BPF::Select ||801Opc == BPF::Select_64_32 ||802Opc == BPF::Select_32 ||803Opc == BPF::Select_32_64);804805bool isMemcpyOp = Opc == BPF::MEMCPY;806807#ifndef NDEBUG808bool isSelectRIOp = (Opc == BPF::Select_Ri ||809Opc == BPF::Select_Ri_64_32 ||810Opc == BPF::Select_Ri_32 ||811Opc == BPF::Select_Ri_32_64);812813if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))814report_fatal_error("unhandled instruction type: " + Twine(Opc));815#endif816817if (isMemcpyOp)818return EmitInstrWithCustomInserterMemcpy(MI, BB);819820bool is32BitCmp = (Opc == BPF::Select_32 ||821Opc == BPF::Select_32_64 ||822Opc == BPF::Select_Ri_32 ||823Opc == BPF::Select_Ri_32_64);824825// To "insert" a SELECT instruction, we actually have to insert the diamond826// control-flow pattern. The incoming instruction knows the destination vreg827// to set, the condition code register to branch on, the true/false values to828// select between, and a branch opcode to use.829const BasicBlock *LLVM_BB = BB->getBasicBlock();830MachineFunction::iterator I = ++BB->getIterator();831832// ThisMBB:833// ...834// TrueVal = ...835// jmp_XX r1, r2 goto Copy1MBB836// fallthrough --> Copy0MBB837MachineBasicBlock *ThisMBB = BB;838MachineFunction *F = BB->getParent();839MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);840MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);841842F->insert(I, Copy0MBB);843F->insert(I, Copy1MBB);844// Update machine-CFG edges by transferring all successors of the current845// block to the new block which will contain the Phi node for the select.846Copy1MBB->splice(Copy1MBB->begin(), BB,847std::next(MachineBasicBlock::iterator(MI)), BB->end());848Copy1MBB->transferSuccessorsAndUpdatePHIs(BB);849// Next, add the true and fallthrough blocks as its successors.850BB->addSuccessor(Copy0MBB);851BB->addSuccessor(Copy1MBB);852853// Insert Branch if Flag854int CC = MI.getOperand(3).getImm();855int NewCC;856switch (CC) {857#define SET_NEWCC(X, Y) \858case ISD::X: \859if (is32BitCmp && HasJmp32) \860NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \861else \862NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \863break864SET_NEWCC(SETGT, JSGT);865SET_NEWCC(SETUGT, JUGT);866SET_NEWCC(SETGE, JSGE);867SET_NEWCC(SETUGE, JUGE);868SET_NEWCC(SETEQ, JEQ);869SET_NEWCC(SETNE, JNE);870SET_NEWCC(SETLT, JSLT);871SET_NEWCC(SETULT, JULT);872SET_NEWCC(SETLE, JSLE);873SET_NEWCC(SETULE, JULE);874default:875report_fatal_error("unimplemented select CondCode " + Twine(CC));876}877878Register LHS = MI.getOperand(1).getReg();879bool isSignedCmp = (CC == ISD::SETGT ||880CC == ISD::SETGE ||881CC == ISD::SETLT ||882CC == ISD::SETLE);883884// eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need885// to be promoted, however if the 32-bit comparison operands are destination886// registers then they are implicitly zero-extended already, there is no887// need of explicit zero-extend sequence for them.888//889// We simply do extension for all situations in this method, but we will890// try to remove those unnecessary in BPFMIPeephole pass.891if (is32BitCmp && !HasJmp32)892LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);893894if (isSelectRROp) {895Register RHS = MI.getOperand(2).getReg();896897if (is32BitCmp && !HasJmp32)898RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);899900BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);901} else {902int64_t imm32 = MI.getOperand(2).getImm();903// Check before we build J*_ri instruction.904if (!isInt<32>(imm32))905report_fatal_error("immediate overflows 32 bits: " + Twine(imm32));906BuildMI(BB, DL, TII.get(NewCC))907.addReg(LHS).addImm(imm32).addMBB(Copy1MBB);908}909910// Copy0MBB:911// %FalseValue = ...912// # fallthrough to Copy1MBB913BB = Copy0MBB;914915// Update machine-CFG edges916BB->addSuccessor(Copy1MBB);917918// Copy1MBB:919// %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]920// ...921BB = Copy1MBB;922BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg())923.addReg(MI.getOperand(5).getReg())924.addMBB(Copy0MBB)925.addReg(MI.getOperand(4).getReg())926.addMBB(ThisMBB);927928MI.eraseFromParent(); // The pseudo instruction is gone now.929return BB;930}931932EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,933EVT VT) const {934return getHasAlu32() ? MVT::i32 : MVT::i64;935}936937MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,938EVT VT) const {939return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;940}941942bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,943const AddrMode &AM, Type *Ty,944unsigned AS,945Instruction *I) const {946// No global is ever allowed as a base.947if (AM.BaseGV)948return false;949950switch (AM.Scale) {951case 0: // "r+i" or just "i", depending on HasBaseReg.952break;953case 1:954if (!AM.HasBaseReg) // allow "r+i".955break;956return false; // disallow "r+r" or "r+r+i".957default:958return false;959}960961return true;962}963964965