Path: blob/main/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
35266 views
//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the ARM-specific support for the FastISel class. Some9// of the target-specific code is generated by tablegen in the file10// ARMGenFastISel.inc, which is #included here.11//12//===----------------------------------------------------------------------===//1314#include "ARM.h"15#include "ARMBaseInstrInfo.h"16#include "ARMBaseRegisterInfo.h"17#include "ARMCallingConv.h"18#include "ARMConstantPoolValue.h"19#include "ARMISelLowering.h"20#include "ARMMachineFunctionInfo.h"21#include "ARMSubtarget.h"22#include "MCTargetDesc/ARMAddressingModes.h"23#include "MCTargetDesc/ARMBaseInfo.h"24#include "Utils/ARMBaseInfo.h"25#include "llvm/ADT/APFloat.h"26#include "llvm/ADT/APInt.h"27#include "llvm/ADT/DenseMap.h"28#include "llvm/ADT/SmallVector.h"29#include "llvm/CodeGen/CallingConvLower.h"30#include "llvm/CodeGen/FastISel.h"31#include "llvm/CodeGen/FunctionLoweringInfo.h"32#include "llvm/CodeGen/ISDOpcodes.h"33#include "llvm/CodeGen/MachineBasicBlock.h"34#include "llvm/CodeGen/MachineConstantPool.h"35#include "llvm/CodeGen/MachineFrameInfo.h"36#include "llvm/CodeGen/MachineFunction.h"37#include "llvm/CodeGen/MachineInstr.h"38#include "llvm/CodeGen/MachineInstrBuilder.h"39#include "llvm/CodeGen/MachineMemOperand.h"40#include "llvm/CodeGen/MachineOperand.h"41#include "llvm/CodeGen/MachineRegisterInfo.h"42#include "llvm/CodeGen/RuntimeLibcallUtil.h"43#include "llvm/CodeGen/TargetInstrInfo.h"44#include "llvm/CodeGen/TargetLowering.h"45#include "llvm/CodeGen/TargetOpcodes.h"46#include "llvm/CodeGen/TargetRegisterInfo.h"47#include "llvm/CodeGen/ValueTypes.h"48#include "llvm/CodeGenTypes/MachineValueType.h"49#include "llvm/IR/Argument.h"50#include "llvm/IR/Attributes.h"51#include "llvm/IR/CallingConv.h"52#include "llvm/IR/Constant.h"53#include "llvm/IR/Constants.h"54#include "llvm/IR/DataLayout.h"55#include "llvm/IR/DerivedTypes.h"56#include "llvm/IR/Function.h"57#include "llvm/IR/GetElementPtrTypeIterator.h"58#include "llvm/IR/GlobalValue.h"59#include "llvm/IR/GlobalVariable.h"60#include "llvm/IR/InstrTypes.h"61#include "llvm/IR/Instruction.h"62#include "llvm/IR/Instructions.h"63#include "llvm/IR/IntrinsicInst.h"64#include "llvm/IR/Intrinsics.h"65#include "llvm/IR/Module.h"66#include "llvm/IR/Operator.h"67#include "llvm/IR/Type.h"68#include "llvm/IR/User.h"69#include "llvm/IR/Value.h"70#include "llvm/MC/MCInstrDesc.h"71#include "llvm/MC/MCRegisterInfo.h"72#include "llvm/Support/Casting.h"73#include "llvm/Support/Compiler.h"74#include "llvm/Support/ErrorHandling.h"75#include "llvm/Support/MathExtras.h"76#include "llvm/Target/TargetMachine.h"77#include "llvm/Target/TargetOptions.h"78#include <cassert>79#include <cstdint>80#include <utility>8182using namespace llvm;8384namespace {8586// All possible address modes, plus some.87struct Address {88enum {89RegBase,90FrameIndexBase91} BaseType = RegBase;9293union {94unsigned Reg;95int FI;96} Base;9798int Offset = 0;99100// Innocuous defaults for our address.101Address() {102Base.Reg = 0;103}104};105106class ARMFastISel final : public FastISel {107/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can108/// make the right decision when generating code for different targets.109const ARMSubtarget *Subtarget;110Module &M;111const TargetMachine &TM;112const TargetInstrInfo &TII;113const TargetLowering &TLI;114ARMFunctionInfo *AFI;115116// Convenience variables to avoid some queries.117bool isThumb2;118LLVMContext *Context;119120public:121explicit ARMFastISel(FunctionLoweringInfo &funcInfo,122const TargetLibraryInfo *libInfo)123: FastISel(funcInfo, libInfo),124Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),125M(const_cast<Module &>(*funcInfo.Fn->getParent())),126TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),127TLI(*Subtarget->getTargetLowering()) {128AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();129isThumb2 = AFI->isThumbFunction();130Context = &funcInfo.Fn->getContext();131}132133private:134// Code from FastISel.cpp.135136unsigned fastEmitInst_r(unsigned MachineInstOpcode,137const TargetRegisterClass *RC, unsigned Op0);138unsigned fastEmitInst_rr(unsigned MachineInstOpcode,139const TargetRegisterClass *RC,140unsigned Op0, unsigned Op1);141unsigned fastEmitInst_ri(unsigned MachineInstOpcode,142const TargetRegisterClass *RC,143unsigned Op0, uint64_t Imm);144unsigned fastEmitInst_i(unsigned MachineInstOpcode,145const TargetRegisterClass *RC,146uint64_t Imm);147148// Backend specific FastISel code.149150bool fastSelectInstruction(const Instruction *I) override;151unsigned fastMaterializeConstant(const Constant *C) override;152unsigned fastMaterializeAlloca(const AllocaInst *AI) override;153bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,154const LoadInst *LI) override;155bool fastLowerArguments() override;156157#include "ARMGenFastISel.inc"158159// Instruction selection routines.160161bool SelectLoad(const Instruction *I);162bool SelectStore(const Instruction *I);163bool SelectBranch(const Instruction *I);164bool SelectIndirectBr(const Instruction *I);165bool SelectCmp(const Instruction *I);166bool SelectFPExt(const Instruction *I);167bool SelectFPTrunc(const Instruction *I);168bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);169bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);170bool SelectIToFP(const Instruction *I, bool isSigned);171bool SelectFPToI(const Instruction *I, bool isSigned);172bool SelectDiv(const Instruction *I, bool isSigned);173bool SelectRem(const Instruction *I, bool isSigned);174bool SelectCall(const Instruction *I, const char *IntrMemName);175bool SelectIntrinsicCall(const IntrinsicInst &I);176bool SelectSelect(const Instruction *I);177bool SelectRet(const Instruction *I);178bool SelectTrunc(const Instruction *I);179bool SelectIntExt(const Instruction *I);180bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);181182// Utility routines.183184bool isPositionIndependent() const;185bool isTypeLegal(Type *Ty, MVT &VT);186bool isLoadTypeLegal(Type *Ty, MVT &VT);187bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,188bool isZExt);189bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,190MaybeAlign Alignment = std::nullopt, bool isZExt = true,191bool allocReg = true);192bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,193MaybeAlign Alignment = std::nullopt);194bool ARMComputeAddress(const Value *Obj, Address &Addr);195void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);196bool ARMIsMemCpySmall(uint64_t Len);197bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,198MaybeAlign Alignment);199unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);200unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);201unsigned ARMMaterializeInt(const Constant *C, MVT VT);202unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);203unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);204unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);205unsigned ARMSelectCallOp(bool UseReg);206unsigned ARMLowerPICELF(const GlobalValue *GV, MVT VT);207208const TargetLowering *getTargetLowering() { return &TLI; }209210// Call handling routines.211212CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,213bool Return,214bool isVarArg);215bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,216SmallVectorImpl<Register> &ArgRegs,217SmallVectorImpl<MVT> &ArgVTs,218SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,219SmallVectorImpl<Register> &RegArgs,220CallingConv::ID CC,221unsigned &NumBytes,222bool isVarArg);223unsigned getLibcallReg(const Twine &Name);224bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,225const Instruction *I, CallingConv::ID CC,226unsigned &NumBytes, bool isVarArg);227bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);228229// OptionalDef handling routines.230231bool isARMNEONPred(const MachineInstr *MI);232bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);233const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);234void AddLoadStoreOperands(MVT VT, Address &Addr,235const MachineInstrBuilder &MIB,236MachineMemOperand::Flags Flags, bool useAM3);237};238239} // end anonymous namespace240241// DefinesOptionalPredicate - This is different from DefinesPredicate in that242// we don't care about implicit defs here, just places we'll need to add a243// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.244bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {245if (!MI->hasOptionalDef())246return false;247248// Look to see if our OptionalDef is defining CPSR or CCR.249for (const MachineOperand &MO : MI->operands()) {250if (!MO.isReg() || !MO.isDef()) continue;251if (MO.getReg() == ARM::CPSR)252*CPSR = true;253}254return true;255}256257bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {258const MCInstrDesc &MCID = MI->getDesc();259260// If we're a thumb2 or not NEON function we'll be handled via isPredicable.261if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||262AFI->isThumb2Function())263return MI->isPredicable();264265for (const MCOperandInfo &opInfo : MCID.operands())266if (opInfo.isPredicate())267return true;268269return false;270}271272// If the machine is predicable go ahead and add the predicate operands, if273// it needs default CC operands add those.274// TODO: If we want to support thumb1 then we'll need to deal with optional275// CPSR defs that need to be added before the remaining operands. See s_cc_out276// for descriptions why.277const MachineInstrBuilder &278ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {279MachineInstr *MI = &*MIB;280281// Do we use a predicate? or...282// Are we NEON in ARM mode and have a predicate operand? If so, I know283// we're not predicable but add it anyways.284if (isARMNEONPred(MI))285MIB.add(predOps(ARMCC::AL));286287// Do we optionally set a predicate? Preds is size > 0 iff the predicate288// defines CPSR. All other OptionalDefines in ARM are the CCR register.289bool CPSR = false;290if (DefinesOptionalPredicate(MI, &CPSR))291MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());292return MIB;293}294295unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,296const TargetRegisterClass *RC,297unsigned Op0) {298Register ResultReg = createResultReg(RC);299const MCInstrDesc &II = TII.get(MachineInstOpcode);300301// Make sure the input operand is sufficiently constrained to be legal302// for this instruction.303Op0 = constrainOperandRegClass(II, Op0, 1);304if (II.getNumDefs() >= 1) {305AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,306ResultReg).addReg(Op0));307} else {308AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)309.addReg(Op0));310AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,311TII.get(TargetOpcode::COPY), ResultReg)312.addReg(II.implicit_defs()[0]));313}314return ResultReg;315}316317unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,318const TargetRegisterClass *RC,319unsigned Op0, unsigned Op1) {320Register ResultReg = createResultReg(RC);321const MCInstrDesc &II = TII.get(MachineInstOpcode);322323// Make sure the input operands are sufficiently constrained to be legal324// for this instruction.325Op0 = constrainOperandRegClass(II, Op0, 1);326Op1 = constrainOperandRegClass(II, Op1, 2);327328if (II.getNumDefs() >= 1) {329AddOptionalDefs(330BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)331.addReg(Op0)332.addReg(Op1));333} else {334AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)335.addReg(Op0)336.addReg(Op1));337AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,338TII.get(TargetOpcode::COPY), ResultReg)339.addReg(II.implicit_defs()[0]));340}341return ResultReg;342}343344unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,345const TargetRegisterClass *RC,346unsigned Op0, uint64_t Imm) {347Register ResultReg = createResultReg(RC);348const MCInstrDesc &II = TII.get(MachineInstOpcode);349350// Make sure the input operand is sufficiently constrained to be legal351// for this instruction.352Op0 = constrainOperandRegClass(II, Op0, 1);353if (II.getNumDefs() >= 1) {354AddOptionalDefs(355BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)356.addReg(Op0)357.addImm(Imm));358} else {359AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)360.addReg(Op0)361.addImm(Imm));362AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,363TII.get(TargetOpcode::COPY), ResultReg)364.addReg(II.implicit_defs()[0]));365}366return ResultReg;367}368369unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,370const TargetRegisterClass *RC,371uint64_t Imm) {372Register ResultReg = createResultReg(RC);373const MCInstrDesc &II = TII.get(MachineInstOpcode);374375if (II.getNumDefs() >= 1) {376AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,377ResultReg).addImm(Imm));378} else {379AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)380.addImm(Imm));381AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,382TII.get(TargetOpcode::COPY), ResultReg)383.addReg(II.implicit_defs()[0]));384}385return ResultReg;386}387388// TODO: Don't worry about 64-bit now, but when this is fixed remove the389// checks from the various callers.390unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {391if (VT == MVT::f64) return 0;392393Register MoveReg = createResultReg(TLI.getRegClassFor(VT));394AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,395TII.get(ARM::VMOVSR), MoveReg)396.addReg(SrcReg));397return MoveReg;398}399400unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {401if (VT == MVT::i64) return 0;402403Register MoveReg = createResultReg(TLI.getRegClassFor(VT));404AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,405TII.get(ARM::VMOVRS), MoveReg)406.addReg(SrcReg));407return MoveReg;408}409410// For double width floating point we need to materialize two constants411// (the high and the low) into integer registers then use a move to get412// the combined constant into an FP reg.413unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {414const APFloat Val = CFP->getValueAPF();415bool is64bit = VT == MVT::f64;416417// This checks to see if we can use VFP3 instructions to materialize418// a constant, otherwise we have to go through the constant pool.419if (TLI.isFPImmLegal(Val, VT)) {420int Imm;421unsigned Opc;422if (is64bit) {423Imm = ARM_AM::getFP64Imm(Val);424Opc = ARM::FCONSTD;425} else {426Imm = ARM_AM::getFP32Imm(Val);427Opc = ARM::FCONSTS;428}429Register DestReg = createResultReg(TLI.getRegClassFor(VT));430AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,431TII.get(Opc), DestReg).addImm(Imm));432return DestReg;433}434435// Require VFP2 for loading fp constants.436if (!Subtarget->hasVFP2Base()) return false;437438// MachineConstantPool wants an explicit alignment.439Align Alignment = DL.getPrefTypeAlign(CFP->getType());440unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);441Register DestReg = createResultReg(TLI.getRegClassFor(VT));442unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;443444// The extra reg is for addrmode5.445AddOptionalDefs(446BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)447.addConstantPoolIndex(Idx)448.addReg(0));449return DestReg;450}451452unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {453if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)454return 0;455456// If we can do this in a single instruction without a constant pool entry457// do so now.458const ConstantInt *CI = cast<ConstantInt>(C);459if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {460unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;461const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :462&ARM::GPRRegClass;463Register ImmReg = createResultReg(RC);464AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,465TII.get(Opc), ImmReg)466.addImm(CI->getZExtValue()));467return ImmReg;468}469470// Use MVN to emit negative constants.471if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {472unsigned Imm = (unsigned)~(CI->getSExtValue());473bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :474(ARM_AM::getSOImmVal(Imm) != -1);475if (UseImm) {476unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;477const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :478&ARM::GPRRegClass;479Register ImmReg = createResultReg(RC);480AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,481TII.get(Opc), ImmReg)482.addImm(Imm));483return ImmReg;484}485}486487unsigned ResultReg = 0;488if (Subtarget->useMovt())489ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());490491if (ResultReg)492return ResultReg;493494// Load from constant pool. For now 32-bit only.495if (VT != MVT::i32)496return 0;497498// MachineConstantPool wants an explicit alignment.499Align Alignment = DL.getPrefTypeAlign(C->getType());500unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);501ResultReg = createResultReg(TLI.getRegClassFor(VT));502if (isThumb2)503AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,504TII.get(ARM::t2LDRpci), ResultReg)505.addConstantPoolIndex(Idx));506else {507// The extra immediate is for addrmode2.508ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);509AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,510TII.get(ARM::LDRcp), ResultReg)511.addConstantPoolIndex(Idx)512.addImm(0));513}514return ResultReg;515}516517bool ARMFastISel::isPositionIndependent() const {518return TLI.isPositionIndependent();519}520521unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {522// For now 32-bit only.523if (VT != MVT::i32 || GV->isThreadLocal()) return 0;524525// ROPI/RWPI not currently supported.526if (Subtarget->isROPI() || Subtarget->isRWPI())527return 0;528529bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);530const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass531: &ARM::GPRRegClass;532Register DestReg = createResultReg(RC);533534// FastISel TLS support on non-MachO is broken, punt to SelectionDAG.535const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);536bool IsThreadLocal = GVar && GVar->isThreadLocal();537if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;538539bool IsPositionIndependent = isPositionIndependent();540// Use movw+movt when possible, it avoids constant pool entries.541// Non-darwin targets only support static movt relocations in FastISel.542if (Subtarget->useMovt() &&543(Subtarget->isTargetMachO() || !IsPositionIndependent)) {544unsigned Opc;545unsigned char TF = 0;546if (Subtarget->isTargetMachO())547TF = ARMII::MO_NONLAZY;548549if (IsPositionIndependent)550Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;551else552Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;553AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,554TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));555} else {556// MachineConstantPool wants an explicit alignment.557Align Alignment = DL.getPrefTypeAlign(GV->getType());558559if (Subtarget->isTargetELF() && IsPositionIndependent)560return ARMLowerPICELF(GV, VT);561562// Grab index.563unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;564unsigned Id = AFI->createPICLabelUId();565ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,566ARMCP::CPValue,567PCAdj);568unsigned Idx = MCP.getConstantPoolIndex(CPV, Alignment);569570// Load value.571MachineInstrBuilder MIB;572if (isThumb2) {573unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;574MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),575DestReg).addConstantPoolIndex(Idx);576if (IsPositionIndependent)577MIB.addImm(Id);578AddOptionalDefs(MIB);579} else {580// The extra immediate is for addrmode2.581DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);582MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,583TII.get(ARM::LDRcp), DestReg)584.addConstantPoolIndex(Idx)585.addImm(0);586AddOptionalDefs(MIB);587588if (IsPositionIndependent) {589unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;590Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));591592MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,593MIMD, TII.get(Opc), NewDestReg)594.addReg(DestReg)595.addImm(Id);596AddOptionalDefs(MIB);597return NewDestReg;598}599}600}601602if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||603(Subtarget->isTargetMachO() && IsIndirect)) {604MachineInstrBuilder MIB;605Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));606if (isThumb2)607MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,608TII.get(ARM::t2LDRi12), NewDestReg)609.addReg(DestReg)610.addImm(0);611else612MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,613TII.get(ARM::LDRi12), NewDestReg)614.addReg(DestReg)615.addImm(0);616DestReg = NewDestReg;617AddOptionalDefs(MIB);618}619620return DestReg;621}622623unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {624EVT CEVT = TLI.getValueType(DL, C->getType(), true);625626// Only handle simple types.627if (!CEVT.isSimple()) return 0;628MVT VT = CEVT.getSimpleVT();629630if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))631return ARMMaterializeFP(CFP, VT);632else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))633return ARMMaterializeGV(GV, VT);634else if (isa<ConstantInt>(C))635return ARMMaterializeInt(C, VT);636637return 0;638}639640// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);641642unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {643// Don't handle dynamic allocas.644if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;645646MVT VT;647if (!isLoadTypeLegal(AI->getType(), VT)) return 0;648649DenseMap<const AllocaInst*, int>::iterator SI =650FuncInfo.StaticAllocaMap.find(AI);651652// This will get lowered later into the correct offsets and registers653// via rewriteXFrameIndex.654if (SI != FuncInfo.StaticAllocaMap.end()) {655unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;656const TargetRegisterClass* RC = TLI.getRegClassFor(VT);657Register ResultReg = createResultReg(RC);658ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);659660AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,661TII.get(Opc), ResultReg)662.addFrameIndex(SI->second)663.addImm(0));664return ResultReg;665}666667return 0;668}669670bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {671EVT evt = TLI.getValueType(DL, Ty, true);672673// Only handle simple types.674if (evt == MVT::Other || !evt.isSimple()) return false;675VT = evt.getSimpleVT();676677// Handle all legal types, i.e. a register that will directly hold this678// value.679return TLI.isTypeLegal(VT);680}681682bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {683if (isTypeLegal(Ty, VT)) return true;684685// If this is a type than can be sign or zero-extended to a basic operation686// go ahead and accept it now.687if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)688return true;689690return false;691}692693// Computes the address to get to an object.694bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {695// Some boilerplate from the X86 FastISel.696const User *U = nullptr;697unsigned Opcode = Instruction::UserOp1;698if (const Instruction *I = dyn_cast<Instruction>(Obj)) {699// Don't walk into other basic blocks unless the object is an alloca from700// another block, otherwise it may not have a virtual register assigned.701if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||702FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {703Opcode = I->getOpcode();704U = I;705}706} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {707Opcode = C->getOpcode();708U = C;709}710711if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))712if (Ty->getAddressSpace() > 255)713// Fast instruction selection doesn't support the special714// address spaces.715return false;716717switch (Opcode) {718default:719break;720case Instruction::BitCast:721// Look through bitcasts.722return ARMComputeAddress(U->getOperand(0), Addr);723case Instruction::IntToPtr:724// Look past no-op inttoptrs.725if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==726TLI.getPointerTy(DL))727return ARMComputeAddress(U->getOperand(0), Addr);728break;729case Instruction::PtrToInt:730// Look past no-op ptrtoints.731if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))732return ARMComputeAddress(U->getOperand(0), Addr);733break;734case Instruction::GetElementPtr: {735Address SavedAddr = Addr;736int TmpOffset = Addr.Offset;737738// Iterate through the GEP folding the constants into offsets where739// we can.740gep_type_iterator GTI = gep_type_begin(U);741for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();742i != e; ++i, ++GTI) {743const Value *Op = *i;744if (StructType *STy = GTI.getStructTypeOrNull()) {745const StructLayout *SL = DL.getStructLayout(STy);746unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();747TmpOffset += SL->getElementOffset(Idx);748} else {749uint64_t S = GTI.getSequentialElementStride(DL);750while (true) {751if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {752// Constant-offset addressing.753TmpOffset += CI->getSExtValue() * S;754break;755}756if (canFoldAddIntoGEP(U, Op)) {757// A compatible add with a constant operand. Fold the constant.758ConstantInt *CI =759cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));760TmpOffset += CI->getSExtValue() * S;761// Iterate on the other operand.762Op = cast<AddOperator>(Op)->getOperand(0);763continue;764}765// Unsupported766goto unsupported_gep;767}768}769}770771// Try to grab the base operand now.772Addr.Offset = TmpOffset;773if (ARMComputeAddress(U->getOperand(0), Addr)) return true;774775// We failed, restore everything and try the other options.776Addr = SavedAddr;777778unsupported_gep:779break;780}781case Instruction::Alloca: {782const AllocaInst *AI = cast<AllocaInst>(Obj);783DenseMap<const AllocaInst*, int>::iterator SI =784FuncInfo.StaticAllocaMap.find(AI);785if (SI != FuncInfo.StaticAllocaMap.end()) {786Addr.BaseType = Address::FrameIndexBase;787Addr.Base.FI = SI->second;788return true;789}790break;791}792}793794// Try to get this in a register if nothing else has worked.795if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);796return Addr.Base.Reg != 0;797}798799void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {800bool needsLowering = false;801switch (VT.SimpleTy) {802default: llvm_unreachable("Unhandled load/store type!");803case MVT::i1:804case MVT::i8:805case MVT::i16:806case MVT::i32:807if (!useAM3) {808// Integer loads/stores handle 12-bit offsets.809needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);810// Handle negative offsets.811if (needsLowering && isThumb2)812needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&813Addr.Offset > -256);814} else {815// ARM halfword load/stores and signed byte loads use +/-imm8 offsets.816needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);817}818break;819case MVT::f32:820case MVT::f64:821// Floating point operands handle 8-bit offsets.822needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);823break;824}825826// If this is a stack pointer and the offset needs to be simplified then827// put the alloca address into a register, set the base type back to828// register and continue. This should almost never happen.829if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {830const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass831: &ARM::GPRRegClass;832Register ResultReg = createResultReg(RC);833unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;834AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,835TII.get(Opc), ResultReg)836.addFrameIndex(Addr.Base.FI)837.addImm(0));838Addr.Base.Reg = ResultReg;839Addr.BaseType = Address::RegBase;840}841842// Since the offset is too large for the load/store instruction843// get the reg+offset into a register.844if (needsLowering) {845Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,846Addr.Offset, MVT::i32);847Addr.Offset = 0;848}849}850851void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,852const MachineInstrBuilder &MIB,853MachineMemOperand::Flags Flags,854bool useAM3) {855// addrmode5 output depends on the selection dag addressing dividing the856// offset by 4 that it then later multiplies. Do this here as well.857if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)858Addr.Offset /= 4;859860// Frame base works a bit differently. Handle it separately.861if (Addr.BaseType == Address::FrameIndexBase) {862int FI = Addr.Base.FI;863int Offset = Addr.Offset;864MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(865MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,866MFI.getObjectSize(FI), MFI.getObjectAlign(FI));867// Now add the rest of the operands.868MIB.addFrameIndex(FI);869870// ARM halfword load/stores and signed byte loads need an additional871// operand.872if (useAM3) {873int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;874MIB.addReg(0);875MIB.addImm(Imm);876} else {877MIB.addImm(Addr.Offset);878}879MIB.addMemOperand(MMO);880} else {881// Now add the rest of the operands.882MIB.addReg(Addr.Base.Reg);883884// ARM halfword load/stores and signed byte loads need an additional885// operand.886if (useAM3) {887int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;888MIB.addReg(0);889MIB.addImm(Imm);890} else {891MIB.addImm(Addr.Offset);892}893}894AddOptionalDefs(MIB);895}896897bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,898MaybeAlign Alignment, bool isZExt,899bool allocReg) {900unsigned Opc;901bool useAM3 = false;902bool needVMOV = false;903const TargetRegisterClass *RC;904switch (VT.SimpleTy) {905// This is mostly going to be Neon/vector support.906default: return false;907case MVT::i1:908case MVT::i8:909if (isThumb2) {910if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())911Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;912else913Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;914} else {915if (isZExt) {916Opc = ARM::LDRBi12;917} else {918Opc = ARM::LDRSB;919useAM3 = true;920}921}922RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;923break;924case MVT::i16:925if (Alignment && *Alignment < Align(2) &&926!Subtarget->allowsUnalignedMem())927return false;928929if (isThumb2) {930if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())931Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;932else933Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;934} else {935Opc = isZExt ? ARM::LDRH : ARM::LDRSH;936useAM3 = true;937}938RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;939break;940case MVT::i32:941if (Alignment && *Alignment < Align(4) &&942!Subtarget->allowsUnalignedMem())943return false;944945if (isThumb2) {946if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())947Opc = ARM::t2LDRi8;948else949Opc = ARM::t2LDRi12;950} else {951Opc = ARM::LDRi12;952}953RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;954break;955case MVT::f32:956if (!Subtarget->hasVFP2Base()) return false;957// Unaligned loads need special handling. Floats require word-alignment.958if (Alignment && *Alignment < Align(4)) {959needVMOV = true;960VT = MVT::i32;961Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;962RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;963} else {964Opc = ARM::VLDRS;965RC = TLI.getRegClassFor(VT);966}967break;968case MVT::f64:969// Can load and store double precision even without FeatureFP64970if (!Subtarget->hasVFP2Base()) return false;971// FIXME: Unaligned loads need special handling. Doublewords require972// word-alignment.973if (Alignment && *Alignment < Align(4))974return false;975976Opc = ARM::VLDRD;977RC = TLI.getRegClassFor(VT);978break;979}980// Simplify this down to something we can handle.981ARMSimplifyAddress(Addr, VT, useAM3);982983// Create the base instruction, then add the operands.984if (allocReg)985ResultReg = createResultReg(RC);986assert(ResultReg > 255 && "Expected an allocated virtual register.");987MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,988TII.get(Opc), ResultReg);989AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);990991// If we had an unaligned load of a float we've converted it to an regular992// load. Now we must move from the GRP to the FP register.993if (needVMOV) {994Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));995AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,996TII.get(ARM::VMOVSR), MoveReg)997.addReg(ResultReg));998ResultReg = MoveReg;999}1000return true;1001}10021003bool ARMFastISel::SelectLoad(const Instruction *I) {1004// Atomic loads need special handling.1005if (cast<LoadInst>(I)->isAtomic())1006return false;10071008const Value *SV = I->getOperand(0);1009if (TLI.supportSwiftError()) {1010// Swifterror values can come from either a function parameter with1011// swifterror attribute or an alloca with swifterror attribute.1012if (const Argument *Arg = dyn_cast<Argument>(SV)) {1013if (Arg->hasSwiftErrorAttr())1014return false;1015}10161017if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {1018if (Alloca->isSwiftError())1019return false;1020}1021}10221023// Verify we have a legal type before going any further.1024MVT VT;1025if (!isLoadTypeLegal(I->getType(), VT))1026return false;10271028// See if we can handle this address.1029Address Addr;1030if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;10311032Register ResultReg;1033if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlign()))1034return false;1035updateValueMap(I, ResultReg);1036return true;1037}10381039bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,1040MaybeAlign Alignment) {1041unsigned StrOpc;1042bool useAM3 = false;1043switch (VT.SimpleTy) {1044// This is mostly going to be Neon/vector support.1045default: return false;1046case MVT::i1: {1047Register Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass1048: &ARM::GPRRegClass);1049unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;1050SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);1051AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1052TII.get(Opc), Res)1053.addReg(SrcReg).addImm(1));1054SrcReg = Res;1055[[fallthrough]];1056}1057case MVT::i8:1058if (isThumb2) {1059if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())1060StrOpc = ARM::t2STRBi8;1061else1062StrOpc = ARM::t2STRBi12;1063} else {1064StrOpc = ARM::STRBi12;1065}1066break;1067case MVT::i16:1068if (Alignment && *Alignment < Align(2) &&1069!Subtarget->allowsUnalignedMem())1070return false;10711072if (isThumb2) {1073if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())1074StrOpc = ARM::t2STRHi8;1075else1076StrOpc = ARM::t2STRHi12;1077} else {1078StrOpc = ARM::STRH;1079useAM3 = true;1080}1081break;1082case MVT::i32:1083if (Alignment && *Alignment < Align(4) &&1084!Subtarget->allowsUnalignedMem())1085return false;10861087if (isThumb2) {1088if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())1089StrOpc = ARM::t2STRi8;1090else1091StrOpc = ARM::t2STRi12;1092} else {1093StrOpc = ARM::STRi12;1094}1095break;1096case MVT::f32:1097if (!Subtarget->hasVFP2Base()) return false;1098// Unaligned stores need special handling. Floats require word-alignment.1099if (Alignment && *Alignment < Align(4)) {1100Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));1101AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1102TII.get(ARM::VMOVRS), MoveReg)1103.addReg(SrcReg));1104SrcReg = MoveReg;1105VT = MVT::i32;1106StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;1107} else {1108StrOpc = ARM::VSTRS;1109}1110break;1111case MVT::f64:1112// Can load and store double precision even without FeatureFP641113if (!Subtarget->hasVFP2Base()) return false;1114// FIXME: Unaligned stores need special handling. Doublewords require1115// word-alignment.1116if (Alignment && *Alignment < Align(4))1117return false;11181119StrOpc = ARM::VSTRD;1120break;1121}1122// Simplify this down to something we can handle.1123ARMSimplifyAddress(Addr, VT, useAM3);11241125// Create the base instruction, then add the operands.1126SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);1127MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1128TII.get(StrOpc))1129.addReg(SrcReg);1130AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);1131return true;1132}11331134bool ARMFastISel::SelectStore(const Instruction *I) {1135Value *Op0 = I->getOperand(0);1136unsigned SrcReg = 0;11371138// Atomic stores need special handling.1139if (cast<StoreInst>(I)->isAtomic())1140return false;11411142const Value *PtrV = I->getOperand(1);1143if (TLI.supportSwiftError()) {1144// Swifterror values can come from either a function parameter with1145// swifterror attribute or an alloca with swifterror attribute.1146if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {1147if (Arg->hasSwiftErrorAttr())1148return false;1149}11501151if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {1152if (Alloca->isSwiftError())1153return false;1154}1155}11561157// Verify we have a legal type before going any further.1158MVT VT;1159if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))1160return false;11611162// Get the value to be stored into a register.1163SrcReg = getRegForValue(Op0);1164if (SrcReg == 0) return false;11651166// See if we can handle this address.1167Address Addr;1168if (!ARMComputeAddress(I->getOperand(1), Addr))1169return false;11701171if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlign()))1172return false;1173return true;1174}11751176static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {1177switch (Pred) {1178// Needs two compares...1179case CmpInst::FCMP_ONE:1180case CmpInst::FCMP_UEQ:1181default:1182// AL is our "false" for now. The other two need more compares.1183return ARMCC::AL;1184case CmpInst::ICMP_EQ:1185case CmpInst::FCMP_OEQ:1186return ARMCC::EQ;1187case CmpInst::ICMP_SGT:1188case CmpInst::FCMP_OGT:1189return ARMCC::GT;1190case CmpInst::ICMP_SGE:1191case CmpInst::FCMP_OGE:1192return ARMCC::GE;1193case CmpInst::ICMP_UGT:1194case CmpInst::FCMP_UGT:1195return ARMCC::HI;1196case CmpInst::FCMP_OLT:1197return ARMCC::MI;1198case CmpInst::ICMP_ULE:1199case CmpInst::FCMP_OLE:1200return ARMCC::LS;1201case CmpInst::FCMP_ORD:1202return ARMCC::VC;1203case CmpInst::FCMP_UNO:1204return ARMCC::VS;1205case CmpInst::FCMP_UGE:1206return ARMCC::PL;1207case CmpInst::ICMP_SLT:1208case CmpInst::FCMP_ULT:1209return ARMCC::LT;1210case CmpInst::ICMP_SLE:1211case CmpInst::FCMP_ULE:1212return ARMCC::LE;1213case CmpInst::FCMP_UNE:1214case CmpInst::ICMP_NE:1215return ARMCC::NE;1216case CmpInst::ICMP_UGE:1217return ARMCC::HS;1218case CmpInst::ICMP_ULT:1219return ARMCC::LO;1220}1221}12221223bool ARMFastISel::SelectBranch(const Instruction *I) {1224const BranchInst *BI = cast<BranchInst>(I);1225MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];1226MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];12271228// Simple branch support.12291230// If we can, avoid recomputing the compare - redoing it could lead to wonky1231// behavior.1232if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {1233if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {1234// Get the compare predicate.1235// Try to take advantage of fallthrough opportunities.1236CmpInst::Predicate Predicate = CI->getPredicate();1237if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {1238std::swap(TBB, FBB);1239Predicate = CmpInst::getInversePredicate(Predicate);1240}12411242ARMCC::CondCodes ARMPred = getComparePred(Predicate);12431244// We may not handle every CC for now.1245if (ARMPred == ARMCC::AL) return false;12461247// Emit the compare.1248if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))1249return false;12501251unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;1252BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(BrOpc))1253.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);1254finishCondBranch(BI->getParent(), TBB, FBB);1255return true;1256}1257} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {1258MVT SourceVT;1259if (TI->hasOneUse() && TI->getParent() == I->getParent() &&1260(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {1261unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;1262Register OpReg = getRegForValue(TI->getOperand(0));1263OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);1264AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1265TII.get(TstOpc))1266.addReg(OpReg).addImm(1));12671268unsigned CCMode = ARMCC::NE;1269if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {1270std::swap(TBB, FBB);1271CCMode = ARMCC::EQ;1272}12731274unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;1275BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(BrOpc))1276.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);12771278finishCondBranch(BI->getParent(), TBB, FBB);1279return true;1280}1281} else if (const ConstantInt *CI =1282dyn_cast<ConstantInt>(BI->getCondition())) {1283uint64_t Imm = CI->getZExtValue();1284MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;1285fastEmitBranch(Target, MIMD.getDL());1286return true;1287}12881289Register CmpReg = getRegForValue(BI->getCondition());1290if (CmpReg == 0) return false;12911292// We've been divorced from our compare! Our block was split, and1293// now our compare lives in a predecessor block. We musn't1294// re-compare here, as the children of the compare aren't guaranteed1295// live across the block boundary (we *could* check for this).1296// Regardless, the compare has been done in the predecessor block,1297// and it left a value for us in a virtual register. Ergo, we test1298// the one-bit value left in the virtual register.1299unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;1300CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);1301AddOptionalDefs(1302BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TstOpc))1303.addReg(CmpReg)1304.addImm(1));13051306unsigned CCMode = ARMCC::NE;1307if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {1308std::swap(TBB, FBB);1309CCMode = ARMCC::EQ;1310}13111312unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;1313BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(BrOpc))1314.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);1315finishCondBranch(BI->getParent(), TBB, FBB);1316return true;1317}13181319bool ARMFastISel::SelectIndirectBr(const Instruction *I) {1320Register AddrReg = getRegForValue(I->getOperand(0));1321if (AddrReg == 0) return false;13221323unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;1324assert(isThumb2 || Subtarget->hasV4TOps());13251326AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1327TII.get(Opc)).addReg(AddrReg));13281329const IndirectBrInst *IB = cast<IndirectBrInst>(I);1330for (const BasicBlock *SuccBB : IB->successors())1331FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);13321333return true;1334}13351336bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,1337bool isZExt) {1338Type *Ty = Src1Value->getType();1339EVT SrcEVT = TLI.getValueType(DL, Ty, true);1340if (!SrcEVT.isSimple()) return false;1341MVT SrcVT = SrcEVT.getSimpleVT();13421343if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())1344return false;13451346if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))1347return false;13481349// Check to see if the 2nd operand is a constant that we can encode directly1350// in the compare.1351int Imm = 0;1352bool UseImm = false;1353bool isNegativeImm = false;1354// FIXME: At -O0 we don't have anything that canonicalizes operand order.1355// Thus, Src1Value may be a ConstantInt, but we're missing it.1356if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {1357if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||1358SrcVT == MVT::i1) {1359const APInt &CIVal = ConstInt->getValue();1360Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();1361// For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather1362// then a cmn, because there is no way to represent 2147483648 as a1363// signed 32-bit int.1364if (Imm < 0 && Imm != (int)0x80000000) {1365isNegativeImm = true;1366Imm = -Imm;1367}1368UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :1369(ARM_AM::getSOImmVal(Imm) != -1);1370}1371} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {1372if (SrcVT == MVT::f32 || SrcVT == MVT::f64)1373if (ConstFP->isZero() && !ConstFP->isNegative())1374UseImm = true;1375}13761377unsigned CmpOpc;1378bool isICmp = true;1379bool needsExt = false;1380switch (SrcVT.SimpleTy) {1381default: return false;1382// TODO: Verify compares.1383case MVT::f32:1384isICmp = false;1385CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;1386break;1387case MVT::f64:1388isICmp = false;1389CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;1390break;1391case MVT::i1:1392case MVT::i8:1393case MVT::i16:1394needsExt = true;1395[[fallthrough]];1396case MVT::i32:1397if (isThumb2) {1398if (!UseImm)1399CmpOpc = ARM::t2CMPrr;1400else1401CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;1402} else {1403if (!UseImm)1404CmpOpc = ARM::CMPrr;1405else1406CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;1407}1408break;1409}14101411Register SrcReg1 = getRegForValue(Src1Value);1412if (SrcReg1 == 0) return false;14131414unsigned SrcReg2 = 0;1415if (!UseImm) {1416SrcReg2 = getRegForValue(Src2Value);1417if (SrcReg2 == 0) return false;1418}14191420// We have i1, i8, or i16, we need to either zero extend or sign extend.1421if (needsExt) {1422SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);1423if (SrcReg1 == 0) return false;1424if (!UseImm) {1425SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);1426if (SrcReg2 == 0) return false;1427}1428}14291430const MCInstrDesc &II = TII.get(CmpOpc);1431SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);1432if (!UseImm) {1433SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);1434AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)1435.addReg(SrcReg1).addReg(SrcReg2));1436} else {1437MachineInstrBuilder MIB;1438MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)1439.addReg(SrcReg1);14401441// Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.1442if (isICmp)1443MIB.addImm(Imm);1444AddOptionalDefs(MIB);1445}14461447// For floating point we need to move the result to a comparison register1448// that we can then use for branches.1449if (Ty->isFloatTy() || Ty->isDoubleTy())1450AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1451TII.get(ARM::FMSTAT)));1452return true;1453}14541455bool ARMFastISel::SelectCmp(const Instruction *I) {1456const CmpInst *CI = cast<CmpInst>(I);14571458// Get the compare predicate.1459ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());14601461// We may not handle every CC for now.1462if (ARMPred == ARMCC::AL) return false;14631464// Emit the compare.1465if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))1466return false;14671468// Now set a register based on the comparison. Explicitly set the predicates1469// here.1470unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;1471const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass1472: &ARM::GPRRegClass;1473Register DestReg = createResultReg(RC);1474Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);1475unsigned ZeroReg = fastMaterializeConstant(Zero);1476// ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.1477BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovCCOpc), DestReg)1478.addReg(ZeroReg).addImm(1)1479.addImm(ARMPred).addReg(ARM::CPSR);14801481updateValueMap(I, DestReg);1482return true;1483}14841485bool ARMFastISel::SelectFPExt(const Instruction *I) {1486// Make sure we have VFP and that we're extending float to double.1487if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;14881489Value *V = I->getOperand(0);1490if (!I->getType()->isDoubleTy() ||1491!V->getType()->isFloatTy()) return false;14921493Register Op = getRegForValue(V);1494if (Op == 0) return false;14951496Register Result = createResultReg(&ARM::DPRRegClass);1497AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1498TII.get(ARM::VCVTDS), Result)1499.addReg(Op));1500updateValueMap(I, Result);1501return true;1502}15031504bool ARMFastISel::SelectFPTrunc(const Instruction *I) {1505// Make sure we have VFP and that we're truncating double to float.1506if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;15071508Value *V = I->getOperand(0);1509if (!(I->getType()->isFloatTy() &&1510V->getType()->isDoubleTy())) return false;15111512Register Op = getRegForValue(V);1513if (Op == 0) return false;15141515Register Result = createResultReg(&ARM::SPRRegClass);1516AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1517TII.get(ARM::VCVTSD), Result)1518.addReg(Op));1519updateValueMap(I, Result);1520return true;1521}15221523bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {1524// Make sure we have VFP.1525if (!Subtarget->hasVFP2Base()) return false;15261527MVT DstVT;1528Type *Ty = I->getType();1529if (!isTypeLegal(Ty, DstVT))1530return false;15311532Value *Src = I->getOperand(0);1533EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);1534if (!SrcEVT.isSimple())1535return false;1536MVT SrcVT = SrcEVT.getSimpleVT();1537if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)1538return false;15391540Register SrcReg = getRegForValue(Src);1541if (SrcReg == 0) return false;15421543// Handle sign-extension.1544if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {1545SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,1546/*isZExt*/!isSigned);1547if (SrcReg == 0) return false;1548}15491550// The conversion routine works on fp-reg to fp-reg and the operand above1551// was an integer, move it to the fp registers if possible.1552unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);1553if (FP == 0) return false;15541555unsigned Opc;1556if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;1557else if (Ty->isDoubleTy() && Subtarget->hasFP64())1558Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;1559else return false;15601561Register ResultReg = createResultReg(TLI.getRegClassFor(DstVT));1562AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1563TII.get(Opc), ResultReg).addReg(FP));1564updateValueMap(I, ResultReg);1565return true;1566}15671568bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {1569// Make sure we have VFP.1570if (!Subtarget->hasVFP2Base()) return false;15711572MVT DstVT;1573Type *RetTy = I->getType();1574if (!isTypeLegal(RetTy, DstVT))1575return false;15761577Register Op = getRegForValue(I->getOperand(0));1578if (Op == 0) return false;15791580unsigned Opc;1581Type *OpTy = I->getOperand(0)->getType();1582if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;1583else if (OpTy->isDoubleTy() && Subtarget->hasFP64())1584Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;1585else return false;15861587// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.1588Register ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));1589AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1590TII.get(Opc), ResultReg).addReg(Op));15911592// This result needs to be in an integer register, but the conversion only1593// takes place in fp-regs.1594unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);1595if (IntReg == 0) return false;15961597updateValueMap(I, IntReg);1598return true;1599}16001601bool ARMFastISel::SelectSelect(const Instruction *I) {1602MVT VT;1603if (!isTypeLegal(I->getType(), VT))1604return false;16051606// Things need to be register sized for register moves.1607if (VT != MVT::i32) return false;16081609Register CondReg = getRegForValue(I->getOperand(0));1610if (CondReg == 0) return false;1611Register Op1Reg = getRegForValue(I->getOperand(1));1612if (Op1Reg == 0) return false;16131614// Check to see if we can use an immediate in the conditional move.1615int Imm = 0;1616bool UseImm = false;1617bool isNegativeImm = false;1618if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {1619assert(VT == MVT::i32 && "Expecting an i32.");1620Imm = (int)ConstInt->getValue().getZExtValue();1621if (Imm < 0) {1622isNegativeImm = true;1623Imm = ~Imm;1624}1625UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :1626(ARM_AM::getSOImmVal(Imm) != -1);1627}16281629unsigned Op2Reg = 0;1630if (!UseImm) {1631Op2Reg = getRegForValue(I->getOperand(2));1632if (Op2Reg == 0) return false;1633}16341635unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;1636CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);1637AddOptionalDefs(1638BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TstOpc))1639.addReg(CondReg)1640.addImm(1));16411642unsigned MovCCOpc;1643const TargetRegisterClass *RC;1644if (!UseImm) {1645RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;1646MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;1647} else {1648RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;1649if (!isNegativeImm)1650MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;1651else1652MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;1653}1654Register ResultReg = createResultReg(RC);1655if (!UseImm) {1656Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);1657Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);1658BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovCCOpc),1659ResultReg)1660.addReg(Op2Reg)1661.addReg(Op1Reg)1662.addImm(ARMCC::NE)1663.addReg(ARM::CPSR);1664} else {1665Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);1666BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovCCOpc),1667ResultReg)1668.addReg(Op1Reg)1669.addImm(Imm)1670.addImm(ARMCC::EQ)1671.addReg(ARM::CPSR);1672}1673updateValueMap(I, ResultReg);1674return true;1675}16761677bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {1678MVT VT;1679Type *Ty = I->getType();1680if (!isTypeLegal(Ty, VT))1681return false;16821683// If we have integer div support we should have selected this automagically.1684// In case we have a real miss go ahead and return false and we'll pick1685// it up later.1686if (Subtarget->hasDivideInThumbMode())1687return false;16881689// Otherwise emit a libcall.1690RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;1691if (VT == MVT::i8)1692LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;1693else if (VT == MVT::i16)1694LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;1695else if (VT == MVT::i32)1696LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;1697else if (VT == MVT::i64)1698LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;1699else if (VT == MVT::i128)1700LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;1701assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");17021703return ARMEmitLibcall(I, LC);1704}17051706bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {1707MVT VT;1708Type *Ty = I->getType();1709if (!isTypeLegal(Ty, VT))1710return false;17111712// Many ABIs do not provide a libcall for standalone remainder, so we need to1713// use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double1714// multi-reg returns, we'll have to bail out.1715if (!TLI.hasStandaloneRem(VT)) {1716return false;1717}17181719RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;1720if (VT == MVT::i8)1721LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;1722else if (VT == MVT::i16)1723LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;1724else if (VT == MVT::i32)1725LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;1726else if (VT == MVT::i64)1727LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;1728else if (VT == MVT::i128)1729LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;1730assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");17311732return ARMEmitLibcall(I, LC);1733}17341735bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {1736EVT DestVT = TLI.getValueType(DL, I->getType(), true);17371738// We can get here in the case when we have a binary operation on a non-legal1739// type and the target independent selector doesn't know how to handle it.1740if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)1741return false;17421743unsigned Opc;1744switch (ISDOpcode) {1745default: return false;1746case ISD::ADD:1747Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;1748break;1749case ISD::OR:1750Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;1751break;1752case ISD::SUB:1753Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;1754break;1755}17561757Register SrcReg1 = getRegForValue(I->getOperand(0));1758if (SrcReg1 == 0) return false;17591760// TODO: Often the 2nd operand is an immediate, which can be encoded directly1761// in the instruction, rather then materializing the value in a register.1762Register SrcReg2 = getRegForValue(I->getOperand(1));1763if (SrcReg2 == 0) return false;17641765Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);1766SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);1767SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);1768AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1769TII.get(Opc), ResultReg)1770.addReg(SrcReg1).addReg(SrcReg2));1771updateValueMap(I, ResultReg);1772return true;1773}17741775bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {1776EVT FPVT = TLI.getValueType(DL, I->getType(), true);1777if (!FPVT.isSimple()) return false;1778MVT VT = FPVT.getSimpleVT();17791780// FIXME: Support vector types where possible.1781if (VT.isVector())1782return false;17831784// We can get here in the case when we want to use NEON for our fp1785// operations, but can't figure out how to. Just use the vfp instructions1786// if we have them.1787// FIXME: It'd be nice to use NEON instructions.1788Type *Ty = I->getType();1789if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())1790return false;1791if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))1792return false;17931794unsigned Opc;1795bool is64bit = VT == MVT::f64 || VT == MVT::i64;1796switch (ISDOpcode) {1797default: return false;1798case ISD::FADD:1799Opc = is64bit ? ARM::VADDD : ARM::VADDS;1800break;1801case ISD::FSUB:1802Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;1803break;1804case ISD::FMUL:1805Opc = is64bit ? ARM::VMULD : ARM::VMULS;1806break;1807}1808Register Op1 = getRegForValue(I->getOperand(0));1809if (Op1 == 0) return false;18101811Register Op2 = getRegForValue(I->getOperand(1));1812if (Op2 == 0) return false;18131814Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));1815AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1816TII.get(Opc), ResultReg)1817.addReg(Op1).addReg(Op2));1818updateValueMap(I, ResultReg);1819return true;1820}18211822// Call Handling Code18231824// This is largely taken directly from CCAssignFnForNode1825// TODO: We may not support all of this.1826CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,1827bool Return,1828bool isVarArg) {1829switch (CC) {1830default:1831report_fatal_error("Unsupported calling convention");1832case CallingConv::Fast:1833if (Subtarget->hasVFP2Base() && !isVarArg) {1834if (!Subtarget->isAAPCS_ABI())1835return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);1836// For AAPCS ABI targets, just use VFP variant of the calling convention.1837return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);1838}1839[[fallthrough]];1840case CallingConv::C:1841case CallingConv::CXX_FAST_TLS:1842// Use target triple & subtarget features to do actual dispatch.1843if (Subtarget->isAAPCS_ABI()) {1844if (Subtarget->hasFPRegs() &&1845TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)1846return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);1847else1848return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);1849} else {1850return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);1851}1852case CallingConv::ARM_AAPCS_VFP:1853case CallingConv::Swift:1854case CallingConv::SwiftTail:1855if (!isVarArg)1856return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);1857// Fall through to soft float variant, variadic functions don't1858// use hard floating point ABI.1859[[fallthrough]];1860case CallingConv::ARM_AAPCS:1861return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);1862case CallingConv::ARM_APCS:1863return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);1864case CallingConv::GHC:1865if (Return)1866report_fatal_error("Can't return in GHC call convention");1867else1868return CC_ARM_APCS_GHC;1869case CallingConv::CFGuard_Check:1870return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);1871}1872}18731874bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,1875SmallVectorImpl<Register> &ArgRegs,1876SmallVectorImpl<MVT> &ArgVTs,1877SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,1878SmallVectorImpl<Register> &RegArgs,1879CallingConv::ID CC,1880unsigned &NumBytes,1881bool isVarArg) {1882SmallVector<CCValAssign, 16> ArgLocs;1883CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);1884CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,1885CCAssignFnForCall(CC, false, isVarArg));18861887// Check that we can handle all of the arguments. If we can't, then bail out1888// now before we add code to the MBB.1889for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {1890CCValAssign &VA = ArgLocs[i];1891MVT ArgVT = ArgVTs[VA.getValNo()];18921893// We don't handle NEON/vector parameters yet.1894if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)1895return false;18961897// Now copy/store arg to correct locations.1898if (VA.isRegLoc() && !VA.needsCustom()) {1899continue;1900} else if (VA.needsCustom()) {1901// TODO: We need custom lowering for vector (v2f64) args.1902if (VA.getLocVT() != MVT::f64 ||1903// TODO: Only handle register args for now.1904!VA.isRegLoc() || !ArgLocs[++i].isRegLoc())1905return false;1906} else {1907switch (ArgVT.SimpleTy) {1908default:1909return false;1910case MVT::i1:1911case MVT::i8:1912case MVT::i16:1913case MVT::i32:1914break;1915case MVT::f32:1916if (!Subtarget->hasVFP2Base())1917return false;1918break;1919case MVT::f64:1920if (!Subtarget->hasVFP2Base())1921return false;1922break;1923}1924}1925}19261927// At the point, we are able to handle the call's arguments in fast isel.19281929// Get a count of how many bytes are to be pushed on the stack.1930NumBytes = CCInfo.getStackSize();19311932// Issue CALLSEQ_START1933unsigned AdjStackDown = TII.getCallFrameSetupOpcode();1934AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1935TII.get(AdjStackDown))1936.addImm(NumBytes).addImm(0));19371938// Process the args.1939for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {1940CCValAssign &VA = ArgLocs[i];1941const Value *ArgVal = Args[VA.getValNo()];1942Register Arg = ArgRegs[VA.getValNo()];1943MVT ArgVT = ArgVTs[VA.getValNo()];19441945assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&1946"We don't handle NEON/vector parameters yet.");19471948// Handle arg promotion, etc.1949switch (VA.getLocInfo()) {1950case CCValAssign::Full: break;1951case CCValAssign::SExt: {1952MVT DestVT = VA.getLocVT();1953Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);1954assert(Arg != 0 && "Failed to emit a sext");1955ArgVT = DestVT;1956break;1957}1958case CCValAssign::AExt:1959// Intentional fall-through. Handle AExt and ZExt.1960case CCValAssign::ZExt: {1961MVT DestVT = VA.getLocVT();1962Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);1963assert(Arg != 0 && "Failed to emit a zext");1964ArgVT = DestVT;1965break;1966}1967case CCValAssign::BCvt: {1968unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg);1969assert(BC != 0 && "Failed to emit a bitcast!");1970Arg = BC;1971ArgVT = VA.getLocVT();1972break;1973}1974default: llvm_unreachable("Unknown arg promotion!");1975}19761977// Now copy/store arg to correct locations.1978if (VA.isRegLoc() && !VA.needsCustom()) {1979BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1980TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);1981RegArgs.push_back(VA.getLocReg());1982} else if (VA.needsCustom()) {1983// TODO: We need custom lowering for vector (v2f64) args.1984assert(VA.getLocVT() == MVT::f64 &&1985"Custom lowering for v2f64 args not available");19861987// FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()1988CCValAssign &NextVA = ArgLocs[++i];19891990assert(VA.isRegLoc() && NextVA.isRegLoc() &&1991"We only handle register args!");19921993AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1994TII.get(ARM::VMOVRRD), VA.getLocReg())1995.addReg(NextVA.getLocReg(), RegState::Define)1996.addReg(Arg));1997RegArgs.push_back(VA.getLocReg());1998RegArgs.push_back(NextVA.getLocReg());1999} else {2000assert(VA.isMemLoc());2001// Need to store on the stack.20022003// Don't emit stores for undef values.2004if (isa<UndefValue>(ArgVal))2005continue;20062007Address Addr;2008Addr.BaseType = Address::RegBase;2009Addr.Base.Reg = ARM::SP;2010Addr.Offset = VA.getLocMemOffset();20112012bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;2013assert(EmitRet && "Could not emit a store for argument!");2014}2015}20162017return true;2018}20192020bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,2021const Instruction *I, CallingConv::ID CC,2022unsigned &NumBytes, bool isVarArg) {2023// Issue CALLSEQ_END2024unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();2025AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2026TII.get(AdjStackUp))2027.addImm(NumBytes).addImm(-1ULL));20282029// Now the return value.2030if (RetVT != MVT::isVoid) {2031SmallVector<CCValAssign, 16> RVLocs;2032CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);2033CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));20342035// Copy all of the result registers out of their specified physreg.2036if (RVLocs.size() == 2 && RetVT == MVT::f64) {2037// For this move we copy into two registers and then move into the2038// double fp reg we want.2039MVT DestVT = RVLocs[0].getValVT();2040const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);2041Register ResultReg = createResultReg(DstRC);2042AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2043TII.get(ARM::VMOVDRR), ResultReg)2044.addReg(RVLocs[0].getLocReg())2045.addReg(RVLocs[1].getLocReg()));20462047UsedRegs.push_back(RVLocs[0].getLocReg());2048UsedRegs.push_back(RVLocs[1].getLocReg());20492050// Finally update the result.2051updateValueMap(I, ResultReg);2052} else {2053assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");2054MVT CopyVT = RVLocs[0].getValVT();20552056// Special handling for extended integers.2057if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)2058CopyVT = MVT::i32;20592060const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);20612062Register ResultReg = createResultReg(DstRC);2063BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2064TII.get(TargetOpcode::COPY),2065ResultReg).addReg(RVLocs[0].getLocReg());2066UsedRegs.push_back(RVLocs[0].getLocReg());20672068// Finally update the result.2069updateValueMap(I, ResultReg);2070}2071}20722073return true;2074}20752076bool ARMFastISel::SelectRet(const Instruction *I) {2077const ReturnInst *Ret = cast<ReturnInst>(I);2078const Function &F = *I->getParent()->getParent();2079const bool IsCmseNSEntry = F.hasFnAttribute("cmse_nonsecure_entry");20802081if (!FuncInfo.CanLowerReturn)2082return false;20832084if (TLI.supportSwiftError() &&2085F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))2086return false;20872088if (TLI.supportSplitCSR(FuncInfo.MF))2089return false;20902091// Build a list of return value registers.2092SmallVector<unsigned, 4> RetRegs;20932094CallingConv::ID CC = F.getCallingConv();2095if (Ret->getNumOperands() > 0) {2096SmallVector<ISD::OutputArg, 4> Outs;2097GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);20982099// Analyze operands of the call, assigning locations to each operand.2100SmallVector<CCValAssign, 16> ValLocs;2101CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());2102CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,2103F.isVarArg()));21042105const Value *RV = Ret->getOperand(0);2106Register Reg = getRegForValue(RV);2107if (Reg == 0)2108return false;21092110// Only handle a single return value for now.2111if (ValLocs.size() != 1)2112return false;21132114CCValAssign &VA = ValLocs[0];21152116// Don't bother handling odd stuff for now.2117if (VA.getLocInfo() != CCValAssign::Full)2118return false;2119// Only handle register returns for now.2120if (!VA.isRegLoc())2121return false;21222123unsigned SrcReg = Reg + VA.getValNo();2124EVT RVEVT = TLI.getValueType(DL, RV->getType());2125if (!RVEVT.isSimple()) return false;2126MVT RVVT = RVEVT.getSimpleVT();2127MVT DestVT = VA.getValVT();2128// Special handling for extended integers.2129if (RVVT != DestVT) {2130if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)2131return false;21322133assert(DestVT == MVT::i32 && "ARM should always ext to i32");21342135// Perform extension if flagged as either zext or sext. Otherwise, do2136// nothing.2137if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {2138SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());2139if (SrcReg == 0) return false;2140}2141}21422143// Make the copy.2144Register DstReg = VA.getLocReg();2145const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);2146// Avoid a cross-class copy. This is very unlikely.2147if (!SrcRC->contains(DstReg))2148return false;2149BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2150TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);21512152// Add register to return instruction.2153RetRegs.push_back(VA.getLocReg());2154}21552156unsigned RetOpc;2157if (IsCmseNSEntry)2158if (isThumb2)2159RetOpc = ARM::tBXNS_RET;2160else2161llvm_unreachable("CMSE not valid for non-Thumb targets");2162else2163RetOpc = Subtarget->getReturnOpcode();21642165MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2166TII.get(RetOpc));2167AddOptionalDefs(MIB);2168for (unsigned R : RetRegs)2169MIB.addReg(R, RegState::Implicit);2170return true;2171}21722173unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {2174if (UseReg)2175return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);2176else2177return isThumb2 ? ARM::tBL : ARM::BL;2178}21792180unsigned ARMFastISel::getLibcallReg(const Twine &Name) {2181// Manually compute the global's type to avoid building it when unnecessary.2182Type *GVTy = PointerType::get(*Context, /*AS=*/0);2183EVT LCREVT = TLI.getValueType(DL, GVTy);2184if (!LCREVT.isSimple()) return 0;21852186GlobalValue *GV = M.getNamedGlobal(Name.str());2187if (!GV)2188GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,2189GlobalValue::ExternalLinkage, nullptr, Name);21902191return ARMMaterializeGV(GV, LCREVT.getSimpleVT());2192}21932194// A quick function that will emit a call for a named libcall in F with the2195// vector of passed arguments for the Instruction in I. We can assume that we2196// can emit a call for any libcall we can produce. This is an abridged version2197// of the full call infrastructure since we won't need to worry about things2198// like computed function pointers or strange arguments at call sites.2199// TODO: Try to unify this and the normal call bits for ARM, then try to unify2200// with X86.2201bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {2202CallingConv::ID CC = TLI.getLibcallCallingConv(Call);22032204// Handle *simple* calls for now.2205Type *RetTy = I->getType();2206MVT RetVT;2207if (RetTy->isVoidTy())2208RetVT = MVT::isVoid;2209else if (!isTypeLegal(RetTy, RetVT))2210return false;22112212// Can't handle non-double multi-reg retvals.2213if (RetVT != MVT::isVoid && RetVT != MVT::i32) {2214SmallVector<CCValAssign, 16> RVLocs;2215CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);2216CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));2217if (RVLocs.size() >= 2 && RetVT != MVT::f64)2218return false;2219}22202221// Set up the argument vectors.2222SmallVector<Value*, 8> Args;2223SmallVector<Register, 8> ArgRegs;2224SmallVector<MVT, 8> ArgVTs;2225SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;2226Args.reserve(I->getNumOperands());2227ArgRegs.reserve(I->getNumOperands());2228ArgVTs.reserve(I->getNumOperands());2229ArgFlags.reserve(I->getNumOperands());2230for (Value *Op : I->operands()) {2231Register Arg = getRegForValue(Op);2232if (Arg == 0) return false;22332234Type *ArgTy = Op->getType();2235MVT ArgVT;2236if (!isTypeLegal(ArgTy, ArgVT)) return false;22372238ISD::ArgFlagsTy Flags;2239Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));22402241Args.push_back(Op);2242ArgRegs.push_back(Arg);2243ArgVTs.push_back(ArgVT);2244ArgFlags.push_back(Flags);2245}22462247// Handle the arguments now that we've gotten them.2248SmallVector<Register, 4> RegArgs;2249unsigned NumBytes;2250if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,2251RegArgs, CC, NumBytes, false))2252return false;22532254Register CalleeReg;2255if (Subtarget->genLongCalls()) {2256CalleeReg = getLibcallReg(TLI.getLibcallName(Call));2257if (CalleeReg == 0) return false;2258}22592260// Issue the call.2261unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());2262MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,2263MIMD, TII.get(CallOpc));2264// BL / BLX don't take a predicate, but tBL / tBLX do.2265if (isThumb2)2266MIB.add(predOps(ARMCC::AL));2267if (Subtarget->genLongCalls()) {2268CalleeReg =2269constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);2270MIB.addReg(CalleeReg);2271} else2272MIB.addExternalSymbol(TLI.getLibcallName(Call));22732274// Add implicit physical register uses to the call.2275for (Register R : RegArgs)2276MIB.addReg(R, RegState::Implicit);22772278// Add a register mask with the call-preserved registers.2279// Proper defs for return values will be added by setPhysRegsDeadExcept().2280MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));22812282// Finish off the call including any return values.2283SmallVector<Register, 4> UsedRegs;2284if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;22852286// Set all unused physreg defs as dead.2287static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);22882289return true;2290}22912292bool ARMFastISel::SelectCall(const Instruction *I,2293const char *IntrMemName = nullptr) {2294const CallInst *CI = cast<CallInst>(I);2295const Value *Callee = CI->getCalledOperand();22962297// Can't handle inline asm.2298if (isa<InlineAsm>(Callee)) return false;22992300// Allow SelectionDAG isel to handle tail calls.2301if (CI->isTailCall()) return false;23022303// Check the calling convention.2304CallingConv::ID CC = CI->getCallingConv();23052306// TODO: Avoid some calling conventions?23072308FunctionType *FTy = CI->getFunctionType();2309bool isVarArg = FTy->isVarArg();23102311// Handle *simple* calls for now.2312Type *RetTy = I->getType();2313MVT RetVT;2314if (RetTy->isVoidTy())2315RetVT = MVT::isVoid;2316else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&2317RetVT != MVT::i8 && RetVT != MVT::i1)2318return false;23192320// Can't handle non-double multi-reg retvals.2321if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&2322RetVT != MVT::i16 && RetVT != MVT::i32) {2323SmallVector<CCValAssign, 16> RVLocs;2324CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);2325CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));2326if (RVLocs.size() >= 2 && RetVT != MVT::f64)2327return false;2328}23292330// Set up the argument vectors.2331SmallVector<Value*, 8> Args;2332SmallVector<Register, 8> ArgRegs;2333SmallVector<MVT, 8> ArgVTs;2334SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;2335unsigned arg_size = CI->arg_size();2336Args.reserve(arg_size);2337ArgRegs.reserve(arg_size);2338ArgVTs.reserve(arg_size);2339ArgFlags.reserve(arg_size);2340for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {2341// If we're lowering a memory intrinsic instead of a regular call, skip the2342// last argument, which shouldn't be passed to the underlying function.2343if (IntrMemName && ArgE - ArgI <= 1)2344break;23452346ISD::ArgFlagsTy Flags;2347unsigned ArgIdx = ArgI - CI->arg_begin();2348if (CI->paramHasAttr(ArgIdx, Attribute::SExt))2349Flags.setSExt();2350if (CI->paramHasAttr(ArgIdx, Attribute::ZExt))2351Flags.setZExt();23522353// FIXME: Only handle *easy* calls for now.2354if (CI->paramHasAttr(ArgIdx, Attribute::InReg) ||2355CI->paramHasAttr(ArgIdx, Attribute::StructRet) ||2356CI->paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||2357CI->paramHasAttr(ArgIdx, Attribute::SwiftError) ||2358CI->paramHasAttr(ArgIdx, Attribute::Nest) ||2359CI->paramHasAttr(ArgIdx, Attribute::ByVal))2360return false;23612362Type *ArgTy = (*ArgI)->getType();2363MVT ArgVT;2364if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&2365ArgVT != MVT::i1)2366return false;23672368Register Arg = getRegForValue(*ArgI);2369if (!Arg.isValid())2370return false;23712372Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));23732374Args.push_back(*ArgI);2375ArgRegs.push_back(Arg);2376ArgVTs.push_back(ArgVT);2377ArgFlags.push_back(Flags);2378}23792380// Handle the arguments now that we've gotten them.2381SmallVector<Register, 4> RegArgs;2382unsigned NumBytes;2383if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,2384RegArgs, CC, NumBytes, isVarArg))2385return false;23862387bool UseReg = false;2388const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);2389if (!GV || Subtarget->genLongCalls()) UseReg = true;23902391Register CalleeReg;2392if (UseReg) {2393if (IntrMemName)2394CalleeReg = getLibcallReg(IntrMemName);2395else2396CalleeReg = getRegForValue(Callee);23972398if (CalleeReg == 0) return false;2399}24002401// Issue the call.2402unsigned CallOpc = ARMSelectCallOp(UseReg);2403MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,2404MIMD, TII.get(CallOpc));24052406// ARM calls don't take a predicate, but tBL / tBLX do.2407if(isThumb2)2408MIB.add(predOps(ARMCC::AL));2409if (UseReg) {2410CalleeReg =2411constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);2412MIB.addReg(CalleeReg);2413} else if (!IntrMemName)2414MIB.addGlobalAddress(GV, 0, 0);2415else2416MIB.addExternalSymbol(IntrMemName, 0);24172418// Add implicit physical register uses to the call.2419for (Register R : RegArgs)2420MIB.addReg(R, RegState::Implicit);24212422// Add a register mask with the call-preserved registers.2423// Proper defs for return values will be added by setPhysRegsDeadExcept().2424MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));24252426// Finish off the call including any return values.2427SmallVector<Register, 4> UsedRegs;2428if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))2429return false;24302431// Set all unused physreg defs as dead.2432static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);24332434return true;2435}24362437bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {2438return Len <= 16;2439}24402441bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,2442MaybeAlign Alignment) {2443// Make sure we don't bloat code by inlining very large memcpy's.2444if (!ARMIsMemCpySmall(Len))2445return false;24462447while (Len) {2448MVT VT;2449if (!Alignment || *Alignment >= 4) {2450if (Len >= 4)2451VT = MVT::i32;2452else if (Len >= 2)2453VT = MVT::i16;2454else {2455assert(Len == 1 && "Expected a length of 1!");2456VT = MVT::i8;2457}2458} else {2459assert(Alignment && "Alignment is set in this branch");2460// Bound based on alignment.2461if (Len >= 2 && *Alignment == 2)2462VT = MVT::i16;2463else {2464VT = MVT::i8;2465}2466}24672468bool RV;2469Register ResultReg;2470RV = ARMEmitLoad(VT, ResultReg, Src);2471assert(RV && "Should be able to handle this load.");2472RV = ARMEmitStore(VT, ResultReg, Dest);2473assert(RV && "Should be able to handle this store.");2474(void)RV;24752476unsigned Size = VT.getSizeInBits()/8;2477Len -= Size;2478Dest.Offset += Size;2479Src.Offset += Size;2480}24812482return true;2483}24842485bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {2486// FIXME: Handle more intrinsics.2487switch (I.getIntrinsicID()) {2488default: return false;2489case Intrinsic::frameaddress: {2490MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();2491MFI.setFrameAddressIsTaken(true);24922493unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;2494const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass2495: &ARM::GPRRegClass;24962497const ARMBaseRegisterInfo *RegInfo =2498static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());2499Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));2500unsigned SrcReg = FramePtr;25012502// Recursively load frame address2503// ldr r0 [fp]2504// ldr r0 [r0]2505// ldr r0 [r0]2506// ...2507unsigned DestReg;2508unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();2509while (Depth--) {2510DestReg = createResultReg(RC);2511AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2512TII.get(LdrOpc), DestReg)2513.addReg(SrcReg).addImm(0));2514SrcReg = DestReg;2515}2516updateValueMap(&I, SrcReg);2517return true;2518}2519case Intrinsic::memcpy:2520case Intrinsic::memmove: {2521const MemTransferInst &MTI = cast<MemTransferInst>(I);2522// Don't handle volatile.2523if (MTI.isVolatile())2524return false;25252526// Disable inlining for memmove before calls to ComputeAddress. Otherwise,2527// we would emit dead code because we don't currently handle memmoves.2528bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);2529if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {2530// Small memcpy's are common enough that we want to do them without a call2531// if possible.2532uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();2533if (ARMIsMemCpySmall(Len)) {2534Address Dest, Src;2535if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||2536!ARMComputeAddress(MTI.getRawSource(), Src))2537return false;2538MaybeAlign Alignment;2539if (MTI.getDestAlign() || MTI.getSourceAlign())2540Alignment = std::min(MTI.getDestAlign().valueOrOne(),2541MTI.getSourceAlign().valueOrOne());2542if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))2543return true;2544}2545}25462547if (!MTI.getLength()->getType()->isIntegerTy(32))2548return false;25492550if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)2551return false;25522553const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";2554return SelectCall(&I, IntrMemName);2555}2556case Intrinsic::memset: {2557const MemSetInst &MSI = cast<MemSetInst>(I);2558// Don't handle volatile.2559if (MSI.isVolatile())2560return false;25612562if (!MSI.getLength()->getType()->isIntegerTy(32))2563return false;25642565if (MSI.getDestAddressSpace() > 255)2566return false;25672568return SelectCall(&I, "memset");2569}2570case Intrinsic::trap: {2571unsigned Opcode;2572if (Subtarget->isThumb())2573Opcode = ARM::tTRAP;2574else2575Opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;2576BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opcode));2577return true;2578}2579}2580}25812582bool ARMFastISel::SelectTrunc(const Instruction *I) {2583// The high bits for a type smaller than the register size are assumed to be2584// undefined.2585Value *Op = I->getOperand(0);25862587EVT SrcVT, DestVT;2588SrcVT = TLI.getValueType(DL, Op->getType(), true);2589DestVT = TLI.getValueType(DL, I->getType(), true);25902591if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)2592return false;2593if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)2594return false;25952596Register SrcReg = getRegForValue(Op);2597if (!SrcReg) return false;25982599// Because the high bits are undefined, a truncate doesn't generate2600// any code.2601updateValueMap(I, SrcReg);2602return true;2603}26042605unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,2606bool isZExt) {2607if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)2608return 0;2609if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)2610return 0;26112612// Table of which combinations can be emitted as a single instruction,2613// and which will require two.2614static const uint8_t isSingleInstrTbl[3][2][2][2] = {2615// ARM Thumb2616// !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops2617// ext: s z s z s z s z2618/* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },2619/* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },2620/* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }2621};26222623// Target registers for:2624// - For ARM can never be PC.2625// - For 16-bit Thumb are restricted to lower 8 registers.2626// - For 32-bit Thumb are restricted to non-SP and non-PC.2627static const TargetRegisterClass *RCTbl[2][2] = {2628// Instructions: Two Single2629/* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },2630/* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }2631};26322633// Table governing the instruction(s) to be emitted.2634static const struct InstructionTable {2635uint32_t Opc : 16;2636uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.2637uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.2638uint32_t Imm : 8; // All instructions have either a shift or a mask.2639} IT[2][2][3][2] = {2640{ // Two instructions (first is left shift, second is in this table).2641{ // ARM Opc S Shift Imm2642/* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 },2643/* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } },2644/* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 },2645/* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } },2646/* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 },2647/* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } }2648},2649{ // Thumb Opc S Shift Imm2650/* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 },2651/* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } },2652/* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 },2653/* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } },2654/* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 },2655/* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } }2656}2657},2658{ // Single instruction.2659{ // ARM Opc S Shift Imm2660/* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },2661/* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } },2662/* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 },2663/* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } },2664/* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 },2665/* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } }2666},2667{ // Thumb Opc S Shift Imm2668/* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },2669/* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } },2670/* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 },2671/* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },2672/* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 },2673/* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } }2674}2675}2676};26772678unsigned SrcBits = SrcVT.getSizeInBits();2679unsigned DestBits = DestVT.getSizeInBits();2680(void) DestBits;2681assert((SrcBits < DestBits) && "can only extend to larger types");2682assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&2683"other sizes unimplemented");2684assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&2685"other sizes unimplemented");26862687bool hasV6Ops = Subtarget->hasV6Ops();2688unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}2689assert((Bitness < 3) && "sanity-check table bounds");26902691bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];2692const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];2693const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];2694unsigned Opc = ITP->Opc;2695assert(ARM::KILL != Opc && "Invalid table entry");2696unsigned hasS = ITP->hasS;2697ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;2698assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&2699"only MOVsi has shift operand addressing mode");2700unsigned Imm = ITP->Imm;27012702// 16-bit Thumb instructions always set CPSR (unless they're in an IT block).2703bool setsCPSR = &ARM::tGPRRegClass == RC;2704unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;2705unsigned ResultReg;2706// MOVsi encodes shift and immediate in shift operand addressing mode.2707// The following condition has the same value when emitting two2708// instruction sequences: both are shifts.2709bool ImmIsSO = (Shift != ARM_AM::no_shift);27102711// Either one or two instructions are emitted.2712// They're always of the form:2713// dst = in OP imm2714// CPSR is set only by 16-bit Thumb instructions.2715// Predicate, if any, is AL.2716// S bit, if available, is always 0.2717// When two are emitted the first's result will feed as the second's input,2718// that value is then dead.2719unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;2720for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {2721ResultReg = createResultReg(RC);2722bool isLsl = (0 == Instr) && !isSingleInstr;2723unsigned Opcode = isLsl ? LSLOpc : Opc;2724ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;2725unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;2726bool isKill = 1 == Instr;2727MachineInstrBuilder MIB = BuildMI(2728*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opcode), ResultReg);2729if (setsCPSR)2730MIB.addReg(ARM::CPSR, RegState::Define);2731SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);2732MIB.addReg(SrcReg, isKill * RegState::Kill)2733.addImm(ImmEnc)2734.add(predOps(ARMCC::AL));2735if (hasS)2736MIB.add(condCodeOp());2737// Second instruction consumes the first's result.2738SrcReg = ResultReg;2739}27402741return ResultReg;2742}27432744bool ARMFastISel::SelectIntExt(const Instruction *I) {2745// On ARM, in general, integer casts don't involve legal types; this code2746// handles promotable integers.2747Type *DestTy = I->getType();2748Value *Src = I->getOperand(0);2749Type *SrcTy = Src->getType();27502751bool isZExt = isa<ZExtInst>(I);2752Register SrcReg = getRegForValue(Src);2753if (!SrcReg) return false;27542755EVT SrcEVT, DestEVT;2756SrcEVT = TLI.getValueType(DL, SrcTy, true);2757DestEVT = TLI.getValueType(DL, DestTy, true);2758if (!SrcEVT.isSimple()) return false;2759if (!DestEVT.isSimple()) return false;27602761MVT SrcVT = SrcEVT.getSimpleVT();2762MVT DestVT = DestEVT.getSimpleVT();2763unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);2764if (ResultReg == 0) return false;2765updateValueMap(I, ResultReg);2766return true;2767}27682769bool ARMFastISel::SelectShift(const Instruction *I,2770ARM_AM::ShiftOpc ShiftTy) {2771// We handle thumb2 mode by target independent selector2772// or SelectionDAG ISel.2773if (isThumb2)2774return false;27752776// Only handle i32 now.2777EVT DestVT = TLI.getValueType(DL, I->getType(), true);2778if (DestVT != MVT::i32)2779return false;27802781unsigned Opc = ARM::MOVsr;2782unsigned ShiftImm;2783Value *Src2Value = I->getOperand(1);2784if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {2785ShiftImm = CI->getZExtValue();27862787// Fall back to selection DAG isel if the shift amount2788// is zero or greater than the width of the value type.2789if (ShiftImm == 0 || ShiftImm >=32)2790return false;27912792Opc = ARM::MOVsi;2793}27942795Value *Src1Value = I->getOperand(0);2796Register Reg1 = getRegForValue(Src1Value);2797if (Reg1 == 0) return false;27982799unsigned Reg2 = 0;2800if (Opc == ARM::MOVsr) {2801Reg2 = getRegForValue(Src2Value);2802if (Reg2 == 0) return false;2803}28042805Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);2806if(ResultReg == 0) return false;28072808MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2809TII.get(Opc), ResultReg)2810.addReg(Reg1);28112812if (Opc == ARM::MOVsi)2813MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));2814else if (Opc == ARM::MOVsr) {2815MIB.addReg(Reg2);2816MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));2817}28182819AddOptionalDefs(MIB);2820updateValueMap(I, ResultReg);2821return true;2822}28232824// TODO: SoftFP support.2825bool ARMFastISel::fastSelectInstruction(const Instruction *I) {2826switch (I->getOpcode()) {2827case Instruction::Load:2828return SelectLoad(I);2829case Instruction::Store:2830return SelectStore(I);2831case Instruction::Br:2832return SelectBranch(I);2833case Instruction::IndirectBr:2834return SelectIndirectBr(I);2835case Instruction::ICmp:2836case Instruction::FCmp:2837return SelectCmp(I);2838case Instruction::FPExt:2839return SelectFPExt(I);2840case Instruction::FPTrunc:2841return SelectFPTrunc(I);2842case Instruction::SIToFP:2843return SelectIToFP(I, /*isSigned*/ true);2844case Instruction::UIToFP:2845return SelectIToFP(I, /*isSigned*/ false);2846case Instruction::FPToSI:2847return SelectFPToI(I, /*isSigned*/ true);2848case Instruction::FPToUI:2849return SelectFPToI(I, /*isSigned*/ false);2850case Instruction::Add:2851return SelectBinaryIntOp(I, ISD::ADD);2852case Instruction::Or:2853return SelectBinaryIntOp(I, ISD::OR);2854case Instruction::Sub:2855return SelectBinaryIntOp(I, ISD::SUB);2856case Instruction::FAdd:2857return SelectBinaryFPOp(I, ISD::FADD);2858case Instruction::FSub:2859return SelectBinaryFPOp(I, ISD::FSUB);2860case Instruction::FMul:2861return SelectBinaryFPOp(I, ISD::FMUL);2862case Instruction::SDiv:2863return SelectDiv(I, /*isSigned*/ true);2864case Instruction::UDiv:2865return SelectDiv(I, /*isSigned*/ false);2866case Instruction::SRem:2867return SelectRem(I, /*isSigned*/ true);2868case Instruction::URem:2869return SelectRem(I, /*isSigned*/ false);2870case Instruction::Call:2871if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))2872return SelectIntrinsicCall(*II);2873return SelectCall(I);2874case Instruction::Select:2875return SelectSelect(I);2876case Instruction::Ret:2877return SelectRet(I);2878case Instruction::Trunc:2879return SelectTrunc(I);2880case Instruction::ZExt:2881case Instruction::SExt:2882return SelectIntExt(I);2883case Instruction::Shl:2884return SelectShift(I, ARM_AM::lsl);2885case Instruction::LShr:2886return SelectShift(I, ARM_AM::lsr);2887case Instruction::AShr:2888return SelectShift(I, ARM_AM::asr);2889default: break;2890}2891return false;2892}28932894// This table describes sign- and zero-extend instructions which can be2895// folded into a preceding load. All of these extends have an immediate2896// (sometimes a mask and sometimes a shift) that's applied after2897// extension.2898static const struct FoldableLoadExtendsStruct {2899uint16_t Opc[2]; // ARM, Thumb.2900uint8_t ExpectedImm;2901uint8_t isZExt : 1;2902uint8_t ExpectedVT : 7;2903} FoldableLoadExtends[] = {2904{ { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },2905{ { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },2906{ { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },2907{ { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },2908{ { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }2909};29102911/// The specified machine instr operand is a vreg, and that2912/// vreg is being provided by the specified load instruction. If possible,2913/// try to fold the load as an operand to the instruction, returning true if2914/// successful.2915bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,2916const LoadInst *LI) {2917// Verify we have a legal type before going any further.2918MVT VT;2919if (!isLoadTypeLegal(LI->getType(), VT))2920return false;29212922// Combine load followed by zero- or sign-extend.2923// ldrb r1, [r0] ldrb r1, [r0]2924// uxtb r2, r1 =>2925// mov r3, r2 mov r3, r12926if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())2927return false;2928const uint64_t Imm = MI->getOperand(2).getImm();29292930bool Found = false;2931bool isZExt;2932for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {2933if (FLE.Opc[isThumb2] == MI->getOpcode() &&2934(uint64_t)FLE.ExpectedImm == Imm &&2935MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {2936Found = true;2937isZExt = FLE.isZExt;2938}2939}2940if (!Found) return false;29412942// See if we can handle this address.2943Address Addr;2944if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;29452946Register ResultReg = MI->getOperand(0).getReg();2947if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlign(), isZExt, false))2948return false;2949MachineBasicBlock::iterator I(MI);2950removeDeadCode(I, std::next(I));2951return true;2952}29532954unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {2955bool UseGOT_PREL = !GV->isDSOLocal();2956LLVMContext *Context = &MF->getFunction().getContext();2957unsigned ARMPCLabelIndex = AFI->createPICLabelUId();2958unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;2959ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(2960GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,2961UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,2962/*AddCurrentAddress=*/UseGOT_PREL);29632964Align ConstAlign =2965MF->getDataLayout().getPrefTypeAlign(PointerType::get(*Context, 0));2966unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);2967MachineMemOperand *CPMMO =2968MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),2969MachineMemOperand::MOLoad, 4, Align(4));29702971Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);2972unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;2973MachineInstrBuilder MIB =2974BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), TempReg)2975.addConstantPoolIndex(Idx)2976.addMemOperand(CPMMO);2977if (Opc == ARM::LDRcp)2978MIB.addImm(0);2979MIB.add(predOps(ARMCC::AL));29802981// Fix the address by adding pc.2982Register DestReg = createResultReg(TLI.getRegClassFor(VT));2983Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR2984: ARM::PICADD;2985DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);2986MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)2987.addReg(TempReg)2988.addImm(ARMPCLabelIndex);29892990if (!Subtarget->isThumb())2991MIB.add(predOps(ARMCC::AL));29922993if (UseGOT_PREL && Subtarget->isThumb()) {2994Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));2995MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2996TII.get(ARM::t2LDRi12), NewDestReg)2997.addReg(DestReg)2998.addImm(0);2999DestReg = NewDestReg;3000AddOptionalDefs(MIB);3001}3002return DestReg;3003}30043005bool ARMFastISel::fastLowerArguments() {3006if (!FuncInfo.CanLowerReturn)3007return false;30083009const Function *F = FuncInfo.Fn;3010if (F->isVarArg())3011return false;30123013CallingConv::ID CC = F->getCallingConv();3014switch (CC) {3015default:3016return false;3017case CallingConv::Fast:3018case CallingConv::C:3019case CallingConv::ARM_AAPCS_VFP:3020case CallingConv::ARM_AAPCS:3021case CallingConv::ARM_APCS:3022case CallingConv::Swift:3023case CallingConv::SwiftTail:3024break;3025}30263027// Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments3028// which are passed in r0 - r3.3029for (const Argument &Arg : F->args()) {3030if (Arg.getArgNo() >= 4)3031return false;30323033if (Arg.hasAttribute(Attribute::InReg) ||3034Arg.hasAttribute(Attribute::StructRet) ||3035Arg.hasAttribute(Attribute::SwiftSelf) ||3036Arg.hasAttribute(Attribute::SwiftError) ||3037Arg.hasAttribute(Attribute::ByVal))3038return false;30393040Type *ArgTy = Arg.getType();3041if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())3042return false;30433044EVT ArgVT = TLI.getValueType(DL, ArgTy);3045if (!ArgVT.isSimple()) return false;3046switch (ArgVT.getSimpleVT().SimpleTy) {3047case MVT::i8:3048case MVT::i16:3049case MVT::i32:3050break;3051default:3052return false;3053}3054}30553056static const MCPhysReg GPRArgRegs[] = {3057ARM::R0, ARM::R1, ARM::R2, ARM::R33058};30593060const TargetRegisterClass *RC = &ARM::rGPRRegClass;3061for (const Argument &Arg : F->args()) {3062unsigned ArgNo = Arg.getArgNo();3063unsigned SrcReg = GPRArgRegs[ArgNo];3064Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);3065// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.3066// Without this, EmitLiveInCopies may eliminate the livein if its only3067// use is a bitcast (which isn't turned into an instruction).3068Register ResultReg = createResultReg(RC);3069BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3070TII.get(TargetOpcode::COPY),3071ResultReg).addReg(DstReg, getKillRegState(true));3072updateValueMap(&Arg, ResultReg);3073}30743075return true;3076}30773078namespace llvm {30793080FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,3081const TargetLibraryInfo *libInfo) {3082if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())3083return new ARMFastISel(funcInfo, libInfo);30843085return nullptr;3086}30873088} // end namespace llvm308930903091