Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
35268 views
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the AArch64-specific support for the FastISel class. Some9// of the target-specific code is generated by tablegen in the file10// AArch64GenFastISel.inc, which is #included here.11//12//===----------------------------------------------------------------------===//1314#include "AArch64.h"15#include "AArch64CallingConvention.h"16#include "AArch64MachineFunctionInfo.h"17#include "AArch64RegisterInfo.h"18#include "AArch64Subtarget.h"19#include "MCTargetDesc/AArch64AddressingModes.h"20#include "Utils/AArch64BaseInfo.h"21#include "llvm/ADT/APFloat.h"22#include "llvm/ADT/APInt.h"23#include "llvm/ADT/DenseMap.h"24#include "llvm/ADT/SmallVector.h"25#include "llvm/Analysis/BranchProbabilityInfo.h"26#include "llvm/CodeGen/CallingConvLower.h"27#include "llvm/CodeGen/FastISel.h"28#include "llvm/CodeGen/FunctionLoweringInfo.h"29#include "llvm/CodeGen/ISDOpcodes.h"30#include "llvm/CodeGen/MachineBasicBlock.h"31#include "llvm/CodeGen/MachineConstantPool.h"32#include "llvm/CodeGen/MachineFrameInfo.h"33#include "llvm/CodeGen/MachineInstr.h"34#include "llvm/CodeGen/MachineInstrBuilder.h"35#include "llvm/CodeGen/MachineMemOperand.h"36#include "llvm/CodeGen/MachineRegisterInfo.h"37#include "llvm/CodeGen/RuntimeLibcallUtil.h"38#include "llvm/CodeGen/ValueTypes.h"39#include "llvm/CodeGenTypes/MachineValueType.h"40#include "llvm/IR/Argument.h"41#include "llvm/IR/Attributes.h"42#include "llvm/IR/BasicBlock.h"43#include "llvm/IR/CallingConv.h"44#include "llvm/IR/Constant.h"45#include "llvm/IR/Constants.h"46#include "llvm/IR/DataLayout.h"47#include "llvm/IR/DerivedTypes.h"48#include "llvm/IR/Function.h"49#include "llvm/IR/GetElementPtrTypeIterator.h"50#include "llvm/IR/GlobalValue.h"51#include "llvm/IR/InstrTypes.h"52#include "llvm/IR/Instruction.h"53#include "llvm/IR/Instructions.h"54#include "llvm/IR/IntrinsicInst.h"55#include "llvm/IR/Intrinsics.h"56#include "llvm/IR/IntrinsicsAArch64.h"57#include "llvm/IR/Module.h"58#include "llvm/IR/Operator.h"59#include "llvm/IR/Type.h"60#include "llvm/IR/User.h"61#include "llvm/IR/Value.h"62#include "llvm/MC/MCInstrDesc.h"63#include "llvm/MC/MCRegisterInfo.h"64#include "llvm/MC/MCSymbol.h"65#include "llvm/Support/AtomicOrdering.h"66#include "llvm/Support/Casting.h"67#include "llvm/Support/CodeGen.h"68#include "llvm/Support/Compiler.h"69#include "llvm/Support/ErrorHandling.h"70#include "llvm/Support/MathExtras.h"71#include <algorithm>72#include <cassert>73#include <cstdint>74#include <iterator>75#include <utility>7677using namespace llvm;7879namespace {8081class AArch64FastISel final : public FastISel {82class Address {83public:84using BaseKind = enum {85RegBase,86FrameIndexBase87};8889private:90BaseKind Kind = RegBase;91AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;92union {93unsigned Reg;94int FI;95} Base;96unsigned OffsetReg = 0;97unsigned Shift = 0;98int64_t Offset = 0;99const GlobalValue *GV = nullptr;100101public:102Address() { Base.Reg = 0; }103104void setKind(BaseKind K) { Kind = K; }105BaseKind getKind() const { return Kind; }106void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }107AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }108bool isRegBase() const { return Kind == RegBase; }109bool isFIBase() const { return Kind == FrameIndexBase; }110111void setReg(unsigned Reg) {112assert(isRegBase() && "Invalid base register access!");113Base.Reg = Reg;114}115116unsigned getReg() const {117assert(isRegBase() && "Invalid base register access!");118return Base.Reg;119}120121void setOffsetReg(unsigned Reg) {122OffsetReg = Reg;123}124125unsigned getOffsetReg() const {126return OffsetReg;127}128129void setFI(unsigned FI) {130assert(isFIBase() && "Invalid base frame index access!");131Base.FI = FI;132}133134unsigned getFI() const {135assert(isFIBase() && "Invalid base frame index access!");136return Base.FI;137}138139void setOffset(int64_t O) { Offset = O; }140int64_t getOffset() { return Offset; }141void setShift(unsigned S) { Shift = S; }142unsigned getShift() { return Shift; }143144void setGlobalValue(const GlobalValue *G) { GV = G; }145const GlobalValue *getGlobalValue() { return GV; }146};147148/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can149/// make the right decision when generating code for different targets.150const AArch64Subtarget *Subtarget;151LLVMContext *Context;152153bool fastLowerArguments() override;154bool fastLowerCall(CallLoweringInfo &CLI) override;155bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;156157private:158// Selection routines.159bool selectAddSub(const Instruction *I);160bool selectLogicalOp(const Instruction *I);161bool selectLoad(const Instruction *I);162bool selectStore(const Instruction *I);163bool selectBranch(const Instruction *I);164bool selectIndirectBr(const Instruction *I);165bool selectCmp(const Instruction *I);166bool selectSelect(const Instruction *I);167bool selectFPExt(const Instruction *I);168bool selectFPTrunc(const Instruction *I);169bool selectFPToInt(const Instruction *I, bool Signed);170bool selectIntToFP(const Instruction *I, bool Signed);171bool selectRem(const Instruction *I, unsigned ISDOpcode);172bool selectRet(const Instruction *I);173bool selectTrunc(const Instruction *I);174bool selectIntExt(const Instruction *I);175bool selectMul(const Instruction *I);176bool selectShift(const Instruction *I);177bool selectBitCast(const Instruction *I);178bool selectFRem(const Instruction *I);179bool selectSDiv(const Instruction *I);180bool selectGetElementPtr(const Instruction *I);181bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);182183// Utility helper routines.184bool isTypeLegal(Type *Ty, MVT &VT);185bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);186bool isValueAvailable(const Value *V) const;187bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);188bool computeCallAddress(const Value *V, Address &Addr);189bool simplifyAddress(Address &Addr, MVT VT);190void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,191MachineMemOperand::Flags Flags,192unsigned ScaleFactor, MachineMemOperand *MMO);193bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);194bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,195MaybeAlign Alignment);196bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,197const Value *Cond);198bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);199bool optimizeSelect(const SelectInst *SI);200unsigned getRegForGEPIndex(const Value *Idx);201202// Emit helper routines.203unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,204const Value *RHS, bool SetFlags = false,205bool WantResult = true, bool IsZExt = false);206unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,207unsigned RHSReg, bool SetFlags = false,208bool WantResult = true);209unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,210uint64_t Imm, bool SetFlags = false,211bool WantResult = true);212unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,213unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,214uint64_t ShiftImm, bool SetFlags = false,215bool WantResult = true);216unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,217unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,218uint64_t ShiftImm, bool SetFlags = false,219bool WantResult = true);220221// Emit functions.222bool emitCompareAndBranch(const BranchInst *BI);223bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);224bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);225bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);226bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);227unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,228MachineMemOperand *MMO = nullptr);229bool emitStore(MVT VT, unsigned SrcReg, Address Addr,230MachineMemOperand *MMO = nullptr);231bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,232MachineMemOperand *MMO = nullptr);233unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);234unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);235unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,236bool SetFlags = false, bool WantResult = true,237bool IsZExt = false);238unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);239unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,240bool SetFlags = false, bool WantResult = true,241bool IsZExt = false);242unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,243bool WantResult = true);244unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,245AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,246bool WantResult = true);247unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,248const Value *RHS);249unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,250uint64_t Imm);251unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,252unsigned RHSReg, uint64_t ShiftImm);253unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);254unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);255unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);256unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);257unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);258unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,259bool IsZExt = true);260unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);261unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,262bool IsZExt = true);263unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);264unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,265bool IsZExt = false);266267unsigned materializeInt(const ConstantInt *CI, MVT VT);268unsigned materializeFP(const ConstantFP *CFP, MVT VT);269unsigned materializeGV(const GlobalValue *GV);270271// Call handling routines.272private:273CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;274bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,275unsigned &NumBytes);276bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);277278public:279// Backend specific FastISel code.280unsigned fastMaterializeAlloca(const AllocaInst *AI) override;281unsigned fastMaterializeConstant(const Constant *C) override;282unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;283284explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,285const TargetLibraryInfo *LibInfo)286: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {287Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();288Context = &FuncInfo.Fn->getContext();289}290291bool fastSelectInstruction(const Instruction *I) override;292293#include "AArch64GenFastISel.inc"294};295296} // end anonymous namespace297298/// Check if the sign-/zero-extend will be a noop.299static bool isIntExtFree(const Instruction *I) {300assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&301"Unexpected integer extend instruction.");302assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&303"Unexpected value type.");304bool IsZExt = isa<ZExtInst>(I);305306if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))307if (LI->hasOneUse())308return true;309310if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))311if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))312return true;313314return false;315}316317/// Determine the implicit scale factor that is applied by a memory318/// operation for a given value type.319static unsigned getImplicitScaleFactor(MVT VT) {320switch (VT.SimpleTy) {321default:322return 0; // invalid323case MVT::i1: // fall-through324case MVT::i8:325return 1;326case MVT::i16:327return 2;328case MVT::i32: // fall-through329case MVT::f32:330return 4;331case MVT::i64: // fall-through332case MVT::f64:333return 8;334}335}336337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {338if (CC == CallingConv::GHC)339return CC_AArch64_GHC;340if (CC == CallingConv::CFGuard_Check)341return CC_AArch64_Win64_CFGuard_Check;342if (Subtarget->isTargetDarwin())343return CC_AArch64_DarwinPCS;344if (Subtarget->isTargetWindows())345return CC_AArch64_Win64PCS;346return CC_AArch64_AAPCS;347}348349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {350assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&351"Alloca should always return a pointer.");352353// Don't handle dynamic allocas.354if (!FuncInfo.StaticAllocaMap.count(AI))355return 0;356357DenseMap<const AllocaInst *, int>::iterator SI =358FuncInfo.StaticAllocaMap.find(AI);359360if (SI != FuncInfo.StaticAllocaMap.end()) {361Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);362BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),363ResultReg)364.addFrameIndex(SI->second)365.addImm(0)366.addImm(0);367return ResultReg;368}369370return 0;371}372373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {374if (VT > MVT::i64)375return 0;376377if (!CI->isZero())378return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());379380// Create a copy from the zero register to materialize a "0" value.381const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass382: &AArch64::GPR32RegClass;383unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;384Register ResultReg = createResultReg(RC);385BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),386ResultReg).addReg(ZeroReg, getKillRegState(true));387return ResultReg;388}389390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {391// Positive zero (+0.0) has to be materialized with a fmov from the zero392// register, because the immediate version of fmov cannot encode zero.393if (CFP->isNullValue())394return fastMaterializeFloatZero(CFP);395396if (VT != MVT::f32 && VT != MVT::f64)397return 0;398399const APFloat Val = CFP->getValueAPF();400bool Is64Bit = (VT == MVT::f64);401// This checks to see if we can use FMOV instructions to materialize402// a constant, otherwise we have to materialize via the constant pool.403int Imm =404Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);405if (Imm != -1) {406unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;407return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);408}409410// For the large code model materialize the FP constant in code.411if (TM.getCodeModel() == CodeModel::Large) {412unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;413const TargetRegisterClass *RC = Is64Bit ?414&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;415416Register TmpReg = createResultReg(RC);417BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)418.addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());419420Register ResultReg = createResultReg(TLI.getRegClassFor(VT));421BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,422TII.get(TargetOpcode::COPY), ResultReg)423.addReg(TmpReg, getKillRegState(true));424425return ResultReg;426}427428// Materialize via constant pool. MachineConstantPool wants an explicit429// alignment.430Align Alignment = DL.getPrefTypeAlign(CFP->getType());431432unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);433Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);434BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),435ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);436437unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;438Register ResultReg = createResultReg(TLI.getRegClassFor(VT));439BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)440.addReg(ADRPReg)441.addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);442return ResultReg;443}444445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {446// We can't handle thread-local variables quickly yet.447if (GV->isThreadLocal())448return 0;449450// MachO still uses GOT for large code-model accesses, but ELF requires451// movz/movk sequences, which FastISel doesn't handle yet.452if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())453return 0;454455unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);456457EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);458if (!DestEVT.isSimple())459return 0;460461Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);462unsigned ResultReg;463464if (OpFlags & AArch64II::MO_GOT) {465// ADRP + LDRX466BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),467ADRPReg)468.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);469470unsigned LdrOpc;471if (Subtarget->isTargetILP32()) {472ResultReg = createResultReg(&AArch64::GPR32RegClass);473LdrOpc = AArch64::LDRWui;474} else {475ResultReg = createResultReg(&AArch64::GPR64RegClass);476LdrOpc = AArch64::LDRXui;477}478BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),479ResultReg)480.addReg(ADRPReg)481.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |482AArch64II::MO_NC | OpFlags);483if (!Subtarget->isTargetILP32())484return ResultReg;485486// LDRWui produces a 32-bit register, but pointers in-register are 64-bits487// so we must extend the result on ILP32.488Register Result64 = createResultReg(&AArch64::GPR64RegClass);489BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,490TII.get(TargetOpcode::SUBREG_TO_REG))491.addDef(Result64)492.addImm(0)493.addReg(ResultReg, RegState::Kill)494.addImm(AArch64::sub_32);495return Result64;496} else {497// ADRP + ADDX498BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),499ADRPReg)500.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);501502if (OpFlags & AArch64II::MO_TAGGED) {503// MO_TAGGED on the page indicates a tagged address. Set the tag now.504// We do so by creating a MOVK that sets bits 48-63 of the register to505// (global address + 0x100000000 - PC) >> 48. This assumes that we're in506// the small code model so we can assume a binary size of <= 4GB, which507// makes the untagged PC relative offset positive. The binary must also be508// loaded into address range [0, 2^48). Both of these properties need to509// be ensured at runtime when using tagged addresses.510//511// TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that512// also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands513// are not exactly 1:1 with FastISel so we cannot easily abstract this514// out. At some point, it would be nice to find a way to not have this515// duplciate code.516unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);517BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),518DstReg)519.addReg(ADRPReg)520.addGlobalAddress(GV, /*Offset=*/0x100000000,521AArch64II::MO_PREL | AArch64II::MO_G3)522.addImm(48);523ADRPReg = DstReg;524}525526ResultReg = createResultReg(&AArch64::GPR64spRegClass);527BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),528ResultReg)529.addReg(ADRPReg)530.addGlobalAddress(GV, 0,531AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)532.addImm(0);533}534return ResultReg;535}536537unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {538EVT CEVT = TLI.getValueType(DL, C->getType(), true);539540// Only handle simple types.541if (!CEVT.isSimple())542return 0;543MVT VT = CEVT.getSimpleVT();544// arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,545// 'null' pointers need to have a somewhat special treatment.546if (isa<ConstantPointerNull>(C)) {547assert(VT == MVT::i64 && "Expected 64-bit pointers");548return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);549}550551if (const auto *CI = dyn_cast<ConstantInt>(C))552return materializeInt(CI, VT);553else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))554return materializeFP(CFP, VT);555else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))556return materializeGV(GV);557558return 0;559}560561unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {562assert(CFP->isNullValue() &&563"Floating-point constant is not a positive zero.");564MVT VT;565if (!isTypeLegal(CFP->getType(), VT))566return 0;567568if (VT != MVT::f32 && VT != MVT::f64)569return 0;570571bool Is64Bit = (VT == MVT::f64);572unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;573unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;574return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);575}576577/// Check if the multiply is by a power-of-2 constant.578static bool isMulPowOf2(const Value *I) {579if (const auto *MI = dyn_cast<MulOperator>(I)) {580if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))581if (C->getValue().isPowerOf2())582return true;583if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))584if (C->getValue().isPowerOf2())585return true;586}587return false;588}589590// Computes the address to get to an object.591bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)592{593const User *U = nullptr;594unsigned Opcode = Instruction::UserOp1;595if (const Instruction *I = dyn_cast<Instruction>(Obj)) {596// Don't walk into other basic blocks unless the object is an alloca from597// another block, otherwise it may not have a virtual register assigned.598if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||599FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {600Opcode = I->getOpcode();601U = I;602}603} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {604Opcode = C->getOpcode();605U = C;606}607608if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))609if (Ty->getAddressSpace() > 255)610// Fast instruction selection doesn't support the special611// address spaces.612return false;613614switch (Opcode) {615default:616break;617case Instruction::BitCast:618// Look through bitcasts.619return computeAddress(U->getOperand(0), Addr, Ty);620621case Instruction::IntToPtr:622// Look past no-op inttoptrs.623if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==624TLI.getPointerTy(DL))625return computeAddress(U->getOperand(0), Addr, Ty);626break;627628case Instruction::PtrToInt:629// Look past no-op ptrtoints.630if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))631return computeAddress(U->getOperand(0), Addr, Ty);632break;633634case Instruction::GetElementPtr: {635Address SavedAddr = Addr;636uint64_t TmpOffset = Addr.getOffset();637638// Iterate through the GEP folding the constants into offsets where639// we can.640for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);641GTI != E; ++GTI) {642const Value *Op = GTI.getOperand();643if (StructType *STy = GTI.getStructTypeOrNull()) {644const StructLayout *SL = DL.getStructLayout(STy);645unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();646TmpOffset += SL->getElementOffset(Idx);647} else {648uint64_t S = GTI.getSequentialElementStride(DL);649while (true) {650if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {651// Constant-offset addressing.652TmpOffset += CI->getSExtValue() * S;653break;654}655if (canFoldAddIntoGEP(U, Op)) {656// A compatible add with a constant operand. Fold the constant.657ConstantInt *CI =658cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));659TmpOffset += CI->getSExtValue() * S;660// Iterate on the other operand.661Op = cast<AddOperator>(Op)->getOperand(0);662continue;663}664// Unsupported665goto unsupported_gep;666}667}668}669670// Try to grab the base operand now.671Addr.setOffset(TmpOffset);672if (computeAddress(U->getOperand(0), Addr, Ty))673return true;674675// We failed, restore everything and try the other options.676Addr = SavedAddr;677678unsupported_gep:679break;680}681case Instruction::Alloca: {682const AllocaInst *AI = cast<AllocaInst>(Obj);683DenseMap<const AllocaInst *, int>::iterator SI =684FuncInfo.StaticAllocaMap.find(AI);685if (SI != FuncInfo.StaticAllocaMap.end()) {686Addr.setKind(Address::FrameIndexBase);687Addr.setFI(SI->second);688return true;689}690break;691}692case Instruction::Add: {693// Adds of constants are common and easy enough.694const Value *LHS = U->getOperand(0);695const Value *RHS = U->getOperand(1);696697if (isa<ConstantInt>(LHS))698std::swap(LHS, RHS);699700if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {701Addr.setOffset(Addr.getOffset() + CI->getSExtValue());702return computeAddress(LHS, Addr, Ty);703}704705Address Backup = Addr;706if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))707return true;708Addr = Backup;709710break;711}712case Instruction::Sub: {713// Subs of constants are common and easy enough.714const Value *LHS = U->getOperand(0);715const Value *RHS = U->getOperand(1);716717if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {718Addr.setOffset(Addr.getOffset() - CI->getSExtValue());719return computeAddress(LHS, Addr, Ty);720}721break;722}723case Instruction::Shl: {724if (Addr.getOffsetReg())725break;726727const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));728if (!CI)729break;730731unsigned Val = CI->getZExtValue();732if (Val < 1 || Val > 3)733break;734735uint64_t NumBytes = 0;736if (Ty && Ty->isSized()) {737uint64_t NumBits = DL.getTypeSizeInBits(Ty);738NumBytes = NumBits / 8;739if (!isPowerOf2_64(NumBits))740NumBytes = 0;741}742743if (NumBytes != (1ULL << Val))744break;745746Addr.setShift(Val);747Addr.setExtendType(AArch64_AM::LSL);748749const Value *Src = U->getOperand(0);750if (const auto *I = dyn_cast<Instruction>(Src)) {751if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {752// Fold the zext or sext when it won't become a noop.753if (const auto *ZE = dyn_cast<ZExtInst>(I)) {754if (!isIntExtFree(ZE) &&755ZE->getOperand(0)->getType()->isIntegerTy(32)) {756Addr.setExtendType(AArch64_AM::UXTW);757Src = ZE->getOperand(0);758}759} else if (const auto *SE = dyn_cast<SExtInst>(I)) {760if (!isIntExtFree(SE) &&761SE->getOperand(0)->getType()->isIntegerTy(32)) {762Addr.setExtendType(AArch64_AM::SXTW);763Src = SE->getOperand(0);764}765}766}767}768769if (const auto *AI = dyn_cast<BinaryOperator>(Src))770if (AI->getOpcode() == Instruction::And) {771const Value *LHS = AI->getOperand(0);772const Value *RHS = AI->getOperand(1);773774if (const auto *C = dyn_cast<ConstantInt>(LHS))775if (C->getValue() == 0xffffffff)776std::swap(LHS, RHS);777778if (const auto *C = dyn_cast<ConstantInt>(RHS))779if (C->getValue() == 0xffffffff) {780Addr.setExtendType(AArch64_AM::UXTW);781Register Reg = getRegForValue(LHS);782if (!Reg)783return false;784Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);785Addr.setOffsetReg(Reg);786return true;787}788}789790Register Reg = getRegForValue(Src);791if (!Reg)792return false;793Addr.setOffsetReg(Reg);794return true;795}796case Instruction::Mul: {797if (Addr.getOffsetReg())798break;799800if (!isMulPowOf2(U))801break;802803const Value *LHS = U->getOperand(0);804const Value *RHS = U->getOperand(1);805806// Canonicalize power-of-2 value to the RHS.807if (const auto *C = dyn_cast<ConstantInt>(LHS))808if (C->getValue().isPowerOf2())809std::swap(LHS, RHS);810811assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");812const auto *C = cast<ConstantInt>(RHS);813unsigned Val = C->getValue().logBase2();814if (Val < 1 || Val > 3)815break;816817uint64_t NumBytes = 0;818if (Ty && Ty->isSized()) {819uint64_t NumBits = DL.getTypeSizeInBits(Ty);820NumBytes = NumBits / 8;821if (!isPowerOf2_64(NumBits))822NumBytes = 0;823}824825if (NumBytes != (1ULL << Val))826break;827828Addr.setShift(Val);829Addr.setExtendType(AArch64_AM::LSL);830831const Value *Src = LHS;832if (const auto *I = dyn_cast<Instruction>(Src)) {833if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {834// Fold the zext or sext when it won't become a noop.835if (const auto *ZE = dyn_cast<ZExtInst>(I)) {836if (!isIntExtFree(ZE) &&837ZE->getOperand(0)->getType()->isIntegerTy(32)) {838Addr.setExtendType(AArch64_AM::UXTW);839Src = ZE->getOperand(0);840}841} else if (const auto *SE = dyn_cast<SExtInst>(I)) {842if (!isIntExtFree(SE) &&843SE->getOperand(0)->getType()->isIntegerTy(32)) {844Addr.setExtendType(AArch64_AM::SXTW);845Src = SE->getOperand(0);846}847}848}849}850851Register Reg = getRegForValue(Src);852if (!Reg)853return false;854Addr.setOffsetReg(Reg);855return true;856}857case Instruction::And: {858if (Addr.getOffsetReg())859break;860861if (!Ty || DL.getTypeSizeInBits(Ty) != 8)862break;863864const Value *LHS = U->getOperand(0);865const Value *RHS = U->getOperand(1);866867if (const auto *C = dyn_cast<ConstantInt>(LHS))868if (C->getValue() == 0xffffffff)869std::swap(LHS, RHS);870871if (const auto *C = dyn_cast<ConstantInt>(RHS))872if (C->getValue() == 0xffffffff) {873Addr.setShift(0);874Addr.setExtendType(AArch64_AM::LSL);875Addr.setExtendType(AArch64_AM::UXTW);876877Register Reg = getRegForValue(LHS);878if (!Reg)879return false;880Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);881Addr.setOffsetReg(Reg);882return true;883}884break;885}886case Instruction::SExt:887case Instruction::ZExt: {888if (!Addr.getReg() || Addr.getOffsetReg())889break;890891const Value *Src = nullptr;892// Fold the zext or sext when it won't become a noop.893if (const auto *ZE = dyn_cast<ZExtInst>(U)) {894if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {895Addr.setExtendType(AArch64_AM::UXTW);896Src = ZE->getOperand(0);897}898} else if (const auto *SE = dyn_cast<SExtInst>(U)) {899if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {900Addr.setExtendType(AArch64_AM::SXTW);901Src = SE->getOperand(0);902}903}904905if (!Src)906break;907908Addr.setShift(0);909Register Reg = getRegForValue(Src);910if (!Reg)911return false;912Addr.setOffsetReg(Reg);913return true;914}915} // end switch916917if (Addr.isRegBase() && !Addr.getReg()) {918Register Reg = getRegForValue(Obj);919if (!Reg)920return false;921Addr.setReg(Reg);922return true;923}924925if (!Addr.getOffsetReg()) {926Register Reg = getRegForValue(Obj);927if (!Reg)928return false;929Addr.setOffsetReg(Reg);930return true;931}932933return false;934}935936bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {937const User *U = nullptr;938unsigned Opcode = Instruction::UserOp1;939bool InMBB = true;940941if (const auto *I = dyn_cast<Instruction>(V)) {942Opcode = I->getOpcode();943U = I;944InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();945} else if (const auto *C = dyn_cast<ConstantExpr>(V)) {946Opcode = C->getOpcode();947U = C;948}949950switch (Opcode) {951default: break;952case Instruction::BitCast:953// Look past bitcasts if its operand is in the same BB.954if (InMBB)955return computeCallAddress(U->getOperand(0), Addr);956break;957case Instruction::IntToPtr:958// Look past no-op inttoptrs if its operand is in the same BB.959if (InMBB &&960TLI.getValueType(DL, U->getOperand(0)->getType()) ==961TLI.getPointerTy(DL))962return computeCallAddress(U->getOperand(0), Addr);963break;964case Instruction::PtrToInt:965// Look past no-op ptrtoints if its operand is in the same BB.966if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))967return computeCallAddress(U->getOperand(0), Addr);968break;969}970971if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {972Addr.setGlobalValue(GV);973return true;974}975976// If all else fails, try to materialize the value in a register.977if (!Addr.getGlobalValue()) {978Addr.setReg(getRegForValue(V));979return Addr.getReg() != 0;980}981982return false;983}984985bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {986EVT evt = TLI.getValueType(DL, Ty, true);987988if (Subtarget->isTargetILP32() && Ty->isPointerTy())989return false;990991// Only handle simple types.992if (evt == MVT::Other || !evt.isSimple())993return false;994VT = evt.getSimpleVT();995996// This is a legal type, but it's not something we handle in fast-isel.997if (VT == MVT::f128)998return false;9991000// Handle all other legal types, i.e. a register that will directly hold this1001// value.1002return TLI.isTypeLegal(VT);1003}10041005/// Determine if the value type is supported by FastISel.1006///1007/// FastISel for AArch64 can handle more value types than are legal. This adds1008/// simple value type such as i1, i8, and i16.1009bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {1010if (Ty->isVectorTy() && !IsVectorAllowed)1011return false;10121013if (isTypeLegal(Ty, VT))1014return true;10151016// If this is a type than can be sign or zero-extended to a basic operation1017// go ahead and accept it now.1018if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)1019return true;10201021return false;1022}10231024bool AArch64FastISel::isValueAvailable(const Value *V) const {1025if (!isa<Instruction>(V))1026return true;10271028const auto *I = cast<Instruction>(V);1029return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;1030}10311032bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {1033if (Subtarget->isTargetILP32())1034return false;10351036unsigned ScaleFactor = getImplicitScaleFactor(VT);1037if (!ScaleFactor)1038return false;10391040bool ImmediateOffsetNeedsLowering = false;1041bool RegisterOffsetNeedsLowering = false;1042int64_t Offset = Addr.getOffset();1043if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))1044ImmediateOffsetNeedsLowering = true;1045else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&1046!isUInt<12>(Offset / ScaleFactor))1047ImmediateOffsetNeedsLowering = true;10481049// Cannot encode an offset register and an immediate offset in the same1050// instruction. Fold the immediate offset into the load/store instruction and1051// emit an additional add to take care of the offset register.1052if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())1053RegisterOffsetNeedsLowering = true;10541055// Cannot encode zero register as base.1056if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())1057RegisterOffsetNeedsLowering = true;10581059// If this is a stack pointer and the offset needs to be simplified then put1060// the alloca address into a register, set the base type back to register and1061// continue. This should almost never happen.1062if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())1063{1064Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);1065BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),1066ResultReg)1067.addFrameIndex(Addr.getFI())1068.addImm(0)1069.addImm(0);1070Addr.setKind(Address::RegBase);1071Addr.setReg(ResultReg);1072}10731074if (RegisterOffsetNeedsLowering) {1075unsigned ResultReg = 0;1076if (Addr.getReg()) {1077if (Addr.getExtendType() == AArch64_AM::SXTW ||1078Addr.getExtendType() == AArch64_AM::UXTW )1079ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),1080Addr.getOffsetReg(), Addr.getExtendType(),1081Addr.getShift());1082else1083ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),1084Addr.getOffsetReg(), AArch64_AM::LSL,1085Addr.getShift());1086} else {1087if (Addr.getExtendType() == AArch64_AM::UXTW)1088ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),1089Addr.getShift(), /*IsZExt=*/true);1090else if (Addr.getExtendType() == AArch64_AM::SXTW)1091ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),1092Addr.getShift(), /*IsZExt=*/false);1093else1094ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),1095Addr.getShift());1096}1097if (!ResultReg)1098return false;10991100Addr.setReg(ResultReg);1101Addr.setOffsetReg(0);1102Addr.setShift(0);1103Addr.setExtendType(AArch64_AM::InvalidShiftExtend);1104}11051106// Since the offset is too large for the load/store instruction get the1107// reg+offset into a register.1108if (ImmediateOffsetNeedsLowering) {1109unsigned ResultReg;1110if (Addr.getReg())1111// Try to fold the immediate into the add instruction.1112ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);1113else1114ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);11151116if (!ResultReg)1117return false;1118Addr.setReg(ResultReg);1119Addr.setOffset(0);1120}1121return true;1122}11231124void AArch64FastISel::addLoadStoreOperands(Address &Addr,1125const MachineInstrBuilder &MIB,1126MachineMemOperand::Flags Flags,1127unsigned ScaleFactor,1128MachineMemOperand *MMO) {1129int64_t Offset = Addr.getOffset() / ScaleFactor;1130// Frame base works a bit differently. Handle it separately.1131if (Addr.isFIBase()) {1132int FI = Addr.getFI();1133// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size1134// and alignment should be based on the VT.1135MMO = FuncInfo.MF->getMachineMemOperand(1136MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,1137MFI.getObjectSize(FI), MFI.getObjectAlign(FI));1138// Now add the rest of the operands.1139MIB.addFrameIndex(FI).addImm(Offset);1140} else {1141assert(Addr.isRegBase() && "Unexpected address kind.");1142const MCInstrDesc &II = MIB->getDesc();1143unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;1144Addr.setReg(1145constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));1146Addr.setOffsetReg(1147constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));1148if (Addr.getOffsetReg()) {1149assert(Addr.getOffset() == 0 && "Unexpected offset");1150bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||1151Addr.getExtendType() == AArch64_AM::SXTX;1152MIB.addReg(Addr.getReg());1153MIB.addReg(Addr.getOffsetReg());1154MIB.addImm(IsSigned);1155MIB.addImm(Addr.getShift() != 0);1156} else1157MIB.addReg(Addr.getReg()).addImm(Offset);1158}11591160if (MMO)1161MIB.addMemOperand(MMO);1162}11631164unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,1165const Value *RHS, bool SetFlags,1166bool WantResult, bool IsZExt) {1167AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;1168bool NeedExtend = false;1169switch (RetVT.SimpleTy) {1170default:1171return 0;1172case MVT::i1:1173NeedExtend = true;1174break;1175case MVT::i8:1176NeedExtend = true;1177ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;1178break;1179case MVT::i16:1180NeedExtend = true;1181ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;1182break;1183case MVT::i32: // fall-through1184case MVT::i64:1185break;1186}1187MVT SrcVT = RetVT;1188RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);11891190// Canonicalize immediates to the RHS first.1191if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))1192std::swap(LHS, RHS);11931194// Canonicalize mul by power of 2 to the RHS.1195if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))1196if (isMulPowOf2(LHS))1197std::swap(LHS, RHS);11981199// Canonicalize shift immediate to the RHS.1200if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))1201if (const auto *SI = dyn_cast<BinaryOperator>(LHS))1202if (isa<ConstantInt>(SI->getOperand(1)))1203if (SI->getOpcode() == Instruction::Shl ||1204SI->getOpcode() == Instruction::LShr ||1205SI->getOpcode() == Instruction::AShr )1206std::swap(LHS, RHS);12071208Register LHSReg = getRegForValue(LHS);1209if (!LHSReg)1210return 0;12111212if (NeedExtend)1213LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);12141215unsigned ResultReg = 0;1216if (const auto *C = dyn_cast<ConstantInt>(RHS)) {1217uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();1218if (C->isNegative())1219ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,1220WantResult);1221else1222ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,1223WantResult);1224} else if (const auto *C = dyn_cast<Constant>(RHS))1225if (C->isNullValue())1226ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);12271228if (ResultReg)1229return ResultReg;12301231// Only extend the RHS within the instruction if there is a valid extend type.1232if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&1233isValueAvailable(RHS)) {1234Register RHSReg = getRegForValue(RHS);1235if (!RHSReg)1236return 0;1237return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,1238SetFlags, WantResult);1239}12401241// Check if the mul can be folded into the instruction.1242if (RHS->hasOneUse() && isValueAvailable(RHS)) {1243if (isMulPowOf2(RHS)) {1244const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);1245const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);12461247if (const auto *C = dyn_cast<ConstantInt>(MulLHS))1248if (C->getValue().isPowerOf2())1249std::swap(MulLHS, MulRHS);12501251assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");1252uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();1253Register RHSReg = getRegForValue(MulLHS);1254if (!RHSReg)1255return 0;1256ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,1257ShiftVal, SetFlags, WantResult);1258if (ResultReg)1259return ResultReg;1260}1261}12621263// Check if the shift can be folded into the instruction.1264if (RHS->hasOneUse() && isValueAvailable(RHS)) {1265if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {1266if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {1267AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;1268switch (SI->getOpcode()) {1269default: break;1270case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;1271case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;1272case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;1273}1274uint64_t ShiftVal = C->getZExtValue();1275if (ShiftType != AArch64_AM::InvalidShiftExtend) {1276Register RHSReg = getRegForValue(SI->getOperand(0));1277if (!RHSReg)1278return 0;1279ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,1280ShiftVal, SetFlags, WantResult);1281if (ResultReg)1282return ResultReg;1283}1284}1285}1286}12871288Register RHSReg = getRegForValue(RHS);1289if (!RHSReg)1290return 0;12911292if (NeedExtend)1293RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);12941295return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);1296}12971298unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,1299unsigned RHSReg, bool SetFlags,1300bool WantResult) {1301assert(LHSReg && RHSReg && "Invalid register number.");13021303if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||1304RHSReg == AArch64::SP || RHSReg == AArch64::WSP)1305return 0;13061307if (RetVT != MVT::i32 && RetVT != MVT::i64)1308return 0;13091310static const unsigned OpcTable[2][2][2] = {1311{ { AArch64::SUBWrr, AArch64::SUBXrr },1312{ AArch64::ADDWrr, AArch64::ADDXrr } },1313{ { AArch64::SUBSWrr, AArch64::SUBSXrr },1314{ AArch64::ADDSWrr, AArch64::ADDSXrr } }1315};1316bool Is64Bit = RetVT == MVT::i64;1317unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];1318const TargetRegisterClass *RC =1319Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;1320unsigned ResultReg;1321if (WantResult)1322ResultReg = createResultReg(RC);1323else1324ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;13251326const MCInstrDesc &II = TII.get(Opc);1327LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());1328RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);1329BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)1330.addReg(LHSReg)1331.addReg(RHSReg);1332return ResultReg;1333}13341335unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,1336uint64_t Imm, bool SetFlags,1337bool WantResult) {1338assert(LHSReg && "Invalid register number.");13391340if (RetVT != MVT::i32 && RetVT != MVT::i64)1341return 0;13421343unsigned ShiftImm;1344if (isUInt<12>(Imm))1345ShiftImm = 0;1346else if ((Imm & 0xfff000) == Imm) {1347ShiftImm = 12;1348Imm >>= 12;1349} else1350return 0;13511352static const unsigned OpcTable[2][2][2] = {1353{ { AArch64::SUBWri, AArch64::SUBXri },1354{ AArch64::ADDWri, AArch64::ADDXri } },1355{ { AArch64::SUBSWri, AArch64::SUBSXri },1356{ AArch64::ADDSWri, AArch64::ADDSXri } }1357};1358bool Is64Bit = RetVT == MVT::i64;1359unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];1360const TargetRegisterClass *RC;1361if (SetFlags)1362RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;1363else1364RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;1365unsigned ResultReg;1366if (WantResult)1367ResultReg = createResultReg(RC);1368else1369ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;13701371const MCInstrDesc &II = TII.get(Opc);1372LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());1373BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)1374.addReg(LHSReg)1375.addImm(Imm)1376.addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));1377return ResultReg;1378}13791380unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,1381unsigned RHSReg,1382AArch64_AM::ShiftExtendType ShiftType,1383uint64_t ShiftImm, bool SetFlags,1384bool WantResult) {1385assert(LHSReg && RHSReg && "Invalid register number.");1386assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&1387RHSReg != AArch64::SP && RHSReg != AArch64::WSP);13881389if (RetVT != MVT::i32 && RetVT != MVT::i64)1390return 0;13911392// Don't deal with undefined shifts.1393if (ShiftImm >= RetVT.getSizeInBits())1394return 0;13951396static const unsigned OpcTable[2][2][2] = {1397{ { AArch64::SUBWrs, AArch64::SUBXrs },1398{ AArch64::ADDWrs, AArch64::ADDXrs } },1399{ { AArch64::SUBSWrs, AArch64::SUBSXrs },1400{ AArch64::ADDSWrs, AArch64::ADDSXrs } }1401};1402bool Is64Bit = RetVT == MVT::i64;1403unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];1404const TargetRegisterClass *RC =1405Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;1406unsigned ResultReg;1407if (WantResult)1408ResultReg = createResultReg(RC);1409else1410ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;14111412const MCInstrDesc &II = TII.get(Opc);1413LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());1414RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);1415BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)1416.addReg(LHSReg)1417.addReg(RHSReg)1418.addImm(getShifterImm(ShiftType, ShiftImm));1419return ResultReg;1420}14211422unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,1423unsigned RHSReg,1424AArch64_AM::ShiftExtendType ExtType,1425uint64_t ShiftImm, bool SetFlags,1426bool WantResult) {1427assert(LHSReg && RHSReg && "Invalid register number.");1428assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&1429RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);14301431if (RetVT != MVT::i32 && RetVT != MVT::i64)1432return 0;14331434if (ShiftImm >= 4)1435return 0;14361437static const unsigned OpcTable[2][2][2] = {1438{ { AArch64::SUBWrx, AArch64::SUBXrx },1439{ AArch64::ADDWrx, AArch64::ADDXrx } },1440{ { AArch64::SUBSWrx, AArch64::SUBSXrx },1441{ AArch64::ADDSWrx, AArch64::ADDSXrx } }1442};1443bool Is64Bit = RetVT == MVT::i64;1444unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];1445const TargetRegisterClass *RC = nullptr;1446if (SetFlags)1447RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;1448else1449RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;1450unsigned ResultReg;1451if (WantResult)1452ResultReg = createResultReg(RC);1453else1454ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;14551456const MCInstrDesc &II = TII.get(Opc);1457LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());1458RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);1459BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)1460.addReg(LHSReg)1461.addReg(RHSReg)1462.addImm(getArithExtendImm(ExtType, ShiftImm));1463return ResultReg;1464}14651466bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {1467Type *Ty = LHS->getType();1468EVT EVT = TLI.getValueType(DL, Ty, true);1469if (!EVT.isSimple())1470return false;1471MVT VT = EVT.getSimpleVT();14721473switch (VT.SimpleTy) {1474default:1475return false;1476case MVT::i1:1477case MVT::i8:1478case MVT::i16:1479case MVT::i32:1480case MVT::i64:1481return emitICmp(VT, LHS, RHS, IsZExt);1482case MVT::f32:1483case MVT::f64:1484return emitFCmp(VT, LHS, RHS);1485}1486}14871488bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,1489bool IsZExt) {1490return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,1491IsZExt) != 0;1492}14931494bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {1495return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,1496/*SetFlags=*/true, /*WantResult=*/false) != 0;1497}14981499bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {1500if (RetVT != MVT::f32 && RetVT != MVT::f64)1501return false;15021503// Check to see if the 2nd operand is a constant that we can encode directly1504// in the compare.1505bool UseImm = false;1506if (const auto *CFP = dyn_cast<ConstantFP>(RHS))1507if (CFP->isZero() && !CFP->isNegative())1508UseImm = true;15091510Register LHSReg = getRegForValue(LHS);1511if (!LHSReg)1512return false;15131514if (UseImm) {1515unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;1516BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))1517.addReg(LHSReg);1518return true;1519}15201521Register RHSReg = getRegForValue(RHS);1522if (!RHSReg)1523return false;15241525unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;1526BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))1527.addReg(LHSReg)1528.addReg(RHSReg);1529return true;1530}15311532unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,1533bool SetFlags, bool WantResult, bool IsZExt) {1534return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,1535IsZExt);1536}15371538/// This method is a wrapper to simplify add emission.1539///1540/// First try to emit an add with an immediate operand using emitAddSub_ri. If1541/// that fails, then try to materialize the immediate into a register and use1542/// emitAddSub_rr instead.1543unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {1544unsigned ResultReg;1545if (Imm < 0)1546ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);1547else1548ResultReg = emitAddSub_ri(true, VT, Op0, Imm);15491550if (ResultReg)1551return ResultReg;15521553unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);1554if (!CReg)1555return 0;15561557ResultReg = emitAddSub_rr(true, VT, Op0, CReg);1558return ResultReg;1559}15601561unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,1562bool SetFlags, bool WantResult, bool IsZExt) {1563return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,1564IsZExt);1565}15661567unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,1568unsigned RHSReg, bool WantResult) {1569return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,1570/*SetFlags=*/true, WantResult);1571}15721573unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,1574unsigned RHSReg,1575AArch64_AM::ShiftExtendType ShiftType,1576uint64_t ShiftImm, bool WantResult) {1577return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,1578ShiftImm, /*SetFlags=*/true, WantResult);1579}15801581unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,1582const Value *LHS, const Value *RHS) {1583// Canonicalize immediates to the RHS first.1584if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))1585std::swap(LHS, RHS);15861587// Canonicalize mul by power-of-2 to the RHS.1588if (LHS->hasOneUse() && isValueAvailable(LHS))1589if (isMulPowOf2(LHS))1590std::swap(LHS, RHS);15911592// Canonicalize shift immediate to the RHS.1593if (LHS->hasOneUse() && isValueAvailable(LHS))1594if (const auto *SI = dyn_cast<ShlOperator>(LHS))1595if (isa<ConstantInt>(SI->getOperand(1)))1596std::swap(LHS, RHS);15971598Register LHSReg = getRegForValue(LHS);1599if (!LHSReg)1600return 0;16011602unsigned ResultReg = 0;1603if (const auto *C = dyn_cast<ConstantInt>(RHS)) {1604uint64_t Imm = C->getZExtValue();1605ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);1606}1607if (ResultReg)1608return ResultReg;16091610// Check if the mul can be folded into the instruction.1611if (RHS->hasOneUse() && isValueAvailable(RHS)) {1612if (isMulPowOf2(RHS)) {1613const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);1614const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);16151616if (const auto *C = dyn_cast<ConstantInt>(MulLHS))1617if (C->getValue().isPowerOf2())1618std::swap(MulLHS, MulRHS);16191620assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");1621uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();16221623Register RHSReg = getRegForValue(MulLHS);1624if (!RHSReg)1625return 0;1626ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);1627if (ResultReg)1628return ResultReg;1629}1630}16311632// Check if the shift can be folded into the instruction.1633if (RHS->hasOneUse() && isValueAvailable(RHS)) {1634if (const auto *SI = dyn_cast<ShlOperator>(RHS))1635if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {1636uint64_t ShiftVal = C->getZExtValue();1637Register RHSReg = getRegForValue(SI->getOperand(0));1638if (!RHSReg)1639return 0;1640ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);1641if (ResultReg)1642return ResultReg;1643}1644}16451646Register RHSReg = getRegForValue(RHS);1647if (!RHSReg)1648return 0;16491650MVT VT = std::max(MVT::i32, RetVT.SimpleTy);1651ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);1652if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {1653uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;1654ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);1655}1656return ResultReg;1657}16581659unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,1660unsigned LHSReg, uint64_t Imm) {1661static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),1662"ISD nodes are not consecutive!");1663static const unsigned OpcTable[3][2] = {1664{ AArch64::ANDWri, AArch64::ANDXri },1665{ AArch64::ORRWri, AArch64::ORRXri },1666{ AArch64::EORWri, AArch64::EORXri }1667};1668const TargetRegisterClass *RC;1669unsigned Opc;1670unsigned RegSize;1671switch (RetVT.SimpleTy) {1672default:1673return 0;1674case MVT::i1:1675case MVT::i8:1676case MVT::i16:1677case MVT::i32: {1678unsigned Idx = ISDOpc - ISD::AND;1679Opc = OpcTable[Idx][0];1680RC = &AArch64::GPR32spRegClass;1681RegSize = 32;1682break;1683}1684case MVT::i64:1685Opc = OpcTable[ISDOpc - ISD::AND][1];1686RC = &AArch64::GPR64spRegClass;1687RegSize = 64;1688break;1689}16901691if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))1692return 0;16931694Register ResultReg =1695fastEmitInst_ri(Opc, RC, LHSReg,1696AArch64_AM::encodeLogicalImmediate(Imm, RegSize));1697if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {1698uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;1699ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);1700}1701return ResultReg;1702}17031704unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,1705unsigned LHSReg, unsigned RHSReg,1706uint64_t ShiftImm) {1707static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),1708"ISD nodes are not consecutive!");1709static const unsigned OpcTable[3][2] = {1710{ AArch64::ANDWrs, AArch64::ANDXrs },1711{ AArch64::ORRWrs, AArch64::ORRXrs },1712{ AArch64::EORWrs, AArch64::EORXrs }1713};17141715// Don't deal with undefined shifts.1716if (ShiftImm >= RetVT.getSizeInBits())1717return 0;17181719const TargetRegisterClass *RC;1720unsigned Opc;1721switch (RetVT.SimpleTy) {1722default:1723return 0;1724case MVT::i1:1725case MVT::i8:1726case MVT::i16:1727case MVT::i32:1728Opc = OpcTable[ISDOpc - ISD::AND][0];1729RC = &AArch64::GPR32RegClass;1730break;1731case MVT::i64:1732Opc = OpcTable[ISDOpc - ISD::AND][1];1733RC = &AArch64::GPR64RegClass;1734break;1735}1736Register ResultReg =1737fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,1738AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));1739if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {1740uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;1741ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);1742}1743return ResultReg;1744}17451746unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,1747uint64_t Imm) {1748return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);1749}17501751unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,1752bool WantZExt, MachineMemOperand *MMO) {1753if (!TLI.allowsMisalignedMemoryAccesses(VT))1754return 0;17551756// Simplify this down to something we can handle.1757if (!simplifyAddress(Addr, VT))1758return 0;17591760unsigned ScaleFactor = getImplicitScaleFactor(VT);1761if (!ScaleFactor)1762llvm_unreachable("Unexpected value type.");17631764// Negative offsets require unscaled, 9-bit, signed immediate offsets.1765// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.1766bool UseScaled = true;1767if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {1768UseScaled = false;1769ScaleFactor = 1;1770}17711772static const unsigned GPOpcTable[2][8][4] = {1773// Sign-extend.1774{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,1775AArch64::LDURXi },1776{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,1777AArch64::LDURXi },1778{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,1779AArch64::LDRXui },1780{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,1781AArch64::LDRXui },1782{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,1783AArch64::LDRXroX },1784{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,1785AArch64::LDRXroX },1786{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,1787AArch64::LDRXroW },1788{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,1789AArch64::LDRXroW }1790},1791// Zero-extend.1792{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,1793AArch64::LDURXi },1794{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,1795AArch64::LDURXi },1796{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,1797AArch64::LDRXui },1798{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,1799AArch64::LDRXui },1800{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,1801AArch64::LDRXroX },1802{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,1803AArch64::LDRXroX },1804{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,1805AArch64::LDRXroW },1806{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,1807AArch64::LDRXroW }1808}1809};18101811static const unsigned FPOpcTable[4][2] = {1812{ AArch64::LDURSi, AArch64::LDURDi },1813{ AArch64::LDRSui, AArch64::LDRDui },1814{ AArch64::LDRSroX, AArch64::LDRDroX },1815{ AArch64::LDRSroW, AArch64::LDRDroW }1816};18171818unsigned Opc;1819const TargetRegisterClass *RC;1820bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&1821Addr.getOffsetReg();1822unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;1823if (Addr.getExtendType() == AArch64_AM::UXTW ||1824Addr.getExtendType() == AArch64_AM::SXTW)1825Idx++;18261827bool IsRet64Bit = RetVT == MVT::i64;1828switch (VT.SimpleTy) {1829default:1830llvm_unreachable("Unexpected value type.");1831case MVT::i1: // Intentional fall-through.1832case MVT::i8:1833Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];1834RC = (IsRet64Bit && !WantZExt) ?1835&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;1836break;1837case MVT::i16:1838Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];1839RC = (IsRet64Bit && !WantZExt) ?1840&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;1841break;1842case MVT::i32:1843Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];1844RC = (IsRet64Bit && !WantZExt) ?1845&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;1846break;1847case MVT::i64:1848Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];1849RC = &AArch64::GPR64RegClass;1850break;1851case MVT::f32:1852Opc = FPOpcTable[Idx][0];1853RC = &AArch64::FPR32RegClass;1854break;1855case MVT::f64:1856Opc = FPOpcTable[Idx][1];1857RC = &AArch64::FPR64RegClass;1858break;1859}18601861// Create the base instruction, then add the operands.1862Register ResultReg = createResultReg(RC);1863MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1864TII.get(Opc), ResultReg);1865addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);18661867// Loading an i1 requires special handling.1868if (VT == MVT::i1) {1869unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);1870assert(ANDReg && "Unexpected AND instruction emission failure.");1871ResultReg = ANDReg;1872}18731874// For zero-extending loads to 64bit we emit a 32bit load and then convert1875// the 32bit reg to a 64bit reg.1876if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {1877Register Reg64 = createResultReg(&AArch64::GPR64RegClass);1878BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,1879TII.get(AArch64::SUBREG_TO_REG), Reg64)1880.addImm(0)1881.addReg(ResultReg, getKillRegState(true))1882.addImm(AArch64::sub_32);1883ResultReg = Reg64;1884}1885return ResultReg;1886}18871888bool AArch64FastISel::selectAddSub(const Instruction *I) {1889MVT VT;1890if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))1891return false;18921893if (VT.isVector())1894return selectOperator(I, I->getOpcode());18951896unsigned ResultReg;1897switch (I->getOpcode()) {1898default:1899llvm_unreachable("Unexpected instruction.");1900case Instruction::Add:1901ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));1902break;1903case Instruction::Sub:1904ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));1905break;1906}1907if (!ResultReg)1908return false;19091910updateValueMap(I, ResultReg);1911return true;1912}19131914bool AArch64FastISel::selectLogicalOp(const Instruction *I) {1915MVT VT;1916if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))1917return false;19181919if (VT.isVector())1920return selectOperator(I, I->getOpcode());19211922unsigned ResultReg;1923switch (I->getOpcode()) {1924default:1925llvm_unreachable("Unexpected instruction.");1926case Instruction::And:1927ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));1928break;1929case Instruction::Or:1930ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));1931break;1932case Instruction::Xor:1933ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));1934break;1935}1936if (!ResultReg)1937return false;19381939updateValueMap(I, ResultReg);1940return true;1941}19421943bool AArch64FastISel::selectLoad(const Instruction *I) {1944MVT VT;1945// Verify we have a legal type before going any further. Currently, we handle1946// simple types that will directly fit in a register (i32/f32/i64/f64) or1947// those that can be sign or zero-extended to a basic operation (i1/i8/i16).1948if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||1949cast<LoadInst>(I)->isAtomic())1950return false;19511952const Value *SV = I->getOperand(0);1953if (TLI.supportSwiftError()) {1954// Swifterror values can come from either a function parameter with1955// swifterror attribute or an alloca with swifterror attribute.1956if (const Argument *Arg = dyn_cast<Argument>(SV)) {1957if (Arg->hasSwiftErrorAttr())1958return false;1959}19601961if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {1962if (Alloca->isSwiftError())1963return false;1964}1965}19661967// See if we can handle this address.1968Address Addr;1969if (!computeAddress(I->getOperand(0), Addr, I->getType()))1970return false;19711972// Fold the following sign-/zero-extend into the load instruction.1973bool WantZExt = true;1974MVT RetVT = VT;1975const Value *IntExtVal = nullptr;1976if (I->hasOneUse()) {1977if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {1978if (isTypeSupported(ZE->getType(), RetVT))1979IntExtVal = ZE;1980else1981RetVT = VT;1982} else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {1983if (isTypeSupported(SE->getType(), RetVT))1984IntExtVal = SE;1985else1986RetVT = VT;1987WantZExt = false;1988}1989}19901991unsigned ResultReg =1992emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));1993if (!ResultReg)1994return false;19951996// There are a few different cases we have to handle, because the load or the1997// sign-/zero-extend might not be selected by FastISel if we fall-back to1998// SelectionDAG. There is also an ordering issue when both instructions are in1999// different basic blocks.2000// 1.) The load instruction is selected by FastISel, but the integer extend2001// not. This usually happens when the integer extend is in a different2002// basic block and SelectionDAG took over for that basic block.2003// 2.) The load instruction is selected before the integer extend. This only2004// happens when the integer extend is in a different basic block.2005// 3.) The load instruction is selected by SelectionDAG and the integer extend2006// by FastISel. This happens if there are instructions between the load2007// and the integer extend that couldn't be selected by FastISel.2008if (IntExtVal) {2009// The integer extend hasn't been emitted yet. FastISel or SelectionDAG2010// could select it. Emit a copy to subreg if necessary. FastISel will remove2011// it when it selects the integer extend.2012Register Reg = lookUpRegForValue(IntExtVal);2013auto *MI = MRI.getUniqueVRegDef(Reg);2014if (!MI) {2015if (RetVT == MVT::i64 && VT <= MVT::i32) {2016if (WantZExt) {2017// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).2018MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));2019ResultReg = std::prev(I)->getOperand(0).getReg();2020removeDeadCode(I, std::next(I));2021} else2022ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,2023AArch64::sub_32);2024}2025updateValueMap(I, ResultReg);2026return true;2027}20282029// The integer extend has already been emitted - delete all the instructions2030// that have been emitted by the integer extend lowering code and use the2031// result from the load instruction directly.2032while (MI) {2033Reg = 0;2034for (auto &Opnd : MI->uses()) {2035if (Opnd.isReg()) {2036Reg = Opnd.getReg();2037break;2038}2039}2040MachineBasicBlock::iterator I(MI);2041removeDeadCode(I, std::next(I));2042MI = nullptr;2043if (Reg)2044MI = MRI.getUniqueVRegDef(Reg);2045}2046updateValueMap(IntExtVal, ResultReg);2047return true;2048}20492050updateValueMap(I, ResultReg);2051return true;2052}20532054bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,2055unsigned AddrReg,2056MachineMemOperand *MMO) {2057unsigned Opc;2058switch (VT.SimpleTy) {2059default: return false;2060case MVT::i8: Opc = AArch64::STLRB; break;2061case MVT::i16: Opc = AArch64::STLRH; break;2062case MVT::i32: Opc = AArch64::STLRW; break;2063case MVT::i64: Opc = AArch64::STLRX; break;2064}20652066const MCInstrDesc &II = TII.get(Opc);2067SrcReg = constrainOperandRegClass(II, SrcReg, 0);2068AddrReg = constrainOperandRegClass(II, AddrReg, 1);2069BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)2070.addReg(SrcReg)2071.addReg(AddrReg)2072.addMemOperand(MMO);2073return true;2074}20752076bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,2077MachineMemOperand *MMO) {2078if (!TLI.allowsMisalignedMemoryAccesses(VT))2079return false;20802081// Simplify this down to something we can handle.2082if (!simplifyAddress(Addr, VT))2083return false;20842085unsigned ScaleFactor = getImplicitScaleFactor(VT);2086if (!ScaleFactor)2087llvm_unreachable("Unexpected value type.");20882089// Negative offsets require unscaled, 9-bit, signed immediate offsets.2090// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.2091bool UseScaled = true;2092if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {2093UseScaled = false;2094ScaleFactor = 1;2095}20962097static const unsigned OpcTable[4][6] = {2098{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,2099AArch64::STURSi, AArch64::STURDi },2100{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,2101AArch64::STRSui, AArch64::STRDui },2102{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,2103AArch64::STRSroX, AArch64::STRDroX },2104{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,2105AArch64::STRSroW, AArch64::STRDroW }2106};21072108unsigned Opc;2109bool VTIsi1 = false;2110bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&2111Addr.getOffsetReg();2112unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;2113if (Addr.getExtendType() == AArch64_AM::UXTW ||2114Addr.getExtendType() == AArch64_AM::SXTW)2115Idx++;21162117switch (VT.SimpleTy) {2118default: llvm_unreachable("Unexpected value type.");2119case MVT::i1: VTIsi1 = true; [[fallthrough]];2120case MVT::i8: Opc = OpcTable[Idx][0]; break;2121case MVT::i16: Opc = OpcTable[Idx][1]; break;2122case MVT::i32: Opc = OpcTable[Idx][2]; break;2123case MVT::i64: Opc = OpcTable[Idx][3]; break;2124case MVT::f32: Opc = OpcTable[Idx][4]; break;2125case MVT::f64: Opc = OpcTable[Idx][5]; break;2126}21272128// Storing an i1 requires special handling.2129if (VTIsi1 && SrcReg != AArch64::WZR) {2130unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);2131assert(ANDReg && "Unexpected AND instruction emission failure.");2132SrcReg = ANDReg;2133}2134// Create the base instruction, then add the operands.2135const MCInstrDesc &II = TII.get(Opc);2136SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());2137MachineInstrBuilder MIB =2138BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);2139addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);21402141return true;2142}21432144bool AArch64FastISel::selectStore(const Instruction *I) {2145MVT VT;2146const Value *Op0 = I->getOperand(0);2147// Verify we have a legal type before going any further. Currently, we handle2148// simple types that will directly fit in a register (i32/f32/i64/f64) or2149// those that can be sign or zero-extended to a basic operation (i1/i8/i16).2150if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))2151return false;21522153const Value *PtrV = I->getOperand(1);2154if (TLI.supportSwiftError()) {2155// Swifterror values can come from either a function parameter with2156// swifterror attribute or an alloca with swifterror attribute.2157if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {2158if (Arg->hasSwiftErrorAttr())2159return false;2160}21612162if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {2163if (Alloca->isSwiftError())2164return false;2165}2166}21672168// Get the value to be stored into a register. Use the zero register directly2169// when possible to avoid an unnecessary copy and a wasted register.2170unsigned SrcReg = 0;2171if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {2172if (CI->isZero())2173SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;2174} else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {2175if (CF->isZero() && !CF->isNegative()) {2176VT = MVT::getIntegerVT(VT.getSizeInBits());2177SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;2178}2179}21802181if (!SrcReg)2182SrcReg = getRegForValue(Op0);21832184if (!SrcReg)2185return false;21862187auto *SI = cast<StoreInst>(I);21882189// Try to emit a STLR for seq_cst/release.2190if (SI->isAtomic()) {2191AtomicOrdering Ord = SI->getOrdering();2192// The non-atomic instructions are sufficient for relaxed stores.2193if (isReleaseOrStronger(Ord)) {2194// The STLR addressing mode only supports a base reg; pass that directly.2195Register AddrReg = getRegForValue(PtrV);2196return emitStoreRelease(VT, SrcReg, AddrReg,2197createMachineMemOperandFor(I));2198}2199}22002201// See if we can handle this address.2202Address Addr;2203if (!computeAddress(PtrV, Addr, Op0->getType()))2204return false;22052206if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))2207return false;2208return true;2209}22102211static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {2212switch (Pred) {2213case CmpInst::FCMP_ONE:2214case CmpInst::FCMP_UEQ:2215default:2216// AL is our "false" for now. The other two need more compares.2217return AArch64CC::AL;2218case CmpInst::ICMP_EQ:2219case CmpInst::FCMP_OEQ:2220return AArch64CC::EQ;2221case CmpInst::ICMP_SGT:2222case CmpInst::FCMP_OGT:2223return AArch64CC::GT;2224case CmpInst::ICMP_SGE:2225case CmpInst::FCMP_OGE:2226return AArch64CC::GE;2227case CmpInst::ICMP_UGT:2228case CmpInst::FCMP_UGT:2229return AArch64CC::HI;2230case CmpInst::FCMP_OLT:2231return AArch64CC::MI;2232case CmpInst::ICMP_ULE:2233case CmpInst::FCMP_OLE:2234return AArch64CC::LS;2235case CmpInst::FCMP_ORD:2236return AArch64CC::VC;2237case CmpInst::FCMP_UNO:2238return AArch64CC::VS;2239case CmpInst::FCMP_UGE:2240return AArch64CC::PL;2241case CmpInst::ICMP_SLT:2242case CmpInst::FCMP_ULT:2243return AArch64CC::LT;2244case CmpInst::ICMP_SLE:2245case CmpInst::FCMP_ULE:2246return AArch64CC::LE;2247case CmpInst::FCMP_UNE:2248case CmpInst::ICMP_NE:2249return AArch64CC::NE;2250case CmpInst::ICMP_UGE:2251return AArch64CC::HS;2252case CmpInst::ICMP_ULT:2253return AArch64CC::LO;2254}2255}22562257/// Try to emit a combined compare-and-branch instruction.2258bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {2259// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions2260// will not be produced, as they are conditional branch instructions that do2261// not set flags.2262if (FuncInfo.MF->getFunction().hasFnAttribute(2263Attribute::SpeculativeLoadHardening))2264return false;22652266assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");2267const CmpInst *CI = cast<CmpInst>(BI->getCondition());2268CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);22692270const Value *LHS = CI->getOperand(0);2271const Value *RHS = CI->getOperand(1);22722273MVT VT;2274if (!isTypeSupported(LHS->getType(), VT))2275return false;22762277unsigned BW = VT.getSizeInBits();2278if (BW > 64)2279return false;22802281MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];2282MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];22832284// Try to take advantage of fallthrough opportunities.2285if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {2286std::swap(TBB, FBB);2287Predicate = CmpInst::getInversePredicate(Predicate);2288}22892290int TestBit = -1;2291bool IsCmpNE;2292switch (Predicate) {2293default:2294return false;2295case CmpInst::ICMP_EQ:2296case CmpInst::ICMP_NE:2297if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())2298std::swap(LHS, RHS);22992300if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())2301return false;23022303if (const auto *AI = dyn_cast<BinaryOperator>(LHS))2304if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {2305const Value *AndLHS = AI->getOperand(0);2306const Value *AndRHS = AI->getOperand(1);23072308if (const auto *C = dyn_cast<ConstantInt>(AndLHS))2309if (C->getValue().isPowerOf2())2310std::swap(AndLHS, AndRHS);23112312if (const auto *C = dyn_cast<ConstantInt>(AndRHS))2313if (C->getValue().isPowerOf2()) {2314TestBit = C->getValue().logBase2();2315LHS = AndLHS;2316}2317}23182319if (VT == MVT::i1)2320TestBit = 0;23212322IsCmpNE = Predicate == CmpInst::ICMP_NE;2323break;2324case CmpInst::ICMP_SLT:2325case CmpInst::ICMP_SGE:2326if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())2327return false;23282329TestBit = BW - 1;2330IsCmpNE = Predicate == CmpInst::ICMP_SLT;2331break;2332case CmpInst::ICMP_SGT:2333case CmpInst::ICMP_SLE:2334if (!isa<ConstantInt>(RHS))2335return false;23362337if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))2338return false;23392340TestBit = BW - 1;2341IsCmpNE = Predicate == CmpInst::ICMP_SLE;2342break;2343} // end switch23442345static const unsigned OpcTable[2][2][2] = {2346{ {AArch64::CBZW, AArch64::CBZX },2347{AArch64::CBNZW, AArch64::CBNZX} },2348{ {AArch64::TBZW, AArch64::TBZX },2349{AArch64::TBNZW, AArch64::TBNZX} }2350};23512352bool IsBitTest = TestBit != -1;2353bool Is64Bit = BW == 64;2354if (TestBit < 32 && TestBit >= 0)2355Is64Bit = false;23562357unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];2358const MCInstrDesc &II = TII.get(Opc);23592360Register SrcReg = getRegForValue(LHS);2361if (!SrcReg)2362return false;23632364if (BW == 64 && !Is64Bit)2365SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);23662367if ((BW < 32) && !IsBitTest)2368SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);23692370// Emit the combined compare and branch instruction.2371SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());2372MachineInstrBuilder MIB =2373BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))2374.addReg(SrcReg);2375if (IsBitTest)2376MIB.addImm(TestBit);2377MIB.addMBB(TBB);23782379finishCondBranch(BI->getParent(), TBB, FBB);2380return true;2381}23822383bool AArch64FastISel::selectBranch(const Instruction *I) {2384const BranchInst *BI = cast<BranchInst>(I);2385if (BI->isUnconditional()) {2386MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];2387fastEmitBranch(MSucc, BI->getDebugLoc());2388return true;2389}23902391MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];2392MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];23932394if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {2395if (CI->hasOneUse() && isValueAvailable(CI)) {2396// Try to optimize or fold the cmp.2397CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);2398switch (Predicate) {2399default:2400break;2401case CmpInst::FCMP_FALSE:2402fastEmitBranch(FBB, MIMD.getDL());2403return true;2404case CmpInst::FCMP_TRUE:2405fastEmitBranch(TBB, MIMD.getDL());2406return true;2407}24082409// Try to emit a combined compare-and-branch first.2410if (emitCompareAndBranch(BI))2411return true;24122413// Try to take advantage of fallthrough opportunities.2414if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {2415std::swap(TBB, FBB);2416Predicate = CmpInst::getInversePredicate(Predicate);2417}24182419// Emit the cmp.2420if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))2421return false;24222423// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch2424// instruction.2425AArch64CC::CondCode CC = getCompareCC(Predicate);2426AArch64CC::CondCode ExtraCC = AArch64CC::AL;2427switch (Predicate) {2428default:2429break;2430case CmpInst::FCMP_UEQ:2431ExtraCC = AArch64CC::EQ;2432CC = AArch64CC::VS;2433break;2434case CmpInst::FCMP_ONE:2435ExtraCC = AArch64CC::MI;2436CC = AArch64CC::GT;2437break;2438}2439assert((CC != AArch64CC::AL) && "Unexpected condition code.");24402441// Emit the extra branch for FCMP_UEQ and FCMP_ONE.2442if (ExtraCC != AArch64CC::AL) {2443BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))2444.addImm(ExtraCC)2445.addMBB(TBB);2446}24472448// Emit the branch.2449BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))2450.addImm(CC)2451.addMBB(TBB);24522453finishCondBranch(BI->getParent(), TBB, FBB);2454return true;2455}2456} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {2457uint64_t Imm = CI->getZExtValue();2458MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;2459BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))2460.addMBB(Target);24612462// Obtain the branch probability and add the target to the successor list.2463if (FuncInfo.BPI) {2464auto BranchProbability = FuncInfo.BPI->getEdgeProbability(2465BI->getParent(), Target->getBasicBlock());2466FuncInfo.MBB->addSuccessor(Target, BranchProbability);2467} else2468FuncInfo.MBB->addSuccessorWithoutProb(Target);2469return true;2470} else {2471AArch64CC::CondCode CC = AArch64CC::NE;2472if (foldXALUIntrinsic(CC, I, BI->getCondition())) {2473// Fake request the condition, otherwise the intrinsic might be completely2474// optimized away.2475Register CondReg = getRegForValue(BI->getCondition());2476if (!CondReg)2477return false;24782479// Emit the branch.2480BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))2481.addImm(CC)2482.addMBB(TBB);24832484finishCondBranch(BI->getParent(), TBB, FBB);2485return true;2486}2487}24882489Register CondReg = getRegForValue(BI->getCondition());2490if (CondReg == 0)2491return false;24922493// i1 conditions come as i32 values, test the lowest bit with tb(n)z.2494unsigned Opcode = AArch64::TBNZW;2495if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {2496std::swap(TBB, FBB);2497Opcode = AArch64::TBZW;2498}24992500const MCInstrDesc &II = TII.get(Opcode);2501Register ConstrainedCondReg2502= constrainOperandRegClass(II, CondReg, II.getNumDefs());2503BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)2504.addReg(ConstrainedCondReg)2505.addImm(0)2506.addMBB(TBB);25072508finishCondBranch(BI->getParent(), TBB, FBB);2509return true;2510}25112512bool AArch64FastISel::selectIndirectBr(const Instruction *I) {2513const IndirectBrInst *BI = cast<IndirectBrInst>(I);2514Register AddrReg = getRegForValue(BI->getOperand(0));2515if (AddrReg == 0)2516return false;25172518// Authenticated indirectbr is not implemented yet.2519if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))2520return false;25212522// Emit the indirect branch.2523const MCInstrDesc &II = TII.get(AArch64::BR);2524AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());2525BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);25262527// Make sure the CFG is up-to-date.2528for (const auto *Succ : BI->successors())2529FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);25302531return true;2532}25332534bool AArch64FastISel::selectCmp(const Instruction *I) {2535const CmpInst *CI = cast<CmpInst>(I);25362537// Vectors of i1 are weird: bail out.2538if (CI->getType()->isVectorTy())2539return false;25402541// Try to optimize or fold the cmp.2542CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);2543unsigned ResultReg = 0;2544switch (Predicate) {2545default:2546break;2547case CmpInst::FCMP_FALSE:2548ResultReg = createResultReg(&AArch64::GPR32RegClass);2549BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,2550TII.get(TargetOpcode::COPY), ResultReg)2551.addReg(AArch64::WZR, getKillRegState(true));2552break;2553case CmpInst::FCMP_TRUE:2554ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);2555break;2556}25572558if (ResultReg) {2559updateValueMap(I, ResultReg);2560return true;2561}25622563// Emit the cmp.2564if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))2565return false;25662567ResultReg = createResultReg(&AArch64::GPR32RegClass);25682569// FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These2570// condition codes are inverted, because they are used by CSINC.2571static unsigned CondCodeTable[2][2] = {2572{ AArch64CC::NE, AArch64CC::VC },2573{ AArch64CC::PL, AArch64CC::LE }2574};2575unsigned *CondCodes = nullptr;2576switch (Predicate) {2577default:2578break;2579case CmpInst::FCMP_UEQ:2580CondCodes = &CondCodeTable[0][0];2581break;2582case CmpInst::FCMP_ONE:2583CondCodes = &CondCodeTable[1][0];2584break;2585}25862587if (CondCodes) {2588Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);2589BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),2590TmpReg1)2591.addReg(AArch64::WZR, getKillRegState(true))2592.addReg(AArch64::WZR, getKillRegState(true))2593.addImm(CondCodes[0]);2594BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),2595ResultReg)2596.addReg(TmpReg1, getKillRegState(true))2597.addReg(AArch64::WZR, getKillRegState(true))2598.addImm(CondCodes[1]);25992600updateValueMap(I, ResultReg);2601return true;2602}26032604// Now set a register based on the comparison.2605AArch64CC::CondCode CC = getCompareCC(Predicate);2606assert((CC != AArch64CC::AL) && "Unexpected condition code.");2607AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);2608BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),2609ResultReg)2610.addReg(AArch64::WZR, getKillRegState(true))2611.addReg(AArch64::WZR, getKillRegState(true))2612.addImm(invertedCC);26132614updateValueMap(I, ResultReg);2615return true;2616}26172618/// Optimize selects of i1 if one of the operands has a 'true' or 'false'2619/// value.2620bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {2621if (!SI->getType()->isIntegerTy(1))2622return false;26232624const Value *Src1Val, *Src2Val;2625unsigned Opc = 0;2626bool NeedExtraOp = false;2627if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {2628if (CI->isOne()) {2629Src1Val = SI->getCondition();2630Src2Val = SI->getFalseValue();2631Opc = AArch64::ORRWrr;2632} else {2633assert(CI->isZero());2634Src1Val = SI->getFalseValue();2635Src2Val = SI->getCondition();2636Opc = AArch64::BICWrr;2637}2638} else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {2639if (CI->isOne()) {2640Src1Val = SI->getCondition();2641Src2Val = SI->getTrueValue();2642Opc = AArch64::ORRWrr;2643NeedExtraOp = true;2644} else {2645assert(CI->isZero());2646Src1Val = SI->getCondition();2647Src2Val = SI->getTrueValue();2648Opc = AArch64::ANDWrr;2649}2650}26512652if (!Opc)2653return false;26542655Register Src1Reg = getRegForValue(Src1Val);2656if (!Src1Reg)2657return false;26582659Register Src2Reg = getRegForValue(Src2Val);2660if (!Src2Reg)2661return false;26622663if (NeedExtraOp)2664Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);26652666Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,2667Src2Reg);2668updateValueMap(SI, ResultReg);2669return true;2670}26712672bool AArch64FastISel::selectSelect(const Instruction *I) {2673assert(isa<SelectInst>(I) && "Expected a select instruction.");2674MVT VT;2675if (!isTypeSupported(I->getType(), VT))2676return false;26772678unsigned Opc;2679const TargetRegisterClass *RC;2680switch (VT.SimpleTy) {2681default:2682return false;2683case MVT::i1:2684case MVT::i8:2685case MVT::i16:2686case MVT::i32:2687Opc = AArch64::CSELWr;2688RC = &AArch64::GPR32RegClass;2689break;2690case MVT::i64:2691Opc = AArch64::CSELXr;2692RC = &AArch64::GPR64RegClass;2693break;2694case MVT::f32:2695Opc = AArch64::FCSELSrrr;2696RC = &AArch64::FPR32RegClass;2697break;2698case MVT::f64:2699Opc = AArch64::FCSELDrrr;2700RC = &AArch64::FPR64RegClass;2701break;2702}27032704const SelectInst *SI = cast<SelectInst>(I);2705const Value *Cond = SI->getCondition();2706AArch64CC::CondCode CC = AArch64CC::NE;2707AArch64CC::CondCode ExtraCC = AArch64CC::AL;27082709if (optimizeSelect(SI))2710return true;27112712// Try to pickup the flags, so we don't have to emit another compare.2713if (foldXALUIntrinsic(CC, I, Cond)) {2714// Fake request the condition to force emission of the XALU intrinsic.2715Register CondReg = getRegForValue(Cond);2716if (!CondReg)2717return false;2718} else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&2719isValueAvailable(Cond)) {2720const auto *Cmp = cast<CmpInst>(Cond);2721// Try to optimize or fold the cmp.2722CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);2723const Value *FoldSelect = nullptr;2724switch (Predicate) {2725default:2726break;2727case CmpInst::FCMP_FALSE:2728FoldSelect = SI->getFalseValue();2729break;2730case CmpInst::FCMP_TRUE:2731FoldSelect = SI->getTrueValue();2732break;2733}27342735if (FoldSelect) {2736Register SrcReg = getRegForValue(FoldSelect);2737if (!SrcReg)2738return false;27392740updateValueMap(I, SrcReg);2741return true;2742}27432744// Emit the cmp.2745if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))2746return false;27472748// FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.2749CC = getCompareCC(Predicate);2750switch (Predicate) {2751default:2752break;2753case CmpInst::FCMP_UEQ:2754ExtraCC = AArch64CC::EQ;2755CC = AArch64CC::VS;2756break;2757case CmpInst::FCMP_ONE:2758ExtraCC = AArch64CC::MI;2759CC = AArch64CC::GT;2760break;2761}2762assert((CC != AArch64CC::AL) && "Unexpected condition code.");2763} else {2764Register CondReg = getRegForValue(Cond);2765if (!CondReg)2766return false;27672768const MCInstrDesc &II = TII.get(AArch64::ANDSWri);2769CondReg = constrainOperandRegClass(II, CondReg, 1);27702771// Emit a TST instruction (ANDS wzr, reg, #imm).2772BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,2773AArch64::WZR)2774.addReg(CondReg)2775.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));2776}27772778Register Src1Reg = getRegForValue(SI->getTrueValue());2779Register Src2Reg = getRegForValue(SI->getFalseValue());27802781if (!Src1Reg || !Src2Reg)2782return false;27832784if (ExtraCC != AArch64CC::AL)2785Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);27862787Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);2788updateValueMap(I, ResultReg);2789return true;2790}27912792bool AArch64FastISel::selectFPExt(const Instruction *I) {2793Value *V = I->getOperand(0);2794if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())2795return false;27962797Register Op = getRegForValue(V);2798if (Op == 0)2799return false;28002801Register ResultReg = createResultReg(&AArch64::FPR64RegClass);2802BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),2803ResultReg).addReg(Op);2804updateValueMap(I, ResultReg);2805return true;2806}28072808bool AArch64FastISel::selectFPTrunc(const Instruction *I) {2809Value *V = I->getOperand(0);2810if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())2811return false;28122813Register Op = getRegForValue(V);2814if (Op == 0)2815return false;28162817Register ResultReg = createResultReg(&AArch64::FPR32RegClass);2818BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),2819ResultReg).addReg(Op);2820updateValueMap(I, ResultReg);2821return true;2822}28232824// FPToUI and FPToSI2825bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {2826MVT DestVT;2827if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())2828return false;28292830Register SrcReg = getRegForValue(I->getOperand(0));2831if (SrcReg == 0)2832return false;28332834EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);2835if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)2836return false;28372838unsigned Opc;2839if (SrcVT == MVT::f64) {2840if (Signed)2841Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;2842else2843Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;2844} else {2845if (Signed)2846Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;2847else2848Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;2849}2850Register ResultReg = createResultReg(2851DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);2852BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)2853.addReg(SrcReg);2854updateValueMap(I, ResultReg);2855return true;2856}28572858bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {2859MVT DestVT;2860if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())2861return false;2862// Let regular ISEL handle FP162863if (DestVT == MVT::f16 || DestVT == MVT::bf16)2864return false;28652866assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&2867"Unexpected value type.");28682869Register SrcReg = getRegForValue(I->getOperand(0));2870if (!SrcReg)2871return false;28722873EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);28742875// Handle sign-extension.2876if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {2877SrcReg =2878emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);2879if (!SrcReg)2880return false;2881}28822883unsigned Opc;2884if (SrcVT == MVT::i64) {2885if (Signed)2886Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;2887else2888Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;2889} else {2890if (Signed)2891Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;2892else2893Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;2894}28952896Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);2897updateValueMap(I, ResultReg);2898return true;2899}29002901bool AArch64FastISel::fastLowerArguments() {2902if (!FuncInfo.CanLowerReturn)2903return false;29042905const Function *F = FuncInfo.Fn;2906if (F->isVarArg())2907return false;29082909CallingConv::ID CC = F->getCallingConv();2910if (CC != CallingConv::C && CC != CallingConv::Swift)2911return false;29122913if (Subtarget->hasCustomCallingConv())2914return false;29152916// Only handle simple cases of up to 8 GPR and FPR each.2917unsigned GPRCnt = 0;2918unsigned FPRCnt = 0;2919for (auto const &Arg : F->args()) {2920if (Arg.hasAttribute(Attribute::ByVal) ||2921Arg.hasAttribute(Attribute::InReg) ||2922Arg.hasAttribute(Attribute::StructRet) ||2923Arg.hasAttribute(Attribute::SwiftSelf) ||2924Arg.hasAttribute(Attribute::SwiftAsync) ||2925Arg.hasAttribute(Attribute::SwiftError) ||2926Arg.hasAttribute(Attribute::Nest))2927return false;29282929Type *ArgTy = Arg.getType();2930if (ArgTy->isStructTy() || ArgTy->isArrayTy())2931return false;29322933EVT ArgVT = TLI.getValueType(DL, ArgTy);2934if (!ArgVT.isSimple())2935return false;29362937MVT VT = ArgVT.getSimpleVT().SimpleTy;2938if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())2939return false;29402941if (VT.isVector() &&2942(!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))2943return false;29442945if (VT >= MVT::i1 && VT <= MVT::i64)2946++GPRCnt;2947else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||2948VT.is128BitVector())2949++FPRCnt;2950else2951return false;29522953if (GPRCnt > 8 || FPRCnt > 8)2954return false;2955}29562957static const MCPhysReg Registers[6][8] = {2958{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,2959AArch64::W5, AArch64::W6, AArch64::W7 },2960{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,2961AArch64::X5, AArch64::X6, AArch64::X7 },2962{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,2963AArch64::H5, AArch64::H6, AArch64::H7 },2964{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,2965AArch64::S5, AArch64::S6, AArch64::S7 },2966{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,2967AArch64::D5, AArch64::D6, AArch64::D7 },2968{ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,2969AArch64::Q5, AArch64::Q6, AArch64::Q7 }2970};29712972unsigned GPRIdx = 0;2973unsigned FPRIdx = 0;2974for (auto const &Arg : F->args()) {2975MVT VT = TLI.getSimpleValueType(DL, Arg.getType());2976unsigned SrcReg;2977const TargetRegisterClass *RC;2978if (VT >= MVT::i1 && VT <= MVT::i32) {2979SrcReg = Registers[0][GPRIdx++];2980RC = &AArch64::GPR32RegClass;2981VT = MVT::i32;2982} else if (VT == MVT::i64) {2983SrcReg = Registers[1][GPRIdx++];2984RC = &AArch64::GPR64RegClass;2985} else if (VT == MVT::f16 || VT == MVT::bf16) {2986SrcReg = Registers[2][FPRIdx++];2987RC = &AArch64::FPR16RegClass;2988} else if (VT == MVT::f32) {2989SrcReg = Registers[3][FPRIdx++];2990RC = &AArch64::FPR32RegClass;2991} else if ((VT == MVT::f64) || VT.is64BitVector()) {2992SrcReg = Registers[4][FPRIdx++];2993RC = &AArch64::FPR64RegClass;2994} else if (VT.is128BitVector()) {2995SrcReg = Registers[5][FPRIdx++];2996RC = &AArch64::FPR128RegClass;2997} else2998llvm_unreachable("Unexpected value type.");29993000Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);3001// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.3002// Without this, EmitLiveInCopies may eliminate the livein if its only3003// use is a bitcast (which isn't turned into an instruction).3004Register ResultReg = createResultReg(RC);3005BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3006TII.get(TargetOpcode::COPY), ResultReg)3007.addReg(DstReg, getKillRegState(true));3008updateValueMap(&Arg, ResultReg);3009}3010return true;3011}30123013bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,3014SmallVectorImpl<MVT> &OutVTs,3015unsigned &NumBytes) {3016CallingConv::ID CC = CLI.CallConv;3017SmallVector<CCValAssign, 16> ArgLocs;3018CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);3019CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));30203021// Get a count of how many bytes are to be pushed on the stack.3022NumBytes = CCInfo.getStackSize();30233024// Issue CALLSEQ_START3025unsigned AdjStackDown = TII.getCallFrameSetupOpcode();3026BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))3027.addImm(NumBytes).addImm(0);30283029// Process the args.3030for (CCValAssign &VA : ArgLocs) {3031const Value *ArgVal = CLI.OutVals[VA.getValNo()];3032MVT ArgVT = OutVTs[VA.getValNo()];30333034Register ArgReg = getRegForValue(ArgVal);3035if (!ArgReg)3036return false;30373038// Handle arg promotion: SExt, ZExt, AExt.3039switch (VA.getLocInfo()) {3040case CCValAssign::Full:3041break;3042case CCValAssign::SExt: {3043MVT DestVT = VA.getLocVT();3044MVT SrcVT = ArgVT;3045ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);3046if (!ArgReg)3047return false;3048break;3049}3050case CCValAssign::AExt:3051// Intentional fall-through.3052case CCValAssign::ZExt: {3053MVT DestVT = VA.getLocVT();3054MVT SrcVT = ArgVT;3055ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);3056if (!ArgReg)3057return false;3058break;3059}3060default:3061llvm_unreachable("Unknown arg promotion!");3062}30633064// Now copy/store arg to correct locations.3065if (VA.isRegLoc() && !VA.needsCustom()) {3066BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3067TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);3068CLI.OutRegs.push_back(VA.getLocReg());3069} else if (VA.needsCustom()) {3070// FIXME: Handle custom args.3071return false;3072} else {3073assert(VA.isMemLoc() && "Assuming store on stack.");30743075// Don't emit stores for undef values.3076if (isa<UndefValue>(ArgVal))3077continue;30783079// Need to store on the stack.3080unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;30813082unsigned BEAlign = 0;3083if (ArgSize < 8 && !Subtarget->isLittleEndian())3084BEAlign = 8 - ArgSize;30853086Address Addr;3087Addr.setKind(Address::RegBase);3088Addr.setReg(AArch64::SP);3089Addr.setOffset(VA.getLocMemOffset() + BEAlign);30903091Align Alignment = DL.getABITypeAlign(ArgVal->getType());3092MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(3093MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),3094MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);30953096if (!emitStore(ArgVT, ArgReg, Addr, MMO))3097return false;3098}3099}3100return true;3101}31023103bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {3104CallingConv::ID CC = CLI.CallConv;31053106// Issue CALLSEQ_END3107unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();3108BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))3109.addImm(NumBytes).addImm(0);31103111// Now the return values.3112SmallVector<CCValAssign, 16> RVLocs;3113CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);3114CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));31153116Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);3117for (unsigned i = 0; i != RVLocs.size(); ++i) {3118CCValAssign &VA = RVLocs[i];3119MVT CopyVT = VA.getValVT();3120unsigned CopyReg = ResultReg + i;31213122// TODO: Handle big-endian results3123if (CopyVT.isVector() && !Subtarget->isLittleEndian())3124return false;31253126// Copy result out of their specified physreg.3127BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),3128CopyReg)3129.addReg(VA.getLocReg());3130CLI.InRegs.push_back(VA.getLocReg());3131}31323133CLI.ResultReg = ResultReg;3134CLI.NumResultRegs = RVLocs.size();31353136return true;3137}31383139bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {3140CallingConv::ID CC = CLI.CallConv;3141bool IsTailCall = CLI.IsTailCall;3142bool IsVarArg = CLI.IsVarArg;3143const Value *Callee = CLI.Callee;3144MCSymbol *Symbol = CLI.Symbol;31453146if (!Callee && !Symbol)3147return false;31483149// Allow SelectionDAG isel to handle calls to functions like setjmp that need3150// a bti instruction following the call.3151if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&3152!Subtarget->noBTIAtReturnTwice() &&3153MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())3154return false;31553156// Allow SelectionDAG isel to handle indirect calls with KCFI checks.3157if (CLI.CB && CLI.CB->isIndirectCall() &&3158CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))3159return false;31603161// Allow SelectionDAG isel to handle tail calls.3162if (IsTailCall)3163return false;31643165// FIXME: we could and should support this, but for now correctness at -O0 is3166// more important.3167if (Subtarget->isTargetILP32())3168return false;31693170CodeModel::Model CM = TM.getCodeModel();3171// Only support the small-addressing and large code models.3172if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())3173return false;31743175// FIXME: Add large code model support for ELF.3176if (CM == CodeModel::Large && !Subtarget->isTargetMachO())3177return false;31783179// ELF -fno-plt compiled intrinsic calls do not have the nonlazybind3180// attribute. Check "RtLibUseGOT" instead.3181if (MF->getFunction().getParent()->getRtLibUseGOT())3182return false;31833184// Let SDISel handle vararg functions.3185if (IsVarArg)3186return false;31873188if (Subtarget->isWindowsArm64EC())3189return false;31903191for (auto Flag : CLI.OutFlags)3192if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||3193Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())3194return false;31953196// Set up the argument vectors.3197SmallVector<MVT, 16> OutVTs;3198OutVTs.reserve(CLI.OutVals.size());31993200for (auto *Val : CLI.OutVals) {3201MVT VT;3202if (!isTypeLegal(Val->getType(), VT) &&3203!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))3204return false;32053206// We don't handle vector parameters yet.3207if (VT.isVector() || VT.getSizeInBits() > 64)3208return false;32093210OutVTs.push_back(VT);3211}32123213Address Addr;3214if (Callee && !computeCallAddress(Callee, Addr))3215return false;32163217// The weak function target may be zero; in that case we must use indirect3218// addressing via a stub on windows as it may be out of range for a3219// PC-relative jump.3220if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&3221Addr.getGlobalValue()->hasExternalWeakLinkage())3222return false;32233224// Handle the arguments now that we've gotten them.3225unsigned NumBytes;3226if (!processCallArgs(CLI, OutVTs, NumBytes))3227return false;32283229const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();3230if (RegInfo->isAnyArgRegReserved(*MF))3231RegInfo->emitReservedArgRegCallError(*MF);32323233// Issue the call.3234MachineInstrBuilder MIB;3235if (Subtarget->useSmallAddressing()) {3236const MCInstrDesc &II =3237TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);3238MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);3239if (Symbol)3240MIB.addSym(Symbol, 0);3241else if (Addr.getGlobalValue())3242MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);3243else if (Addr.getReg()) {3244Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);3245MIB.addReg(Reg);3246} else3247return false;3248} else {3249unsigned CallReg = 0;3250if (Symbol) {3251Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);3252BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),3253ADRPReg)3254.addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);32553256CallReg = createResultReg(&AArch64::GPR64RegClass);3257BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3258TII.get(AArch64::LDRXui), CallReg)3259.addReg(ADRPReg)3260.addSym(Symbol,3261AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);3262} else if (Addr.getGlobalValue())3263CallReg = materializeGV(Addr.getGlobalValue());3264else if (Addr.getReg())3265CallReg = Addr.getReg();32663267if (!CallReg)3268return false;32693270const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));3271CallReg = constrainOperandRegClass(II, CallReg, 0);3272MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);3273}32743275// Add implicit physical register uses to the call.3276for (auto Reg : CLI.OutRegs)3277MIB.addReg(Reg, RegState::Implicit);32783279// Add a register mask with the call-preserved registers.3280// Proper defs for return values will be added by setPhysRegsDeadExcept().3281MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));32823283CLI.Call = MIB;32843285// Finish off the call including any return values.3286return finishCall(CLI, NumBytes);3287}32883289bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {3290if (Alignment)3291return Len / Alignment->value() <= 4;3292else3293return Len < 32;3294}32953296bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,3297uint64_t Len, MaybeAlign Alignment) {3298// Make sure we don't bloat code by inlining very large memcpy's.3299if (!isMemCpySmall(Len, Alignment))3300return false;33013302int64_t UnscaledOffset = 0;3303Address OrigDest = Dest;3304Address OrigSrc = Src;33053306while (Len) {3307MVT VT;3308if (!Alignment || *Alignment >= 8) {3309if (Len >= 8)3310VT = MVT::i64;3311else if (Len >= 4)3312VT = MVT::i32;3313else if (Len >= 2)3314VT = MVT::i16;3315else {3316VT = MVT::i8;3317}3318} else {3319assert(Alignment && "Alignment is set in this branch");3320// Bound based on alignment.3321if (Len >= 4 && *Alignment == 4)3322VT = MVT::i32;3323else if (Len >= 2 && *Alignment == 2)3324VT = MVT::i16;3325else {3326VT = MVT::i8;3327}3328}33293330unsigned ResultReg = emitLoad(VT, VT, Src);3331if (!ResultReg)3332return false;33333334if (!emitStore(VT, ResultReg, Dest))3335return false;33363337int64_t Size = VT.getSizeInBits() / 8;3338Len -= Size;3339UnscaledOffset += Size;33403341// We need to recompute the unscaled offset for each iteration.3342Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);3343Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);3344}33453346return true;3347}33483349/// Check if it is possible to fold the condition from the XALU intrinsic3350/// into the user. The condition code will only be updated on success.3351bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,3352const Instruction *I,3353const Value *Cond) {3354if (!isa<ExtractValueInst>(Cond))3355return false;33563357const auto *EV = cast<ExtractValueInst>(Cond);3358if (!isa<IntrinsicInst>(EV->getAggregateOperand()))3359return false;33603361const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());3362MVT RetVT;3363const Function *Callee = II->getCalledFunction();3364Type *RetTy =3365cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);3366if (!isTypeLegal(RetTy, RetVT))3367return false;33683369if (RetVT != MVT::i32 && RetVT != MVT::i64)3370return false;33713372const Value *LHS = II->getArgOperand(0);3373const Value *RHS = II->getArgOperand(1);33743375// Canonicalize immediate to the RHS.3376if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())3377std::swap(LHS, RHS);33783379// Simplify multiplies.3380Intrinsic::ID IID = II->getIntrinsicID();3381switch (IID) {3382default:3383break;3384case Intrinsic::smul_with_overflow:3385if (const auto *C = dyn_cast<ConstantInt>(RHS))3386if (C->getValue() == 2)3387IID = Intrinsic::sadd_with_overflow;3388break;3389case Intrinsic::umul_with_overflow:3390if (const auto *C = dyn_cast<ConstantInt>(RHS))3391if (C->getValue() == 2)3392IID = Intrinsic::uadd_with_overflow;3393break;3394}33953396AArch64CC::CondCode TmpCC;3397switch (IID) {3398default:3399return false;3400case Intrinsic::sadd_with_overflow:3401case Intrinsic::ssub_with_overflow:3402TmpCC = AArch64CC::VS;3403break;3404case Intrinsic::uadd_with_overflow:3405TmpCC = AArch64CC::HS;3406break;3407case Intrinsic::usub_with_overflow:3408TmpCC = AArch64CC::LO;3409break;3410case Intrinsic::smul_with_overflow:3411case Intrinsic::umul_with_overflow:3412TmpCC = AArch64CC::NE;3413break;3414}34153416// Check if both instructions are in the same basic block.3417if (!isValueAvailable(II))3418return false;34193420// Make sure nothing is in the way3421BasicBlock::const_iterator Start(I);3422BasicBlock::const_iterator End(II);3423for (auto Itr = std::prev(Start); Itr != End; --Itr) {3424// We only expect extractvalue instructions between the intrinsic and the3425// instruction to be selected.3426if (!isa<ExtractValueInst>(Itr))3427return false;34283429// Check that the extractvalue operand comes from the intrinsic.3430const auto *EVI = cast<ExtractValueInst>(Itr);3431if (EVI->getAggregateOperand() != II)3432return false;3433}34343435CC = TmpCC;3436return true;3437}34383439bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {3440// FIXME: Handle more intrinsics.3441switch (II->getIntrinsicID()) {3442default: return false;3443case Intrinsic::frameaddress: {3444MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();3445MFI.setFrameAddressIsTaken(true);34463447const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();3448Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));3449Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);3450BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3451TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);3452// Recursively load frame address3453// ldr x0, [fp]3454// ldr x0, [x0]3455// ldr x0, [x0]3456// ...3457unsigned DestReg;3458unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();3459while (Depth--) {3460DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,3461SrcReg, 0);3462assert(DestReg && "Unexpected LDR instruction emission failure.");3463SrcReg = DestReg;3464}34653466updateValueMap(II, SrcReg);3467return true;3468}3469case Intrinsic::sponentry: {3470MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();34713472// SP = FP + Fixed Object + 163473int FI = MFI.CreateFixedObject(4, 0, false);3474Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);3475BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3476TII.get(AArch64::ADDXri), ResultReg)3477.addFrameIndex(FI)3478.addImm(0)3479.addImm(0);34803481updateValueMap(II, ResultReg);3482return true;3483}3484case Intrinsic::memcpy:3485case Intrinsic::memmove: {3486const auto *MTI = cast<MemTransferInst>(II);3487// Don't handle volatile.3488if (MTI->isVolatile())3489return false;34903491// Disable inlining for memmove before calls to ComputeAddress. Otherwise,3492// we would emit dead code because we don't currently handle memmoves.3493bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);3494if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {3495// Small memcpy's are common enough that we want to do them without a call3496// if possible.3497uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();3498MaybeAlign Alignment;3499if (MTI->getDestAlign() || MTI->getSourceAlign())3500Alignment = std::min(MTI->getDestAlign().valueOrOne(),3501MTI->getSourceAlign().valueOrOne());3502if (isMemCpySmall(Len, Alignment)) {3503Address Dest, Src;3504if (!computeAddress(MTI->getRawDest(), Dest) ||3505!computeAddress(MTI->getRawSource(), Src))3506return false;3507if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))3508return true;3509}3510}35113512if (!MTI->getLength()->getType()->isIntegerTy(64))3513return false;35143515if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)3516// Fast instruction selection doesn't support the special3517// address spaces.3518return false;35193520const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";3521return lowerCallTo(II, IntrMemName, II->arg_size() - 1);3522}3523case Intrinsic::memset: {3524const MemSetInst *MSI = cast<MemSetInst>(II);3525// Don't handle volatile.3526if (MSI->isVolatile())3527return false;35283529if (!MSI->getLength()->getType()->isIntegerTy(64))3530return false;35313532if (MSI->getDestAddressSpace() > 255)3533// Fast instruction selection doesn't support the special3534// address spaces.3535return false;35363537return lowerCallTo(II, "memset", II->arg_size() - 1);3538}3539case Intrinsic::sin:3540case Intrinsic::cos:3541case Intrinsic::tan:3542case Intrinsic::pow: {3543MVT RetVT;3544if (!isTypeLegal(II->getType(), RetVT))3545return false;35463547if (RetVT != MVT::f32 && RetVT != MVT::f64)3548return false;35493550static const RTLIB::Libcall LibCallTable[4][2] = {3551{RTLIB::SIN_F32, RTLIB::SIN_F64},3552{RTLIB::COS_F32, RTLIB::COS_F64},3553{RTLIB::TAN_F32, RTLIB::TAN_F64},3554{RTLIB::POW_F32, RTLIB::POW_F64}};3555RTLIB::Libcall LC;3556bool Is64Bit = RetVT == MVT::f64;3557switch (II->getIntrinsicID()) {3558default:3559llvm_unreachable("Unexpected intrinsic.");3560case Intrinsic::sin:3561LC = LibCallTable[0][Is64Bit];3562break;3563case Intrinsic::cos:3564LC = LibCallTable[1][Is64Bit];3565break;3566case Intrinsic::tan:3567LC = LibCallTable[2][Is64Bit];3568break;3569case Intrinsic::pow:3570LC = LibCallTable[3][Is64Bit];3571break;3572}35733574ArgListTy Args;3575Args.reserve(II->arg_size());35763577// Populate the argument list.3578for (auto &Arg : II->args()) {3579ArgListEntry Entry;3580Entry.Val = Arg;3581Entry.Ty = Arg->getType();3582Args.push_back(Entry);3583}35843585CallLoweringInfo CLI;3586MCContext &Ctx = MF->getContext();3587CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),3588TLI.getLibcallName(LC), std::move(Args));3589if (!lowerCallTo(CLI))3590return false;3591updateValueMap(II, CLI.ResultReg);3592return true;3593}3594case Intrinsic::fabs: {3595MVT VT;3596if (!isTypeLegal(II->getType(), VT))3597return false;35983599unsigned Opc;3600switch (VT.SimpleTy) {3601default:3602return false;3603case MVT::f32:3604Opc = AArch64::FABSSr;3605break;3606case MVT::f64:3607Opc = AArch64::FABSDr;3608break;3609}3610Register SrcReg = getRegForValue(II->getOperand(0));3611if (!SrcReg)3612return false;3613Register ResultReg = createResultReg(TLI.getRegClassFor(VT));3614BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)3615.addReg(SrcReg);3616updateValueMap(II, ResultReg);3617return true;3618}3619case Intrinsic::trap:3620BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))3621.addImm(1);3622return true;3623case Intrinsic::debugtrap:3624BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))3625.addImm(0xF000);3626return true;36273628case Intrinsic::sqrt: {3629Type *RetTy = II->getCalledFunction()->getReturnType();36303631MVT VT;3632if (!isTypeLegal(RetTy, VT))3633return false;36343635Register Op0Reg = getRegForValue(II->getOperand(0));3636if (!Op0Reg)3637return false;36383639unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);3640if (!ResultReg)3641return false;36423643updateValueMap(II, ResultReg);3644return true;3645}3646case Intrinsic::sadd_with_overflow:3647case Intrinsic::uadd_with_overflow:3648case Intrinsic::ssub_with_overflow:3649case Intrinsic::usub_with_overflow:3650case Intrinsic::smul_with_overflow:3651case Intrinsic::umul_with_overflow: {3652// This implements the basic lowering of the xalu with overflow intrinsics.3653const Function *Callee = II->getCalledFunction();3654auto *Ty = cast<StructType>(Callee->getReturnType());3655Type *RetTy = Ty->getTypeAtIndex(0U);36563657MVT VT;3658if (!isTypeLegal(RetTy, VT))3659return false;36603661if (VT != MVT::i32 && VT != MVT::i64)3662return false;36633664const Value *LHS = II->getArgOperand(0);3665const Value *RHS = II->getArgOperand(1);3666// Canonicalize immediate to the RHS.3667if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())3668std::swap(LHS, RHS);36693670// Simplify multiplies.3671Intrinsic::ID IID = II->getIntrinsicID();3672switch (IID) {3673default:3674break;3675case Intrinsic::smul_with_overflow:3676if (const auto *C = dyn_cast<ConstantInt>(RHS))3677if (C->getValue() == 2) {3678IID = Intrinsic::sadd_with_overflow;3679RHS = LHS;3680}3681break;3682case Intrinsic::umul_with_overflow:3683if (const auto *C = dyn_cast<ConstantInt>(RHS))3684if (C->getValue() == 2) {3685IID = Intrinsic::uadd_with_overflow;3686RHS = LHS;3687}3688break;3689}36903691unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;3692AArch64CC::CondCode CC = AArch64CC::Invalid;3693switch (IID) {3694default: llvm_unreachable("Unexpected intrinsic!");3695case Intrinsic::sadd_with_overflow:3696ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);3697CC = AArch64CC::VS;3698break;3699case Intrinsic::uadd_with_overflow:3700ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);3701CC = AArch64CC::HS;3702break;3703case Intrinsic::ssub_with_overflow:3704ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);3705CC = AArch64CC::VS;3706break;3707case Intrinsic::usub_with_overflow:3708ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);3709CC = AArch64CC::LO;3710break;3711case Intrinsic::smul_with_overflow: {3712CC = AArch64CC::NE;3713Register LHSReg = getRegForValue(LHS);3714if (!LHSReg)3715return false;37163717Register RHSReg = getRegForValue(RHS);3718if (!RHSReg)3719return false;37203721if (VT == MVT::i32) {3722MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);3723Register MulSubReg =3724fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);3725// cmp xreg, wreg, sxtw3726emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,3727AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,3728/*WantResult=*/false);3729MulReg = MulSubReg;3730} else {3731assert(VT == MVT::i64 && "Unexpected value type.");3732// LHSReg and RHSReg cannot be killed by this Mul, since they are3733// reused in the next instruction.3734MulReg = emitMul_rr(VT, LHSReg, RHSReg);3735unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);3736emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,3737/*WantResult=*/false);3738}3739break;3740}3741case Intrinsic::umul_with_overflow: {3742CC = AArch64CC::NE;3743Register LHSReg = getRegForValue(LHS);3744if (!LHSReg)3745return false;37463747Register RHSReg = getRegForValue(RHS);3748if (!RHSReg)3749return false;37503751if (VT == MVT::i32) {3752MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);3753// tst xreg, #0xffffffff000000003754BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3755TII.get(AArch64::ANDSXri), AArch64::XZR)3756.addReg(MulReg)3757.addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));3758MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);3759} else {3760assert(VT == MVT::i64 && "Unexpected value type.");3761// LHSReg and RHSReg cannot be killed by this Mul, since they are3762// reused in the next instruction.3763MulReg = emitMul_rr(VT, LHSReg, RHSReg);3764unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);3765emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);3766}3767break;3768}3769}37703771if (MulReg) {3772ResultReg1 = createResultReg(TLI.getRegClassFor(VT));3773BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3774TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);3775}37763777if (!ResultReg1)3778return false;37793780ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,3781AArch64::WZR, AArch64::WZR,3782getInvertedCondCode(CC));3783(void)ResultReg2;3784assert((ResultReg1 + 1) == ResultReg2 &&3785"Nonconsecutive result registers.");3786updateValueMap(II, ResultReg1, 2);3787return true;3788}3789case Intrinsic::aarch64_crc32b:3790case Intrinsic::aarch64_crc32h:3791case Intrinsic::aarch64_crc32w:3792case Intrinsic::aarch64_crc32x:3793case Intrinsic::aarch64_crc32cb:3794case Intrinsic::aarch64_crc32ch:3795case Intrinsic::aarch64_crc32cw:3796case Intrinsic::aarch64_crc32cx: {3797if (!Subtarget->hasCRC())3798return false;37993800unsigned Opc;3801switch (II->getIntrinsicID()) {3802default:3803llvm_unreachable("Unexpected intrinsic!");3804case Intrinsic::aarch64_crc32b:3805Opc = AArch64::CRC32Brr;3806break;3807case Intrinsic::aarch64_crc32h:3808Opc = AArch64::CRC32Hrr;3809break;3810case Intrinsic::aarch64_crc32w:3811Opc = AArch64::CRC32Wrr;3812break;3813case Intrinsic::aarch64_crc32x:3814Opc = AArch64::CRC32Xrr;3815break;3816case Intrinsic::aarch64_crc32cb:3817Opc = AArch64::CRC32CBrr;3818break;3819case Intrinsic::aarch64_crc32ch:3820Opc = AArch64::CRC32CHrr;3821break;3822case Intrinsic::aarch64_crc32cw:3823Opc = AArch64::CRC32CWrr;3824break;3825case Intrinsic::aarch64_crc32cx:3826Opc = AArch64::CRC32CXrr;3827break;3828}38293830Register LHSReg = getRegForValue(II->getArgOperand(0));3831Register RHSReg = getRegForValue(II->getArgOperand(1));3832if (!LHSReg || !RHSReg)3833return false;38343835Register ResultReg =3836fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);3837updateValueMap(II, ResultReg);3838return true;3839}3840}3841return false;3842}38433844bool AArch64FastISel::selectRet(const Instruction *I) {3845const ReturnInst *Ret = cast<ReturnInst>(I);3846const Function &F = *I->getParent()->getParent();38473848if (!FuncInfo.CanLowerReturn)3849return false;38503851if (F.isVarArg())3852return false;38533854if (TLI.supportSwiftError() &&3855F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))3856return false;38573858if (TLI.supportSplitCSR(FuncInfo.MF))3859return false;38603861// Build a list of return value registers.3862SmallVector<unsigned, 4> RetRegs;38633864if (Ret->getNumOperands() > 0) {3865CallingConv::ID CC = F.getCallingConv();3866SmallVector<ISD::OutputArg, 4> Outs;3867GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);38683869// Analyze operands of the call, assigning locations to each operand.3870SmallVector<CCValAssign, 16> ValLocs;3871CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());3872CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);38733874// Only handle a single return value for now.3875if (ValLocs.size() != 1)3876return false;38773878CCValAssign &VA = ValLocs[0];3879const Value *RV = Ret->getOperand(0);38803881// Don't bother handling odd stuff for now.3882if ((VA.getLocInfo() != CCValAssign::Full) &&3883(VA.getLocInfo() != CCValAssign::BCvt))3884return false;38853886// Only handle register returns for now.3887if (!VA.isRegLoc())3888return false;38893890Register Reg = getRegForValue(RV);3891if (Reg == 0)3892return false;38933894unsigned SrcReg = Reg + VA.getValNo();3895Register DestReg = VA.getLocReg();3896// Avoid a cross-class copy. This is very unlikely.3897if (!MRI.getRegClass(SrcReg)->contains(DestReg))3898return false;38993900EVT RVEVT = TLI.getValueType(DL, RV->getType());3901if (!RVEVT.isSimple())3902return false;39033904// Vectors (of > 1 lane) in big endian need tricky handling.3905if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&3906!Subtarget->isLittleEndian())3907return false;39083909MVT RVVT = RVEVT.getSimpleVT();3910if (RVVT == MVT::f128)3911return false;39123913MVT DestVT = VA.getValVT();3914// Special handling for extended integers.3915if (RVVT != DestVT) {3916if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)3917return false;39183919if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())3920return false;39213922bool IsZExt = Outs[0].Flags.isZExt();3923SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);3924if (SrcReg == 0)3925return false;3926}39273928// "Callee" (i.e. value producer) zero extends pointers at function3929// boundary.3930if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())3931SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);39323933// Make the copy.3934BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3935TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);39363937// Add register to return instruction.3938RetRegs.push_back(VA.getLocReg());3939}39403941MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,3942TII.get(AArch64::RET_ReallyLR));3943for (unsigned RetReg : RetRegs)3944MIB.addReg(RetReg, RegState::Implicit);3945return true;3946}39473948bool AArch64FastISel::selectTrunc(const Instruction *I) {3949Type *DestTy = I->getType();3950Value *Op = I->getOperand(0);3951Type *SrcTy = Op->getType();39523953EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);3954EVT DestEVT = TLI.getValueType(DL, DestTy, true);3955if (!SrcEVT.isSimple())3956return false;3957if (!DestEVT.isSimple())3958return false;39593960MVT SrcVT = SrcEVT.getSimpleVT();3961MVT DestVT = DestEVT.getSimpleVT();39623963if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&3964SrcVT != MVT::i8)3965return false;3966if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&3967DestVT != MVT::i1)3968return false;39693970Register SrcReg = getRegForValue(Op);3971if (!SrcReg)3972return false;39733974// If we're truncating from i64 to a smaller non-legal type then generate an3975// AND. Otherwise, we know the high bits are undefined and a truncate only3976// generate a COPY. We cannot mark the source register also as result3977// register, because this can incorrectly transfer the kill flag onto the3978// source register.3979unsigned ResultReg;3980if (SrcVT == MVT::i64) {3981uint64_t Mask = 0;3982switch (DestVT.SimpleTy) {3983default:3984// Trunc i64 to i32 is handled by the target-independent fast-isel.3985return false;3986case MVT::i1:3987Mask = 0x1;3988break;3989case MVT::i8:3990Mask = 0xff;3991break;3992case MVT::i16:3993Mask = 0xffff;3994break;3995}3996// Issue an extract_subreg to get the lower 32-bits.3997Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,3998AArch64::sub_32);3999// Create the AND instruction which performs the actual truncation.4000ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);4001assert(ResultReg && "Unexpected AND instruction emission failure.");4002} else {4003ResultReg = createResultReg(&AArch64::GPR32RegClass);4004BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4005TII.get(TargetOpcode::COPY), ResultReg)4006.addReg(SrcReg);4007}40084009updateValueMap(I, ResultReg);4010return true;4011}40124013unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {4014assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||4015DestVT == MVT::i64) &&4016"Unexpected value type.");4017// Handle i8 and i16 as i32.4018if (DestVT == MVT::i8 || DestVT == MVT::i16)4019DestVT = MVT::i32;40204021if (IsZExt) {4022unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);4023assert(ResultReg && "Unexpected AND instruction emission failure.");4024if (DestVT == MVT::i64) {4025// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the4026// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.4027Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);4028BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4029TII.get(AArch64::SUBREG_TO_REG), Reg64)4030.addImm(0)4031.addReg(ResultReg)4032.addImm(AArch64::sub_32);4033ResultReg = Reg64;4034}4035return ResultReg;4036} else {4037if (DestVT == MVT::i64) {4038// FIXME: We're SExt i1 to i64.4039return 0;4040}4041return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,40420, 0);4043}4044}40454046unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {4047unsigned Opc, ZReg;4048switch (RetVT.SimpleTy) {4049default: return 0;4050case MVT::i8:4051case MVT::i16:4052case MVT::i32:4053RetVT = MVT::i32;4054Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;4055case MVT::i64:4056Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;4057}40584059const TargetRegisterClass *RC =4060(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;4061return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);4062}40634064unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {4065if (RetVT != MVT::i64)4066return 0;40674068return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,4069Op0, Op1, AArch64::XZR);4070}40714072unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {4073if (RetVT != MVT::i64)4074return 0;40754076return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,4077Op0, Op1, AArch64::XZR);4078}40794080unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,4081unsigned Op1Reg) {4082unsigned Opc = 0;4083bool NeedTrunc = false;4084uint64_t Mask = 0;4085switch (RetVT.SimpleTy) {4086default: return 0;4087case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;4088case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;4089case MVT::i32: Opc = AArch64::LSLVWr; break;4090case MVT::i64: Opc = AArch64::LSLVXr; break;4091}40924093const TargetRegisterClass *RC =4094(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;4095if (NeedTrunc)4096Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);40974098Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);4099if (NeedTrunc)4100ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);4101return ResultReg;4102}41034104unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,4105uint64_t Shift, bool IsZExt) {4106assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&4107"Unexpected source/return type pair.");4108assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||4109SrcVT == MVT::i32 || SrcVT == MVT::i64) &&4110"Unexpected source value type.");4111assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||4112RetVT == MVT::i64) && "Unexpected return value type.");41134114bool Is64Bit = (RetVT == MVT::i64);4115unsigned RegSize = Is64Bit ? 64 : 32;4116unsigned DstBits = RetVT.getSizeInBits();4117unsigned SrcBits = SrcVT.getSizeInBits();4118const TargetRegisterClass *RC =4119Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;41204121// Just emit a copy for "zero" shifts.4122if (Shift == 0) {4123if (RetVT == SrcVT) {4124Register ResultReg = createResultReg(RC);4125BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4126TII.get(TargetOpcode::COPY), ResultReg)4127.addReg(Op0);4128return ResultReg;4129} else4130return emitIntExt(SrcVT, Op0, RetVT, IsZExt);4131}41324133// Don't deal with undefined shifts.4134if (Shift >= DstBits)4135return 0;41364137// For immediate shifts we can fold the zero-/sign-extension into the shift.4138// {S|U}BFM Wd, Wn, #r, #s4139// Wd<32+s-r,32-r> = Wn<s:0> when r > s41404141// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164142// %2 = shl i16 %1, 44143// Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 74144// 0b1111_1111_1111_1111__1111_1010_1010_0000 sext4145// 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext4146// 0b0000_0000_0000_0000__0000_1010_1010_0000 zext41474148// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164149// %2 = shl i16 %1, 84150// Wd<32+7-24,32-24> = Wn<7:0>4151// 0b1111_1111_1111_1111__1010_1010_0000_0000 sext4152// 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext4153// 0b0000_0000_0000_0000__1010_1010_0000_0000 zext41544155// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164156// %2 = shl i16 %1, 124157// Wd<32+3-20,32-20> = Wn<3:0>4158// 0b1111_1111_1111_1111__1010_0000_0000_0000 sext4159// 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext4160// 0b0000_0000_0000_0000__1010_0000_0000_0000 zext41614162unsigned ImmR = RegSize - Shift;4163// Limit the width to the length of the source type.4164unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);4165static const unsigned OpcTable[2][2] = {4166{AArch64::SBFMWri, AArch64::SBFMXri},4167{AArch64::UBFMWri, AArch64::UBFMXri}4168};4169unsigned Opc = OpcTable[IsZExt][Is64Bit];4170if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {4171Register TmpReg = MRI.createVirtualRegister(RC);4172BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4173TII.get(AArch64::SUBREG_TO_REG), TmpReg)4174.addImm(0)4175.addReg(Op0)4176.addImm(AArch64::sub_32);4177Op0 = TmpReg;4178}4179return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);4180}41814182unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,4183unsigned Op1Reg) {4184unsigned Opc = 0;4185bool NeedTrunc = false;4186uint64_t Mask = 0;4187switch (RetVT.SimpleTy) {4188default: return 0;4189case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;4190case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;4191case MVT::i32: Opc = AArch64::LSRVWr; break;4192case MVT::i64: Opc = AArch64::LSRVXr; break;4193}41944195const TargetRegisterClass *RC =4196(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;4197if (NeedTrunc) {4198Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);4199Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);4200}4201Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);4202if (NeedTrunc)4203ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);4204return ResultReg;4205}42064207unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,4208uint64_t Shift, bool IsZExt) {4209assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&4210"Unexpected source/return type pair.");4211assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||4212SrcVT == MVT::i32 || SrcVT == MVT::i64) &&4213"Unexpected source value type.");4214assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||4215RetVT == MVT::i64) && "Unexpected return value type.");42164217bool Is64Bit = (RetVT == MVT::i64);4218unsigned RegSize = Is64Bit ? 64 : 32;4219unsigned DstBits = RetVT.getSizeInBits();4220unsigned SrcBits = SrcVT.getSizeInBits();4221const TargetRegisterClass *RC =4222Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;42234224// Just emit a copy for "zero" shifts.4225if (Shift == 0) {4226if (RetVT == SrcVT) {4227Register ResultReg = createResultReg(RC);4228BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4229TII.get(TargetOpcode::COPY), ResultReg)4230.addReg(Op0);4231return ResultReg;4232} else4233return emitIntExt(SrcVT, Op0, RetVT, IsZExt);4234}42354236// Don't deal with undefined shifts.4237if (Shift >= DstBits)4238return 0;42394240// For immediate shifts we can fold the zero-/sign-extension into the shift.4241// {S|U}BFM Wd, Wn, #r, #s4242// Wd<s-r:0> = Wn<s:r> when r <= s42434244// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164245// %2 = lshr i16 %1, 44246// Wd<7-4:0> = Wn<7:4>4247// 0b0000_0000_0000_0000__0000_1111_1111_1010 sext4248// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext4249// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext42504251// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164252// %2 = lshr i16 %1, 84253// Wd<7-7,0> = Wn<7:7>4254// 0b0000_0000_0000_0000__0000_0000_1111_1111 sext4255// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext4256// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext42574258// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164259// %2 = lshr i16 %1, 124260// Wd<7-7,0> = Wn<7:7> <- clamp r to 74261// 0b0000_0000_0000_0000__0000_0000_0000_1111 sext4262// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext4263// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext42644265if (Shift >= SrcBits && IsZExt)4266return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);42674268// It is not possible to fold a sign-extend into the LShr instruction. In this4269// case emit a sign-extend.4270if (!IsZExt) {4271Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);4272if (!Op0)4273return 0;4274SrcVT = RetVT;4275SrcBits = SrcVT.getSizeInBits();4276IsZExt = true;4277}42784279unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);4280unsigned ImmS = SrcBits - 1;4281static const unsigned OpcTable[2][2] = {4282{AArch64::SBFMWri, AArch64::SBFMXri},4283{AArch64::UBFMWri, AArch64::UBFMXri}4284};4285unsigned Opc = OpcTable[IsZExt][Is64Bit];4286if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {4287Register TmpReg = MRI.createVirtualRegister(RC);4288BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4289TII.get(AArch64::SUBREG_TO_REG), TmpReg)4290.addImm(0)4291.addReg(Op0)4292.addImm(AArch64::sub_32);4293Op0 = TmpReg;4294}4295return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);4296}42974298unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,4299unsigned Op1Reg) {4300unsigned Opc = 0;4301bool NeedTrunc = false;4302uint64_t Mask = 0;4303switch (RetVT.SimpleTy) {4304default: return 0;4305case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;4306case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;4307case MVT::i32: Opc = AArch64::ASRVWr; break;4308case MVT::i64: Opc = AArch64::ASRVXr; break;4309}43104311const TargetRegisterClass *RC =4312(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;4313if (NeedTrunc) {4314Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);4315Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);4316}4317Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);4318if (NeedTrunc)4319ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);4320return ResultReg;4321}43224323unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,4324uint64_t Shift, bool IsZExt) {4325assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&4326"Unexpected source/return type pair.");4327assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||4328SrcVT == MVT::i32 || SrcVT == MVT::i64) &&4329"Unexpected source value type.");4330assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||4331RetVT == MVT::i64) && "Unexpected return value type.");43324333bool Is64Bit = (RetVT == MVT::i64);4334unsigned RegSize = Is64Bit ? 64 : 32;4335unsigned DstBits = RetVT.getSizeInBits();4336unsigned SrcBits = SrcVT.getSizeInBits();4337const TargetRegisterClass *RC =4338Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;43394340// Just emit a copy for "zero" shifts.4341if (Shift == 0) {4342if (RetVT == SrcVT) {4343Register ResultReg = createResultReg(RC);4344BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4345TII.get(TargetOpcode::COPY), ResultReg)4346.addReg(Op0);4347return ResultReg;4348} else4349return emitIntExt(SrcVT, Op0, RetVT, IsZExt);4350}43514352// Don't deal with undefined shifts.4353if (Shift >= DstBits)4354return 0;43554356// For immediate shifts we can fold the zero-/sign-extension into the shift.4357// {S|U}BFM Wd, Wn, #r, #s4358// Wd<s-r:0> = Wn<s:r> when r <= s43594360// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164361// %2 = ashr i16 %1, 44362// Wd<7-4:0> = Wn<7:4>4363// 0b1111_1111_1111_1111__1111_1111_1111_1010 sext4364// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext4365// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext43664367// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164368// %2 = ashr i16 %1, 84369// Wd<7-7,0> = Wn<7:7>4370// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext4371// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext4372// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext43734374// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i164375// %2 = ashr i16 %1, 124376// Wd<7-7,0> = Wn<7:7> <- clamp r to 74377// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext4378// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext4379// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext43804381if (Shift >= SrcBits && IsZExt)4382return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);43834384unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);4385unsigned ImmS = SrcBits - 1;4386static const unsigned OpcTable[2][2] = {4387{AArch64::SBFMWri, AArch64::SBFMXri},4388{AArch64::UBFMWri, AArch64::UBFMXri}4389};4390unsigned Opc = OpcTable[IsZExt][Is64Bit];4391if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {4392Register TmpReg = MRI.createVirtualRegister(RC);4393BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4394TII.get(AArch64::SUBREG_TO_REG), TmpReg)4395.addImm(0)4396.addReg(Op0)4397.addImm(AArch64::sub_32);4398Op0 = TmpReg;4399}4400return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);4401}44024403unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,4404bool IsZExt) {4405assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");44064407// FastISel does not have plumbing to deal with extensions where the SrcVT or4408// DestVT are odd things, so test to make sure that they are both types we can4409// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise4410// bail out to SelectionDAG.4411if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&4412(DestVT != MVT::i32) && (DestVT != MVT::i64)) ||4413((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&4414(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))4415return 0;44164417unsigned Opc;4418unsigned Imm = 0;44194420switch (SrcVT.SimpleTy) {4421default:4422return 0;4423case MVT::i1:4424return emiti1Ext(SrcReg, DestVT, IsZExt);4425case MVT::i8:4426if (DestVT == MVT::i64)4427Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;4428else4429Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;4430Imm = 7;4431break;4432case MVT::i16:4433if (DestVT == MVT::i64)4434Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;4435else4436Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;4437Imm = 15;4438break;4439case MVT::i32:4440assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");4441Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;4442Imm = 31;4443break;4444}44454446// Handle i8 and i16 as i32.4447if (DestVT == MVT::i8 || DestVT == MVT::i16)4448DestVT = MVT::i32;4449else if (DestVT == MVT::i64) {4450Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);4451BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4452TII.get(AArch64::SUBREG_TO_REG), Src64)4453.addImm(0)4454.addReg(SrcReg)4455.addImm(AArch64::sub_32);4456SrcReg = Src64;4457}44584459const TargetRegisterClass *RC =4460(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;4461return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);4462}44634464static bool isZExtLoad(const MachineInstr *LI) {4465switch (LI->getOpcode()) {4466default:4467return false;4468case AArch64::LDURBBi:4469case AArch64::LDURHHi:4470case AArch64::LDURWi:4471case AArch64::LDRBBui:4472case AArch64::LDRHHui:4473case AArch64::LDRWui:4474case AArch64::LDRBBroX:4475case AArch64::LDRHHroX:4476case AArch64::LDRWroX:4477case AArch64::LDRBBroW:4478case AArch64::LDRHHroW:4479case AArch64::LDRWroW:4480return true;4481}4482}44834484static bool isSExtLoad(const MachineInstr *LI) {4485switch (LI->getOpcode()) {4486default:4487return false;4488case AArch64::LDURSBWi:4489case AArch64::LDURSHWi:4490case AArch64::LDURSBXi:4491case AArch64::LDURSHXi:4492case AArch64::LDURSWi:4493case AArch64::LDRSBWui:4494case AArch64::LDRSHWui:4495case AArch64::LDRSBXui:4496case AArch64::LDRSHXui:4497case AArch64::LDRSWui:4498case AArch64::LDRSBWroX:4499case AArch64::LDRSHWroX:4500case AArch64::LDRSBXroX:4501case AArch64::LDRSHXroX:4502case AArch64::LDRSWroX:4503case AArch64::LDRSBWroW:4504case AArch64::LDRSHWroW:4505case AArch64::LDRSBXroW:4506case AArch64::LDRSHXroW:4507case AArch64::LDRSWroW:4508return true;4509}4510}45114512bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,4513MVT SrcVT) {4514const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));4515if (!LI || !LI->hasOneUse())4516return false;45174518// Check if the load instruction has already been selected.4519Register Reg = lookUpRegForValue(LI);4520if (!Reg)4521return false;45224523MachineInstr *MI = MRI.getUniqueVRegDef(Reg);4524if (!MI)4525return false;45264527// Check if the correct load instruction has been emitted - SelectionDAG might4528// have emitted a zero-extending load, but we need a sign-extending load.4529bool IsZExt = isa<ZExtInst>(I);4530const auto *LoadMI = MI;4531if (LoadMI->getOpcode() == TargetOpcode::COPY &&4532LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {4533Register LoadReg = MI->getOperand(1).getReg();4534LoadMI = MRI.getUniqueVRegDef(LoadReg);4535assert(LoadMI && "Expected valid instruction");4536}4537if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))4538return false;45394540// Nothing to be done.4541if (RetVT != MVT::i64 || SrcVT > MVT::i32) {4542updateValueMap(I, Reg);4543return true;4544}45454546if (IsZExt) {4547Register Reg64 = createResultReg(&AArch64::GPR64RegClass);4548BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4549TII.get(AArch64::SUBREG_TO_REG), Reg64)4550.addImm(0)4551.addReg(Reg, getKillRegState(true))4552.addImm(AArch64::sub_32);4553Reg = Reg64;4554} else {4555assert((MI->getOpcode() == TargetOpcode::COPY &&4556MI->getOperand(1).getSubReg() == AArch64::sub_32) &&4557"Expected copy instruction");4558Reg = MI->getOperand(1).getReg();4559MachineBasicBlock::iterator I(MI);4560removeDeadCode(I, std::next(I));4561}4562updateValueMap(I, Reg);4563return true;4564}45654566bool AArch64FastISel::selectIntExt(const Instruction *I) {4567assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&4568"Unexpected integer extend instruction.");4569MVT RetVT;4570MVT SrcVT;4571if (!isTypeSupported(I->getType(), RetVT))4572return false;45734574if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))4575return false;45764577// Try to optimize already sign-/zero-extended values from load instructions.4578if (optimizeIntExtLoad(I, RetVT, SrcVT))4579return true;45804581Register SrcReg = getRegForValue(I->getOperand(0));4582if (!SrcReg)4583return false;45844585// Try to optimize already sign-/zero-extended values from function arguments.4586bool IsZExt = isa<ZExtInst>(I);4587if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {4588if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {4589if (RetVT == MVT::i64 && SrcVT != MVT::i64) {4590Register ResultReg = createResultReg(&AArch64::GPR64RegClass);4591BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,4592TII.get(AArch64::SUBREG_TO_REG), ResultReg)4593.addImm(0)4594.addReg(SrcReg)4595.addImm(AArch64::sub_32);4596SrcReg = ResultReg;4597}45984599updateValueMap(I, SrcReg);4600return true;4601}4602}46034604unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);4605if (!ResultReg)4606return false;46074608updateValueMap(I, ResultReg);4609return true;4610}46114612bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {4613EVT DestEVT = TLI.getValueType(DL, I->getType(), true);4614if (!DestEVT.isSimple())4615return false;46164617MVT DestVT = DestEVT.getSimpleVT();4618if (DestVT != MVT::i64 && DestVT != MVT::i32)4619return false;46204621unsigned DivOpc;4622bool Is64bit = (DestVT == MVT::i64);4623switch (ISDOpcode) {4624default:4625return false;4626case ISD::SREM:4627DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;4628break;4629case ISD::UREM:4630DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;4631break;4632}4633unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;4634Register Src0Reg = getRegForValue(I->getOperand(0));4635if (!Src0Reg)4636return false;46374638Register Src1Reg = getRegForValue(I->getOperand(1));4639if (!Src1Reg)4640return false;46414642const TargetRegisterClass *RC =4643(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;4644Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);4645assert(QuotReg && "Unexpected DIV instruction emission failure.");4646// The remainder is computed as numerator - (quotient * denominator) using the4647// MSUB instruction.4648Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);4649updateValueMap(I, ResultReg);4650return true;4651}46524653bool AArch64FastISel::selectMul(const Instruction *I) {4654MVT VT;4655if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))4656return false;46574658if (VT.isVector())4659return selectBinaryOp(I, ISD::MUL);46604661const Value *Src0 = I->getOperand(0);4662const Value *Src1 = I->getOperand(1);4663if (const auto *C = dyn_cast<ConstantInt>(Src0))4664if (C->getValue().isPowerOf2())4665std::swap(Src0, Src1);46664667// Try to simplify to a shift instruction.4668if (const auto *C = dyn_cast<ConstantInt>(Src1))4669if (C->getValue().isPowerOf2()) {4670uint64_t ShiftVal = C->getValue().logBase2();4671MVT SrcVT = VT;4672bool IsZExt = true;4673if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {4674if (!isIntExtFree(ZExt)) {4675MVT VT;4676if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {4677SrcVT = VT;4678IsZExt = true;4679Src0 = ZExt->getOperand(0);4680}4681}4682} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {4683if (!isIntExtFree(SExt)) {4684MVT VT;4685if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {4686SrcVT = VT;4687IsZExt = false;4688Src0 = SExt->getOperand(0);4689}4690}4691}46924693Register Src0Reg = getRegForValue(Src0);4694if (!Src0Reg)4695return false;46964697unsigned ResultReg =4698emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);46994700if (ResultReg) {4701updateValueMap(I, ResultReg);4702return true;4703}4704}47054706Register Src0Reg = getRegForValue(I->getOperand(0));4707if (!Src0Reg)4708return false;47094710Register Src1Reg = getRegForValue(I->getOperand(1));4711if (!Src1Reg)4712return false;47134714unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);47154716if (!ResultReg)4717return false;47184719updateValueMap(I, ResultReg);4720return true;4721}47224723bool AArch64FastISel::selectShift(const Instruction *I) {4724MVT RetVT;4725if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))4726return false;47274728if (RetVT.isVector())4729return selectOperator(I, I->getOpcode());47304731if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {4732unsigned ResultReg = 0;4733uint64_t ShiftVal = C->getZExtValue();4734MVT SrcVT = RetVT;4735bool IsZExt = I->getOpcode() != Instruction::AShr;4736const Value *Op0 = I->getOperand(0);4737if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {4738if (!isIntExtFree(ZExt)) {4739MVT TmpVT;4740if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {4741SrcVT = TmpVT;4742IsZExt = true;4743Op0 = ZExt->getOperand(0);4744}4745}4746} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {4747if (!isIntExtFree(SExt)) {4748MVT TmpVT;4749if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {4750SrcVT = TmpVT;4751IsZExt = false;4752Op0 = SExt->getOperand(0);4753}4754}4755}47564757Register Op0Reg = getRegForValue(Op0);4758if (!Op0Reg)4759return false;47604761switch (I->getOpcode()) {4762default: llvm_unreachable("Unexpected instruction.");4763case Instruction::Shl:4764ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);4765break;4766case Instruction::AShr:4767ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);4768break;4769case Instruction::LShr:4770ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);4771break;4772}4773if (!ResultReg)4774return false;47754776updateValueMap(I, ResultReg);4777return true;4778}47794780Register Op0Reg = getRegForValue(I->getOperand(0));4781if (!Op0Reg)4782return false;47834784Register Op1Reg = getRegForValue(I->getOperand(1));4785if (!Op1Reg)4786return false;47874788unsigned ResultReg = 0;4789switch (I->getOpcode()) {4790default: llvm_unreachable("Unexpected instruction.");4791case Instruction::Shl:4792ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);4793break;4794case Instruction::AShr:4795ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);4796break;4797case Instruction::LShr:4798ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);4799break;4800}48014802if (!ResultReg)4803return false;48044805updateValueMap(I, ResultReg);4806return true;4807}48084809bool AArch64FastISel::selectBitCast(const Instruction *I) {4810MVT RetVT, SrcVT;48114812if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))4813return false;4814if (!isTypeLegal(I->getType(), RetVT))4815return false;48164817unsigned Opc;4818if (RetVT == MVT::f32 && SrcVT == MVT::i32)4819Opc = AArch64::FMOVWSr;4820else if (RetVT == MVT::f64 && SrcVT == MVT::i64)4821Opc = AArch64::FMOVXDr;4822else if (RetVT == MVT::i32 && SrcVT == MVT::f32)4823Opc = AArch64::FMOVSWr;4824else if (RetVT == MVT::i64 && SrcVT == MVT::f64)4825Opc = AArch64::FMOVDXr;4826else4827return false;48284829const TargetRegisterClass *RC = nullptr;4830switch (RetVT.SimpleTy) {4831default: llvm_unreachable("Unexpected value type.");4832case MVT::i32: RC = &AArch64::GPR32RegClass; break;4833case MVT::i64: RC = &AArch64::GPR64RegClass; break;4834case MVT::f32: RC = &AArch64::FPR32RegClass; break;4835case MVT::f64: RC = &AArch64::FPR64RegClass; break;4836}4837Register Op0Reg = getRegForValue(I->getOperand(0));4838if (!Op0Reg)4839return false;48404841Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);4842if (!ResultReg)4843return false;48444845updateValueMap(I, ResultReg);4846return true;4847}48484849bool AArch64FastISel::selectFRem(const Instruction *I) {4850MVT RetVT;4851if (!isTypeLegal(I->getType(), RetVT))4852return false;48534854RTLIB::Libcall LC;4855switch (RetVT.SimpleTy) {4856default:4857return false;4858case MVT::f32:4859LC = RTLIB::REM_F32;4860break;4861case MVT::f64:4862LC = RTLIB::REM_F64;4863break;4864}48654866ArgListTy Args;4867Args.reserve(I->getNumOperands());48684869// Populate the argument list.4870for (auto &Arg : I->operands()) {4871ArgListEntry Entry;4872Entry.Val = Arg;4873Entry.Ty = Arg->getType();4874Args.push_back(Entry);4875}48764877CallLoweringInfo CLI;4878MCContext &Ctx = MF->getContext();4879CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),4880TLI.getLibcallName(LC), std::move(Args));4881if (!lowerCallTo(CLI))4882return false;4883updateValueMap(I, CLI.ResultReg);4884return true;4885}48864887bool AArch64FastISel::selectSDiv(const Instruction *I) {4888MVT VT;4889if (!isTypeLegal(I->getType(), VT))4890return false;48914892if (!isa<ConstantInt>(I->getOperand(1)))4893return selectBinaryOp(I, ISD::SDIV);48944895const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();4896if ((VT != MVT::i32 && VT != MVT::i64) || !C ||4897!(C.isPowerOf2() || C.isNegatedPowerOf2()))4898return selectBinaryOp(I, ISD::SDIV);48994900unsigned Lg2 = C.countr_zero();4901Register Src0Reg = getRegForValue(I->getOperand(0));4902if (!Src0Reg)4903return false;49044905if (cast<BinaryOperator>(I)->isExact()) {4906unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);4907if (!ResultReg)4908return false;4909updateValueMap(I, ResultReg);4910return true;4911}49124913int64_t Pow2MinusOne = (1ULL << Lg2) - 1;4914unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);4915if (!AddReg)4916return false;49174918// (Src0 < 0) ? Pow2 - 1 : 0;4919if (!emitICmp_ri(VT, Src0Reg, 0))4920return false;49214922unsigned SelectOpc;4923const TargetRegisterClass *RC;4924if (VT == MVT::i64) {4925SelectOpc = AArch64::CSELXr;4926RC = &AArch64::GPR64RegClass;4927} else {4928SelectOpc = AArch64::CSELWr;4929RC = &AArch64::GPR32RegClass;4930}4931Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,4932AArch64CC::LT);4933if (!SelectReg)4934return false;49354936// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also4937// negate the result.4938unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;4939unsigned ResultReg;4940if (C.isNegative())4941ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,4942AArch64_AM::ASR, Lg2);4943else4944ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);49454946if (!ResultReg)4947return false;49484949updateValueMap(I, ResultReg);4950return true;4951}49524953/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We4954/// have to duplicate it for AArch64, because otherwise we would fail during the4955/// sign-extend emission.4956unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {4957Register IdxN = getRegForValue(Idx);4958if (IdxN == 0)4959// Unhandled operand. Halt "fast" selection and bail.4960return 0;49614962// If the index is smaller or larger than intptr_t, truncate or extend it.4963MVT PtrVT = TLI.getPointerTy(DL);4964EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);4965if (IdxVT.bitsLT(PtrVT)) {4966IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);4967} else if (IdxVT.bitsGT(PtrVT))4968llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");4969return IdxN;4970}49714972/// This is mostly a copy of the existing FastISel GEP code, but we have to4973/// duplicate it for AArch64, because otherwise we would bail out even for4974/// simple cases. This is because the standard fastEmit functions don't cover4975/// MUL at all and ADD is lowered very inefficientily.4976bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {4977if (Subtarget->isTargetILP32())4978return false;49794980Register N = getRegForValue(I->getOperand(0));4981if (!N)4982return false;49834984// Keep a running tab of the total offset to coalesce multiple N = N + Offset4985// into a single N = N + TotalOffset.4986uint64_t TotalOffs = 0;4987MVT VT = TLI.getPointerTy(DL);4988for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);4989GTI != E; ++GTI) {4990const Value *Idx = GTI.getOperand();4991if (auto *StTy = GTI.getStructTypeOrNull()) {4992unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();4993// N = N + Offset4994if (Field)4995TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);4996} else {4997// If this is a constant subscript, handle it quickly.4998if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {4999if (CI->isZero())5000continue;5001// N = N + Offset5002TotalOffs += GTI.getSequentialElementStride(DL) *5003cast<ConstantInt>(CI)->getSExtValue();5004continue;5005}5006if (TotalOffs) {5007N = emitAdd_ri_(VT, N, TotalOffs);5008if (!N)5009return false;5010TotalOffs = 0;5011}50125013// N = N + Idx * ElementSize;5014uint64_t ElementSize = GTI.getSequentialElementStride(DL);5015unsigned IdxN = getRegForGEPIndex(Idx);5016if (!IdxN)5017return false;50185019if (ElementSize != 1) {5020unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);5021if (!C)5022return false;5023IdxN = emitMul_rr(VT, IdxN, C);5024if (!IdxN)5025return false;5026}5027N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);5028if (!N)5029return false;5030}5031}5032if (TotalOffs) {5033N = emitAdd_ri_(VT, N, TotalOffs);5034if (!N)5035return false;5036}5037updateValueMap(I, N);5038return true;5039}50405041bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {5042assert(TM.getOptLevel() == CodeGenOptLevel::None &&5043"cmpxchg survived AtomicExpand at optlevel > -O0");50445045auto *RetPairTy = cast<StructType>(I->getType());5046Type *RetTy = RetPairTy->getTypeAtIndex(0U);5047assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&5048"cmpxchg has a non-i1 status result");50495050MVT VT;5051if (!isTypeLegal(RetTy, VT))5052return false;50535054const TargetRegisterClass *ResRC;5055unsigned Opc, CmpOpc;5056// This only supports i32/i64, because i8/i16 aren't legal, and the generic5057// extractvalue selection doesn't support that.5058if (VT == MVT::i32) {5059Opc = AArch64::CMP_SWAP_32;5060CmpOpc = AArch64::SUBSWrs;5061ResRC = &AArch64::GPR32RegClass;5062} else if (VT == MVT::i64) {5063Opc = AArch64::CMP_SWAP_64;5064CmpOpc = AArch64::SUBSXrs;5065ResRC = &AArch64::GPR64RegClass;5066} else {5067return false;5068}50695070const MCInstrDesc &II = TII.get(Opc);50715072const Register AddrReg = constrainOperandRegClass(5073II, getRegForValue(I->getPointerOperand()), II.getNumDefs());5074const Register DesiredReg = constrainOperandRegClass(5075II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);5076const Register NewReg = constrainOperandRegClass(5077II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);50785079const Register ResultReg1 = createResultReg(ResRC);5080const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);5081const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);50825083// FIXME: MachineMemOperand doesn't support cmpxchg yet.5084BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)5085.addDef(ResultReg1)5086.addDef(ScratchReg)5087.addUse(AddrReg)5088.addUse(DesiredReg)5089.addUse(NewReg);50905091BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))5092.addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)5093.addUse(ResultReg1)5094.addUse(DesiredReg)5095.addImm(0);50965097BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))5098.addDef(ResultReg2)5099.addUse(AArch64::WZR)5100.addUse(AArch64::WZR)5101.addImm(AArch64CC::NE);51025103assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");5104updateValueMap(I, ResultReg1, 2);5105return true;5106}51075108bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {5109if (TLI.fallBackToDAGISel(*I))5110return false;5111switch (I->getOpcode()) {5112default:5113break;5114case Instruction::Add:5115case Instruction::Sub:5116return selectAddSub(I);5117case Instruction::Mul:5118return selectMul(I);5119case Instruction::SDiv:5120return selectSDiv(I);5121case Instruction::SRem:5122if (!selectBinaryOp(I, ISD::SREM))5123return selectRem(I, ISD::SREM);5124return true;5125case Instruction::URem:5126if (!selectBinaryOp(I, ISD::UREM))5127return selectRem(I, ISD::UREM);5128return true;5129case Instruction::Shl:5130case Instruction::LShr:5131case Instruction::AShr:5132return selectShift(I);5133case Instruction::And:5134case Instruction::Or:5135case Instruction::Xor:5136return selectLogicalOp(I);5137case Instruction::Br:5138return selectBranch(I);5139case Instruction::IndirectBr:5140return selectIndirectBr(I);5141case Instruction::BitCast:5142if (!FastISel::selectBitCast(I))5143return selectBitCast(I);5144return true;5145case Instruction::FPToSI:5146if (!selectCast(I, ISD::FP_TO_SINT))5147return selectFPToInt(I, /*Signed=*/true);5148return true;5149case Instruction::FPToUI:5150return selectFPToInt(I, /*Signed=*/false);5151case Instruction::ZExt:5152case Instruction::SExt:5153return selectIntExt(I);5154case Instruction::Trunc:5155if (!selectCast(I, ISD::TRUNCATE))5156return selectTrunc(I);5157return true;5158case Instruction::FPExt:5159return selectFPExt(I);5160case Instruction::FPTrunc:5161return selectFPTrunc(I);5162case Instruction::SIToFP:5163if (!selectCast(I, ISD::SINT_TO_FP))5164return selectIntToFP(I, /*Signed=*/true);5165return true;5166case Instruction::UIToFP:5167return selectIntToFP(I, /*Signed=*/false);5168case Instruction::Load:5169return selectLoad(I);5170case Instruction::Store:5171return selectStore(I);5172case Instruction::FCmp:5173case Instruction::ICmp:5174return selectCmp(I);5175case Instruction::Select:5176return selectSelect(I);5177case Instruction::Ret:5178return selectRet(I);5179case Instruction::FRem:5180return selectFRem(I);5181case Instruction::GetElementPtr:5182return selectGetElementPtr(I);5183case Instruction::AtomicCmpXchg:5184return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));5185}51865187// fall-back to target-independent instruction selection.5188return selectOperator(I, I->getOpcode());5189}51905191FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,5192const TargetLibraryInfo *LibInfo) {51935194SMEAttrs CallerAttrs(*FuncInfo.Fn);5195if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||5196CallerAttrs.hasStreamingInterfaceOrBody() ||5197CallerAttrs.hasStreamingCompatibleInterface())5198return nullptr;5199return new AArch64FastISel(FuncInfo, LibInfo);5200}520152025203