Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
35269 views
//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains a pass that expands pseudo instructions into target9// instructions to allow proper scheduling and other late optimizations. This10// pass should be run after register allocation but before the post-regalloc11// scheduling pass.12//13//===----------------------------------------------------------------------===//1415#include "AArch64ExpandImm.h"16#include "AArch64InstrInfo.h"17#include "AArch64MachineFunctionInfo.h"18#include "AArch64Subtarget.h"19#include "MCTargetDesc/AArch64AddressingModes.h"20#include "Utils/AArch64BaseInfo.h"21#include "llvm/CodeGen/LivePhysRegs.h"22#include "llvm/CodeGen/MachineBasicBlock.h"23#include "llvm/CodeGen/MachineConstantPool.h"24#include "llvm/CodeGen/MachineFunction.h"25#include "llvm/CodeGen/MachineFunctionPass.h"26#include "llvm/CodeGen/MachineInstr.h"27#include "llvm/CodeGen/MachineInstrBuilder.h"28#include "llvm/CodeGen/MachineOperand.h"29#include "llvm/CodeGen/TargetSubtargetInfo.h"30#include "llvm/IR/DebugLoc.h"31#include "llvm/MC/MCInstrDesc.h"32#include "llvm/Pass.h"33#include "llvm/Support/CodeGen.h"34#include "llvm/Support/MathExtras.h"35#include "llvm/Target/TargetMachine.h"36#include "llvm/TargetParser/Triple.h"37#include <cassert>38#include <cstdint>39#include <iterator>40#include <utility>4142using namespace llvm;4344#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"4546namespace {4748class AArch64ExpandPseudo : public MachineFunctionPass {49public:50const AArch64InstrInfo *TII;5152static char ID;5354AArch64ExpandPseudo() : MachineFunctionPass(ID) {55initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());56}5758bool runOnMachineFunction(MachineFunction &Fn) override;5960StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }6162private:63bool expandMBB(MachineBasicBlock &MBB);64bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,65MachineBasicBlock::iterator &NextMBBI);66bool expandMultiVecPseudo(MachineBasicBlock &MBB,67MachineBasicBlock::iterator MBBI,68TargetRegisterClass ContiguousClass,69TargetRegisterClass StridedClass,70unsigned ContiguousOpc, unsigned StridedOpc);71bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,72unsigned BitSize);7374bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,75MachineBasicBlock::iterator MBBI);76bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,77unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,78unsigned ExtendImm, unsigned ZeroReg,79MachineBasicBlock::iterator &NextMBBI);80bool expandCMP_SWAP_128(MachineBasicBlock &MBB,81MachineBasicBlock::iterator MBBI,82MachineBasicBlock::iterator &NextMBBI);83bool expandSetTagLoop(MachineBasicBlock &MBB,84MachineBasicBlock::iterator MBBI,85MachineBasicBlock::iterator &NextMBBI);86bool expandSVESpillFill(MachineBasicBlock &MBB,87MachineBasicBlock::iterator MBBI, unsigned Opc,88unsigned N);89bool expandCALL_RVMARKER(MachineBasicBlock &MBB,90MachineBasicBlock::iterator MBBI);91bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);92bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,93MachineBasicBlock::iterator MBBI);94MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,95MachineBasicBlock::iterator MBBI);96MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,97MachineBasicBlock::iterator MBBI);98};99100} // end anonymous namespace101102char AArch64ExpandPseudo::ID = 0;103104INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",105AARCH64_EXPAND_PSEUDO_NAME, false, false)106107/// Transfer implicit operands on the pseudo instruction to the108/// instructions created from the expansion.109static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,110MachineInstrBuilder &DefMI) {111const MCInstrDesc &Desc = OldMI.getDesc();112for (const MachineOperand &MO :113llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {114assert(MO.isReg() && MO.getReg());115if (MO.isUse())116UseMI.add(MO);117else118DefMI.add(MO);119}120}121122/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more123/// real move-immediate instructions to synthesize the immediate.124bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,125MachineBasicBlock::iterator MBBI,126unsigned BitSize) {127MachineInstr &MI = *MBBI;128Register DstReg = MI.getOperand(0).getReg();129uint64_t RenamableState =130MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;131uint64_t Imm = MI.getOperand(1).getImm();132133if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {134// Useless def, and we don't want to risk creating an invalid ORR (which135// would really write to sp).136MI.eraseFromParent();137return true;138}139140SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;141AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);142assert(Insn.size() != 0);143144SmallVector<MachineInstrBuilder, 4> MIBS;145for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {146bool LastItem = std::next(I) == E;147switch (I->Opcode)148{149default: llvm_unreachable("unhandled!"); break;150151case AArch64::ORRWri:152case AArch64::ORRXri:153if (I->Op1 == 0) {154MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))155.add(MI.getOperand(0))156.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)157.addImm(I->Op2));158} else {159Register DstReg = MI.getOperand(0).getReg();160bool DstIsDead = MI.getOperand(0).isDead();161MIBS.push_back(162BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))163.addReg(DstReg, RegState::Define |164getDeadRegState(DstIsDead && LastItem) |165RenamableState)166.addReg(DstReg)167.addImm(I->Op2));168}169break;170case AArch64::ORRWrs:171case AArch64::ORRXrs: {172Register DstReg = MI.getOperand(0).getReg();173bool DstIsDead = MI.getOperand(0).isDead();174MIBS.push_back(175BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))176.addReg(DstReg, RegState::Define |177getDeadRegState(DstIsDead && LastItem) |178RenamableState)179.addReg(DstReg)180.addReg(DstReg)181.addImm(I->Op2));182} break;183case AArch64::ANDXri:184case AArch64::EORXri:185if (I->Op1 == 0) {186MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))187.add(MI.getOperand(0))188.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)189.addImm(I->Op2));190} else {191Register DstReg = MI.getOperand(0).getReg();192bool DstIsDead = MI.getOperand(0).isDead();193MIBS.push_back(194BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))195.addReg(DstReg, RegState::Define |196getDeadRegState(DstIsDead && LastItem) |197RenamableState)198.addReg(DstReg)199.addImm(I->Op2));200}201break;202case AArch64::MOVNWi:203case AArch64::MOVNXi:204case AArch64::MOVZWi:205case AArch64::MOVZXi: {206bool DstIsDead = MI.getOperand(0).isDead();207MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))208.addReg(DstReg, RegState::Define |209getDeadRegState(DstIsDead && LastItem) |210RenamableState)211.addImm(I->Op1)212.addImm(I->Op2));213} break;214case AArch64::MOVKWi:215case AArch64::MOVKXi: {216Register DstReg = MI.getOperand(0).getReg();217bool DstIsDead = MI.getOperand(0).isDead();218MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))219.addReg(DstReg,220RegState::Define |221getDeadRegState(DstIsDead && LastItem) |222RenamableState)223.addReg(DstReg)224.addImm(I->Op1)225.addImm(I->Op2));226} break;227}228}229transferImpOps(MI, MIBS.front(), MIBS.back());230MI.eraseFromParent();231return true;232}233234bool AArch64ExpandPseudo::expandCMP_SWAP(235MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,236unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,237MachineBasicBlock::iterator &NextMBBI) {238MachineInstr &MI = *MBBI;239MIMetadata MIMD(MI);240const MachineOperand &Dest = MI.getOperand(0);241Register StatusReg = MI.getOperand(1).getReg();242bool StatusDead = MI.getOperand(1).isDead();243// Duplicating undef operands into 2 instructions does not guarantee the same244// value on both; However undef should be replaced by xzr anyway.245assert(!MI.getOperand(2).isUndef() && "cannot handle undef");246Register AddrReg = MI.getOperand(2).getReg();247Register DesiredReg = MI.getOperand(3).getReg();248Register NewReg = MI.getOperand(4).getReg();249250MachineFunction *MF = MBB.getParent();251auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());252auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());253auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());254255MF->insert(++MBB.getIterator(), LoadCmpBB);256MF->insert(++LoadCmpBB->getIterator(), StoreBB);257MF->insert(++StoreBB->getIterator(), DoneBB);258259// .Lloadcmp:260// mov wStatus, 0261// ldaxr xDest, [xAddr]262// cmp xDest, xDesired263// b.ne .Ldone264if (!StatusDead)265BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)266.addImm(0).addImm(0);267BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())268.addReg(AddrReg);269BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)270.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))271.addReg(DesiredReg)272.addImm(ExtendImm);273BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))274.addImm(AArch64CC::NE)275.addMBB(DoneBB)276.addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);277LoadCmpBB->addSuccessor(DoneBB);278LoadCmpBB->addSuccessor(StoreBB);279280// .Lstore:281// stlxr wStatus, xNew, [xAddr]282// cbnz wStatus, .Lloadcmp283BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)284.addReg(NewReg)285.addReg(AddrReg);286BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))287.addReg(StatusReg, getKillRegState(StatusDead))288.addMBB(LoadCmpBB);289StoreBB->addSuccessor(LoadCmpBB);290StoreBB->addSuccessor(DoneBB);291292DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());293DoneBB->transferSuccessors(&MBB);294295MBB.addSuccessor(LoadCmpBB);296297NextMBBI = MBB.end();298MI.eraseFromParent();299300// Recompute livein lists.301LivePhysRegs LiveRegs;302computeAndAddLiveIns(LiveRegs, *DoneBB);303computeAndAddLiveIns(LiveRegs, *StoreBB);304computeAndAddLiveIns(LiveRegs, *LoadCmpBB);305// Do an extra pass around the loop to get loop carried registers right.306StoreBB->clearLiveIns();307computeAndAddLiveIns(LiveRegs, *StoreBB);308LoadCmpBB->clearLiveIns();309computeAndAddLiveIns(LiveRegs, *LoadCmpBB);310311return true;312}313314bool AArch64ExpandPseudo::expandCMP_SWAP_128(315MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,316MachineBasicBlock::iterator &NextMBBI) {317MachineInstr &MI = *MBBI;318MIMetadata MIMD(MI);319MachineOperand &DestLo = MI.getOperand(0);320MachineOperand &DestHi = MI.getOperand(1);321Register StatusReg = MI.getOperand(2).getReg();322bool StatusDead = MI.getOperand(2).isDead();323// Duplicating undef operands into 2 instructions does not guarantee the same324// value on both; However undef should be replaced by xzr anyway.325assert(!MI.getOperand(3).isUndef() && "cannot handle undef");326Register AddrReg = MI.getOperand(3).getReg();327Register DesiredLoReg = MI.getOperand(4).getReg();328Register DesiredHiReg = MI.getOperand(5).getReg();329Register NewLoReg = MI.getOperand(6).getReg();330Register NewHiReg = MI.getOperand(7).getReg();331332unsigned LdxpOp, StxpOp;333334switch (MI.getOpcode()) {335case AArch64::CMP_SWAP_128_MONOTONIC:336LdxpOp = AArch64::LDXPX;337StxpOp = AArch64::STXPX;338break;339case AArch64::CMP_SWAP_128_RELEASE:340LdxpOp = AArch64::LDXPX;341StxpOp = AArch64::STLXPX;342break;343case AArch64::CMP_SWAP_128_ACQUIRE:344LdxpOp = AArch64::LDAXPX;345StxpOp = AArch64::STXPX;346break;347case AArch64::CMP_SWAP_128:348LdxpOp = AArch64::LDAXPX;349StxpOp = AArch64::STLXPX;350break;351default:352llvm_unreachable("Unexpected opcode");353}354355MachineFunction *MF = MBB.getParent();356auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());357auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());358auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());359auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());360361MF->insert(++MBB.getIterator(), LoadCmpBB);362MF->insert(++LoadCmpBB->getIterator(), StoreBB);363MF->insert(++StoreBB->getIterator(), FailBB);364MF->insert(++FailBB->getIterator(), DoneBB);365366// .Lloadcmp:367// ldaxp xDestLo, xDestHi, [xAddr]368// cmp xDestLo, xDesiredLo369// sbcs xDestHi, xDesiredHi370// b.ne .Ldone371BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))372.addReg(DestLo.getReg(), RegState::Define)373.addReg(DestHi.getReg(), RegState::Define)374.addReg(AddrReg);375BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)376.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))377.addReg(DesiredLoReg)378.addImm(0);379BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)380.addUse(AArch64::WZR)381.addUse(AArch64::WZR)382.addImm(AArch64CC::EQ);383BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)384.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))385.addReg(DesiredHiReg)386.addImm(0);387BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)388.addUse(StatusReg, RegState::Kill)389.addUse(StatusReg, RegState::Kill)390.addImm(AArch64CC::EQ);391BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))392.addUse(StatusReg, getKillRegState(StatusDead))393.addMBB(FailBB);394LoadCmpBB->addSuccessor(FailBB);395LoadCmpBB->addSuccessor(StoreBB);396397// .Lstore:398// stlxp wStatus, xNewLo, xNewHi, [xAddr]399// cbnz wStatus, .Lloadcmp400BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)401.addReg(NewLoReg)402.addReg(NewHiReg)403.addReg(AddrReg);404BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))405.addReg(StatusReg, getKillRegState(StatusDead))406.addMBB(LoadCmpBB);407BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);408StoreBB->addSuccessor(LoadCmpBB);409StoreBB->addSuccessor(DoneBB);410411// .Lfail:412// stlxp wStatus, xDestLo, xDestHi, [xAddr]413// cbnz wStatus, .Lloadcmp414BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)415.addReg(DestLo.getReg())416.addReg(DestHi.getReg())417.addReg(AddrReg);418BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))419.addReg(StatusReg, getKillRegState(StatusDead))420.addMBB(LoadCmpBB);421FailBB->addSuccessor(LoadCmpBB);422FailBB->addSuccessor(DoneBB);423424DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());425DoneBB->transferSuccessors(&MBB);426427MBB.addSuccessor(LoadCmpBB);428429NextMBBI = MBB.end();430MI.eraseFromParent();431432// Recompute liveness bottom up.433LivePhysRegs LiveRegs;434computeAndAddLiveIns(LiveRegs, *DoneBB);435computeAndAddLiveIns(LiveRegs, *FailBB);436computeAndAddLiveIns(LiveRegs, *StoreBB);437computeAndAddLiveIns(LiveRegs, *LoadCmpBB);438439// Do an extra pass in the loop to get the loop carried dependencies right.440FailBB->clearLiveIns();441computeAndAddLiveIns(LiveRegs, *FailBB);442StoreBB->clearLiveIns();443computeAndAddLiveIns(LiveRegs, *StoreBB);444LoadCmpBB->clearLiveIns();445computeAndAddLiveIns(LiveRegs, *LoadCmpBB);446447return true;448}449450/// \brief Expand Pseudos to Instructions with destructive operands.451///452/// This mechanism uses MOVPRFX instructions for zeroing the false lanes453/// or for fixing relaxed register allocation conditions to comply with454/// the instructions register constraints. The latter case may be cheaper455/// than setting the register constraints in the register allocator,456/// since that will insert regular MOV instructions rather than MOVPRFX.457///458/// Example (after register allocation):459///460/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0461///462/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.463/// * We cannot map directly to FSUB_ZPmZ_B because the register464/// constraints of the instruction are not met.465/// * Also the _ZERO specifies the false lanes need to be zeroed.466///467/// We first try to see if the destructive operand == result operand,468/// if not, we try to swap the operands, e.g.469///470/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1471///472/// But because FSUB_ZPmZ is not commutative, this is semantically473/// different, so we need a reverse instruction:474///475/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1476///477/// Then we implement the zeroing of the false lanes of Z0 by adding478/// a zeroing MOVPRFX instruction:479///480/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0481/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1482///483/// Note that this can only be done for _ZERO or _UNDEF variants where484/// we can guarantee the false lanes to be zeroed (by implementing this)485/// or that they are undef (don't care / not used), otherwise the486/// swapping of operands is illegal because the operation is not487/// (or cannot be emulated to be) fully commutative.488bool AArch64ExpandPseudo::expand_DestructiveOp(489MachineInstr &MI,490MachineBasicBlock &MBB,491MachineBasicBlock::iterator MBBI) {492unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());493uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;494uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;495bool FalseZero = FalseLanes == AArch64::FalseLanesZero;496Register DstReg = MI.getOperand(0).getReg();497bool DstIsDead = MI.getOperand(0).isDead();498bool UseRev = false;499unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;500501switch (DType) {502case AArch64::DestructiveBinaryComm:503case AArch64::DestructiveBinaryCommWithRev:504if (DstReg == MI.getOperand(3).getReg()) {505// FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1506std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);507UseRev = true;508break;509}510[[fallthrough]];511case AArch64::DestructiveBinary:512case AArch64::DestructiveBinaryImm:513std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);514break;515case AArch64::DestructiveUnaryPassthru:516std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);517break;518case AArch64::DestructiveTernaryCommWithRev:519std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);520if (DstReg == MI.getOperand(3).getReg()) {521// FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za522std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);523UseRev = true;524} else if (DstReg == MI.getOperand(4).getReg()) {525// FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za526std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);527UseRev = true;528}529break;530default:531llvm_unreachable("Unsupported Destructive Operand type");532}533534// MOVPRFX can only be used if the destination operand535// is the destructive operand, not as any other operand,536// so the Destructive Operand must be unique.537bool DOPRegIsUnique = false;538switch (DType) {539case AArch64::DestructiveBinary:540DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();541break;542case AArch64::DestructiveBinaryComm:543case AArch64::DestructiveBinaryCommWithRev:544DOPRegIsUnique =545DstReg != MI.getOperand(DOPIdx).getReg() ||546MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();547break;548case AArch64::DestructiveUnaryPassthru:549case AArch64::DestructiveBinaryImm:550DOPRegIsUnique = true;551break;552case AArch64::DestructiveTernaryCommWithRev:553DOPRegIsUnique =554DstReg != MI.getOperand(DOPIdx).getReg() ||555(MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&556MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());557break;558}559560// Resolve the reverse opcode561if (UseRev) {562int NewOpcode;563// e.g. DIV -> DIVR564if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)565Opcode = NewOpcode;566// e.g. DIVR -> DIV567else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)568Opcode = NewOpcode;569}570571// Get the right MOVPRFX572uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);573unsigned MovPrfx, LSLZero, MovPrfxZero;574switch (ElementSize) {575case AArch64::ElementSizeNone:576case AArch64::ElementSizeB:577MovPrfx = AArch64::MOVPRFX_ZZ;578LSLZero = AArch64::LSL_ZPmI_B;579MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;580break;581case AArch64::ElementSizeH:582MovPrfx = AArch64::MOVPRFX_ZZ;583LSLZero = AArch64::LSL_ZPmI_H;584MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;585break;586case AArch64::ElementSizeS:587MovPrfx = AArch64::MOVPRFX_ZZ;588LSLZero = AArch64::LSL_ZPmI_S;589MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;590break;591case AArch64::ElementSizeD:592MovPrfx = AArch64::MOVPRFX_ZZ;593LSLZero = AArch64::LSL_ZPmI_D;594MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;595break;596default:597llvm_unreachable("Unsupported ElementSize");598}599600//601// Create the destructive operation (if required)602//603MachineInstrBuilder PRFX, DOP;604if (FalseZero) {605// If we cannot prefix the requested instruction we'll instead emit a606// prefixed_zeroing_mov for DestructiveBinary.607assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||608DType == AArch64::DestructiveBinaryComm ||609DType == AArch64::DestructiveBinaryCommWithRev) &&610"The destructive operand should be unique");611assert(ElementSize != AArch64::ElementSizeNone &&612"This instruction is unpredicated");613614// Merge source operand into destination register615PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))616.addReg(DstReg, RegState::Define)617.addReg(MI.getOperand(PredIdx).getReg())618.addReg(MI.getOperand(DOPIdx).getReg());619620// After the movprfx, the destructive operand is same as Dst621DOPIdx = 0;622623// Create the additional LSL to zero the lanes when the DstReg is not624// unique. Zeros the lanes in z0 that aren't active in p0 with sequence625// movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;626if ((DType == AArch64::DestructiveBinary ||627DType == AArch64::DestructiveBinaryComm ||628DType == AArch64::DestructiveBinaryCommWithRev) &&629!DOPRegIsUnique) {630BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))631.addReg(DstReg, RegState::Define)632.add(MI.getOperand(PredIdx))633.addReg(DstReg)634.addImm(0);635}636} else if (DstReg != MI.getOperand(DOPIdx).getReg()) {637assert(DOPRegIsUnique && "The destructive operand should be unique");638PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))639.addReg(DstReg, RegState::Define)640.addReg(MI.getOperand(DOPIdx).getReg());641DOPIdx = 0;642}643644//645// Create the destructive operation646//647DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))648.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));649650switch (DType) {651case AArch64::DestructiveUnaryPassthru:652DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)653.add(MI.getOperand(PredIdx))654.add(MI.getOperand(SrcIdx));655break;656case AArch64::DestructiveBinary:657case AArch64::DestructiveBinaryImm:658case AArch64::DestructiveBinaryComm:659case AArch64::DestructiveBinaryCommWithRev:660DOP.add(MI.getOperand(PredIdx))661.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)662.add(MI.getOperand(SrcIdx));663break;664case AArch64::DestructiveTernaryCommWithRev:665DOP.add(MI.getOperand(PredIdx))666.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)667.add(MI.getOperand(SrcIdx))668.add(MI.getOperand(Src2Idx));669break;670}671672if (PRFX) {673finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());674transferImpOps(MI, PRFX, DOP);675} else676transferImpOps(MI, DOP, DOP);677678MI.eraseFromParent();679return true;680}681682bool AArch64ExpandPseudo::expandSetTagLoop(683MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,684MachineBasicBlock::iterator &NextMBBI) {685MachineInstr &MI = *MBBI;686DebugLoc DL = MI.getDebugLoc();687Register SizeReg = MI.getOperand(0).getReg();688Register AddressReg = MI.getOperand(1).getReg();689690MachineFunction *MF = MBB.getParent();691692bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;693const unsigned OpCode1 =694ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;695const unsigned OpCode2 =696ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;697698unsigned Size = MI.getOperand(2).getImm();699assert(Size > 0 && Size % 16 == 0);700if (Size % (16 * 2) != 0) {701BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)702.addReg(AddressReg)703.addReg(AddressReg)704.addImm(1);705Size -= 16;706}707MachineBasicBlock::iterator I =708BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)709.addImm(Size);710expandMOVImm(MBB, I, 64);711712auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());713auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());714715MF->insert(++MBB.getIterator(), LoopBB);716MF->insert(++LoopBB->getIterator(), DoneBB);717718BuildMI(LoopBB, DL, TII->get(OpCode2))719.addDef(AddressReg)720.addReg(AddressReg)721.addReg(AddressReg)722.addImm(2)723.cloneMemRefs(MI)724.setMIFlags(MI.getFlags());725BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))726.addDef(SizeReg)727.addReg(SizeReg)728.addImm(16 * 2)729.addImm(0);730BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))731.addImm(AArch64CC::NE)732.addMBB(LoopBB)733.addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);734735LoopBB->addSuccessor(LoopBB);736LoopBB->addSuccessor(DoneBB);737738DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());739DoneBB->transferSuccessors(&MBB);740741MBB.addSuccessor(LoopBB);742743NextMBBI = MBB.end();744MI.eraseFromParent();745// Recompute liveness bottom up.746LivePhysRegs LiveRegs;747computeAndAddLiveIns(LiveRegs, *DoneBB);748computeAndAddLiveIns(LiveRegs, *LoopBB);749// Do an extra pass in the loop to get the loop carried dependencies right.750// FIXME: is this necessary?751LoopBB->clearLiveIns();752computeAndAddLiveIns(LiveRegs, *LoopBB);753DoneBB->clearLiveIns();754computeAndAddLiveIns(LiveRegs, *DoneBB);755756return true;757}758759bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,760MachineBasicBlock::iterator MBBI,761unsigned Opc, unsigned N) {762assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||763Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&764"Unexpected opcode");765unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)766? RegState::Define767: 0;768unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)769? AArch64::zsub0770: AArch64::psub0;771const TargetRegisterInfo *TRI =772MBB.getParent()->getSubtarget().getRegisterInfo();773MachineInstr &MI = *MBBI;774for (unsigned Offset = 0; Offset < N; ++Offset) {775int ImmOffset = MI.getOperand(2).getImm() + Offset;776bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;777assert(ImmOffset >= -256 && ImmOffset < 256 &&778"Immediate spill offset out of range");779BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))780.addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),781RState)782.addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))783.addImm(ImmOffset);784}785MI.eraseFromParent();786return true;787}788789// Create a call with the passed opcode and explicit operands, copying over all790// the implicit operands from *MBBI, starting at the regmask.791static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,792MachineBasicBlock::iterator MBBI,793const AArch64InstrInfo *TII,794unsigned Opcode,795ArrayRef<MachineOperand> ExplicitOps,796unsigned RegMaskStartIdx) {797// Build the MI, with explicit operands first (including the call target).798MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))799.add(ExplicitOps)800.getInstr();801802// Register arguments are added during ISel, but cannot be added as explicit803// operands of the branch as it expects to be B <target> which is only one804// operand. Instead they are implicit operands used by the branch.805while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {806const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);807assert(MOP.isReg() && "can only add register operands");808Call->addOperand(MachineOperand::CreateReg(809MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,810/*isDead=*/false, /*isUndef=*/MOP.isUndef()));811RegMaskStartIdx++;812}813for (const MachineOperand &MO :814llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))815Call->addOperand(MO);816817return Call;818}819820// Create a call to CallTarget, copying over all the operands from *MBBI,821// starting at the regmask.822static MachineInstr *createCall(MachineBasicBlock &MBB,823MachineBasicBlock::iterator MBBI,824const AArch64InstrInfo *TII,825MachineOperand &CallTarget,826unsigned RegMaskStartIdx) {827unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;828829assert((CallTarget.isGlobal() || CallTarget.isReg()) &&830"invalid operand for regular call");831return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);832}833834bool AArch64ExpandPseudo::expandCALL_RVMARKER(835MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {836// Expand CALL_RVMARKER pseudo to:837// - a branch to the call target, followed by838// - the special `mov x29, x29` marker, and839// - another branch, to the runtime function840// Mark the sequence as bundle, to avoid passes moving other code in between.841MachineInstr &MI = *MBBI;842MachineOperand &RVTarget = MI.getOperand(0);843assert(RVTarget.isGlobal() && "invalid operand for attached call");844845MachineInstr *OriginalCall = nullptr;846847if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {848// ptrauth call.849const MachineOperand &CallTarget = MI.getOperand(1);850const MachineOperand &Key = MI.getOperand(2);851const MachineOperand &IntDisc = MI.getOperand(3);852const MachineOperand &AddrDisc = MI.getOperand(4);853854assert((Key.getImm() == AArch64PACKey::IA ||855Key.getImm() == AArch64PACKey::IB) &&856"Invalid auth call key");857858MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};859860OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,861/*RegMaskStartIdx=*/5);862} else {863assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");864OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),865// Regmask starts after the RV and call targets.866/*RegMaskStartIdx=*/2);867}868869BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))870.addReg(AArch64::FP, RegState::Define)871.addReg(AArch64::XZR)872.addReg(AArch64::FP)873.addImm(0);874875auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))876.add(RVTarget)877.getInstr();878879if (MI.shouldUpdateCallSiteInfo())880MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);881882MI.eraseFromParent();883finalizeBundle(MBB, OriginalCall->getIterator(),884std::next(RVCall->getIterator()));885return true;886}887888bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,889MachineBasicBlock::iterator MBBI) {890// Expand CALL_BTI pseudo to:891// - a branch to the call target892// - a BTI instruction893// Mark the sequence as a bundle, to avoid passes moving other code in894// between.895MachineInstr &MI = *MBBI;896MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),897// Regmask starts after the call target.898/*RegMaskStartIdx=*/1);899900Call->setCFIType(*MBB.getParent(), MI.getCFIType());901902MachineInstr *BTI =903BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))904// BTI J so that setjmp can to BR to this.905.addImm(36)906.getInstr();907908if (MI.shouldUpdateCallSiteInfo())909MBB.getParent()->moveCallSiteInfo(&MI, Call);910911MI.eraseFromParent();912finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));913return true;914}915916bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(917MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {918Register CtxReg = MBBI->getOperand(0).getReg();919Register BaseReg = MBBI->getOperand(1).getReg();920int Offset = MBBI->getOperand(2).getImm();921DebugLoc DL(MBBI->getDebugLoc());922auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();923924if (STI.getTargetTriple().getArchName() != "arm64e") {925BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))926.addUse(CtxReg)927.addUse(BaseReg)928.addImm(Offset / 8)929.setMIFlag(MachineInstr::FrameSetup);930MBBI->eraseFromParent();931return true;932}933934// We need to sign the context in an address-discriminated way. 0xc31a is a935// fixed random value, chosen as part of the ABI.936// add x16, xBase, #Offset937// movk x16, #0xc31a, lsl #48938// mov x17, x22/xzr939// pacdb x17, x16940// str x17, [xBase, #Offset]941unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;942BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)943.addUse(BaseReg)944.addImm(abs(Offset))945.addImm(0)946.setMIFlag(MachineInstr::FrameSetup);947BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)948.addUse(AArch64::X16)949.addImm(0xc31a)950.addImm(48)951.setMIFlag(MachineInstr::FrameSetup);952// We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so953// move it somewhere before signing.954BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)955.addUse(AArch64::XZR)956.addUse(CtxReg)957.addImm(0)958.setMIFlag(MachineInstr::FrameSetup);959BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)960.addUse(AArch64::X17)961.addUse(AArch64::X16)962.setMIFlag(MachineInstr::FrameSetup);963BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))964.addUse(AArch64::X17)965.addUse(BaseReg)966.addImm(Offset / 8)967.setMIFlag(MachineInstr::FrameSetup);968969MBBI->eraseFromParent();970return true;971}972973MachineBasicBlock *974AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,975MachineBasicBlock::iterator MBBI) {976MachineInstr &MI = *MBBI;977assert((std::next(MBBI) != MBB.end() ||978MI.getParent()->successors().begin() !=979MI.getParent()->successors().end()) &&980"Unexpected unreachable in block that restores ZA");981982// Compare TPIDR2_EL0 value against 0.983DebugLoc DL = MI.getDebugLoc();984MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))985.add(MI.getOperand(0));986987// Split MBB and create two new blocks:988// - MBB now contains all instructions before RestoreZAPseudo.989// - SMBB contains the RestoreZAPseudo instruction only.990// - EndBB contains all instructions after RestoreZAPseudo.991MachineInstr &PrevMI = *std::prev(MBBI);992MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);993MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()994? *SMBB->successors().begin()995: SMBB->splitAt(MI, /*UpdateLiveIns*/ true);996997// Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.998Cbz.addMBB(SMBB);999BuildMI(&MBB, DL, TII->get(AArch64::B))1000.addMBB(EndBB);1001MBB.addSuccessor(EndBB);10021003// Replace the pseudo with a call (BL).1004MachineInstrBuilder MIB =1005BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));1006MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);1007for (unsigned I = 2; I < MI.getNumOperands(); ++I)1008MIB.add(MI.getOperand(I));1009BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);10101011MI.eraseFromParent();1012return EndBB;1013}10141015MachineBasicBlock *1016AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,1017MachineBasicBlock::iterator MBBI) {1018MachineInstr &MI = *MBBI;1019// In the case of a smstart/smstop before a unreachable, just remove the pseudo.1020// Exception handling code generated by Clang may introduce unreachables and it1021// seems unnecessary to restore pstate.sm when that happens. Note that it is1022// not just an optimisation, the code below expects a successor instruction/block1023// in order to split the block at MBBI.1024if (std::next(MBBI) == MBB.end() &&1025MI.getParent()->successors().begin() ==1026MI.getParent()->successors().end()) {1027MI.eraseFromParent();1028return &MBB;1029}10301031// Expand the pseudo into smstart or smstop instruction. The pseudo has the1032// following operands:1033//1034// MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>1035//1036// The pseudo is expanded into a conditional smstart/smstop, with a1037// check if pstate.sm (register) equals the expected value, and if not,1038// invokes the smstart/smstop.1039//1040// As an example, the following block contains a normal call from a1041// streaming-compatible function:1042//1043// OrigBB:1044// MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP1045// bl @normal_callee1046// MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART1047//1048// ...which will be transformed into:1049//1050// OrigBB:1051// TBNZx %0:gpr64, 0, SMBB1052// b EndBB1053//1054// SMBB:1055// MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP1056//1057// EndBB:1058// bl @normal_callee1059// MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART1060//1061DebugLoc DL = MI.getDebugLoc();10621063// Create the conditional branch based on the third operand of the1064// instruction, which tells us if we are wrapping a normal or streaming1065// function.1066// We test the live value of pstate.sm and toggle pstate.sm if this is not the1067// expected value for the callee (0 for a normal callee and 1 for a streaming1068// callee).1069unsigned Opc;1070switch (MI.getOperand(2).getImm()) {1071case AArch64SME::Always:1072llvm_unreachable("Should have matched to instruction directly");1073case AArch64SME::IfCallerIsStreaming:1074Opc = AArch64::TBNZW;1075break;1076case AArch64SME::IfCallerIsNonStreaming:1077Opc = AArch64::TBZW;1078break;1079}1080auto PStateSM = MI.getOperand(3).getReg();1081auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();1082unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);1083MachineInstrBuilder Tbx =1084BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);10851086// Split MBB and create two new blocks:1087// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.1088// - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.1089// - EndBB contains all instructions after MSRcond_pstatesvcrImm1.1090MachineInstr &PrevMI = *std::prev(MBBI);1091MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);1092MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()1093? *SMBB->successors().begin()1094: SMBB->splitAt(MI, /*UpdateLiveIns*/ true);10951096// Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.1097Tbx.addMBB(SMBB);1098BuildMI(&MBB, DL, TII->get(AArch64::B))1099.addMBB(EndBB);1100MBB.addSuccessor(EndBB);11011102// Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.1103MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),1104TII->get(AArch64::MSRpstatesvcrImm1));1105// Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as1106// these contain the CopyFromReg for the first argument and the flag to1107// indicate whether the callee is streaming or normal).1108MIB.add(MI.getOperand(0));1109MIB.add(MI.getOperand(1));1110for (unsigned i = 4; i < MI.getNumOperands(); ++i)1111MIB.add(MI.getOperand(i));11121113BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);11141115MI.eraseFromParent();1116return EndBB;1117}11181119bool AArch64ExpandPseudo::expandMultiVecPseudo(1120MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,1121TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,1122unsigned ContiguousOp, unsigned StridedOpc) {1123MachineInstr &MI = *MBBI;1124Register Tuple = MI.getOperand(0).getReg();11251126auto ContiguousRange = ContiguousClass.getRegisters();1127auto StridedRange = StridedClass.getRegisters();1128unsigned Opc;1129if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {1130Opc = ContiguousOp;1131} else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {1132Opc = StridedOpc;1133} else1134llvm_unreachable("Cannot expand Multi-Vector pseudo");11351136MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))1137.add(MI.getOperand(0))1138.add(MI.getOperand(1))1139.add(MI.getOperand(2))1140.add(MI.getOperand(3));1141transferImpOps(MI, MIB, MIB);1142MI.eraseFromParent();1143return true;1144}11451146/// If MBBI references a pseudo instruction that should be expanded here,1147/// do the expansion and return true. Otherwise return false.1148bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,1149MachineBasicBlock::iterator MBBI,1150MachineBasicBlock::iterator &NextMBBI) {1151MachineInstr &MI = *MBBI;1152unsigned Opcode = MI.getOpcode();11531154// Check if we can expand the destructive op1155int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());1156if (OrigInstr != -1) {1157auto &Orig = TII->get(OrigInstr);1158if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=1159AArch64::NotDestructive) {1160return expand_DestructiveOp(MI, MBB, MBBI);1161}1162}11631164switch (Opcode) {1165default:1166break;11671168case AArch64::BSPv8i8:1169case AArch64::BSPv16i8: {1170Register DstReg = MI.getOperand(0).getReg();1171if (DstReg == MI.getOperand(3).getReg()) {1172// Expand to BIT1173BuildMI(MBB, MBBI, MI.getDebugLoc(),1174TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i81175: AArch64::BITv16i8))1176.add(MI.getOperand(0))1177.add(MI.getOperand(3))1178.add(MI.getOperand(2))1179.add(MI.getOperand(1));1180} else if (DstReg == MI.getOperand(2).getReg()) {1181// Expand to BIF1182BuildMI(MBB, MBBI, MI.getDebugLoc(),1183TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i81184: AArch64::BIFv16i8))1185.add(MI.getOperand(0))1186.add(MI.getOperand(2))1187.add(MI.getOperand(3))1188.add(MI.getOperand(1));1189} else {1190// Expand to BSL, use additional move if required1191if (DstReg == MI.getOperand(1).getReg()) {1192BuildMI(MBB, MBBI, MI.getDebugLoc(),1193TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i81194: AArch64::BSLv16i8))1195.add(MI.getOperand(0))1196.add(MI.getOperand(1))1197.add(MI.getOperand(2))1198.add(MI.getOperand(3));1199} else {1200BuildMI(MBB, MBBI, MI.getDebugLoc(),1201TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i81202: AArch64::ORRv16i8))1203.addReg(DstReg,1204RegState::Define |1205getRenamableRegState(MI.getOperand(0).isRenamable()))1206.add(MI.getOperand(1))1207.add(MI.getOperand(1));1208BuildMI(MBB, MBBI, MI.getDebugLoc(),1209TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i81210: AArch64::BSLv16i8))1211.add(MI.getOperand(0))1212.addReg(DstReg,1213RegState::Kill |1214getRenamableRegState(MI.getOperand(0).isRenamable()))1215.add(MI.getOperand(2))1216.add(MI.getOperand(3));1217}1218}1219MI.eraseFromParent();1220return true;1221}12221223case AArch64::ADDWrr:1224case AArch64::SUBWrr:1225case AArch64::ADDXrr:1226case AArch64::SUBXrr:1227case AArch64::ADDSWrr:1228case AArch64::SUBSWrr:1229case AArch64::ADDSXrr:1230case AArch64::SUBSXrr:1231case AArch64::ANDWrr:1232case AArch64::ANDXrr:1233case AArch64::BICWrr:1234case AArch64::BICXrr:1235case AArch64::ANDSWrr:1236case AArch64::ANDSXrr:1237case AArch64::BICSWrr:1238case AArch64::BICSXrr:1239case AArch64::EONWrr:1240case AArch64::EONXrr:1241case AArch64::EORWrr:1242case AArch64::EORXrr:1243case AArch64::ORNWrr:1244case AArch64::ORNXrr:1245case AArch64::ORRWrr:1246case AArch64::ORRXrr: {1247unsigned Opcode;1248switch (MI.getOpcode()) {1249default:1250return false;1251case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;1252case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;1253case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;1254case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;1255case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;1256case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;1257case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;1258case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;1259case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;1260case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;1261case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;1262case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;1263case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;1264case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;1265case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;1266case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;1267case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;1268case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;1269case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;1270case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;1271case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;1272case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;1273case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;1274case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;1275}1276MachineFunction &MF = *MBB.getParent();1277// Try to create new inst without implicit operands added.1278MachineInstr *NewMI = MF.CreateMachineInstr(1279TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);1280MBB.insert(MBBI, NewMI);1281MachineInstrBuilder MIB1(MF, NewMI);1282MIB1->setPCSections(MF, MI.getPCSections());1283MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)1284.add(MI.getOperand(1))1285.add(MI.getOperand(2))1286.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));1287transferImpOps(MI, MIB1, MIB1);1288if (auto DebugNumber = MI.peekDebugInstrNum())1289NewMI->setDebugInstrNum(DebugNumber);1290MI.eraseFromParent();1291return true;1292}12931294case AArch64::LOADgot: {1295MachineFunction *MF = MBB.getParent();1296Register DstReg = MI.getOperand(0).getReg();1297const MachineOperand &MO1 = MI.getOperand(1);1298unsigned Flags = MO1.getTargetFlags();12991300if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {1301// Tiny codemodel expand to LDR1302MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),1303TII->get(AArch64::LDRXl), DstReg);13041305if (MO1.isGlobal()) {1306MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);1307} else if (MO1.isSymbol()) {1308MIB.addExternalSymbol(MO1.getSymbolName(), Flags);1309} else {1310assert(MO1.isCPI() &&1311"Only expect globals, externalsymbols, or constant pools");1312MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);1313}1314} else {1315// Small codemodel expand into ADRP + LDR.1316MachineFunction &MF = *MI.getParent()->getParent();1317DebugLoc DL = MI.getDebugLoc();1318MachineInstrBuilder MIB1 =1319BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);13201321MachineInstrBuilder MIB2;1322if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {1323auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();1324unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);1325unsigned DstFlags = MI.getOperand(0).getTargetFlags();1326MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))1327.addDef(Reg32)1328.addReg(DstReg, RegState::Kill)1329.addReg(DstReg, DstFlags | RegState::Implicit);1330} else {1331Register DstReg = MI.getOperand(0).getReg();1332MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))1333.add(MI.getOperand(0))1334.addUse(DstReg, RegState::Kill);1335}13361337if (MO1.isGlobal()) {1338MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);1339MIB2.addGlobalAddress(MO1.getGlobal(), 0,1340Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);1341} else if (MO1.isSymbol()) {1342MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);1343MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |1344AArch64II::MO_PAGEOFF |1345AArch64II::MO_NC);1346} else {1347assert(MO1.isCPI() &&1348"Only expect globals, externalsymbols, or constant pools");1349MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),1350Flags | AArch64II::MO_PAGE);1351MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),1352Flags | AArch64II::MO_PAGEOFF |1353AArch64II::MO_NC);1354}13551356transferImpOps(MI, MIB1, MIB2);1357}1358MI.eraseFromParent();1359return true;1360}1361case AArch64::MOVaddrBA: {1362MachineFunction &MF = *MI.getParent()->getParent();1363if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {1364// blockaddress expressions have to come from a constant pool because the1365// largest addend (and hence offset within a function) allowed for ADRP is1366// only 8MB.1367const BlockAddress *BA = MI.getOperand(1).getBlockAddress();1368assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");13691370MachineConstantPool *MCP = MF.getConstantPool();1371unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));13721373Register DstReg = MI.getOperand(0).getReg();1374auto MIB1 =1375BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)1376.addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);1377auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),1378TII->get(AArch64::LDRXui), DstReg)1379.addUse(DstReg)1380.addConstantPoolIndex(1381CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);1382transferImpOps(MI, MIB1, MIB2);1383MI.eraseFromParent();1384return true;1385}1386}1387[[fallthrough]];1388case AArch64::MOVaddr:1389case AArch64::MOVaddrJT:1390case AArch64::MOVaddrCP:1391case AArch64::MOVaddrTLS:1392case AArch64::MOVaddrEXT: {1393// Expand into ADRP + ADD.1394Register DstReg = MI.getOperand(0).getReg();1395assert(DstReg != AArch64::XZR);1396MachineInstrBuilder MIB1 =1397BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)1398.add(MI.getOperand(1));13991400if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {1401// MO_TAGGED on the page indicates a tagged address. Set the tag now.1402// We do so by creating a MOVK that sets bits 48-63 of the register to1403// (global address + 0x100000000 - PC) >> 48. This assumes that we're in1404// the small code model so we can assume a binary size of <= 4GB, which1405// makes the untagged PC relative offset positive. The binary must also be1406// loaded into address range [0, 2^48). Both of these properties need to1407// be ensured at runtime when using tagged addresses.1408auto Tag = MI.getOperand(1);1409Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);1410Tag.setOffset(0x100000000);1411BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)1412.addReg(DstReg)1413.add(Tag)1414.addImm(48);1415}14161417MachineInstrBuilder MIB2 =1418BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))1419.add(MI.getOperand(0))1420.addReg(DstReg)1421.add(MI.getOperand(2))1422.addImm(0);14231424transferImpOps(MI, MIB1, MIB2);1425MI.eraseFromParent();1426return true;1427}1428case AArch64::ADDlowTLS:1429// Produce a plain ADD1430BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))1431.add(MI.getOperand(0))1432.add(MI.getOperand(1))1433.add(MI.getOperand(2))1434.addImm(0);1435MI.eraseFromParent();1436return true;14371438case AArch64::MOVbaseTLS: {1439Register DstReg = MI.getOperand(0).getReg();1440auto SysReg = AArch64SysReg::TPIDR_EL0;1441MachineFunction *MF = MBB.getParent();1442if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())1443SysReg = AArch64SysReg::TPIDR_EL3;1444else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())1445SysReg = AArch64SysReg::TPIDR_EL2;1446else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())1447SysReg = AArch64SysReg::TPIDR_EL1;1448else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())1449SysReg = AArch64SysReg::TPIDRRO_EL0;1450BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)1451.addImm(SysReg);1452MI.eraseFromParent();1453return true;1454}14551456case AArch64::MOVi32imm:1457return expandMOVImm(MBB, MBBI, 32);1458case AArch64::MOVi64imm:1459return expandMOVImm(MBB, MBBI, 64);1460case AArch64::RET_ReallyLR: {1461// Hiding the LR use with RET_ReallyLR may lead to extra kills in the1462// function and missing live-ins. We are fine in practice because callee1463// saved register handling ensures the register value is restored before1464// RET, but we need the undef flag here to appease the MachineVerifier1465// liveness checks.1466MachineInstrBuilder MIB =1467BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))1468.addReg(AArch64::LR, RegState::Undef);1469transferImpOps(MI, MIB, MIB);1470MI.eraseFromParent();1471return true;1472}1473case AArch64::CMP_SWAP_8:1474return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,1475AArch64::SUBSWrx,1476AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),1477AArch64::WZR, NextMBBI);1478case AArch64::CMP_SWAP_16:1479return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,1480AArch64::SUBSWrx,1481AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),1482AArch64::WZR, NextMBBI);1483case AArch64::CMP_SWAP_32:1484return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,1485AArch64::SUBSWrs,1486AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),1487AArch64::WZR, NextMBBI);1488case AArch64::CMP_SWAP_64:1489return expandCMP_SWAP(MBB, MBBI,1490AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,1491AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),1492AArch64::XZR, NextMBBI);1493case AArch64::CMP_SWAP_128:1494case AArch64::CMP_SWAP_128_RELEASE:1495case AArch64::CMP_SWAP_128_ACQUIRE:1496case AArch64::CMP_SWAP_128_MONOTONIC:1497return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);14981499case AArch64::AESMCrrTied:1500case AArch64::AESIMCrrTied: {1501MachineInstrBuilder MIB =1502BuildMI(MBB, MBBI, MI.getDebugLoc(),1503TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :1504AArch64::AESIMCrr))1505.add(MI.getOperand(0))1506.add(MI.getOperand(1));1507transferImpOps(MI, MIB, MIB);1508MI.eraseFromParent();1509return true;1510}1511case AArch64::IRGstack: {1512MachineFunction &MF = *MBB.getParent();1513const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();1514const AArch64FrameLowering *TFI =1515MF.getSubtarget<AArch64Subtarget>().getFrameLowering();15161517// IRG does not allow immediate offset. getTaggedBasePointerOffset should1518// almost always point to SP-after-prologue; if not, emit a longer1519// instruction sequence.1520int BaseOffset = -AFI->getTaggedBasePointerOffset();1521Register FrameReg;1522StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(1523MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,1524/*PreferFP=*/false,1525/*ForSimm=*/true);1526Register SrcReg = FrameReg;1527if (FrameRegOffset) {1528// Use output register as temporary.1529SrcReg = MI.getOperand(0).getReg();1530emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,1531FrameRegOffset, TII);1532}1533BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))1534.add(MI.getOperand(0))1535.addUse(SrcReg)1536.add(MI.getOperand(2));1537MI.eraseFromParent();1538return true;1539}1540case AArch64::TAGPstack: {1541int64_t Offset = MI.getOperand(2).getImm();1542BuildMI(MBB, MBBI, MI.getDebugLoc(),1543TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))1544.add(MI.getOperand(0))1545.add(MI.getOperand(1))1546.addImm(std::abs(Offset))1547.add(MI.getOperand(4));1548MI.eraseFromParent();1549return true;1550}1551case AArch64::STGloop_wback:1552case AArch64::STZGloop_wback:1553return expandSetTagLoop(MBB, MBBI, NextMBBI);1554case AArch64::STGloop:1555case AArch64::STZGloop:1556report_fatal_error(1557"Non-writeback variants of STGloop / STZGloop should not "1558"survive past PrologEpilogInserter.");1559case AArch64::STR_ZZZZXI:1560return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);1561case AArch64::STR_ZZZXI:1562return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);1563case AArch64::STR_ZZXI:1564return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);1565case AArch64::STR_PPXI:1566return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);1567case AArch64::LDR_ZZZZXI:1568return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);1569case AArch64::LDR_ZZZXI:1570return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);1571case AArch64::LDR_ZZXI:1572return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);1573case AArch64::LDR_PPXI:1574return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);1575case AArch64::BLR_RVMARKER:1576case AArch64::BLRA_RVMARKER:1577return expandCALL_RVMARKER(MBB, MBBI);1578case AArch64::BLR_BTI:1579return expandCALL_BTI(MBB, MBBI);1580case AArch64::StoreSwiftAsyncContext:1581return expandStoreSwiftAsyncContext(MBB, MBBI);1582case AArch64::RestoreZAPseudo: {1583auto *NewMBB = expandRestoreZA(MBB, MBBI);1584if (NewMBB != &MBB)1585NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.1586return true;1587}1588case AArch64::MSRpstatePseudo: {1589auto *NewMBB = expandCondSMToggle(MBB, MBBI);1590if (NewMBB != &MBB)1591NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.1592return true;1593}1594case AArch64::COALESCER_BARRIER_FPR16:1595case AArch64::COALESCER_BARRIER_FPR32:1596case AArch64::COALESCER_BARRIER_FPR64:1597case AArch64::COALESCER_BARRIER_FPR128:1598MI.eraseFromParent();1599return true;1600case AArch64::LD1B_2Z_IMM_PSEUDO:1601return expandMultiVecPseudo(1602MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1603AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);1604case AArch64::LD1H_2Z_IMM_PSEUDO:1605return expandMultiVecPseudo(1606MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1607AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);1608case AArch64::LD1W_2Z_IMM_PSEUDO:1609return expandMultiVecPseudo(1610MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1611AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);1612case AArch64::LD1D_2Z_IMM_PSEUDO:1613return expandMultiVecPseudo(1614MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1615AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);1616case AArch64::LDNT1B_2Z_IMM_PSEUDO:1617return expandMultiVecPseudo(1618MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1619AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);1620case AArch64::LDNT1H_2Z_IMM_PSEUDO:1621return expandMultiVecPseudo(1622MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1623AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);1624case AArch64::LDNT1W_2Z_IMM_PSEUDO:1625return expandMultiVecPseudo(1626MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1627AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);1628case AArch64::LDNT1D_2Z_IMM_PSEUDO:1629return expandMultiVecPseudo(1630MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1631AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);1632case AArch64::LD1B_2Z_PSEUDO:1633return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,1634AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,1635AArch64::LD1B_2Z_STRIDED);1636case AArch64::LD1H_2Z_PSEUDO:1637return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,1638AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,1639AArch64::LD1H_2Z_STRIDED);1640case AArch64::LD1W_2Z_PSEUDO:1641return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,1642AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,1643AArch64::LD1W_2Z_STRIDED);1644case AArch64::LD1D_2Z_PSEUDO:1645return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,1646AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,1647AArch64::LD1D_2Z_STRIDED);1648case AArch64::LDNT1B_2Z_PSEUDO:1649return expandMultiVecPseudo(1650MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1651AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);1652case AArch64::LDNT1H_2Z_PSEUDO:1653return expandMultiVecPseudo(1654MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1655AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);1656case AArch64::LDNT1W_2Z_PSEUDO:1657return expandMultiVecPseudo(1658MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1659AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);1660case AArch64::LDNT1D_2Z_PSEUDO:1661return expandMultiVecPseudo(1662MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,1663AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);1664case AArch64::LD1B_4Z_IMM_PSEUDO:1665return expandMultiVecPseudo(1666MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1667AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);1668case AArch64::LD1H_4Z_IMM_PSEUDO:1669return expandMultiVecPseudo(1670MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1671AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);1672case AArch64::LD1W_4Z_IMM_PSEUDO:1673return expandMultiVecPseudo(1674MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1675AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);1676case AArch64::LD1D_4Z_IMM_PSEUDO:1677return expandMultiVecPseudo(1678MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1679AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);1680case AArch64::LDNT1B_4Z_IMM_PSEUDO:1681return expandMultiVecPseudo(1682MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1683AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);1684case AArch64::LDNT1H_4Z_IMM_PSEUDO:1685return expandMultiVecPseudo(1686MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1687AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);1688case AArch64::LDNT1W_4Z_IMM_PSEUDO:1689return expandMultiVecPseudo(1690MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1691AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);1692case AArch64::LDNT1D_4Z_IMM_PSEUDO:1693return expandMultiVecPseudo(1694MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1695AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);1696case AArch64::LD1B_4Z_PSEUDO:1697return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,1698AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,1699AArch64::LD1B_4Z_STRIDED);1700case AArch64::LD1H_4Z_PSEUDO:1701return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,1702AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,1703AArch64::LD1H_4Z_STRIDED);1704case AArch64::LD1W_4Z_PSEUDO:1705return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,1706AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,1707AArch64::LD1W_4Z_STRIDED);1708case AArch64::LD1D_4Z_PSEUDO:1709return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,1710AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,1711AArch64::LD1D_4Z_STRIDED);1712case AArch64::LDNT1B_4Z_PSEUDO:1713return expandMultiVecPseudo(1714MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1715AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);1716case AArch64::LDNT1H_4Z_PSEUDO:1717return expandMultiVecPseudo(1718MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1719AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);1720case AArch64::LDNT1W_4Z_PSEUDO:1721return expandMultiVecPseudo(1722MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1723AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);1724case AArch64::LDNT1D_4Z_PSEUDO:1725return expandMultiVecPseudo(1726MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,1727AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);1728}1729return false;1730}17311732/// Iterate over the instructions in basic block MBB and expand any1733/// pseudo instructions. Return true if anything was modified.1734bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {1735bool Modified = false;17361737MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();1738while (MBBI != E) {1739MachineBasicBlock::iterator NMBBI = std::next(MBBI);1740Modified |= expandMI(MBB, MBBI, NMBBI);1741MBBI = NMBBI;1742}17431744return Modified;1745}17461747bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {1748TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());17491750bool Modified = false;1751for (auto &MBB : MF)1752Modified |= expandMBB(MBB);1753return Modified;1754}17551756/// Returns an instance of the pseudo instruction expansion pass.1757FunctionPass *llvm::createAArch64ExpandPseudoPass() {1758return new AArch64ExpandPseudo();1759}176017611762