Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
35268 views
//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains a pass that expands atomic pseudo instructions into9// target instructions. This pass should be run at the last possible moment,10// avoiding the possibility for other passes to break the requirements for11// forward progress in the LR/SC block.12//13//===----------------------------------------------------------------------===//1415#include "RISCV.h"16#include "RISCVInstrInfo.h"17#include "RISCVTargetMachine.h"1819#include "llvm/CodeGen/LivePhysRegs.h"20#include "llvm/CodeGen/MachineFunctionPass.h"21#include "llvm/CodeGen/MachineInstrBuilder.h"2223using namespace llvm;2425#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \26"RISC-V atomic pseudo instruction expansion pass"2728namespace {2930class RISCVExpandAtomicPseudo : public MachineFunctionPass {31public:32const RISCVSubtarget *STI;33const RISCVInstrInfo *TII;34static char ID;3536RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {37initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());38}3940bool runOnMachineFunction(MachineFunction &MF) override;4142StringRef getPassName() const override {43return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;44}4546private:47bool expandMBB(MachineBasicBlock &MBB);48bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,49MachineBasicBlock::iterator &NextMBBI);50bool expandAtomicBinOp(MachineBasicBlock &MBB,51MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,52bool IsMasked, int Width,53MachineBasicBlock::iterator &NextMBBI);54bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,55MachineBasicBlock::iterator MBBI,56AtomicRMWInst::BinOp, bool IsMasked, int Width,57MachineBasicBlock::iterator &NextMBBI);58bool expandAtomicCmpXchg(MachineBasicBlock &MBB,59MachineBasicBlock::iterator MBBI, bool IsMasked,60int Width, MachineBasicBlock::iterator &NextMBBI);61#ifndef NDEBUG62unsigned getInstSizeInBytes(const MachineFunction &MF) const {63unsigned Size = 0;64for (auto &MBB : MF)65for (auto &MI : MBB)66Size += TII->getInstSizeInBytes(MI);67return Size;68}69#endif70};7172char RISCVExpandAtomicPseudo::ID = 0;7374bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {75STI = &MF.getSubtarget<RISCVSubtarget>();76TII = STI->getInstrInfo();7778#ifndef NDEBUG79const unsigned OldSize = getInstSizeInBytes(MF);80#endif8182bool Modified = false;83for (auto &MBB : MF)84Modified |= expandMBB(MBB);8586#ifndef NDEBUG87const unsigned NewSize = getInstSizeInBytes(MF);88assert(OldSize >= NewSize);89#endif90return Modified;91}9293bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {94bool Modified = false;9596MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();97while (MBBI != E) {98MachineBasicBlock::iterator NMBBI = std::next(MBBI);99Modified |= expandMI(MBB, MBBI, NMBBI);100MBBI = NMBBI;101}102103return Modified;104}105106bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,107MachineBasicBlock::iterator MBBI,108MachineBasicBlock::iterator &NextMBBI) {109// RISCVInstrInfo::getInstSizeInBytes expects that the total size of the110// expanded instructions for each pseudo is correct in the Size field of the111// tablegen definition for the pseudo.112switch (MBBI->getOpcode()) {113case RISCV::PseudoAtomicLoadNand32:114return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,115NextMBBI);116case RISCV::PseudoAtomicLoadNand64:117return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,118NextMBBI);119case RISCV::PseudoMaskedAtomicSwap32:120return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,121NextMBBI);122case RISCV::PseudoMaskedAtomicLoadAdd32:123return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);124case RISCV::PseudoMaskedAtomicLoadSub32:125return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);126case RISCV::PseudoMaskedAtomicLoadNand32:127return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,128NextMBBI);129case RISCV::PseudoMaskedAtomicLoadMax32:130return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,131NextMBBI);132case RISCV::PseudoMaskedAtomicLoadMin32:133return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,134NextMBBI);135case RISCV::PseudoMaskedAtomicLoadUMax32:136return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,137NextMBBI);138case RISCV::PseudoMaskedAtomicLoadUMin32:139return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,140NextMBBI);141case RISCV::PseudoCmpXchg32:142return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);143case RISCV::PseudoCmpXchg64:144return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);145case RISCV::PseudoMaskedCmpXchg32:146return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);147}148149return false;150}151152static unsigned getLRForRMW32(AtomicOrdering Ordering,153const RISCVSubtarget *Subtarget) {154switch (Ordering) {155default:156llvm_unreachable("Unexpected AtomicOrdering");157case AtomicOrdering::Monotonic:158return RISCV::LR_W;159case AtomicOrdering::Acquire:160if (Subtarget->hasStdExtZtso())161return RISCV::LR_W;162return RISCV::LR_W_AQ;163case AtomicOrdering::Release:164return RISCV::LR_W;165case AtomicOrdering::AcquireRelease:166if (Subtarget->hasStdExtZtso())167return RISCV::LR_W;168return RISCV::LR_W_AQ;169case AtomicOrdering::SequentiallyConsistent:170return RISCV::LR_W_AQ_RL;171}172}173174static unsigned getSCForRMW32(AtomicOrdering Ordering,175const RISCVSubtarget *Subtarget) {176switch (Ordering) {177default:178llvm_unreachable("Unexpected AtomicOrdering");179case AtomicOrdering::Monotonic:180return RISCV::SC_W;181case AtomicOrdering::Acquire:182return RISCV::SC_W;183case AtomicOrdering::Release:184if (Subtarget->hasStdExtZtso())185return RISCV::SC_W;186return RISCV::SC_W_RL;187case AtomicOrdering::AcquireRelease:188if (Subtarget->hasStdExtZtso())189return RISCV::SC_W;190return RISCV::SC_W_RL;191case AtomicOrdering::SequentiallyConsistent:192return RISCV::SC_W_RL;193}194}195196static unsigned getLRForRMW64(AtomicOrdering Ordering,197const RISCVSubtarget *Subtarget) {198switch (Ordering) {199default:200llvm_unreachable("Unexpected AtomicOrdering");201case AtomicOrdering::Monotonic:202return RISCV::LR_D;203case AtomicOrdering::Acquire:204if (Subtarget->hasStdExtZtso())205return RISCV::LR_D;206return RISCV::LR_D_AQ;207case AtomicOrdering::Release:208return RISCV::LR_D;209case AtomicOrdering::AcquireRelease:210if (Subtarget->hasStdExtZtso())211return RISCV::LR_D;212return RISCV::LR_D_AQ;213case AtomicOrdering::SequentiallyConsistent:214return RISCV::LR_D_AQ_RL;215}216}217218static unsigned getSCForRMW64(AtomicOrdering Ordering,219const RISCVSubtarget *Subtarget) {220switch (Ordering) {221default:222llvm_unreachable("Unexpected AtomicOrdering");223case AtomicOrdering::Monotonic:224return RISCV::SC_D;225case AtomicOrdering::Acquire:226return RISCV::SC_D;227case AtomicOrdering::Release:228if (Subtarget->hasStdExtZtso())229return RISCV::SC_D;230return RISCV::SC_D_RL;231case AtomicOrdering::AcquireRelease:232if (Subtarget->hasStdExtZtso())233return RISCV::SC_D;234return RISCV::SC_D_RL;235case AtomicOrdering::SequentiallyConsistent:236return RISCV::SC_D_RL;237}238}239240static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,241const RISCVSubtarget *Subtarget) {242if (Width == 32)243return getLRForRMW32(Ordering, Subtarget);244if (Width == 64)245return getLRForRMW64(Ordering, Subtarget);246llvm_unreachable("Unexpected LR width\n");247}248249static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,250const RISCVSubtarget *Subtarget) {251if (Width == 32)252return getSCForRMW32(Ordering, Subtarget);253if (Width == 64)254return getSCForRMW64(Ordering, Subtarget);255llvm_unreachable("Unexpected SC width\n");256}257258static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,259DebugLoc DL, MachineBasicBlock *ThisMBB,260MachineBasicBlock *LoopMBB,261MachineBasicBlock *DoneMBB,262AtomicRMWInst::BinOp BinOp, int Width,263const RISCVSubtarget *STI) {264Register DestReg = MI.getOperand(0).getReg();265Register ScratchReg = MI.getOperand(1).getReg();266Register AddrReg = MI.getOperand(2).getReg();267Register IncrReg = MI.getOperand(3).getReg();268AtomicOrdering Ordering =269static_cast<AtomicOrdering>(MI.getOperand(4).getImm());270271// .loop:272// lr.[w|d] dest, (addr)273// binop scratch, dest, val274// sc.[w|d] scratch, scratch, (addr)275// bnez scratch, loop276BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)277.addReg(AddrReg);278switch (BinOp) {279default:280llvm_unreachable("Unexpected AtomicRMW BinOp");281case AtomicRMWInst::Nand:282BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)283.addReg(DestReg)284.addReg(IncrReg);285BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)286.addReg(ScratchReg)287.addImm(-1);288break;289}290BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)291.addReg(AddrReg)292.addReg(ScratchReg);293BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))294.addReg(ScratchReg)295.addReg(RISCV::X0)296.addMBB(LoopMBB);297}298299static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,300MachineBasicBlock *MBB, Register DestReg,301Register OldValReg, Register NewValReg,302Register MaskReg, Register ScratchReg) {303assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");304assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");305assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");306307// We select bits from newval and oldval using:308// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge309// r = oldval ^ ((oldval ^ newval) & masktargetdata);310BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)311.addReg(OldValReg)312.addReg(NewValReg);313BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)314.addReg(ScratchReg)315.addReg(MaskReg);316BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)317.addReg(OldValReg)318.addReg(ScratchReg);319}320321static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,322MachineInstr &MI, DebugLoc DL,323MachineBasicBlock *ThisMBB,324MachineBasicBlock *LoopMBB,325MachineBasicBlock *DoneMBB,326AtomicRMWInst::BinOp BinOp, int Width,327const RISCVSubtarget *STI) {328assert(Width == 32 && "Should never need to expand masked 64-bit operations");329Register DestReg = MI.getOperand(0).getReg();330Register ScratchReg = MI.getOperand(1).getReg();331Register AddrReg = MI.getOperand(2).getReg();332Register IncrReg = MI.getOperand(3).getReg();333Register MaskReg = MI.getOperand(4).getReg();334AtomicOrdering Ordering =335static_cast<AtomicOrdering>(MI.getOperand(5).getImm());336337// .loop:338// lr.w destreg, (alignedaddr)339// binop scratch, destreg, incr340// xor scratch, destreg, scratch341// and scratch, scratch, masktargetdata342// xor scratch, destreg, scratch343// sc.w scratch, scratch, (alignedaddr)344// bnez scratch, loop345BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)346.addReg(AddrReg);347switch (BinOp) {348default:349llvm_unreachable("Unexpected AtomicRMW BinOp");350case AtomicRMWInst::Xchg:351BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)352.addReg(IncrReg)353.addImm(0);354break;355case AtomicRMWInst::Add:356BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)357.addReg(DestReg)358.addReg(IncrReg);359break;360case AtomicRMWInst::Sub:361BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)362.addReg(DestReg)363.addReg(IncrReg);364break;365case AtomicRMWInst::Nand:366BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)367.addReg(DestReg)368.addReg(IncrReg);369BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)370.addReg(ScratchReg)371.addImm(-1);372break;373}374375insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,376ScratchReg);377378BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)379.addReg(AddrReg)380.addReg(ScratchReg);381BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))382.addReg(ScratchReg)383.addReg(RISCV::X0)384.addMBB(LoopMBB);385}386387bool RISCVExpandAtomicPseudo::expandAtomicBinOp(388MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,389AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,390MachineBasicBlock::iterator &NextMBBI) {391MachineInstr &MI = *MBBI;392DebugLoc DL = MI.getDebugLoc();393394MachineFunction *MF = MBB.getParent();395auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());396auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());397398// Insert new MBBs.399MF->insert(++MBB.getIterator(), LoopMBB);400MF->insert(++LoopMBB->getIterator(), DoneMBB);401402// Set up successors and transfer remaining instructions to DoneMBB.403LoopMBB->addSuccessor(LoopMBB);404LoopMBB->addSuccessor(DoneMBB);405DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());406DoneMBB->transferSuccessors(&MBB);407MBB.addSuccessor(LoopMBB);408409if (!IsMasked)410doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,411STI);412else413doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,414Width, STI);415416NextMBBI = MBB.end();417MI.eraseFromParent();418419LivePhysRegs LiveRegs;420computeAndAddLiveIns(LiveRegs, *LoopMBB);421computeAndAddLiveIns(LiveRegs, *DoneMBB);422423return true;424}425426static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,427MachineBasicBlock *MBB, Register ValReg,428Register ShamtReg) {429BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)430.addReg(ValReg)431.addReg(ShamtReg);432BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)433.addReg(ValReg)434.addReg(ShamtReg);435}436437bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(438MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,439AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,440MachineBasicBlock::iterator &NextMBBI) {441assert(IsMasked == true &&442"Should only need to expand masked atomic max/min");443assert(Width == 32 && "Should never need to expand masked 64-bit operations");444445MachineInstr &MI = *MBBI;446DebugLoc DL = MI.getDebugLoc();447MachineFunction *MF = MBB.getParent();448auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());449auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());450auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());451auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());452453// Insert new MBBs.454MF->insert(++MBB.getIterator(), LoopHeadMBB);455MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);456MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);457MF->insert(++LoopTailMBB->getIterator(), DoneMBB);458459// Set up successors and transfer remaining instructions to DoneMBB.460LoopHeadMBB->addSuccessor(LoopIfBodyMBB);461LoopHeadMBB->addSuccessor(LoopTailMBB);462LoopIfBodyMBB->addSuccessor(LoopTailMBB);463LoopTailMBB->addSuccessor(LoopHeadMBB);464LoopTailMBB->addSuccessor(DoneMBB);465DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());466DoneMBB->transferSuccessors(&MBB);467MBB.addSuccessor(LoopHeadMBB);468469Register DestReg = MI.getOperand(0).getReg();470Register Scratch1Reg = MI.getOperand(1).getReg();471Register Scratch2Reg = MI.getOperand(2).getReg();472Register AddrReg = MI.getOperand(3).getReg();473Register IncrReg = MI.getOperand(4).getReg();474Register MaskReg = MI.getOperand(5).getReg();475bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;476AtomicOrdering Ordering =477static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());478479//480// .loophead:481// lr.w destreg, (alignedaddr)482// and scratch2, destreg, mask483// mv scratch1, destreg484// [sext scratch2 if signed min/max]485// ifnochangeneeded scratch2, incr, .looptail486BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)487.addReg(AddrReg);488BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)489.addReg(DestReg)490.addReg(MaskReg);491BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)492.addReg(DestReg)493.addImm(0);494495switch (BinOp) {496default:497llvm_unreachable("Unexpected AtomicRMW BinOp");498case AtomicRMWInst::Max: {499insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());500BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))501.addReg(Scratch2Reg)502.addReg(IncrReg)503.addMBB(LoopTailMBB);504break;505}506case AtomicRMWInst::Min: {507insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());508BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))509.addReg(IncrReg)510.addReg(Scratch2Reg)511.addMBB(LoopTailMBB);512break;513}514case AtomicRMWInst::UMax:515BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))516.addReg(Scratch2Reg)517.addReg(IncrReg)518.addMBB(LoopTailMBB);519break;520case AtomicRMWInst::UMin:521BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))522.addReg(IncrReg)523.addReg(Scratch2Reg)524.addMBB(LoopTailMBB);525break;526}527528// .loopifbody:529// xor scratch1, destreg, incr530// and scratch1, scratch1, mask531// xor scratch1, destreg, scratch1532insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,533MaskReg, Scratch1Reg);534535// .looptail:536// sc.w scratch1, scratch1, (addr)537// bnez scratch1, loop538BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)539.addReg(AddrReg)540.addReg(Scratch1Reg);541BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))542.addReg(Scratch1Reg)543.addReg(RISCV::X0)544.addMBB(LoopHeadMBB);545546NextMBBI = MBB.end();547MI.eraseFromParent();548549LivePhysRegs LiveRegs;550computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);551computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);552computeAndAddLiveIns(LiveRegs, *LoopTailMBB);553computeAndAddLiveIns(LiveRegs, *DoneMBB);554555return true;556}557558// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg559// operation, it can be folded into the cmpxchg expansion by560// modifying the branch within 'LoopHead' (which performs the same561// comparison). This is a valid transformation because after altering the562// LoopHead's BNE destination, the BNE following the cmpxchg becomes563// redundant and and be deleted. In the case of a masked cmpxchg, an564// appropriate AND and BNE must be matched.565//566// On success, returns true and deletes the matching BNE or AND+BNE, sets the567// LoopHeadBNETarget argument to the target that should be used within the568// loop head, and removes that block as a successor to MBB.569bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,570MachineBasicBlock::iterator MBBI,571Register DestReg, Register CmpValReg,572Register MaskReg,573MachineBasicBlock *&LoopHeadBNETarget) {574SmallVector<MachineInstr *> ToErase;575auto E = MBB.end();576if (MBBI == E)577return false;578MBBI = skipDebugInstructionsForward(MBBI, E);579580// If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.581if (MaskReg.isValid()) {582if (MBBI == E || MBBI->getOpcode() != RISCV::AND)583return false;584Register ANDOp1 = MBBI->getOperand(1).getReg();585Register ANDOp2 = MBBI->getOperand(2).getReg();586if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&587!(ANDOp1 == MaskReg && ANDOp2 == DestReg))588return false;589// We now expect the BNE to use the result of the AND as an operand.590DestReg = MBBI->getOperand(0).getReg();591ToErase.push_back(&*MBBI);592MBBI = skipDebugInstructionsForward(std::next(MBBI), E);593}594595// Match BNE DestReg, MaskReg.596if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)597return false;598Register BNEOp0 = MBBI->getOperand(0).getReg();599Register BNEOp1 = MBBI->getOperand(1).getReg();600if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&601!(BNEOp0 == CmpValReg && BNEOp1 == DestReg))602return false;603604// Make sure the branch is the only user of the AND.605if (MaskReg.isValid()) {606if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())607return false;608if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())609return false;610}611612ToErase.push_back(&*MBBI);613LoopHeadBNETarget = MBBI->getOperand(2).getMBB();614MBBI = skipDebugInstructionsForward(std::next(MBBI), E);615if (MBBI != E)616return false;617618MBB.removeSuccessor(LoopHeadBNETarget);619for (auto *MI : ToErase)620MI->eraseFromParent();621return true;622}623624bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(625MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,626int Width, MachineBasicBlock::iterator &NextMBBI) {627MachineInstr &MI = *MBBI;628DebugLoc DL = MI.getDebugLoc();629MachineFunction *MF = MBB.getParent();630auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());631auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());632auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());633634Register DestReg = MI.getOperand(0).getReg();635Register ScratchReg = MI.getOperand(1).getReg();636Register AddrReg = MI.getOperand(2).getReg();637Register CmpValReg = MI.getOperand(3).getReg();638Register NewValReg = MI.getOperand(4).getReg();639Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();640641MachineBasicBlock *LoopHeadBNETarget = DoneMBB;642tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,643LoopHeadBNETarget);644645// Insert new MBBs.646MF->insert(++MBB.getIterator(), LoopHeadMBB);647MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);648MF->insert(++LoopTailMBB->getIterator(), DoneMBB);649650// Set up successors and transfer remaining instructions to DoneMBB.651LoopHeadMBB->addSuccessor(LoopTailMBB);652LoopHeadMBB->addSuccessor(LoopHeadBNETarget);653LoopTailMBB->addSuccessor(DoneMBB);654LoopTailMBB->addSuccessor(LoopHeadMBB);655DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());656DoneMBB->transferSuccessors(&MBB);657MBB.addSuccessor(LoopHeadMBB);658659AtomicOrdering Ordering =660static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());661662if (!IsMasked) {663// .loophead:664// lr.[w|d] dest, (addr)665// bne dest, cmpval, done666BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),667DestReg)668.addReg(AddrReg);669BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))670.addReg(DestReg)671.addReg(CmpValReg)672.addMBB(LoopHeadBNETarget);673// .looptail:674// sc.[w|d] scratch, newval, (addr)675// bnez scratch, loophead676BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),677ScratchReg)678.addReg(AddrReg)679.addReg(NewValReg);680BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))681.addReg(ScratchReg)682.addReg(RISCV::X0)683.addMBB(LoopHeadMBB);684} else {685// .loophead:686// lr.w dest, (addr)687// and scratch, dest, mask688// bne scratch, cmpval, done689Register MaskReg = MI.getOperand(5).getReg();690BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),691DestReg)692.addReg(AddrReg);693BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)694.addReg(DestReg)695.addReg(MaskReg);696BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))697.addReg(ScratchReg)698.addReg(CmpValReg)699.addMBB(LoopHeadBNETarget);700701// .looptail:702// xor scratch, dest, newval703// and scratch, scratch, mask704// xor scratch, dest, scratch705// sc.w scratch, scratch, (adrr)706// bnez scratch, loophead707insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,708MaskReg, ScratchReg);709BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),710ScratchReg)711.addReg(AddrReg)712.addReg(ScratchReg);713BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))714.addReg(ScratchReg)715.addReg(RISCV::X0)716.addMBB(LoopHeadMBB);717}718719NextMBBI = MBB.end();720MI.eraseFromParent();721722LivePhysRegs LiveRegs;723computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);724computeAndAddLiveIns(LiveRegs, *LoopTailMBB);725computeAndAddLiveIns(LiveRegs, *DoneMBB);726727return true;728}729730} // end of anonymous namespace731732INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",733RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)734735namespace llvm {736737FunctionPass *createRISCVExpandAtomicPseudoPass() {738return new RISCVExpandAtomicPseudo();739}740741} // end of namespace llvm742743744