Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
213799 views
//===- RISCVFoldMemOffset.cpp - Fold ADDI into memory offsets ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===---------------------------------------------------------------------===//7//8// Look for ADDIs that can be removed by folding their immediate into later9// load/store addresses. There may be other arithmetic instructions between the10// addi and load/store that we need to reassociate through. If the final result11// of the arithmetic is only used by load/store addresses, we can fold the12// offset into the all the load/store as long as it doesn't create an offset13// that is too large.14//15//===---------------------------------------------------------------------===//1617#include "RISCV.h"18#include "RISCVSubtarget.h"19#include "llvm/CodeGen/MachineFunctionPass.h"20#include <queue>2122using namespace llvm;2324#define DEBUG_TYPE "riscv-fold-mem-offset"25#define RISCV_FOLD_MEM_OFFSET_NAME "RISC-V Fold Memory Offset"2627namespace {2829class RISCVFoldMemOffset : public MachineFunctionPass {30public:31static char ID;3233RISCVFoldMemOffset() : MachineFunctionPass(ID) {}3435bool runOnMachineFunction(MachineFunction &MF) override;3637bool foldOffset(Register OrigReg, int64_t InitialOffset,38const MachineRegisterInfo &MRI,39DenseMap<MachineInstr *, int64_t> &FoldableInstrs);4041void getAnalysisUsage(AnalysisUsage &AU) const override {42AU.setPreservesCFG();43MachineFunctionPass::getAnalysisUsage(AU);44}4546StringRef getPassName() const override { return RISCV_FOLD_MEM_OFFSET_NAME; }47};4849// Wrapper class around a std::optional to allow accumulation.50class FoldableOffset {51std::optional<int64_t> Offset;5253public:54bool hasValue() const { return Offset.has_value(); }55int64_t getValue() const { return *Offset; }5657FoldableOffset &operator=(int64_t RHS) {58Offset = RHS;59return *this;60}6162FoldableOffset &operator+=(int64_t RHS) {63if (!Offset)64Offset = 0;65Offset = (uint64_t)*Offset + (uint64_t)RHS;66return *this;67}6869int64_t operator*() { return *Offset; }70};7172} // end anonymous namespace7374char RISCVFoldMemOffset::ID = 0;75INITIALIZE_PASS(RISCVFoldMemOffset, DEBUG_TYPE, RISCV_FOLD_MEM_OFFSET_NAME,76false, false)7778FunctionPass *llvm::createRISCVFoldMemOffsetPass() {79return new RISCVFoldMemOffset();80}8182// Walk forward from the ADDI looking for arithmetic instructions we can83// analyze or memory instructions that use it as part of their address84// calculation. For each arithmetic instruction we lookup how the offset85// contributes to the value in that register use that information to86// calculate the contribution to the output of this instruction.87// Only addition and left shift are supported.88// FIXME: Add multiplication by constant. The constant will be in a register.89bool RISCVFoldMemOffset::foldOffset(90Register OrigReg, int64_t InitialOffset, const MachineRegisterInfo &MRI,91DenseMap<MachineInstr *, int64_t> &FoldableInstrs) {92// Map to hold how much the offset contributes to the value of this register.93DenseMap<Register, int64_t> RegToOffsetMap;9495// Insert root offset into the map.96RegToOffsetMap[OrigReg] = InitialOffset;9798std::queue<Register> Worklist;99Worklist.push(OrigReg);100101while (!Worklist.empty()) {102Register Reg = Worklist.front();103Worklist.pop();104105if (!Reg.isVirtual())106return false;107108for (auto &User : MRI.use_nodbg_instructions(Reg)) {109FoldableOffset Offset;110111switch (User.getOpcode()) {112default:113return false;114case RISCV::ADD:115if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());116I != RegToOffsetMap.end())117Offset = I->second;118if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());119I != RegToOffsetMap.end())120Offset += I->second;121break;122case RISCV::SH1ADD:123if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());124I != RegToOffsetMap.end())125Offset = (uint64_t)I->second << 1;126if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());127I != RegToOffsetMap.end())128Offset += I->second;129break;130case RISCV::SH2ADD:131if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());132I != RegToOffsetMap.end())133Offset = (uint64_t)I->second << 2;134if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());135I != RegToOffsetMap.end())136Offset += I->second;137break;138case RISCV::SH3ADD:139if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());140I != RegToOffsetMap.end())141Offset = (uint64_t)I->second << 3;142if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());143I != RegToOffsetMap.end())144Offset += I->second;145break;146case RISCV::ADD_UW:147case RISCV::SH1ADD_UW:148case RISCV::SH2ADD_UW:149case RISCV::SH3ADD_UW:150// Don't fold through the zero extended input.151if (User.getOperand(1).getReg() == Reg)152return false;153if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());154I != RegToOffsetMap.end())155Offset = I->second;156break;157case RISCV::SLLI: {158unsigned ShAmt = User.getOperand(2).getImm();159if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());160I != RegToOffsetMap.end())161Offset = (uint64_t)I->second << ShAmt;162break;163}164case RISCV::LB:165case RISCV::LBU:166case RISCV::SB:167case RISCV::LH:168case RISCV::LH_INX:169case RISCV::LHU:170case RISCV::FLH:171case RISCV::SH:172case RISCV::SH_INX:173case RISCV::FSH:174case RISCV::LW:175case RISCV::LW_INX:176case RISCV::LWU:177case RISCV::FLW:178case RISCV::SW:179case RISCV::SW_INX:180case RISCV::FSW:181case RISCV::LD:182case RISCV::FLD:183case RISCV::SD:184case RISCV::FSD: {185// Can't fold into store value.186if (User.getOperand(0).getReg() == Reg)187return false;188189// Existing offset must be immediate.190if (!User.getOperand(2).isImm())191return false;192193// Require at least one operation between the ADDI and the load/store.194// We have other optimizations that should handle the simple case.195if (User.getOperand(1).getReg() == OrigReg)196return false;197198auto I = RegToOffsetMap.find(User.getOperand(1).getReg());199if (I == RegToOffsetMap.end())200return false;201202int64_t LocalOffset = User.getOperand(2).getImm();203assert(isInt<12>(LocalOffset));204int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second;205if (!isInt<12>(CombinedOffset))206return false;207208FoldableInstrs[&User] = CombinedOffset;209continue;210}211}212213// If we reach here we should have an accumulated offset.214assert(Offset.hasValue() && "Expected an offset");215216// If the offset is new or changed, add the destination register to the217// work list.218int64_t OffsetVal = Offset.getValue();219auto P =220RegToOffsetMap.try_emplace(User.getOperand(0).getReg(), OffsetVal);221if (P.second) {222Worklist.push(User.getOperand(0).getReg());223} else if (P.first->second != OffsetVal) {224P.first->second = OffsetVal;225Worklist.push(User.getOperand(0).getReg());226}227}228}229230return true;231}232233bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) {234if (skipFunction(MF.getFunction()))235return false;236237// This optimization may increase size by preventing compression.238if (MF.getFunction().hasOptSize())239return false;240241MachineRegisterInfo &MRI = MF.getRegInfo();242243bool MadeChange = false;244for (MachineBasicBlock &MBB : MF) {245for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {246// FIXME: We can support ADDIW from an LUI+ADDIW pair if the result is247// equivalent to LUI+ADDI.248if (MI.getOpcode() != RISCV::ADDI)249continue;250251// We only want to optimize register ADDIs.252if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())253continue;254255// Ignore 'li'.256if (MI.getOperand(1).getReg() == RISCV::X0)257continue;258259int64_t Offset = MI.getOperand(2).getImm();260assert(isInt<12>(Offset));261262DenseMap<MachineInstr *, int64_t> FoldableInstrs;263264if (!foldOffset(MI.getOperand(0).getReg(), Offset, MRI, FoldableInstrs))265continue;266267if (FoldableInstrs.empty())268continue;269270// We can fold this ADDI.271// Rewrite all the instructions.272for (auto [MemMI, NewOffset] : FoldableInstrs)273MemMI->getOperand(2).setImm(NewOffset);274275MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());276MRI.clearKillFlags(MI.getOperand(1).getReg());277MI.eraseFromParent();278}279}280281return MadeChange;282}283284285