Path: blob/main/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
35267 views
//===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===---------------------------------------------------------------------===//7//8// This pass does some optimizations for *W instructions at the MI level.9//10// First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either11// because the sign extended bits aren't consumed or because the input was12// already sign extended by an earlier instruction.13//14// Then:15// 1. Unless explicit disabled or the target prefers instructions with W suffix,16// it removes the -w suffix from opw instructions whenever all users are17// dependent only on the lower word of the result of the instruction.18// The cases handled are:19// * addi.w because it helps reduce test differences between LA32 and LA6420// w/o being a pessimization.21//22// 2. Or if explicit enabled or the target prefers instructions with W suffix,23// it adds the W suffix to the instruction whenever all users are dependent24// only on the lower word of the result of the instruction.25// The cases handled are:26// * add.d/addi.d/sub.d/mul.d.27// * slli.d with imm < 32.28// * ld.d/ld.wu.29//===---------------------------------------------------------------------===//3031#include "LoongArch.h"32#include "LoongArchMachineFunctionInfo.h"33#include "LoongArchSubtarget.h"34#include "llvm/ADT/SmallSet.h"35#include "llvm/ADT/Statistic.h"36#include "llvm/CodeGen/MachineFunctionPass.h"37#include "llvm/CodeGen/TargetInstrInfo.h"3839using namespace llvm;4041#define DEBUG_TYPE "loongarch-opt-w-instrs"42#define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"4344STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");45STATISTIC(NumTransformedToWInstrs,46"Number of instructions transformed to W-ops");4748static cl::opt<bool>49DisableSExtWRemoval("loongarch-disable-sextw-removal",50cl::desc("Disable removal of sign-extend insn"),51cl::init(false), cl::Hidden);52static cl::opt<bool>53DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",54cl::desc("Disable convert to D suffix"),55cl::init(false), cl::Hidden);5657namespace {5859class LoongArchOptWInstrs : public MachineFunctionPass {60public:61static char ID;6263LoongArchOptWInstrs() : MachineFunctionPass(ID) {}6465bool runOnMachineFunction(MachineFunction &MF) override;66bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,67const LoongArchSubtarget &ST,68MachineRegisterInfo &MRI);69bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,70const LoongArchSubtarget &ST,71MachineRegisterInfo &MRI);72bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,73const LoongArchSubtarget &ST,74MachineRegisterInfo &MRI);7576void getAnalysisUsage(AnalysisUsage &AU) const override {77AU.setPreservesCFG();78MachineFunctionPass::getAnalysisUsage(AU);79}8081StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }82};8384} // end anonymous namespace8586char LoongArchOptWInstrs::ID = 0;87INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME,88false, false)8990FunctionPass *llvm::createLoongArchOptWInstrsPass() {91return new LoongArchOptWInstrs();92}9394// Checks if all users only demand the lower \p OrigBits of the original95// instruction's result.96// TODO: handle multiple interdependent transformations97static bool hasAllNBitUsers(const MachineInstr &OrigMI,98const LoongArchSubtarget &ST,99const MachineRegisterInfo &MRI, unsigned OrigBits) {100101SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;102SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;103104Worklist.push_back(std::make_pair(&OrigMI, OrigBits));105106while (!Worklist.empty()) {107auto P = Worklist.pop_back_val();108const MachineInstr *MI = P.first;109unsigned Bits = P.second;110111if (!Visited.insert(P).second)112continue;113114// Only handle instructions with one def.115if (MI->getNumExplicitDefs() != 1)116return false;117118Register DestReg = MI->getOperand(0).getReg();119if (!DestReg.isVirtual())120return false;121122for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {123const MachineInstr *UserMI = UserOp.getParent();124unsigned OpIdx = UserOp.getOperandNo();125126switch (UserMI->getOpcode()) {127default:128// TODO: Add vector129return false;130131case LoongArch::ADD_W:132case LoongArch::ADDI_W:133case LoongArch::SUB_W:134case LoongArch::ALSL_W:135case LoongArch::ALSL_WU:136case LoongArch::MUL_W:137case LoongArch::MULH_W:138case LoongArch::MULH_WU:139case LoongArch::MULW_D_W:140case LoongArch::MULW_D_WU:141// TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.142// case LoongArch::DIV_W:143// case LoongArch::DIV_WU:144// case LoongArch::MOD_W:145// case LoongArch::MOD_WU:146case LoongArch::SLL_W:147case LoongArch::SLLI_W:148case LoongArch::SRL_W:149case LoongArch::SRLI_W:150case LoongArch::SRA_W:151case LoongArch::SRAI_W:152case LoongArch::ROTR_W:153case LoongArch::ROTRI_W:154case LoongArch::CLO_W:155case LoongArch::CLZ_W:156case LoongArch::CTO_W:157case LoongArch::CTZ_W:158case LoongArch::BYTEPICK_W:159case LoongArch::REVB_2H:160case LoongArch::BITREV_4B:161case LoongArch::BITREV_W:162case LoongArch::BSTRINS_W:163case LoongArch::BSTRPICK_W:164case LoongArch::CRC_W_W_W:165case LoongArch::CRCC_W_W_W:166case LoongArch::MOVGR2FCSR:167case LoongArch::MOVGR2FRH_W:168case LoongArch::MOVGR2FR_W_64:169if (Bits >= 32)170break;171return false;172case LoongArch::MOVGR2CF:173if (Bits >= 1)174break;175return false;176case LoongArch::EXT_W_B:177if (Bits >= 8)178break;179return false;180case LoongArch::EXT_W_H:181if (Bits >= 16)182break;183return false;184185case LoongArch::SRLI_D: {186// If we are shifting right by less than Bits, and users don't demand187// any bits that were shifted into [Bits-1:0], then we can consider this188// as an N-Bit user.189unsigned ShAmt = UserMI->getOperand(2).getImm();190if (Bits > ShAmt) {191Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));192break;193}194return false;195}196197// these overwrite higher input bits, otherwise the lower word of output198// depends only on the lower word of input. So check their uses read W.199case LoongArch::SLLI_D:200if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm()))201break;202Worklist.push_back(std::make_pair(UserMI, Bits));203break;204case LoongArch::ANDI: {205uint64_t Imm = UserMI->getOperand(2).getImm();206if (Bits >= (unsigned)llvm::bit_width(Imm))207break;208Worklist.push_back(std::make_pair(UserMI, Bits));209break;210}211case LoongArch::ORI: {212uint64_t Imm = UserMI->getOperand(2).getImm();213if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))214break;215Worklist.push_back(std::make_pair(UserMI, Bits));216break;217}218219case LoongArch::SLL_D:220// Operand 2 is the shift amount which uses log2(grlen) bits.221if (OpIdx == 2) {222if (Bits >= Log2_32(ST.getGRLen()))223break;224return false;225}226Worklist.push_back(std::make_pair(UserMI, Bits));227break;228229case LoongArch::SRA_D:230case LoongArch::SRL_D:231case LoongArch::ROTR_D:232// Operand 2 is the shift amount which uses 6 bits.233if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen()))234break;235return false;236237case LoongArch::ST_B:238case LoongArch::STX_B:239case LoongArch::STGT_B:240case LoongArch::STLE_B:241case LoongArch::IOCSRWR_B:242// The first argument is the value to store.243if (OpIdx == 0 && Bits >= 8)244break;245return false;246case LoongArch::ST_H:247case LoongArch::STX_H:248case LoongArch::STGT_H:249case LoongArch::STLE_H:250case LoongArch::IOCSRWR_H:251// The first argument is the value to store.252if (OpIdx == 0 && Bits >= 16)253break;254return false;255case LoongArch::ST_W:256case LoongArch::STX_W:257case LoongArch::SCREL_W:258case LoongArch::STPTR_W:259case LoongArch::STGT_W:260case LoongArch::STLE_W:261case LoongArch::IOCSRWR_W:262// The first argument is the value to store.263if (OpIdx == 0 && Bits >= 32)264break;265return false;266267case LoongArch::CRC_W_B_W:268case LoongArch::CRCC_W_B_W:269if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))270break;271return false;272case LoongArch::CRC_W_H_W:273case LoongArch::CRCC_W_H_W:274if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))275break;276return false;277case LoongArch::CRC_W_D_W:278case LoongArch::CRCC_W_D_W:279if (OpIdx == 2 && Bits >= 32)280break;281return false;282283// For these, lower word of output in these operations, depends only on284// the lower word of input. So, we check all uses only read lower word.285case LoongArch::COPY:286case LoongArch::PHI:287case LoongArch::ADD_D:288case LoongArch::ADDI_D:289case LoongArch::SUB_D:290case LoongArch::MUL_D:291case LoongArch::AND:292case LoongArch::OR:293case LoongArch::NOR:294case LoongArch::XOR:295case LoongArch::XORI:296case LoongArch::ANDN:297case LoongArch::ORN:298Worklist.push_back(std::make_pair(UserMI, Bits));299break;300301case LoongArch::MASKNEZ:302case LoongArch::MASKEQZ:303if (OpIdx != 1)304return false;305Worklist.push_back(std::make_pair(UserMI, Bits));306break;307}308}309}310311return true;312}313314static bool hasAllWUsers(const MachineInstr &OrigMI,315const LoongArchSubtarget &ST,316const MachineRegisterInfo &MRI) {317return hasAllNBitUsers(OrigMI, ST, MRI, 32);318}319320// This function returns true if the machine instruction always outputs a value321// where bits 63:32 match bit 31.322static bool isSignExtendingOpW(const MachineInstr &MI,323const MachineRegisterInfo &MRI, unsigned OpNo) {324switch (MI.getOpcode()) {325// Normal cases326case LoongArch::ADD_W:327case LoongArch::SUB_W:328case LoongArch::ADDI_W:329case LoongArch::ALSL_W:330case LoongArch::LU12I_W:331case LoongArch::SLT:332case LoongArch::SLTU:333case LoongArch::SLTI:334case LoongArch::SLTUI:335case LoongArch::ANDI:336case LoongArch::MUL_W:337case LoongArch::MULH_W:338case LoongArch::MULH_WU:339case LoongArch::DIV_W:340case LoongArch::MOD_W:341case LoongArch::DIV_WU:342case LoongArch::MOD_WU:343case LoongArch::SLL_W:344case LoongArch::SRL_W:345case LoongArch::SRA_W:346case LoongArch::ROTR_W:347case LoongArch::SLLI_W:348case LoongArch::SRLI_W:349case LoongArch::SRAI_W:350case LoongArch::ROTRI_W:351case LoongArch::EXT_W_B:352case LoongArch::EXT_W_H:353case LoongArch::CLO_W:354case LoongArch::CLZ_W:355case LoongArch::CTO_W:356case LoongArch::CTZ_W:357case LoongArch::BYTEPICK_W:358case LoongArch::REVB_2H:359case LoongArch::BITREV_4B:360case LoongArch::BITREV_W:361case LoongArch::BSTRINS_W:362case LoongArch::BSTRPICK_W:363case LoongArch::LD_B:364case LoongArch::LD_H:365case LoongArch::LD_W:366case LoongArch::LD_BU:367case LoongArch::LD_HU:368case LoongArch::LL_W:369case LoongArch::LLACQ_W:370case LoongArch::RDTIMEL_W:371case LoongArch::RDTIMEH_W:372case LoongArch::CPUCFG:373case LoongArch::LDX_B:374case LoongArch::LDX_H:375case LoongArch::LDX_W:376case LoongArch::LDX_BU:377case LoongArch::LDX_HU:378case LoongArch::LDPTR_W:379case LoongArch::LDGT_B:380case LoongArch::LDGT_H:381case LoongArch::LDGT_W:382case LoongArch::LDLE_B:383case LoongArch::LDLE_H:384case LoongArch::LDLE_W:385case LoongArch::AMSWAP_B:386case LoongArch::AMSWAP_H:387case LoongArch::AMSWAP_W:388case LoongArch::AMADD_B:389case LoongArch::AMADD_H:390case LoongArch::AMADD_W:391case LoongArch::AMAND_W:392case LoongArch::AMOR_W:393case LoongArch::AMXOR_W:394case LoongArch::AMMAX_W:395case LoongArch::AMMIN_W:396case LoongArch::AMMAX_WU:397case LoongArch::AMMIN_WU:398case LoongArch::AMSWAP__DB_B:399case LoongArch::AMSWAP__DB_H:400case LoongArch::AMSWAP__DB_W:401case LoongArch::AMADD__DB_B:402case LoongArch::AMADD__DB_H:403case LoongArch::AMADD__DB_W:404case LoongArch::AMAND__DB_W:405case LoongArch::AMOR__DB_W:406case LoongArch::AMXOR__DB_W:407case LoongArch::AMMAX__DB_W:408case LoongArch::AMMIN__DB_W:409case LoongArch::AMMAX__DB_WU:410case LoongArch::AMMIN__DB_WU:411case LoongArch::AMCAS_B:412case LoongArch::AMCAS_H:413case LoongArch::AMCAS_W:414case LoongArch::AMCAS__DB_B:415case LoongArch::AMCAS__DB_H:416case LoongArch::AMCAS__DB_W:417case LoongArch::CRC_W_B_W:418case LoongArch::CRC_W_H_W:419case LoongArch::CRC_W_W_W:420case LoongArch::CRC_W_D_W:421case LoongArch::CRCC_W_B_W:422case LoongArch::CRCC_W_H_W:423case LoongArch::CRCC_W_W_W:424case LoongArch::CRCC_W_D_W:425case LoongArch::IOCSRRD_B:426case LoongArch::IOCSRRD_H:427case LoongArch::IOCSRRD_W:428case LoongArch::MOVFR2GR_S:429case LoongArch::MOVFCSR2GR:430case LoongArch::MOVCF2GR:431case LoongArch::MOVFRH2GR_S:432case LoongArch::MOVFR2GR_S_64:433// TODO: Add vector434return true;435// Special cases that require checking operands.436// shifting right sufficiently makes the value 32-bit sign-extended437case LoongArch::SRAI_D:438return MI.getOperand(2).getImm() >= 32;439case LoongArch::SRLI_D:440return MI.getOperand(2).getImm() > 32;441// The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.442case LoongArch::ADDI_D:443case LoongArch::ORI:444return MI.getOperand(1).isReg() &&445MI.getOperand(1).getReg() == LoongArch::R0;446// A bits extract is sign extended if the msb is less than 31.447case LoongArch::BSTRPICK_D:448return MI.getOperand(2).getImm() < 31;449// Copying from R0 produces zero.450case LoongArch::COPY:451return MI.getOperand(1).getReg() == LoongArch::R0;452// Ignore the scratch register destination.453case LoongArch::PseudoMaskedAtomicSwap32:454case LoongArch::PseudoAtomicSwap32:455case LoongArch::PseudoMaskedAtomicLoadAdd32:456case LoongArch::PseudoMaskedAtomicLoadSub32:457case LoongArch::PseudoAtomicLoadNand32:458case LoongArch::PseudoMaskedAtomicLoadNand32:459case LoongArch::PseudoAtomicLoadAdd32:460case LoongArch::PseudoAtomicLoadSub32:461case LoongArch::PseudoAtomicLoadAnd32:462case LoongArch::PseudoAtomicLoadOr32:463case LoongArch::PseudoAtomicLoadXor32:464case LoongArch::PseudoMaskedAtomicLoadUMax32:465case LoongArch::PseudoMaskedAtomicLoadUMin32:466case LoongArch::PseudoCmpXchg32:467case LoongArch::PseudoMaskedCmpXchg32:468case LoongArch::PseudoMaskedAtomicLoadMax32:469case LoongArch::PseudoMaskedAtomicLoadMin32:470return OpNo == 0;471}472473return false;474}475476static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,477const MachineRegisterInfo &MRI,478SmallPtrSetImpl<MachineInstr *> &FixableDef) {479SmallSet<Register, 4> Visited;480SmallVector<Register, 4> Worklist;481482auto AddRegToWorkList = [&](Register SrcReg) {483if (!SrcReg.isVirtual())484return false;485Worklist.push_back(SrcReg);486return true;487};488489if (!AddRegToWorkList(SrcReg))490return false;491492while (!Worklist.empty()) {493Register Reg = Worklist.pop_back_val();494495// If we already visited this register, we don't need to check it again.496if (!Visited.insert(Reg).second)497continue;498499MachineInstr *MI = MRI.getVRegDef(Reg);500if (!MI)501continue;502503int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);504assert(OpNo != -1 && "Couldn't find register");505506// If this is a sign extending operation we don't need to look any further.507if (isSignExtendingOpW(*MI, MRI, OpNo))508continue;509510// Is this an instruction that propagates sign extend?511switch (MI->getOpcode()) {512default:513// Unknown opcode, give up.514return false;515case LoongArch::COPY: {516const MachineFunction *MF = MI->getMF();517const LoongArchMachineFunctionInfo *LAFI =518MF->getInfo<LoongArchMachineFunctionInfo>();519520// If this is the entry block and the register is livein, see if we know521// it is sign extended.522if (MI->getParent() == &MF->front()) {523Register VReg = MI->getOperand(0).getReg();524if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg))525continue;526}527528Register CopySrcReg = MI->getOperand(1).getReg();529if (CopySrcReg == LoongArch::R4) {530// For a method return value, we check the ZExt/SExt flags in attribute.531// We assume the following code sequence for method call.532// PseudoCALL @bar, ...533// ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3534// %0:gpr = COPY $r4535//536// We use the PseudoCall to look up the IR function being called to find537// its return attributes.538const MachineBasicBlock *MBB = MI->getParent();539auto II = MI->getIterator();540if (II == MBB->instr_begin() ||541(--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)542return false;543544const MachineInstr &CallMI = *(--II);545if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())546return false;547548auto *CalleeFn =549dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());550if (!CalleeFn)551return false;552553auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());554if (!IntTy)555return false;556557const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();558unsigned BitWidth = IntTy->getBitWidth();559if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||560(BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))561continue;562}563564if (!AddRegToWorkList(CopySrcReg))565return false;566567break;568}569570// For these, we just need to check if the 1st operand is sign extended.571case LoongArch::MOD_D:572case LoongArch::ANDI:573case LoongArch::ORI:574case LoongArch::XORI:575// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.576// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1577// Logical operations use a sign extended 12-bit immediate.578if (!AddRegToWorkList(MI->getOperand(1).getReg()))579return false;580581break;582case LoongArch::MOD_DU:583case LoongArch::AND:584case LoongArch::OR:585case LoongArch::XOR:586case LoongArch::ANDN:587case LoongArch::ORN:588case LoongArch::PHI: {589// If all incoming values are sign-extended, the output of AND, OR, XOR,590// or PHI is also sign-extended.591592// The input registers for PHI are operand 1, 3, ...593// The input registers for others are operand 1 and 2.594unsigned B = 1, E = 3, D = 1;595switch (MI->getOpcode()) {596case LoongArch::PHI:597E = MI->getNumOperands();598D = 2;599break;600}601602for (unsigned I = B; I != E; I += D) {603if (!MI->getOperand(I).isReg())604return false;605606if (!AddRegToWorkList(MI->getOperand(I).getReg()))607return false;608}609610break;611}612613case LoongArch::MASKEQZ:614case LoongArch::MASKNEZ:615// Instructions return zero or operand 1. Result is sign extended if616// operand 1 is sign extended.617if (!AddRegToWorkList(MI->getOperand(1).getReg()))618return false;619break;620621// With these opcode, we can "fix" them with the W-version622// if we know all users of the result only rely on bits 31:0623case LoongArch::SLLI_D:624// SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits625if (MI->getOperand(2).getImm() >= 32)626return false;627[[fallthrough]];628case LoongArch::ADDI_D:629case LoongArch::ADD_D:630case LoongArch::LD_D:631case LoongArch::LD_WU:632case LoongArch::MUL_D:633case LoongArch::SUB_D:634if (hasAllWUsers(*MI, ST, MRI)) {635FixableDef.insert(MI);636break;637}638return false;639// If all incoming values are sign-extended and all users only use640// the lower 32 bits, then convert them to W versions.641case LoongArch::DIV_D: {642if (!AddRegToWorkList(MI->getOperand(1).getReg()))643return false;644if (!AddRegToWorkList(MI->getOperand(2).getReg()))645return false;646if (hasAllWUsers(*MI, ST, MRI)) {647FixableDef.insert(MI);648break;649}650return false;651}652}653}654655// If we get here, then every node we visited produces a sign extended value656// or propagated sign extended values. So the result must be sign extended.657return true;658}659660static unsigned getWOp(unsigned Opcode) {661switch (Opcode) {662case LoongArch::ADDI_D:663return LoongArch::ADDI_W;664case LoongArch::ADD_D:665return LoongArch::ADD_W;666case LoongArch::DIV_D:667return LoongArch::DIV_W;668case LoongArch::LD_D:669case LoongArch::LD_WU:670return LoongArch::LD_W;671case LoongArch::MUL_D:672return LoongArch::MUL_W;673case LoongArch::SLLI_D:674return LoongArch::SLLI_W;675case LoongArch::SUB_D:676return LoongArch::SUB_W;677default:678llvm_unreachable("Unexpected opcode for replacement with W variant");679}680}681682bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,683const LoongArchInstrInfo &TII,684const LoongArchSubtarget &ST,685MachineRegisterInfo &MRI) {686if (DisableSExtWRemoval)687return false;688689bool MadeChange = false;690for (MachineBasicBlock &MBB : MF) {691for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {692// We're looking for the sext.w pattern ADDI.W rd, rs, 0.693if (!LoongArch::isSEXT_W(MI))694continue;695696Register SrcReg = MI.getOperand(1).getReg();697698SmallPtrSet<MachineInstr *, 4> FixableDefs;699700// If all users only use the lower bits, this sext.w is redundant.701// Or if all definitions reaching MI sign-extend their output,702// then sext.w is redundant.703if (!hasAllWUsers(MI, ST, MRI) &&704!isSignExtendedW(SrcReg, ST, MRI, FixableDefs))705continue;706707Register DstReg = MI.getOperand(0).getReg();708if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))709continue;710711// Convert Fixable instructions to their W versions.712for (MachineInstr *Fixable : FixableDefs) {713LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);714Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));715Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);716Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);717Fixable->clearFlag(MachineInstr::MIFlag::IsExact);718LLVM_DEBUG(dbgs() << " with " << *Fixable);719++NumTransformedToWInstrs;720}721722LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");723MRI.replaceRegWith(DstReg, SrcReg);724MRI.clearKillFlags(SrcReg);725MI.eraseFromParent();726++NumRemovedSExtW;727MadeChange = true;728}729}730731return MadeChange;732}733734bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,735const LoongArchInstrInfo &TII,736const LoongArchSubtarget &ST,737MachineRegisterInfo &MRI) {738bool MadeChange = false;739for (MachineBasicBlock &MBB : MF) {740for (MachineInstr &MI : MBB) {741unsigned Opc;742switch (MI.getOpcode()) {743default:744continue;745case LoongArch::ADDI_W:746Opc = LoongArch::ADDI_D;747break;748}749750if (hasAllWUsers(MI, ST, MRI)) {751MI.setDesc(TII.get(Opc));752MadeChange = true;753}754}755}756757return MadeChange;758}759760bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,761const LoongArchInstrInfo &TII,762const LoongArchSubtarget &ST,763MachineRegisterInfo &MRI) {764bool MadeChange = false;765for (MachineBasicBlock &MBB : MF) {766for (MachineInstr &MI : MBB) {767unsigned WOpc;768// TODO: Add more?769switch (MI.getOpcode()) {770default:771continue;772case LoongArch::ADD_D:773WOpc = LoongArch::ADD_W;774break;775case LoongArch::ADDI_D:776WOpc = LoongArch::ADDI_W;777break;778case LoongArch::SUB_D:779WOpc = LoongArch::SUB_W;780break;781case LoongArch::MUL_D:782WOpc = LoongArch::MUL_W;783break;784case LoongArch::SLLI_D:785// SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits786if (MI.getOperand(2).getImm() >= 32)787continue;788WOpc = LoongArch::SLLI_W;789break;790case LoongArch::LD_D:791case LoongArch::LD_WU:792WOpc = LoongArch::LD_W;793break;794}795796if (hasAllWUsers(MI, ST, MRI)) {797LLVM_DEBUG(dbgs() << "Replacing " << MI);798MI.setDesc(TII.get(WOpc));799MI.clearFlag(MachineInstr::MIFlag::NoSWrap);800MI.clearFlag(MachineInstr::MIFlag::NoUWrap);801MI.clearFlag(MachineInstr::MIFlag::IsExact);802LLVM_DEBUG(dbgs() << " with " << MI);803++NumTransformedToWInstrs;804MadeChange = true;805}806}807}808809return MadeChange;810}811812bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {813if (skipFunction(MF.getFunction()))814return false;815816MachineRegisterInfo &MRI = MF.getRegInfo();817const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>();818const LoongArchInstrInfo &TII = *ST.getInstrInfo();819820if (!ST.is64Bit())821return false;822823bool MadeChange = false;824MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);825826if (!(DisableCvtToDSuffix || ST.preferWInst()))827MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);828829if (ST.preferWInst())830MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);831832return MadeChange;833}834835836