Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
35294 views
//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass tries to make consecutive compares of values use same operands to9// allow CSE pass to remove duplicated instructions. For this it analyzes10// branches and adjusts comparisons with immediate values by converting:11// * GE -> GT12// * GT -> GE13// * LT -> LE14// * LE -> LT15// and adjusting immediate values appropriately. It basically corrects two16// immediate values towards each other to make them equal.17//18// Consider the following example in C:19//20// if ((a < 5 && ...) || (a > 5 && ...)) {21// ~~~~~ ~~~~~22// ^ ^23// x y24//25// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates26// to "false", "y" can just check flags set by the first comparison. As a27// result of the canonicalization employed by28// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific29// code, assembly ends up in the form that is not CSE friendly:30//31// ...32// cmp w8, #433// b.gt .LBB0_334// ...35// .LBB0_3:36// cmp w8, #637// b.lt .LBB0_638// ...39//40// Same assembly after the pass:41//42// ...43// cmp w8, #544// b.ge .LBB0_345// ...46// .LBB0_3:47// cmp w8, #5 // <-- CSE pass removes this instruction48// b.le .LBB0_649// ...50//51// Currently only SUBS and ADDS followed by b.?? are supported.52//53// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"54// TODO: handle other conditional instructions (e.g. CSET)55// TODO: allow second branching to be anything if it doesn't require adjusting56//57//===----------------------------------------------------------------------===//5859#include "AArch64.h"60#include "MCTargetDesc/AArch64AddressingModes.h"61#include "Utils/AArch64BaseInfo.h"62#include "llvm/ADT/ArrayRef.h"63#include "llvm/ADT/DepthFirstIterator.h"64#include "llvm/ADT/SmallVector.h"65#include "llvm/ADT/Statistic.h"66#include "llvm/CodeGen/MachineBasicBlock.h"67#include "llvm/CodeGen/MachineDominators.h"68#include "llvm/CodeGen/MachineFunction.h"69#include "llvm/CodeGen/MachineFunctionPass.h"70#include "llvm/CodeGen/MachineInstr.h"71#include "llvm/CodeGen/MachineInstrBuilder.h"72#include "llvm/CodeGen/MachineOperand.h"73#include "llvm/CodeGen/MachineRegisterInfo.h"74#include "llvm/CodeGen/TargetInstrInfo.h"75#include "llvm/CodeGen/TargetSubtargetInfo.h"76#include "llvm/InitializePasses.h"77#include "llvm/Pass.h"78#include "llvm/Support/Debug.h"79#include "llvm/Support/ErrorHandling.h"80#include "llvm/Support/raw_ostream.h"81#include <cassert>82#include <cstdlib>83#include <tuple>8485using namespace llvm;8687#define DEBUG_TYPE "aarch64-condopt"8889STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");9091namespace {9293class AArch64ConditionOptimizer : public MachineFunctionPass {94const TargetInstrInfo *TII;95MachineDominatorTree *DomTree;96const MachineRegisterInfo *MRI;9798public:99// Stores immediate, compare instruction opcode and branch condition (in this100// order) of adjusted comparison.101using CmpInfo = std::tuple<int, unsigned, AArch64CC::CondCode>;102103static char ID;104105AArch64ConditionOptimizer() : MachineFunctionPass(ID) {106initializeAArch64ConditionOptimizerPass(*PassRegistry::getPassRegistry());107}108109void getAnalysisUsage(AnalysisUsage &AU) const override;110MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);111CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);112void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);113bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,114int ToImm);115bool runOnMachineFunction(MachineFunction &MF) override;116117StringRef getPassName() const override {118return "AArch64 Condition Optimizer";119}120};121122} // end anonymous namespace123124char AArch64ConditionOptimizer::ID = 0;125126INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",127"AArch64 CondOpt Pass", false, false)128INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)129INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",130"AArch64 CondOpt Pass", false, false)131132FunctionPass *llvm::createAArch64ConditionOptimizerPass() {133return new AArch64ConditionOptimizer();134}135136void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {137AU.addRequired<MachineDominatorTreeWrapperPass>();138AU.addPreserved<MachineDominatorTreeWrapperPass>();139MachineFunctionPass::getAnalysisUsage(AU);140}141142// Finds compare instruction that corresponds to supported types of branching.143// Returns the instruction or nullptr on failures or detecting unsupported144// instructions.145MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(146MachineBasicBlock *MBB) {147MachineBasicBlock::iterator Term = MBB->getFirstTerminator();148if (Term == MBB->end())149return nullptr;150151if (Term->getOpcode() != AArch64::Bcc)152return nullptr;153154// Since we may modify cmp of this MBB, make sure NZCV does not live out.155for (auto *SuccBB : MBB->successors())156if (SuccBB->isLiveIn(AArch64::NZCV))157return nullptr;158159// Now find the instruction controlling the terminator.160for (MachineBasicBlock::iterator B = MBB->begin(), It = Term; It != B;) {161It = prev_nodbg(It, B);162MachineInstr &I = *It;163assert(!I.isTerminator() && "Spurious terminator");164// Check if there is any use of NZCV between CMP and Bcc.165if (I.readsRegister(AArch64::NZCV, /*TRI=*/nullptr))166return nullptr;167switch (I.getOpcode()) {168// cmp is an alias for subs with a dead destination register.169case AArch64::SUBSWri:170case AArch64::SUBSXri:171// cmn is an alias for adds with a dead destination register.172case AArch64::ADDSWri:173case AArch64::ADDSXri: {174unsigned ShiftAmt = AArch64_AM::getShiftValue(I.getOperand(3).getImm());175if (!I.getOperand(2).isImm()) {176LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << I << '\n');177return nullptr;178} else if (I.getOperand(2).getImm() << ShiftAmt >= 0xfff) {179LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << I180<< '\n');181return nullptr;182} else if (!MRI->use_nodbg_empty(I.getOperand(0).getReg())) {183LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << I << '\n');184return nullptr;185}186return &I;187}188// Prevent false positive case like:189// cmp w19, #0190// cinc w0, w19, gt191// ...192// fcmp d8, #0.0193// b.gt .LBB0_5194case AArch64::FCMPDri:195case AArch64::FCMPSri:196case AArch64::FCMPESri:197case AArch64::FCMPEDri:198199case AArch64::SUBSWrr:200case AArch64::SUBSXrr:201case AArch64::ADDSWrr:202case AArch64::ADDSXrr:203case AArch64::FCMPSrr:204case AArch64::FCMPDrr:205case AArch64::FCMPESrr:206case AArch64::FCMPEDrr:207// Skip comparison instructions without immediate operands.208return nullptr;209}210}211LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)212<< '\n');213return nullptr;214}215216// Changes opcode adds <-> subs considering register operand width.217static int getComplementOpc(int Opc) {218switch (Opc) {219case AArch64::ADDSWri: return AArch64::SUBSWri;220case AArch64::ADDSXri: return AArch64::SUBSXri;221case AArch64::SUBSWri: return AArch64::ADDSWri;222case AArch64::SUBSXri: return AArch64::ADDSXri;223default:224llvm_unreachable("Unexpected opcode");225}226}227228// Changes form of comparison inclusive <-> exclusive.229static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {230switch (Cmp) {231case AArch64CC::GT: return AArch64CC::GE;232case AArch64CC::GE: return AArch64CC::GT;233case AArch64CC::LT: return AArch64CC::LE;234case AArch64CC::LE: return AArch64CC::LT;235default:236llvm_unreachable("Unexpected condition code");237}238}239240// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison241// operator and condition code.242AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(243MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {244unsigned Opc = CmpMI->getOpcode();245246// CMN (compare with negative immediate) is an alias to ADDS (as247// "operand - negative" == "operand + positive")248bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);249250int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;251// Negate Correction value for comparison with negative immediate (CMN).252if (Negative) {253Correction = -Correction;254}255256const int OldImm = (int)CmpMI->getOperand(2).getImm();257const int NewImm = std::abs(OldImm + Correction);258259// Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by260// adjusting compare instruction opcode.261if (OldImm == 0 && ((Negative && Correction == 1) ||262(!Negative && Correction == -1))) {263Opc = getComplementOpc(Opc);264}265266return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));267}268269// Applies changes to comparison instruction suggested by adjustCmp().270void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,271const CmpInfo &Info) {272int Imm;273unsigned Opc;274AArch64CC::CondCode Cmp;275std::tie(Imm, Opc, Cmp) = Info;276277MachineBasicBlock *const MBB = CmpMI->getParent();278279// Change immediate in comparison instruction (ADDS or SUBS).280BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))281.add(CmpMI->getOperand(0))282.add(CmpMI->getOperand(1))283.addImm(Imm)284.add(CmpMI->getOperand(3));285CmpMI->eraseFromParent();286287// The fact that this comparison was picked ensures that it's related to the288// first terminator instruction.289MachineInstr &BrMI = *MBB->getFirstTerminator();290291// Change condition in branch instruction.292BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))293.addImm(Cmp)294.add(BrMI.getOperand(1));295BrMI.eraseFromParent();296297++NumConditionsAdjusted;298}299300// Parse a condition code returned by analyzeBranch, and compute the CondCode301// corresponding to TBB.302// Returns true if parsing was successful, otherwise false is returned.303static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {304// A normal br.cond simply has the condition code.305if (Cond[0].getImm() != -1) {306assert(Cond.size() == 1 && "Unknown Cond array format");307CC = (AArch64CC::CondCode)(int)Cond[0].getImm();308return true;309}310return false;311}312313// Adjusts one cmp instruction to another one if result of adjustment will allow314// CSE. Returns true if compare instruction was changed, otherwise false is315// returned.316bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,317AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm)318{319CmpInfo Info = adjustCmp(CmpMI, Cmp);320if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) {321modifyCmp(CmpMI, Info);322return true;323}324return false;325}326327bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {328LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"329<< "********** Function: " << MF.getName() << '\n');330if (skipFunction(MF.getFunction()))331return false;332333TII = MF.getSubtarget().getInstrInfo();334DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();335MRI = &MF.getRegInfo();336337bool Changed = false;338339// Visit blocks in dominator tree pre-order. The pre-order enables multiple340// cmp-conversions from the same head block.341// Note that updateDomTree() modifies the children of the DomTree node342// currently being visited. The df_iterator supports that; it doesn't look at343// child_begin() / child_end() until after a node has been visited.344for (MachineDomTreeNode *I : depth_first(DomTree)) {345MachineBasicBlock *HBB = I->getBlock();346347SmallVector<MachineOperand, 4> HeadCond;348MachineBasicBlock *TBB = nullptr, *FBB = nullptr;349if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) {350continue;351}352353// Equivalence check is to skip loops.354if (!TBB || TBB == HBB) {355continue;356}357358SmallVector<MachineOperand, 4> TrueCond;359MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;360if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {361continue;362}363364MachineInstr *HeadCmpMI = findSuitableCompare(HBB);365if (!HeadCmpMI) {366continue;367}368369MachineInstr *TrueCmpMI = findSuitableCompare(TBB);370if (!TrueCmpMI) {371continue;372}373374AArch64CC::CondCode HeadCmp;375if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {376continue;377}378379AArch64CC::CondCode TrueCmp;380if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {381continue;382}383384const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();385const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();386387LLVM_DEBUG(dbgs() << "Head branch:\n");388LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)389<< '\n');390LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');391392LLVM_DEBUG(dbgs() << "True branch:\n");393LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)394<< '\n');395LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');396397if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||398(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&399std::abs(TrueImm - HeadImm) == 2) {400// This branch transforms machine instructions that correspond to401//402// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)403// 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)404//405// into406//407// 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)408// 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)409410CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);411CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);412if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&413std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {414modifyCmp(HeadCmpMI, HeadCmpInfo);415modifyCmp(TrueCmpMI, TrueCmpInfo);416Changed = true;417}418} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||419(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&420std::abs(TrueImm - HeadImm) == 1) {421// This branch transforms machine instructions that correspond to422//423// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)424// 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)425//426// into427//428// 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)429// 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)430431// GT -> GE transformation increases immediate value, so picking the432// smaller one; LT -> LE decreases immediate value so invert the choice.433bool adjustHeadCond = (HeadImm < TrueImm);434if (HeadCmp == AArch64CC::LT) {435adjustHeadCond = !adjustHeadCond;436}437438if (adjustHeadCond) {439Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);440} else {441Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);442}443}444// Other transformation cases almost never occur due to generation of < or >445// comparisons instead of <= and >=.446}447448return Changed;449}450451452