Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
35266 views
//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains a pass that scans a machine function to determine which9// conditional branches need more than 16 bits of displacement to reach their10// target basic block. It does this in two passes; a calculation of basic block11// positions pass, and a branch pseudo op to machine branch opcode pass. This12// pass should be run last, just before the assembly printer.13//14//===----------------------------------------------------------------------===//1516#include "MCTargetDesc/PPCPredicates.h"17#include "PPC.h"18#include "PPCInstrBuilder.h"19#include "PPCInstrInfo.h"20#include "PPCSubtarget.h"21#include "llvm/ADT/Statistic.h"22#include "llvm/CodeGen/MachineFunctionPass.h"23#include "llvm/CodeGen/MachineRegisterInfo.h"24#include "llvm/CodeGen/TargetSubtargetInfo.h"25#include "llvm/Support/MathExtras.h"26#include "llvm/Target/TargetMachine.h"27#include <algorithm>28using namespace llvm;2930#define DEBUG_TYPE "ppc-branch-select"3132STATISTIC(NumExpanded, "Number of branches expanded to long format");33STATISTIC(NumPrefixed, "Number of prefixed instructions");34STATISTIC(NumPrefixedAligned,35"Number of prefixed instructions that have been aligned");3637namespace {38struct PPCBSel : public MachineFunctionPass {39static char ID;40PPCBSel() : MachineFunctionPass(ID) {41initializePPCBSelPass(*PassRegistry::getPassRegistry());42}4344// The sizes of the basic blocks in the function (the first45// element of the pair); the second element of the pair is the amount of the46// size that is due to potential padding.47std::vector<std::pair<unsigned, unsigned>> BlockSizes;4849// The first block number which has imprecise instruction address.50int FirstImpreciseBlock = -1;5152unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset);53unsigned ComputeBlockSizes(MachineFunction &Fn);54void modifyAdjustment(MachineFunction &Fn);55int computeBranchSize(MachineFunction &Fn,56const MachineBasicBlock *Src,57const MachineBasicBlock *Dest,58unsigned BrOffset);5960bool runOnMachineFunction(MachineFunction &Fn) override;6162MachineFunctionProperties getRequiredProperties() const override {63return MachineFunctionProperties().set(64MachineFunctionProperties::Property::NoVRegs);65}6667StringRef getPassName() const override { return "PowerPC Branch Selector"; }68};69char PPCBSel::ID = 0;70}7172INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",73false, false)7475/// createPPCBranchSelectionPass - returns an instance of the Branch Selection76/// Pass77///78FunctionPass *llvm::createPPCBranchSelectionPass() {79return new PPCBSel();80}8182/// In order to make MBB aligned, we need to add an adjustment value to the83/// original Offset.84unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,85unsigned Offset) {86const Align Alignment = MBB.getAlignment();87if (Alignment == Align(1))88return 0;8990const Align ParentAlign = MBB.getParent()->getAlignment();9192if (Alignment <= ParentAlign)93return offsetToAlignment(Offset, Alignment);9495// The alignment of this MBB is larger than the function's alignment, so we96// can't tell whether or not it will insert nops. Assume that it will.97if (FirstImpreciseBlock < 0)98FirstImpreciseBlock = MBB.getNumber();99return Alignment.value() + offsetToAlignment(Offset, Alignment);100}101102/// We need to be careful about the offset of the first block in the function103/// because it might not have the function's alignment. This happens because,104/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a105/// two-instruction sequence to the start of the function.106/// Note: This needs to be synchronized with the check in107/// PPCLinuxAsmPrinter::EmitFunctionBodyStart.108static inline unsigned GetInitialOffset(MachineFunction &Fn) {109unsigned InitialOffset = 0;110if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&111!Fn.getRegInfo().use_empty(PPC::X2))112InitialOffset = 8;113return InitialOffset;114}115116/// Measure each MBB and compute a size for the entire function.117unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {118const PPCInstrInfo *TII =119static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());120unsigned FuncSize = GetInitialOffset(Fn);121122for (MachineBasicBlock &MBB : Fn) {123// The end of the previous block may have extra nops if this block has an124// alignment requirement.125if (MBB.getNumber() > 0) {126unsigned AlignExtra = GetAlignmentAdjustment(MBB, FuncSize);127128auto &BS = BlockSizes[MBB.getNumber()-1];129BS.first += AlignExtra;130BS.second = AlignExtra;131132FuncSize += AlignExtra;133}134135unsigned BlockSize = 0;136unsigned UnalignedBytesRemaining = 0;137for (MachineInstr &MI : MBB) {138unsigned MINumBytes = TII->getInstSizeInBytes(MI);139if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))140FirstImpreciseBlock = MBB.getNumber();141if (TII->isPrefixed(MI.getOpcode())) {142NumPrefixed++;143144// All 8 byte instructions may require alignment. Each 8 byte145// instruction may be aligned by another 4 bytes.146// This means that an 8 byte instruction may require 12 bytes147// (8 for the instruction itself and 4 for the alignment nop).148// This will happen if an 8 byte instruction can be aligned to 64 bytes149// by only adding a 4 byte nop.150// We don't know the alignment at this point in the code so we have to151// adopt a more pessimistic approach. If an instruction may need152// alignment we assume that it does need alignment and add 4 bytes to153// it. As a result we may end up with more long branches than before154// but we are in the safe position where if we need a long branch we155// have one.156// The if statement checks to make sure that two 8 byte instructions157// are at least 64 bytes away from each other. It is not possible for158// two instructions that both need alignment to be within 64 bytes of159// each other.160if (!UnalignedBytesRemaining) {161BlockSize += 4;162UnalignedBytesRemaining = 60;163NumPrefixedAligned++;164}165}166UnalignedBytesRemaining -= std::min(UnalignedBytesRemaining, MINumBytes);167BlockSize += MINumBytes;168}169170BlockSizes[MBB.getNumber()].first = BlockSize;171FuncSize += BlockSize;172}173174return FuncSize;175}176177/// Modify the basic block align adjustment.178void PPCBSel::modifyAdjustment(MachineFunction &Fn) {179unsigned Offset = GetInitialOffset(Fn);180for (MachineBasicBlock &MBB : Fn) {181if (MBB.getNumber() > 0) {182auto &BS = BlockSizes[MBB.getNumber()-1];183BS.first -= BS.second;184Offset -= BS.second;185186unsigned AlignExtra = GetAlignmentAdjustment(MBB, Offset);187188BS.first += AlignExtra;189BS.second = AlignExtra;190191Offset += AlignExtra;192}193194Offset += BlockSizes[MBB.getNumber()].first;195}196}197198/// Determine the offset from the branch in Src block to the Dest block.199/// BrOffset is the offset of the branch instruction inside Src block.200int PPCBSel::computeBranchSize(MachineFunction &Fn,201const MachineBasicBlock *Src,202const MachineBasicBlock *Dest,203unsigned BrOffset) {204int BranchSize;205Align MaxAlign = Align(4);206bool NeedExtraAdjustment = false;207if (Dest->getNumber() <= Src->getNumber()) {208// If this is a backwards branch, the delta is the offset from the209// start of this block to this branch, plus the sizes of all blocks210// from this block to the dest.211BranchSize = BrOffset;212MaxAlign = std::max(MaxAlign, Src->getAlignment());213214int DestBlock = Dest->getNumber();215BranchSize += BlockSizes[DestBlock].first;216for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {217BranchSize += BlockSizes[i].first;218MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());219}220221NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&222(DestBlock >= FirstImpreciseBlock);223} else {224// Otherwise, add the size of the blocks between this block and the225// dest to the number of bytes left in this block.226unsigned StartBlock = Src->getNumber();227BranchSize = BlockSizes[StartBlock].first - BrOffset;228229MaxAlign = std::max(MaxAlign, Dest->getAlignment());230for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {231BranchSize += BlockSizes[i].first;232MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());233}234235NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&236(Src->getNumber() >= FirstImpreciseBlock);237}238239// We tend to over estimate code size due to large alignment and240// inline assembly. Usually it causes larger computed branch offset.241// But sometimes it may also causes smaller computed branch offset242// than actual branch offset. If the offset is close to the limit of243// encoding, it may cause problem at run time.244// Following is a simplified example.245//246// actual estimated247// address address248// ...249// bne Far 100 10c250// .p2align 4251// Near: 110 110252// ...253// Far: 8108 8108254//255// Actual offset: 0x8108 - 0x100 = 0x8008256// Computed offset: 0x8108 - 0x10c = 0x7ffc257//258// This example also shows when we can get the largest gap between259// estimated offset and actual offset. If there is an aligned block260// ABB between branch and target, assume its alignment is <align>261// bits. Now consider the accumulated function size FSIZE till the end262// of previous block PBB. If the estimated FSIZE is multiple of263// 2^<align>, we don't need any padding for the estimated address of264// ABB. If actual FSIZE at the end of PBB is 4 bytes more than265// multiple of 2^<align>, then we need (2^<align> - 4) bytes of266// padding. It also means the actual branch offset is (2^<align> - 4)267// larger than computed offset. Other actual FSIZE needs less padding268// bytes, so causes smaller gap between actual and computed offset.269//270// On the other hand, if the inline asm or large alignment occurs271// between the branch block and destination block, the estimated address272// can be <delta> larger than actual address. If padding bytes are273// needed for a later aligned block, the actual number of padding bytes274// is at most <delta> more than estimated padding bytes. So the actual275// aligned block address is less than or equal to the estimated aligned276// block address. So the actual branch offset is less than or equal to277// computed branch offset.278//279// The computed offset is at most ((1 << alignment) - 4) bytes smaller280// than actual offset. So we add this number to the offset for safety.281if (NeedExtraAdjustment)282BranchSize += MaxAlign.value() - 4;283284return BranchSize;285}286287bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {288const PPCInstrInfo *TII =289static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());290// Give the blocks of the function a dense, in-order, numbering.291Fn.RenumberBlocks();292BlockSizes.resize(Fn.getNumBlockIDs());293FirstImpreciseBlock = -1;294295// Measure each MBB and compute a size for the entire function.296unsigned FuncSize = ComputeBlockSizes(Fn);297298// If the entire function is smaller than the displacement of a branch field,299// we know we don't need to shrink any branches in this function. This is a300// common case.301if (FuncSize < (1 << 15)) {302BlockSizes.clear();303return false;304}305306// For each conditional branch, if the offset to its destination is larger307// than the offset field allows, transform it into a long branch sequence308// like this:309// short branch:310// bCC MBB311// long branch:312// b!CC $PC+8313// b MBB314//315bool MadeChange = true;316bool EverMadeChange = false;317while (MadeChange) {318// Iteratively expand branches until we reach a fixed point.319MadeChange = false;320321for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;322++MFI) {323MachineBasicBlock &MBB = *MFI;324unsigned MBBStartOffset = 0;325for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();326I != E; ++I) {327MachineBasicBlock *Dest = nullptr;328if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())329Dest = I->getOperand(2).getMBB();330else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&331!I->getOperand(1).isImm())332Dest = I->getOperand(1).getMBB();333else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||334I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&335!I->getOperand(0).isImm())336Dest = I->getOperand(0).getMBB();337338if (!Dest) {339MBBStartOffset += TII->getInstSizeInBytes(*I);340continue;341}342343// Determine the offset from the current branch to the destination344// block.345int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset);346347// If this branch is in range, ignore it.348if (isInt<16>(BranchSize)) {349MBBStartOffset += 4;350continue;351}352353// Otherwise, we have to expand it to a long branch.354MachineInstr &OldBranch = *I;355DebugLoc dl = OldBranch.getDebugLoc();356357if (I->getOpcode() == PPC::BCC) {358// The BCC operands are:359// 0. PPC branch predicate360// 1. CR register361// 2. Target MBB362PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();363Register CRReg = I->getOperand(1).getReg();364365// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.366BuildMI(MBB, I, dl, TII->get(PPC::BCC))367.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);368} else if (I->getOpcode() == PPC::BC) {369Register CRBit = I->getOperand(0).getReg();370BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);371} else if (I->getOpcode() == PPC::BCn) {372Register CRBit = I->getOperand(0).getReg();373BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);374} else if (I->getOpcode() == PPC::BDNZ) {375BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);376} else if (I->getOpcode() == PPC::BDNZ8) {377BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);378} else if (I->getOpcode() == PPC::BDZ) {379BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);380} else if (I->getOpcode() == PPC::BDZ8) {381BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);382} else {383llvm_unreachable("Unhandled branch type!");384}385386// Uncond branch to the real destination.387I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);388389// Remove the old branch from the function.390OldBranch.eraseFromParent();391392// Remember that this instruction is 8-bytes, increase the size of the393// block by 4, remember to iterate.394BlockSizes[MBB.getNumber()].first += 4;395MBBStartOffset += 8;396++NumExpanded;397MadeChange = true;398}399}400401if (MadeChange) {402// If we're going to iterate again, make sure we've updated our403// padding-based contributions to the block sizes.404modifyAdjustment(Fn);405}406407EverMadeChange |= MadeChange;408}409410BlockSizes.clear();411return EverMadeChange;412}413414415