Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
35269 views
//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains a pass that collect the Linker Optimization Hint (LOH).9// This pass should be run at the very end of the compilation flow, just before10// assembly printer.11// To be useful for the linker, the LOH must be printed into the assembly file.12//13// A LOH describes a sequence of instructions that may be optimized by the14// linker.15// This same sequence cannot be optimized by the compiler because some of16// the information will be known at link time.17// For instance, consider the following sequence:18// L1: adrp xA, sym@PAGE19// L2: add xB, xA, sym@PAGEOFF20// L3: ldr xC, [xB, #imm]21// This sequence can be turned into:22// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:23// L3: ldr xC, sym+#imm24// It may also be turned into either the following more efficient25// code sequences:26// - If sym@PAGEOFF + #imm fits the encoding space of L3.27// L1: adrp xA, sym@PAGE28// L3: ldr xC, [xB, sym@PAGEOFF + #imm]29// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:30// L1: adr xA, sym31// L3: ldr xC, [xB, #imm]32//33// To be valid a LOH must meet all the requirements needed by all the related34// possible linker transformations.35// For instance, using the running example, the constraints to emit36// ".loh AdrpAddLdr" are:37// - L1, L2, and L3 instructions are of the expected type, i.e.,38// respectively ADRP, ADD (immediate), and LD.39// - The result of L1 is used only by L2.40// - The register argument (xA) used in the ADD instruction is defined41// only by L1.42// - The result of L2 is used only by L3.43// - The base address (xB) in L3 is defined only L2.44// - The ADRP in L1 and the ADD in L2 must reference the same symbol using45// @PAGE/@PAGEOFF with no additional constants46//47// Currently supported LOHs are:48// * So called non-ADRP-related:49// - .loh AdrpAddLdr L1, L2, L3:50// L1: adrp xA, sym@PAGE51// L2: add xB, xA, sym@PAGEOFF52// L3: ldr xC, [xB, #imm]53// - .loh AdrpLdrGotLdr L1, L2, L3:54// L1: adrp xA, sym@GOTPAGE55// L2: ldr xB, [xA, sym@GOTPAGEOFF]56// L3: ldr xC, [xB, #imm]57// - .loh AdrpLdr L1, L3:58// L1: adrp xA, sym@PAGE59// L3: ldr xC, [xA, sym@PAGEOFF]60// - .loh AdrpAddStr L1, L2, L3:61// L1: adrp xA, sym@PAGE62// L2: add xB, xA, sym@PAGEOFF63// L3: str xC, [xB, #imm]64// - .loh AdrpLdrGotStr L1, L2, L3:65// L1: adrp xA, sym@GOTPAGE66// L2: ldr xB, [xA, sym@GOTPAGEOFF]67// L3: str xC, [xB, #imm]68// - .loh AdrpAdd L1, L2:69// L1: adrp xA, sym@PAGE70// L2: add xB, xA, sym@PAGEOFF71// For all these LOHs, L1, L2, L3 form a simple chain:72// L1 result is used only by L2 and L2 result by L3.73// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument74// by L1.75// All these LOHs aim at using more efficient load/store patterns by folding76// some instructions used to compute the address directly into the load/store.77//78// * So called ADRP-related:79// - .loh AdrpAdrp L2, L1:80// L2: ADRP xA, sym1@PAGE81// L1: ADRP xA, sym2@PAGE82// L2 dominates L1 and xA is not redifined between L2 and L183// This LOH aims at getting rid of redundant ADRP instructions.84//85// The overall design for emitting the LOHs is:86// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.87// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:88// 1. Associates them a label.89// 2. Emits them in a MCStreamer (EmitLOHDirective).90// - The MCMachOStreamer records them into the MCAssembler.91// - The MCAsmStreamer prints them.92// - Other MCStreamers ignore them.93// 3. Closes the MCStreamer:94// - The MachObjectWriter gets them from the MCAssembler and writes95// them in the object file.96// - Other ObjectWriters ignore them.97//===----------------------------------------------------------------------===//9899#include "AArch64.h"100#include "AArch64InstrInfo.h"101#include "AArch64MachineFunctionInfo.h"102#include "llvm/ADT/SmallSet.h"103#include "llvm/ADT/Statistic.h"104#include "llvm/CodeGen/MachineBasicBlock.h"105#include "llvm/CodeGen/MachineFunctionPass.h"106#include "llvm/CodeGen/MachineInstr.h"107#include "llvm/CodeGen/TargetRegisterInfo.h"108#include "llvm/Support/Debug.h"109#include "llvm/Support/ErrorHandling.h"110#include "llvm/Support/raw_ostream.h"111#include "llvm/Target/TargetMachine.h"112using namespace llvm;113114#define DEBUG_TYPE "aarch64-collect-loh"115116STATISTIC(NumADRPSimpleCandidate,117"Number of simplifiable ADRP dominate by another");118STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");119STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");120STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");121STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");122STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");123STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");124125#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"126127namespace {128129struct AArch64CollectLOH : public MachineFunctionPass {130static char ID;131AArch64CollectLOH() : MachineFunctionPass(ID) {}132133bool runOnMachineFunction(MachineFunction &MF) override;134135MachineFunctionProperties getRequiredProperties() const override {136return MachineFunctionProperties().set(137MachineFunctionProperties::Property::NoVRegs);138}139140StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }141142void getAnalysisUsage(AnalysisUsage &AU) const override {143MachineFunctionPass::getAnalysisUsage(AU);144AU.setPreservesAll();145}146};147148char AArch64CollectLOH::ID = 0;149150} // end anonymous namespace.151152INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",153AARCH64_COLLECT_LOH_NAME, false, false)154155static bool canAddBePartOfLOH(const MachineInstr &MI) {156// Check immediate to see if the immediate is an address.157switch (MI.getOperand(2).getType()) {158default:159return false;160case MachineOperand::MO_GlobalAddress:161case MachineOperand::MO_JumpTableIndex:162case MachineOperand::MO_ConstantPoolIndex:163case MachineOperand::MO_BlockAddress:164return true;165}166}167168/// Answer the following question: Can Def be one of the definition169/// involved in a part of a LOH?170static bool canDefBePartOfLOH(const MachineInstr &MI) {171// Accept ADRP, ADDLow and LOADGot.172switch (MI.getOpcode()) {173default:174return false;175case AArch64::ADRP:176return true;177case AArch64::ADDXri:178return canAddBePartOfLOH(MI);179case AArch64::LDRXui:180case AArch64::LDRWui:181// Check immediate to see if the immediate is an address.182switch (MI.getOperand(2).getType()) {183default:184return false;185case MachineOperand::MO_GlobalAddress:186return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;187}188}189}190191/// Check whether the given instruction can the end of a LOH chain involving a192/// store.193static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {194switch (MI.getOpcode()) {195default:196return false;197case AArch64::STRBBui:198case AArch64::STRHHui:199case AArch64::STRBui:200case AArch64::STRHui:201case AArch64::STRWui:202case AArch64::STRXui:203case AArch64::STRSui:204case AArch64::STRDui:205case AArch64::STRQui:206// We can only optimize the index operand.207// In case we have str xA, [xA, #imm], this is two different uses208// of xA and we cannot fold, otherwise the xA stored may be wrong,209// even if #imm == 0.210return MO.getOperandNo() == 1 &&211MI.getOperand(0).getReg() != MI.getOperand(1).getReg();212}213}214215/// Check whether the given instruction can be the end of a LOH chain216/// involving a load.217static bool isCandidateLoad(const MachineInstr &MI) {218switch (MI.getOpcode()) {219default:220return false;221case AArch64::LDRSBWui:222case AArch64::LDRSBXui:223case AArch64::LDRSHWui:224case AArch64::LDRSHXui:225case AArch64::LDRSWui:226case AArch64::LDRBui:227case AArch64::LDRHui:228case AArch64::LDRWui:229case AArch64::LDRXui:230case AArch64::LDRSui:231case AArch64::LDRDui:232case AArch64::LDRQui:233return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);234}235}236237/// Check whether the given instruction can load a litteral.238static bool supportLoadFromLiteral(const MachineInstr &MI) {239switch (MI.getOpcode()) {240default:241return false;242case AArch64::LDRSWui:243case AArch64::LDRWui:244case AArch64::LDRXui:245case AArch64::LDRSui:246case AArch64::LDRDui:247case AArch64::LDRQui:248return true;249}250}251252/// Number of GPR registers traked by mapRegToGPRIndex()253static const unsigned N_GPR_REGS = 31;254/// Map register number to index from 0-30.255static int mapRegToGPRIndex(MCPhysReg Reg) {256static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");257static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");258if (AArch64::X0 <= Reg && Reg <= AArch64::X28)259return Reg - AArch64::X0;260if (AArch64::W0 <= Reg && Reg <= AArch64::W30)261return Reg - AArch64::W0;262// TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to263// handle them as special cases.264if (Reg == AArch64::FP)265return 29;266if (Reg == AArch64::LR)267return 30;268return -1;269}270271/// State tracked per register.272/// The main algorithm walks backwards over a basic block maintaining this273/// datastructure for each tracked general purpose register.274struct LOHInfo {275MCLOHType Type : 8; ///< "Best" type of LOH possible.276bool IsCandidate : 1; ///< Possible LOH candidate.277bool OneUser : 1; ///< Found exactly one user (yet).278bool MultiUsers : 1; ///< Found multiple users.279const MachineInstr *MI0; ///< First instruction involved in the LOH.280const MachineInstr *MI1; ///< Second instruction involved in the LOH281/// (if any).282const MachineInstr *LastADRP; ///< Last ADRP in same register.283};284285/// Update state \p Info given \p MI uses the tracked register.286static void handleUse(const MachineInstr &MI, const MachineOperand &MO,287LOHInfo &Info) {288// We have multiple uses if we already found one before.289if (Info.MultiUsers || Info.OneUser) {290Info.IsCandidate = false;291Info.MultiUsers = true;292return;293}294Info.OneUser = true;295296// Start new LOHInfo if applicable.297if (isCandidateLoad(MI)) {298Info.Type = MCLOH_AdrpLdr;299Info.IsCandidate = true;300Info.MI0 = &MI;301// Note that even this is AdrpLdr now, we can switch to a Ldr variant302// later.303} else if (isCandidateStore(MI, MO)) {304Info.Type = MCLOH_AdrpAddStr;305Info.IsCandidate = true;306Info.MI0 = &MI;307Info.MI1 = nullptr;308} else if (MI.getOpcode() == AArch64::ADDXri) {309Info.Type = MCLOH_AdrpAdd;310Info.IsCandidate = true;311Info.MI0 = &MI;312} else if ((MI.getOpcode() == AArch64::LDRXui ||313MI.getOpcode() == AArch64::LDRWui) &&314MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {315Info.Type = MCLOH_AdrpLdrGot;316Info.IsCandidate = true;317Info.MI0 = &MI;318}319}320321/// Update state \p Info given the tracked register is clobbered.322static void handleClobber(LOHInfo &Info) {323Info.IsCandidate = false;324Info.OneUser = false;325Info.MultiUsers = false;326Info.LastADRP = nullptr;327}328329/// Update state \p Info given that \p MI is possibly the middle instruction330/// of an LOH involving 3 instructions.331static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,332LOHInfo &OpInfo) {333if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))334return false;335// Copy LOHInfo for dest register to LOHInfo for source register.336if (&DefInfo != &OpInfo) {337OpInfo = DefInfo;338// Invalidate \p DefInfo because we track it in \p OpInfo now.339handleClobber(DefInfo);340} else341DefInfo.LastADRP = nullptr;342343// Advance state machine.344assert(OpInfo.IsCandidate && "Expect valid state");345if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {346if (OpInfo.Type == MCLOH_AdrpLdr) {347OpInfo.Type = MCLOH_AdrpAddLdr;348OpInfo.IsCandidate = true;349OpInfo.MI1 = &MI;350return true;351} else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {352OpInfo.Type = MCLOH_AdrpAddStr;353OpInfo.IsCandidate = true;354OpInfo.MI1 = &MI;355return true;356}357} else {358assert((MI.getOpcode() == AArch64::LDRXui ||359MI.getOpcode() == AArch64::LDRWui) &&360"Expect LDRXui or LDRWui");361assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&362"Expected GOT relocation");363if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {364OpInfo.Type = MCLOH_AdrpLdrGotStr;365OpInfo.IsCandidate = true;366OpInfo.MI1 = &MI;367return true;368} else if (OpInfo.Type == MCLOH_AdrpLdr) {369OpInfo.Type = MCLOH_AdrpLdrGotLdr;370OpInfo.IsCandidate = true;371OpInfo.MI1 = &MI;372return true;373}374}375return false;376}377378/// Update state when seeing and ADRP instruction.379static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,380LOHInfo &Info, LOHInfo *LOHInfos) {381if (Info.LastADRP != nullptr) {382LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n"383<< '\t' << MI << '\t' << *Info.LastADRP);384AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});385++NumADRPSimpleCandidate;386}387388// Produce LOH directive if possible.389if (Info.IsCandidate) {390switch (Info.Type) {391case MCLOH_AdrpAdd: {392// ADRPs and ADDs for this candidate may be split apart if using393// GlobalISel instead of pseudo-expanded. If that happens, the394// def register of the ADD may have a use in between. Adding an LOH in395// this case can cause the linker to rewrite the ADRP to write to that396// register, clobbering the use.397const MachineInstr *AddMI = Info.MI0;398int DefIdx = mapRegToGPRIndex(MI.getOperand(0).getReg());399int OpIdx = mapRegToGPRIndex(AddMI->getOperand(0).getReg());400LOHInfo DefInfo = LOHInfos[OpIdx];401if (DefIdx != OpIdx && (DefInfo.OneUser || DefInfo.MultiUsers))402break;403LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n"404<< '\t' << MI << '\t' << *Info.MI0);405AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});406++NumADRSimpleCandidate;407break;408}409case MCLOH_AdrpLdr:410if (supportLoadFromLiteral(*Info.MI0)) {411LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n"412<< '\t' << MI << '\t' << *Info.MI0);413AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});414++NumADRPToLDR;415}416break;417case MCLOH_AdrpAddLdr: {418// There is a possibility that the linker may try to rewrite:419// adrp x0, @sym@PAGE420// add x1, x0, @sym@PAGEOFF421// [x0 = some other def]422// ldr x2, [x1]423// ...into...424// adrp x0, @sym425// nop426// [x0 = some other def]427// ldr x2, [x0]428// ...if the offset to the symbol won't fit within a literal load.429// This causes the load to use the result of the adrp, which in this430// case has already been clobbered.431// FIXME: Implement proper liveness tracking for all registers. For now,432// don't emit the LOH if there are any instructions between the add and433// the ldr.434MachineInstr *AddMI = const_cast<MachineInstr *>(Info.MI1);435const MachineInstr *LdrMI = Info.MI0;436auto AddIt = MachineBasicBlock::iterator(AddMI);437auto EndIt = AddMI->getParent()->end();438if (AddMI->getIterator() == EndIt || LdrMI != &*next_nodbg(AddIt, EndIt))439break;440441LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n"442<< '\t' << MI << '\t' << *Info.MI1 << '\t'443<< *Info.MI0);444AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});445++NumADDToLDR;446break;447}448case MCLOH_AdrpAddStr:449if (Info.MI1 != nullptr) {450LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n"451<< '\t' << MI << '\t' << *Info.MI1 << '\t'452<< *Info.MI0);453AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});454++NumADDToSTR;455}456break;457case MCLOH_AdrpLdrGotLdr:458LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n"459<< '\t' << MI << '\t' << *Info.MI1 << '\t'460<< *Info.MI0);461AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});462++NumLDRToLDR;463break;464case MCLOH_AdrpLdrGotStr:465LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n"466<< '\t' << MI << '\t' << *Info.MI1 << '\t'467<< *Info.MI0);468AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});469++NumLDRToSTR;470break;471case MCLOH_AdrpLdrGot:472LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n"473<< '\t' << MI << '\t' << *Info.MI0);474AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});475break;476case MCLOH_AdrpAdrp:477llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");478}479}480481handleClobber(Info);482Info.LastADRP = &MI;483}484485static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,486LOHInfo *LOHInfos) {487if (!MachineOperand::clobbersPhysReg(RegMask, Reg))488return;489int Idx = mapRegToGPRIndex(Reg);490if (Idx >= 0)491handleClobber(LOHInfos[Idx]);492}493494static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {495// Handle defs and regmasks.496for (const MachineOperand &MO : MI.operands()) {497if (MO.isRegMask()) {498const uint32_t *RegMask = MO.getRegMask();499for (MCPhysReg Reg : AArch64::GPR32RegClass)500handleRegMaskClobber(RegMask, Reg, LOHInfos);501for (MCPhysReg Reg : AArch64::GPR64RegClass)502handleRegMaskClobber(RegMask, Reg, LOHInfos);503continue;504}505if (!MO.isReg() || !MO.isDef())506continue;507int Idx = mapRegToGPRIndex(MO.getReg());508if (Idx < 0)509continue;510handleClobber(LOHInfos[Idx]);511}512// Handle uses.513514SmallSet<int, 4> UsesSeen;515for (const MachineOperand &MO : MI.uses()) {516if (!MO.isReg() || !MO.readsReg())517continue;518int Idx = mapRegToGPRIndex(MO.getReg());519if (Idx < 0)520continue;521522// Multiple uses of the same register within a single instruction don't523// count as MultiUser or block optimization. This is especially important on524// arm64_32, where any memory operation is likely to be an explicit use of525// xN and an implicit use of wN (the base address register).526if (UsesSeen.insert(Idx).second)527handleUse(MI, MO, LOHInfos[Idx]);528}529}530531bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {532if (skipFunction(MF.getFunction()))533return false;534535LLVM_DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"536<< "Looking in function " << MF.getName() << '\n');537538LOHInfo LOHInfos[N_GPR_REGS];539AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();540for (const MachineBasicBlock &MBB : MF) {541// Reset register tracking state.542memset(LOHInfos, 0, sizeof(LOHInfos));543// Live-out registers are used.544for (const MachineBasicBlock *Succ : MBB.successors()) {545for (const auto &LI : Succ->liveins()) {546int RegIdx = mapRegToGPRIndex(LI.PhysReg);547if (RegIdx >= 0)548LOHInfos[RegIdx].OneUser = true;549}550}551552// Walk the basic block backwards and update the per register state machine553// in the process.554for (const MachineInstr &MI :555instructionsWithoutDebug(MBB.instr_rbegin(), MBB.instr_rend())) {556unsigned Opcode = MI.getOpcode();557switch (Opcode) {558case AArch64::ADDXri:559case AArch64::LDRXui:560case AArch64::LDRWui:561if (canDefBePartOfLOH(MI)) {562const MachineOperand &Def = MI.getOperand(0);563const MachineOperand &Op = MI.getOperand(1);564assert(Def.isReg() && Def.isDef() && "Expected reg def");565assert(Op.isReg() && Op.isUse() && "Expected reg use");566int DefIdx = mapRegToGPRIndex(Def.getReg());567int OpIdx = mapRegToGPRIndex(Op.getReg());568if (DefIdx >= 0 && OpIdx >= 0 &&569handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))570continue;571}572break;573case AArch64::ADRP:574const MachineOperand &Op0 = MI.getOperand(0);575int Idx = mapRegToGPRIndex(Op0.getReg());576if (Idx >= 0) {577handleADRP(MI, AFI, LOHInfos[Idx], LOHInfos);578continue;579}580break;581}582handleNormalInst(MI, LOHInfos);583}584}585586// Return "no change": The pass only collects information.587return false;588}589590FunctionPass *llvm::createAArch64CollectLOHPass() {591return new AArch64CollectLOH();592}593594595