Path: blob/main/contrib/llvm-project/lld/MachO/SectionPriorities.cpp
34878 views
//===- SectionPriorities.cpp ----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details9/// about the algorithm.10///11//===----------------------------------------------------------------------===//1213#include "SectionPriorities.h"14#include "Config.h"15#include "InputFiles.h"16#include "Symbols.h"17#include "Target.h"1819#include "lld/Common/Args.h"20#include "lld/Common/CommonLinkerContext.h"21#include "lld/Common/ErrorHandler.h"22#include "llvm/ADT/DenseMap.h"23#include "llvm/ADT/MapVector.h"24#include "llvm/Support/Path.h"25#include "llvm/Support/TimeProfiler.h"26#include "llvm/Support/raw_ostream.h"2728#include <numeric>2930using namespace llvm;31using namespace llvm::MachO;32using namespace llvm::sys;33using namespace lld;34using namespace lld::macho;3536PriorityBuilder macho::priorityBuilder;3738namespace {3940size_t highestAvailablePriority = std::numeric_limits<size_t>::max();4142struct Edge {43int from;44uint64_t weight;45};4647struct Cluster {48Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}4950double getDensity() const {51if (size == 0)52return 0;53return double(weight) / double(size);54}5556int next;57int prev;58uint64_t size;59uint64_t weight = 0;60uint64_t initialWeight = 0;61Edge bestPred = {-1, 0};62};6364class CallGraphSort {65public:66CallGraphSort(const MapVector<SectionPair, uint64_t> &profile);6768DenseMap<const InputSection *, size_t> run();6970private:71std::vector<Cluster> clusters;72std::vector<const InputSection *> sections;73};74// Maximum amount the combined cluster density can be worse than the original75// cluster to consider merging.76constexpr int MAX_DENSITY_DEGRADATION = 8;77} // end anonymous namespace7879// Take the edge list in callGraphProfile, resolve symbol names to Symbols, and80// generate a graph between InputSections with the provided weights.81CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) {82DenseMap<const InputSection *, int> secToCluster;8384auto getOrCreateCluster = [&](const InputSection *isec) -> int {85auto res = secToCluster.try_emplace(isec, clusters.size());86if (res.second) {87sections.push_back(isec);88clusters.emplace_back(clusters.size(), isec->getSize());89}90return res.first->second;91};9293// Create the graph94for (const std::pair<SectionPair, uint64_t> &c : profile) {95const auto fromSec = c.first.first->canonical();96const auto toSec = c.first.second->canonical();97uint64_t weight = c.second;98// Ignore edges between input sections belonging to different output99// sections. This is done because otherwise we would end up with clusters100// containing input sections that can't actually be placed adjacently in the101// output. This messes with the cluster size and density calculations. We102// would also end up moving input sections in other output sections without103// moving them closer to what calls them.104if (fromSec->parent != toSec->parent)105continue;106107int from = getOrCreateCluster(fromSec);108int to = getOrCreateCluster(toSec);109110clusters[to].weight += weight;111112if (from == to)113continue;114115// Remember the best edge.116Cluster &toC = clusters[to];117if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {118toC.bestPred.from = from;119toC.bestPred.weight = weight;120}121}122for (Cluster &c : clusters)123c.initialWeight = c.weight;124}125126// It's bad to merge clusters which would degrade the density too much.127static bool isNewDensityBad(Cluster &a, Cluster &b) {128double newDensity = double(a.weight + b.weight) / double(a.size + b.size);129return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;130}131132// Find the leader of V's belonged cluster (represented as an equivalence133// class). We apply union-find path-halving technique (simple to implement) in134// the meantime as it decreases depths and the time complexity.135static int getLeader(std::vector<int> &leaders, int v) {136while (leaders[v] != v) {137leaders[v] = leaders[leaders[v]];138v = leaders[v];139}140return v;141}142143static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,144Cluster &from, int fromIdx) {145int tail1 = into.prev, tail2 = from.prev;146into.prev = tail2;147cs[tail2].next = intoIdx;148from.prev = tail1;149cs[tail1].next = fromIdx;150into.size += from.size;151into.weight += from.weight;152from.size = 0;153from.weight = 0;154}155156// Group InputSections into clusters using the Call-Chain Clustering heuristic157// then sort the clusters by density.158DenseMap<const InputSection *, size_t> CallGraphSort::run() {159const uint64_t maxClusterSize = target->getPageSize();160161// Cluster indices sorted by density.162std::vector<int> sorted(clusters.size());163// For union-find.164std::vector<int> leaders(clusters.size());165166std::iota(leaders.begin(), leaders.end(), 0);167std::iota(sorted.begin(), sorted.end(), 0);168169llvm::stable_sort(sorted, [&](int a, int b) {170return clusters[a].getDensity() > clusters[b].getDensity();171});172173for (int l : sorted) {174// The cluster index is the same as the index of its leader here because175// clusters[L] has not been merged into another cluster yet.176Cluster &c = clusters[l];177178// Don't consider merging if the edge is unlikely.179if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)180continue;181182int predL = getLeader(leaders, c.bestPred.from);183// Already in the same cluster.184if (l == predL)185continue;186187Cluster *predC = &clusters[predL];188if (c.size + predC->size > maxClusterSize)189continue;190191if (isNewDensityBad(*predC, c))192continue;193194leaders[l] = predL;195mergeClusters(clusters, *predC, predL, c, l);196}197// Sort remaining non-empty clusters by density.198sorted.clear();199for (int i = 0, e = (int)clusters.size(); i != e; ++i)200if (clusters[i].size > 0)201sorted.push_back(i);202llvm::stable_sort(sorted, [&](int a, int b) {203return clusters[a].getDensity() > clusters[b].getDensity();204});205206DenseMap<const InputSection *, size_t> orderMap;207208// Sections will be sorted by decreasing order. Absent sections will have209// priority 0 and be placed at the end of sections.210// NB: This is opposite from COFF/ELF to be compatible with the existing211// order-file code.212int curOrder = highestAvailablePriority;213for (int leader : sorted) {214for (int i = leader;;) {215orderMap[sections[i]] = curOrder--;216i = clusters[i].next;217if (i == leader)218break;219}220}221if (!config->printSymbolOrder.empty()) {222std::error_code ec;223raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);224if (ec) {225error("cannot open " + config->printSymbolOrder + ": " + ec.message());226return orderMap;227}228// Print the symbols ordered by C3, in the order of decreasing curOrder229// Instead of sorting all the orderMap, just repeat the loops above.230for (int leader : sorted)231for (int i = leader;;) {232const InputSection *isec = sections[i];233// Search all the symbols in the file of the section234// and find out a Defined symbol with name that is within the235// section.236for (Symbol *sym : isec->getFile()->symbols) {237if (auto *d = dyn_cast_or_null<Defined>(sym)) {238if (d->isec() == isec)239os << sym->getName() << "\n";240}241}242i = clusters[i].next;243if (i == leader)244break;245}246}247248return orderMap;249}250251std::optional<size_t>252macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {253if (sym->isAbsolute())254return std::nullopt;255256auto it = priorities.find(sym->getName());257if (it == priorities.end())258return std::nullopt;259const SymbolPriorityEntry &entry = it->second;260const InputFile *f = sym->isec()->getFile();261if (!f)262return entry.anyObjectFile;263// We don't use toString(InputFile *) here because it returns the full path264// for object files, and we only want the basename.265StringRef filename;266if (f->archiveName.empty())267filename = path::filename(f->getName());268else269filename = saver().save(path::filename(f->archiveName) + "(" +270path::filename(f->getName()) + ")");271return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);272}273274void macho::PriorityBuilder::extractCallGraphProfile() {275TimeTraceScope timeScope("Extract call graph profile");276bool hasOrderFile = !priorities.empty();277for (const InputFile *file : inputFiles) {278auto *obj = dyn_cast_or_null<ObjFile>(file);279if (!obj)280continue;281for (const CallGraphEntry &entry : obj->callGraph) {282assert(entry.fromIndex < obj->symbols.size() &&283entry.toIndex < obj->symbols.size());284auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);285auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);286if (fromSym && toSym &&287(!hasOrderFile ||288(!getSymbolPriority(fromSym) && !getSymbolPriority(toSym))))289callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count;290}291}292}293294void macho::PriorityBuilder::parseOrderFile(StringRef path) {295assert(callGraphProfile.empty() &&296"Order file must be parsed before call graph profile is processed");297std::optional<MemoryBufferRef> buffer = readFile(path);298if (!buffer) {299error("Could not read order file at " + path);300return;301}302303MemoryBufferRef mbref = *buffer;304for (StringRef line : args::getLines(mbref)) {305StringRef objectFile, symbol;306line = line.take_until([](char c) { return c == '#'; }); // ignore comments307line = line.ltrim();308309CPUType cpuType = StringSwitch<CPUType>(line)310.StartsWith("i386:", CPU_TYPE_I386)311.StartsWith("x86_64:", CPU_TYPE_X86_64)312.StartsWith("arm:", CPU_TYPE_ARM)313.StartsWith("arm64:", CPU_TYPE_ARM64)314.StartsWith("ppc:", CPU_TYPE_POWERPC)315.StartsWith("ppc64:", CPU_TYPE_POWERPC64)316.Default(CPU_TYPE_ANY);317318if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)319continue;320321// Drop the CPU type as well as the colon322if (cpuType != CPU_TYPE_ANY)323line = line.drop_until([](char c) { return c == ':'; }).drop_front();324325constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"};326for (StringRef fileEnd : fileEnds) {327size_t pos = line.find(fileEnd);328if (pos != StringRef::npos) {329// Split the string around the colon330objectFile = line.take_front(pos + fileEnd.size() - 1);331line = line.drop_front(pos + fileEnd.size());332break;333}334}335symbol = line.trim();336337if (!symbol.empty()) {338SymbolPriorityEntry &entry = priorities[symbol];339if (!objectFile.empty())340entry.objectFiles.insert(341std::make_pair(objectFile, highestAvailablePriority));342else343entry.anyObjectFile =344std::max(entry.anyObjectFile, highestAvailablePriority);345}346347--highestAvailablePriority;348}349}350351DenseMap<const InputSection *, size_t>352macho::PriorityBuilder::buildInputSectionPriorities() {353DenseMap<const InputSection *, size_t> sectionPriorities;354if (config->callGraphProfileSort) {355// Sort sections by the profile data provided by __LLVM,__cg_profile356// sections.357//358// This first builds a call graph based on the profile data then merges359// sections according to the C³ heuristic. All clusters are then sorted by a360// density metric to further improve locality.361TimeTraceScope timeScope("Call graph profile sort");362sectionPriorities = CallGraphSort(callGraphProfile).run();363}364365if (priorities.empty())366return sectionPriorities;367368auto addSym = [&](const Defined *sym) {369std::optional<size_t> symbolPriority = getSymbolPriority(sym);370if (!symbolPriority)371return;372size_t &priority = sectionPriorities[sym->isec()];373priority = std::max(priority, *symbolPriority);374};375376// TODO: Make sure this handles weak symbols correctly.377for (const InputFile *file : inputFiles) {378if (isa<ObjFile>(file))379for (Symbol *sym : file->symbols)380if (auto *d = dyn_cast_or_null<Defined>(sym))381addSym(d);382}383384return sectionPriorities;385}386387388