Path: blob/main/contrib/llvm-project/lld/COFF/CallGraphSort.cpp
34879 views
//===- CallGraphSort.cpp --------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details9/// about the algorithm.10///11//===----------------------------------------------------------------------===//1213#include "CallGraphSort.h"14#include "COFFLinkerContext.h"15#include "InputFiles.h"16#include "SymbolTable.h"17#include "Symbols.h"18#include "lld/Common/ErrorHandler.h"1920#include <numeric>2122using namespace llvm;23using namespace lld;24using namespace lld::coff;2526namespace {27struct Edge {28int from;29uint64_t weight;30};3132struct Cluster {33Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}3435double getDensity() const {36if (size == 0)37return 0;38return double(weight) / double(size);39}4041int next;42int prev;43uint64_t size;44uint64_t weight = 0;45uint64_t initialWeight = 0;46Edge bestPred = {-1, 0};47};4849class CallGraphSort {50public:51CallGraphSort(const COFFLinkerContext &ctx);5253DenseMap<const SectionChunk *, int> run();5455private:56std::vector<Cluster> clusters;57std::vector<const SectionChunk *> sections;5859const COFFLinkerContext &ctx;60};6162// Maximum amount the combined cluster density can be worse than the original63// cluster to consider merging.64constexpr int MAX_DENSITY_DEGRADATION = 8;6566// Maximum cluster size in bytes.67constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;68} // end anonymous namespace6970using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>;7172// Take the edge list in Config->CallGraphProfile, resolve symbol names to73// Symbols, and generate a graph between InputSections with the provided74// weights.75CallGraphSort::CallGraphSort(const COFFLinkerContext &ctx) : ctx(ctx) {76const MapVector<SectionPair, uint64_t> &profile = ctx.config.callGraphProfile;77DenseMap<const SectionChunk *, int> secToCluster;7879auto getOrCreateNode = [&](const SectionChunk *isec) -> int {80auto res = secToCluster.try_emplace(isec, clusters.size());81if (res.second) {82sections.push_back(isec);83clusters.emplace_back(clusters.size(), isec->getSize());84}85return res.first->second;86};8788// Create the graph.89for (const std::pair<SectionPair, uint64_t> &c : profile) {90const auto *fromSec = cast<SectionChunk>(c.first.first->repl);91const auto *toSec = cast<SectionChunk>(c.first.second->repl);92uint64_t weight = c.second;9394// Ignore edges between input sections belonging to different output95// sections. This is done because otherwise we would end up with clusters96// containing input sections that can't actually be placed adjacently in the97// output. This messes with the cluster size and density calculations. We98// would also end up moving input sections in other output sections without99// moving them closer to what calls them.100if (ctx.getOutputSection(fromSec) != ctx.getOutputSection(toSec))101continue;102103int from = getOrCreateNode(fromSec);104int to = getOrCreateNode(toSec);105106clusters[to].weight += weight;107108if (from == to)109continue;110111// Remember the best edge.112Cluster &toC = clusters[to];113if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {114toC.bestPred.from = from;115toC.bestPred.weight = weight;116}117}118for (Cluster &c : clusters)119c.initialWeight = c.weight;120}121122// It's bad to merge clusters which would degrade the density too much.123static bool isNewDensityBad(Cluster &a, Cluster &b) {124double newDensity = double(a.weight + b.weight) / double(a.size + b.size);125return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;126}127128// Find the leader of V's belonged cluster (represented as an equivalence129// class). We apply union-find path-halving technique (simple to implement) in130// the meantime as it decreases depths and the time complexity.131static int getLeader(std::vector<int> &leaders, int v) {132while (leaders[v] != v) {133leaders[v] = leaders[leaders[v]];134v = leaders[v];135}136return v;137}138139static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,140Cluster &from, int fromIdx) {141int tail1 = into.prev, tail2 = from.prev;142into.prev = tail2;143cs[tail2].next = intoIdx;144from.prev = tail1;145cs[tail1].next = fromIdx;146into.size += from.size;147into.weight += from.weight;148from.size = 0;149from.weight = 0;150}151152// Group InputSections into clusters using the Call-Chain Clustering heuristic153// then sort the clusters by density.154DenseMap<const SectionChunk *, int> CallGraphSort::run() {155std::vector<int> sorted(clusters.size());156std::vector<int> leaders(clusters.size());157158std::iota(leaders.begin(), leaders.end(), 0);159std::iota(sorted.begin(), sorted.end(), 0);160llvm::stable_sort(sorted, [&](int a, int b) {161return clusters[a].getDensity() > clusters[b].getDensity();162});163164for (int l : sorted) {165// The cluster index is the same as the index of its leader here because166// clusters[L] has not been merged into another cluster yet.167Cluster &c = clusters[l];168169// Don't consider merging if the edge is unlikely.170if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)171continue;172173int predL = getLeader(leaders, c.bestPred.from);174if (l == predL)175continue;176177Cluster *predC = &clusters[predL];178if (c.size + predC->size > MAX_CLUSTER_SIZE)179continue;180181if (isNewDensityBad(*predC, c))182continue;183184leaders[l] = predL;185mergeClusters(clusters, *predC, predL, c, l);186}187188// Sort remaining non-empty clusters by density.189sorted.clear();190for (int i = 0, e = (int)clusters.size(); i != e; ++i)191if (clusters[i].size > 0)192sorted.push_back(i);193llvm::stable_sort(sorted, [&](int a, int b) {194return clusters[a].getDensity() > clusters[b].getDensity();195});196197DenseMap<const SectionChunk *, int> orderMap;198// Sections will be sorted by increasing order. Absent sections will have199// priority 0 and be placed at the end of sections.200int curOrder = INT_MIN;201for (int leader : sorted) {202for (int i = leader;;) {203orderMap[sections[i]] = curOrder++;204i = clusters[i].next;205if (i == leader)206break;207}208}209if (!ctx.config.printSymbolOrder.empty()) {210std::error_code ec;211raw_fd_ostream os(ctx.config.printSymbolOrder, ec, sys::fs::OF_None);212if (ec) {213error("cannot open " + ctx.config.printSymbolOrder + ": " + ec.message());214return orderMap;215}216// Print the symbols ordered by C3, in the order of increasing curOrder217// Instead of sorting all the orderMap, just repeat the loops above.218for (int leader : sorted)219for (int i = leader;;) {220const SectionChunk *sc = sections[i];221222// Search all the symbols in the file of the section223// and find out a DefinedCOFF symbol with name that is within the224// section.225for (Symbol *sym : sc->file->getSymbols())226if (auto *d = dyn_cast_or_null<DefinedCOFF>(sym))227// Filter out non-COMDAT symbols and section symbols.228if (d->isCOMDAT && !d->getCOFFSymbol().isSection() &&229sc == d->getChunk())230os << sym->getName() << "\n";231i = clusters[i].next;232if (i == leader)233break;234}235}236237return orderMap;238}239240// Sort sections by the profile data provided by /call-graph-ordering-file241//242// This first builds a call graph based on the profile data then merges sections243// according to the C³ heuristic. All clusters are then sorted by a density244// metric to further improve locality.245DenseMap<const SectionChunk *, int>246coff::computeCallGraphProfileOrder(const COFFLinkerContext &ctx) {247return CallGraphSort(ctx).run();248}249250251