Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/MemProfRadixTree.cpp
213765 views
1
//===- MemProfRadixTree.cpp - Radix tree encoded callstacks ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// This file contains logic that implements a space efficient radix tree
9
// encoding for callstacks used by MemProf.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/ProfileData/MemProfRadixTree.h"
14
15
namespace llvm {
16
namespace memprof {
17
// Encode a call stack into RadixArray. Return the starting index within
18
// RadixArray. For each call stack we encode, we emit two or three components
19
// into RadixArray. If a given call stack doesn't have a common prefix relative
20
// to the previous one, we emit:
21
//
22
// - the frames in the given call stack in the root-to-leaf order
23
//
24
// - the length of the given call stack
25
//
26
// If a given call stack has a non-empty common prefix relative to the previous
27
// one, we emit:
28
//
29
// - the relative location of the common prefix, encoded as a negative number.
30
//
31
// - a portion of the given call stack that's beyond the common prefix
32
//
33
// - the length of the given call stack, including the length of the common
34
// prefix.
35
//
36
// The resulting RadixArray requires a somewhat unintuitive backward traversal
37
// to reconstruct a call stack -- read the call stack length and scan backward
38
// while collecting frames in the leaf to root order. build, the caller of this
39
// function, reverses RadixArray in place so that we can reconstruct a call
40
// stack as if we were deserializing an array in a typical way -- the call stack
41
// length followed by the frames in the leaf-to-root order except that we need
42
// to handle pointers to parents along the way.
43
//
44
// To quickly determine the location of the common prefix within RadixArray,
45
// Indexes caches the indexes of the previous call stack's frames within
46
// RadixArray.
47
template <typename FrameIdTy>
48
LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
49
const llvm::SmallVector<FrameIdTy> *CallStack,
50
const llvm::SmallVector<FrameIdTy> *Prev,
51
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes) {
52
// Compute the length of the common root prefix between Prev and CallStack.
53
uint32_t CommonLen = 0;
54
if (Prev) {
55
auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(),
56
CallStack->rend());
57
CommonLen = std::distance(CallStack->rbegin(), Pos.second);
58
}
59
60
// Drop the portion beyond CommonLen.
61
assert(CommonLen <= Indexes.size());
62
Indexes.resize(CommonLen);
63
64
// Append a pointer to the parent.
65
if (CommonLen) {
66
uint32_t CurrentIndex = RadixArray.size();
67
uint32_t ParentIndex = Indexes.back();
68
// The offset to the parent must be negative because we are pointing to an
69
// element we've already added to RadixArray.
70
assert(ParentIndex < CurrentIndex);
71
RadixArray.push_back(ParentIndex - CurrentIndex);
72
}
73
74
// Copy the part of the call stack beyond the common prefix to RadixArray.
75
assert(CommonLen <= CallStack->size());
76
for (FrameIdTy F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
77
// Remember the index of F in RadixArray.
78
Indexes.push_back(RadixArray.size());
79
RadixArray.push_back(
80
MemProfFrameIndexes ? MemProfFrameIndexes->find(F)->second : F);
81
}
82
assert(CallStack->size() == Indexes.size());
83
84
// End with the call stack length.
85
RadixArray.push_back(CallStack->size());
86
87
// Return the index within RadixArray where we can start reconstructing a
88
// given call stack from.
89
return RadixArray.size() - 1;
90
}
91
92
template <typename FrameIdTy>
93
void CallStackRadixTreeBuilder<FrameIdTy>::build(
94
llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
95
&&MemProfCallStackData,
96
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
97
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
98
// Take the vector portion of MemProfCallStackData. The vector is exactly
99
// what we need to sort. Also, we no longer need its lookup capability.
100
llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
101
102
// Return early if we have no work to do.
103
if (CallStacks.empty()) {
104
RadixArray.clear();
105
CallStackPos.clear();
106
return;
107
}
108
109
// Sorting the list of call stacks in the dictionary order is sufficient to
110
// maximize the length of the common prefix between two adjacent call stacks
111
// and thus minimize the length of RadixArray. However, we go one step
112
// further and try to reduce the number of times we follow pointers to parents
113
// during deserilization. Consider a poorly encoded radix tree:
114
//
115
// CallStackId 1: f1 -> f2 -> f3
116
// |
117
// CallStackId 2: +--- f4 -> f5
118
// |
119
// CallStackId 3: +--> f6
120
//
121
// Here, f2 and f4 appear once and twice, respectively, in the call stacks.
122
// Once we encode CallStackId 1 into RadixArray, every other call stack with
123
// common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3
124
// share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to
125
// parents twice.
126
//
127
// We try to alleviate the situation by sorting the list of call stacks by
128
// comparing the popularity of frames rather than the integer values of
129
// FrameIds. In the example above, f4 is more popular than f2, so we sort the
130
// call stacks and encode them as:
131
//
132
// CallStackId 2: f1 -- f4 -> f5
133
// | |
134
// CallStackId 3: | +--> f6
135
// |
136
// CallStackId 1: +--> f2 -> f3
137
//
138
// Notice that CallStackId 3 follows a pointer to a parent only once.
139
//
140
// All this is a quick-n-dirty trick to reduce the number of jumps. The
141
// proper way would be to compute the weight of each radix tree node -- how
142
// many call stacks use a given radix tree node, and encode a radix tree from
143
// the heaviest node first. We do not do so because that's a lot of work.
144
llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) {
145
// Call stacks are stored from leaf to root. Perform comparisons from the
146
// root.
147
return std::lexicographical_compare(
148
L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
149
[&](FrameIdTy F1, FrameIdTy F2) {
150
uint64_t H1 = FrameHistogram[F1].Count;
151
uint64_t H2 = FrameHistogram[F2].Count;
152
// Popular frames should come later because we encode call stacks from
153
// the last one in the list.
154
if (H1 != H2)
155
return H1 < H2;
156
// For sort stability.
157
return F1 < F2;
158
});
159
});
160
161
// Reserve some reasonable amount of storage.
162
RadixArray.clear();
163
RadixArray.reserve(CallStacks.size() * 8);
164
165
// Indexes will grow as long as the longest call stack.
166
Indexes.clear();
167
Indexes.reserve(512);
168
169
// CallStackPos will grow to exactly CallStacks.size() entries.
170
CallStackPos.clear();
171
CallStackPos.reserve(CallStacks.size());
172
173
// Compute the radix array. We encode one call stack at a time, computing the
174
// longest prefix that's shared with the previous call stack we encode. For
175
// each call stack we encode, we remember a mapping from CallStackId to its
176
// position within RadixArray.
177
//
178
// As an optimization, we encode from the last call stack in CallStacks to
179
// reduce the number of times we follow pointers to the parents. Consider the
180
// list of call stacks that has been sorted in the dictionary order:
181
//
182
// Call Stack 1: F1
183
// Call Stack 2: F1 -> F2
184
// Call Stack 3: F1 -> F2 -> F3
185
//
186
// If we traversed CallStacks in the forward order, we would end up with a
187
// radix tree like:
188
//
189
// Call Stack 1: F1
190
// |
191
// Call Stack 2: +---> F2
192
// |
193
// Call Stack 3: +---> F3
194
//
195
// Notice that each call stack jumps to the previous one. However, if we
196
// traverse CallStacks in the reverse order, then Call Stack 3 has the
197
// complete call stack encoded without any pointers. Call Stack 1 and 2 point
198
// to appropriate prefixes of Call Stack 3.
199
const llvm::SmallVector<FrameIdTy> *Prev = nullptr;
200
for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
201
LinearCallStackId Pos =
202
encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
203
CallStackPos.insert({CSId, Pos});
204
Prev = &CallStack;
205
}
206
207
// "RadixArray.size() - 1" below is problematic if RadixArray is empty.
208
assert(!RadixArray.empty());
209
210
// Reverse the radix array in place. We do so mostly for intuitive
211
// deserialization where we would read the length field and then the call
212
// stack frames proper just like any other array deserialization, except
213
// that we have occasional jumps to take advantage of prefixes.
214
for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J)
215
std::swap(RadixArray[I], RadixArray[J]);
216
217
// "Reverse" the indexes stored in CallStackPos.
218
for (auto &[K, V] : CallStackPos)
219
V = RadixArray.size() - 1 - V;
220
}
221
222
// Explicitly instantiate class with the utilized FrameIdTy.
223
template class LLVM_EXPORT_TEMPLATE CallStackRadixTreeBuilder<FrameId>;
224
template class LLVM_EXPORT_TEMPLATE CallStackRadixTreeBuilder<LinearFrameId>;
225
226
template <typename FrameIdTy>
227
llvm::DenseMap<FrameIdTy, FrameStat>
228
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
229
&MemProfCallStackData) {
230
llvm::DenseMap<FrameIdTy, FrameStat> Histogram;
231
232
for (const auto &KV : MemProfCallStackData) {
233
const auto &CS = KV.second;
234
for (unsigned I = 0, E = CS.size(); I != E; ++I) {
235
auto &S = Histogram[CS[I]];
236
++S.Count;
237
S.PositionSum += I;
238
}
239
}
240
return Histogram;
241
}
242
243
// Explicitly instantiate function with the utilized FrameIdTy.
244
template LLVM_ABI llvm::DenseMap<FrameId, FrameStat>
245
computeFrameHistogram<FrameId>(
246
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
247
&MemProfCallStackData);
248
template LLVM_ABI llvm::DenseMap<LinearFrameId, FrameStat>
249
computeFrameHistogram<LinearFrameId>(
250
llvm::MapVector<CallStackId, llvm::SmallVector<LinearFrameId>>
251
&MemProfCallStackData);
252
} // namespace memprof
253
} // namespace llvm
254
255