Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
35265 views
1
//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of XRay, a dynamic runtime instrumentation system.
10
//
11
// This file defines the interface for a function call trie.
12
//
13
//===----------------------------------------------------------------------===//
14
#ifndef XRAY_FUNCTION_CALL_TRIE_H
15
#define XRAY_FUNCTION_CALL_TRIE_H
16
17
#include "xray_buffer_queue.h"
18
#include "xray_defs.h"
19
#include "xray_profiling_flags.h"
20
#include "xray_segmented_array.h"
21
#include <limits>
22
#include <memory> // For placement new.
23
#include <utility>
24
25
namespace __xray {
26
27
/// A FunctionCallTrie represents the stack traces of XRay instrumented
28
/// functions that we've encountered, where a node corresponds to a function and
29
/// the path from the root to the node its stack trace. Each node in the trie
30
/// will contain some useful values, including:
31
///
32
/// * The cumulative amount of time spent in this particular node/stack.
33
/// * The number of times this stack has appeared.
34
/// * A histogram of latencies for that particular node.
35
///
36
/// Each node in the trie will also contain a list of callees, represented using
37
/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38
/// ID of the callee, and a pointer to the node.
39
///
40
/// If we visualise this data structure, we'll find the following potential
41
/// representation:
42
///
43
/// [function id node] -> [callees] [cumulative time]
44
/// [call counter] [latency histogram]
45
///
46
/// As an example, when we have a function in this pseudocode:
47
///
48
/// func f(N) {
49
/// g()
50
/// h()
51
/// for i := 1..N { j() }
52
/// }
53
///
54
/// We may end up with a trie of the following form:
55
///
56
/// f -> [ g, h, j ] [...] [1] [...]
57
/// g -> [ ... ] [...] [1] [...]
58
/// h -> [ ... ] [...] [1] [...]
59
/// j -> [ ... ] [...] [N] [...]
60
///
61
/// If for instance the function g() called j() like so:
62
///
63
/// func g() {
64
/// for i := 1..10 { j() }
65
/// }
66
///
67
/// We'll find the following updated trie:
68
///
69
/// f -> [ g, h, j ] [...] [1] [...]
70
/// g -> [ j' ] [...] [1] [...]
71
/// h -> [ ... ] [...] [1] [...]
72
/// j -> [ ... ] [...] [N] [...]
73
/// j' -> [ ... ] [...] [10] [...]
74
///
75
/// Note that we'll have a new node representing the path `f -> g -> j'` with
76
/// isolated data. This isolation gives us a means of representing the stack
77
/// traces as a path, as opposed to a key in a table. The alternative
78
/// implementation here would be to use a separate table for the path, and use
79
/// hashes of the path as an identifier to accumulate the information. We've
80
/// moved away from this approach as it takes a lot of time to compute the hash
81
/// every time we need to update a function's call information as we're handling
82
/// the entry and exit events.
83
///
84
/// This approach allows us to maintain a shadow stack, which represents the
85
/// currently executing path, and on function exits quickly compute the amount
86
/// of time elapsed from the entry, then update the counters for the node
87
/// already represented in the trie. This necessitates an efficient
88
/// representation of the various data structures (the list of callees must be
89
/// cache-aware and efficient to look up, and the histogram must be compact and
90
/// quick to update) to enable us to keep the overheads of this implementation
91
/// to the minimum.
92
class FunctionCallTrie {
93
public:
94
struct Node;
95
96
// We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97
// standard library types in this header.
98
struct NodeIdPair {
99
Node *NodePtr;
100
int32_t FId;
101
};
102
103
using NodeIdPairArray = Array<NodeIdPair>;
104
using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105
106
// A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107
// number of times this node actually appeared, the cumulative amount of time
108
// for this particular node including its children call times, and just the
109
// local time spent on this node. Each Node will have the ID of the XRay
110
// instrumented function that it is associated to.
111
struct Node {
112
Node *Parent;
113
NodeIdPairArray Callees;
114
uint64_t CallCount;
115
uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116
int32_t FId;
117
118
// TODO: Include the compact histogram.
119
};
120
121
private:
122
struct ShadowStackEntry {
123
uint64_t EntryTSC;
124
Node *NodePtr;
125
uint16_t EntryCPU;
126
};
127
128
using NodeArray = Array<Node>;
129
using RootArray = Array<Node *>;
130
using ShadowStackArray = Array<ShadowStackEntry>;
131
132
public:
133
// We collate the allocators we need into a single struct, as a convenience to
134
// allow us to initialize these as a group.
135
struct Allocators {
136
using NodeAllocatorType = NodeArray::AllocatorType;
137
using RootAllocatorType = RootArray::AllocatorType;
138
using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139
140
// Use hosted aligned storage members to allow for trivial move and init.
141
// This also allows us to sidestep the potential-failing allocation issue.
142
alignas(NodeAllocatorType) std::byte
143
NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144
alignas(RootAllocatorType) std::byte
145
RootAllocatorStorage[sizeof(RootAllocatorType)];
146
alignas(ShadowStackAllocatorType) std::byte
147
ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148
alignas(NodeIdPairAllocatorType) std::byte
149
NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
150
151
NodeAllocatorType *NodeAllocator = nullptr;
152
RootAllocatorType *RootAllocator = nullptr;
153
ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
154
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
155
156
Allocators() = default;
157
Allocators(const Allocators &) = delete;
158
Allocators &operator=(const Allocators &) = delete;
159
160
struct Buffers {
161
BufferQueue::Buffer NodeBuffer;
162
BufferQueue::Buffer RootsBuffer;
163
BufferQueue::Buffer ShadowStackBuffer;
164
BufferQueue::Buffer NodeIdPairBuffer;
165
};
166
167
explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
168
new (&NodeAllocatorStorage)
169
NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
170
NodeAllocator =
171
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
172
173
new (&RootAllocatorStorage)
174
RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
175
RootAllocator =
176
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
177
178
new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
179
B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
180
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
181
&ShadowStackAllocatorStorage);
182
183
new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
184
B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
185
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
186
&NodeIdPairAllocatorStorage);
187
}
188
189
explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
190
new (&NodeAllocatorStorage) NodeAllocatorType(Max);
191
NodeAllocator =
192
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
193
194
new (&RootAllocatorStorage) RootAllocatorType(Max);
195
RootAllocator =
196
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
197
198
new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
199
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
200
&ShadowStackAllocatorStorage);
201
202
new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
203
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
204
&NodeIdPairAllocatorStorage);
205
}
206
207
Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
208
// Here we rely on the safety of memcpy'ing contents of the storage
209
// members, and then pointing the source pointers to nullptr.
210
internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
211
sizeof(NodeAllocatorType));
212
internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
213
sizeof(RootAllocatorType));
214
internal_memcpy(&ShadowStackAllocatorStorage,
215
&O.ShadowStackAllocatorStorage,
216
sizeof(ShadowStackAllocatorType));
217
internal_memcpy(&NodeIdPairAllocatorStorage,
218
&O.NodeIdPairAllocatorStorage,
219
sizeof(NodeIdPairAllocatorType));
220
221
NodeAllocator =
222
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
223
RootAllocator =
224
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
225
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
226
&ShadowStackAllocatorStorage);
227
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
228
&NodeIdPairAllocatorStorage);
229
230
O.NodeAllocator = nullptr;
231
O.RootAllocator = nullptr;
232
O.ShadowStackAllocator = nullptr;
233
O.NodeIdPairAllocator = nullptr;
234
}
235
236
Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
237
// When moving into an existing instance, we ensure that we clean up the
238
// current allocators.
239
if (NodeAllocator)
240
NodeAllocator->~NodeAllocatorType();
241
if (O.NodeAllocator) {
242
new (&NodeAllocatorStorage)
243
NodeAllocatorType(std::move(*O.NodeAllocator));
244
NodeAllocator =
245
reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
246
O.NodeAllocator = nullptr;
247
} else {
248
NodeAllocator = nullptr;
249
}
250
251
if (RootAllocator)
252
RootAllocator->~RootAllocatorType();
253
if (O.RootAllocator) {
254
new (&RootAllocatorStorage)
255
RootAllocatorType(std::move(*O.RootAllocator));
256
RootAllocator =
257
reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
258
O.RootAllocator = nullptr;
259
} else {
260
RootAllocator = nullptr;
261
}
262
263
if (ShadowStackAllocator)
264
ShadowStackAllocator->~ShadowStackAllocatorType();
265
if (O.ShadowStackAllocator) {
266
new (&ShadowStackAllocatorStorage)
267
ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
268
ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
269
&ShadowStackAllocatorStorage);
270
O.ShadowStackAllocator = nullptr;
271
} else {
272
ShadowStackAllocator = nullptr;
273
}
274
275
if (NodeIdPairAllocator)
276
NodeIdPairAllocator->~NodeIdPairAllocatorType();
277
if (O.NodeIdPairAllocator) {
278
new (&NodeIdPairAllocatorStorage)
279
NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
280
NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
281
&NodeIdPairAllocatorStorage);
282
O.NodeIdPairAllocator = nullptr;
283
} else {
284
NodeIdPairAllocator = nullptr;
285
}
286
287
return *this;
288
}
289
290
~Allocators() XRAY_NEVER_INSTRUMENT {
291
if (NodeAllocator != nullptr)
292
NodeAllocator->~NodeAllocatorType();
293
if (RootAllocator != nullptr)
294
RootAllocator->~RootAllocatorType();
295
if (ShadowStackAllocator != nullptr)
296
ShadowStackAllocator->~ShadowStackAllocatorType();
297
if (NodeIdPairAllocator != nullptr)
298
NodeIdPairAllocator->~NodeIdPairAllocatorType();
299
}
300
};
301
302
static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
303
return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
304
}
305
306
static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
307
Allocators A(Max);
308
return A;
309
}
310
311
static Allocators
312
InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
313
Allocators A(Bufs);
314
return A;
315
}
316
317
private:
318
NodeArray Nodes;
319
RootArray Roots;
320
ShadowStackArray ShadowStack;
321
NodeIdPairAllocatorType *NodeIdPairAllocator;
322
uint32_t OverflowedFunctions;
323
324
public:
325
explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
326
: Nodes(*A.NodeAllocator),
327
Roots(*A.RootAllocator),
328
ShadowStack(*A.ShadowStackAllocator),
329
NodeIdPairAllocator(A.NodeIdPairAllocator),
330
OverflowedFunctions(0) {}
331
332
FunctionCallTrie() = delete;
333
FunctionCallTrie(const FunctionCallTrie &) = delete;
334
FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
335
336
FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
337
: Nodes(std::move(O.Nodes)),
338
Roots(std::move(O.Roots)),
339
ShadowStack(std::move(O.ShadowStack)),
340
NodeIdPairAllocator(O.NodeIdPairAllocator),
341
OverflowedFunctions(O.OverflowedFunctions) {}
342
343
FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
344
Nodes = std::move(O.Nodes);
345
Roots = std::move(O.Roots);
346
ShadowStack = std::move(O.ShadowStack);
347
NodeIdPairAllocator = O.NodeIdPairAllocator;
348
OverflowedFunctions = O.OverflowedFunctions;
349
return *this;
350
}
351
352
~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
353
354
void enterFunction(const int32_t FId, uint64_t TSC,
355
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
356
DCHECK_NE(FId, 0);
357
358
// If we're already overflowed the function call stack, do not bother
359
// attempting to record any more function entries.
360
if (UNLIKELY(OverflowedFunctions)) {
361
++OverflowedFunctions;
362
return;
363
}
364
365
// If this is the first function we've encountered, we want to set up the
366
// node(s) and treat it as a root.
367
if (UNLIKELY(ShadowStack.empty())) {
368
auto *NewRoot = Nodes.AppendEmplace(
369
nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
370
if (UNLIKELY(NewRoot == nullptr))
371
return;
372
if (Roots.AppendEmplace(NewRoot) == nullptr) {
373
Nodes.trim(1);
374
return;
375
}
376
if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
377
Nodes.trim(1);
378
Roots.trim(1);
379
++OverflowedFunctions;
380
return;
381
}
382
return;
383
}
384
385
// From this point on, we require that the stack is not empty.
386
DCHECK(!ShadowStack.empty());
387
auto TopNode = ShadowStack.back().NodePtr;
388
DCHECK_NE(TopNode, nullptr);
389
390
// If we've seen this callee before, then we access that node and place that
391
// on the top of the stack.
392
auto* Callee = TopNode->Callees.find_element(
393
[FId](const NodeIdPair &NR) { return NR.FId == FId; });
394
if (Callee != nullptr) {
395
CHECK_NE(Callee->NodePtr, nullptr);
396
if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
397
++OverflowedFunctions;
398
return;
399
}
400
401
// This means we've never seen this stack before, create a new node here.
402
auto* NewNode = Nodes.AppendEmplace(
403
TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
404
if (UNLIKELY(NewNode == nullptr))
405
return;
406
DCHECK_NE(NewNode, nullptr);
407
TopNode->Callees.AppendEmplace(NewNode, FId);
408
if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
409
++OverflowedFunctions;
410
return;
411
}
412
413
void exitFunction(int32_t FId, uint64_t TSC,
414
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
415
// If we're exiting functions that have "overflowed" or don't fit into the
416
// stack due to allocator constraints, we then decrement that count first.
417
if (OverflowedFunctions) {
418
--OverflowedFunctions;
419
return;
420
}
421
422
// When we exit a function, we look up the ShadowStack to see whether we've
423
// entered this function before. We do as little processing here as we can,
424
// since most of the hard work would have already been done at function
425
// entry.
426
uint64_t CumulativeTreeTime = 0;
427
428
while (!ShadowStack.empty()) {
429
const auto &Top = ShadowStack.back();
430
auto TopNode = Top.NodePtr;
431
DCHECK_NE(TopNode, nullptr);
432
433
// We may encounter overflow on the TSC we're provided, which may end up
434
// being less than the TSC when we first entered the function.
435
//
436
// To get the accurate measurement of cycles, we need to check whether
437
// we've overflowed (TSC < Top.EntryTSC) and then account the difference
438
// between the entry TSC and the max for the TSC counter (max of uint64_t)
439
// then add the value of TSC. We can prove that the maximum delta we will
440
// get is at most the 64-bit unsigned value, since the difference between
441
// a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
442
// - 1) + 1.
443
//
444
// NOTE: This assumes that TSCs are synchronised across CPUs.
445
// TODO: Count the number of times we've seen CPU migrations.
446
uint64_t LocalTime =
447
Top.EntryTSC > TSC
448
? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
449
: TSC - Top.EntryTSC;
450
TopNode->CallCount++;
451
TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
452
CumulativeTreeTime += LocalTime;
453
ShadowStack.trim(1);
454
455
// TODO: Update the histogram for the node.
456
if (TopNode->FId == FId)
457
break;
458
}
459
}
460
461
const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
462
463
// The deepCopyInto operation will update the provided FunctionCallTrie by
464
// re-creating the contents of this particular FunctionCallTrie in the other
465
// FunctionCallTrie. It will do this using a Depth First Traversal from the
466
// roots, and while doing so recreating the traversal in the provided
467
// FunctionCallTrie.
468
//
469
// This operation will *not* destroy the state in `O`, and thus may cause some
470
// duplicate entries in `O` if it is not empty.
471
//
472
// This function is *not* thread-safe, and may require external
473
// synchronisation of both "this" and |O|.
474
//
475
// This function must *not* be called with a non-empty FunctionCallTrie |O|.
476
void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
477
DCHECK(O.getRoots().empty());
478
479
// We then push the root into a stack, to use as the parent marker for new
480
// nodes we push in as we're traversing depth-first down the call tree.
481
struct NodeAndParent {
482
FunctionCallTrie::Node *Node;
483
FunctionCallTrie::Node *NewNode;
484
};
485
using Stack = Array<NodeAndParent>;
486
487
typename Stack::AllocatorType StackAllocator(
488
profilingFlags()->stack_allocator_max);
489
Stack DFSStack(StackAllocator);
490
491
for (const auto Root : getRoots()) {
492
// Add a node in O for this root.
493
auto NewRoot = O.Nodes.AppendEmplace(
494
nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
495
Root->CumulativeLocalTime, Root->FId);
496
497
// Because we cannot allocate more memory we should bail out right away.
498
if (UNLIKELY(NewRoot == nullptr))
499
return;
500
501
if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
502
return;
503
504
// TODO: Figure out what to do if we fail to allocate any more stack
505
// space. Maybe warn or report once?
506
if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
507
return;
508
while (!DFSStack.empty()) {
509
NodeAndParent NP = DFSStack.back();
510
DCHECK_NE(NP.Node, nullptr);
511
DCHECK_NE(NP.NewNode, nullptr);
512
DFSStack.trim(1);
513
for (const auto Callee : NP.Node->Callees) {
514
auto NewNode = O.Nodes.AppendEmplace(
515
NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
516
Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
517
Callee.FId);
518
if (UNLIKELY(NewNode == nullptr))
519
return;
520
if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
521
nullptr))
522
return;
523
if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
524
nullptr))
525
return;
526
}
527
}
528
}
529
}
530
531
// The mergeInto operation will update the provided FunctionCallTrie by
532
// traversing the current trie's roots and updating (i.e. merging) the data in
533
// the nodes with the data in the target's nodes. If the node doesn't exist in
534
// the provided trie, we add a new one in the right position, and inherit the
535
// data from the original (current) trie, along with all its callees.
536
//
537
// This function is *not* thread-safe, and may require external
538
// synchronisation of both "this" and |O|.
539
void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
540
struct NodeAndTarget {
541
FunctionCallTrie::Node *OrigNode;
542
FunctionCallTrie::Node *TargetNode;
543
};
544
using Stack = Array<NodeAndTarget>;
545
typename Stack::AllocatorType StackAllocator(
546
profilingFlags()->stack_allocator_max);
547
Stack DFSStack(StackAllocator);
548
549
for (const auto Root : getRoots()) {
550
Node *TargetRoot = nullptr;
551
auto R = O.Roots.find_element(
552
[&](const Node *Node) { return Node->FId == Root->FId; });
553
if (R == nullptr) {
554
TargetRoot = O.Nodes.AppendEmplace(
555
nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
556
Root->FId);
557
if (UNLIKELY(TargetRoot == nullptr))
558
return;
559
560
O.Roots.Append(TargetRoot);
561
} else {
562
TargetRoot = *R;
563
}
564
565
DFSStack.AppendEmplace(Root, TargetRoot);
566
while (!DFSStack.empty()) {
567
NodeAndTarget NT = DFSStack.back();
568
DCHECK_NE(NT.OrigNode, nullptr);
569
DCHECK_NE(NT.TargetNode, nullptr);
570
DFSStack.trim(1);
571
// TODO: Update the histogram as well when we have it ready.
572
NT.TargetNode->CallCount += NT.OrigNode->CallCount;
573
NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
574
for (const auto Callee : NT.OrigNode->Callees) {
575
auto TargetCallee = NT.TargetNode->Callees.find_element(
576
[&](const FunctionCallTrie::NodeIdPair &C) {
577
return C.FId == Callee.FId;
578
});
579
if (TargetCallee == nullptr) {
580
auto NewTargetNode = O.Nodes.AppendEmplace(
581
NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
582
Callee.FId);
583
584
if (UNLIKELY(NewTargetNode == nullptr))
585
return;
586
587
TargetCallee =
588
NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
589
}
590
DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
591
}
592
}
593
}
594
}
595
};
596
597
} // namespace __xray
598
599
#endif // XRAY_FUNCTION_CALL_TRIE_H
600
601