CoCalc -- xray_function_call

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h
³⁵²⁶⁵ views
1
//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of XRay, a dynamic runtime instrumentation system.
10
//
11
// This file defines the interface for a function call trie.
12
//
13
//===----------------------------------------------------------------------===//
14
#ifndef XRAY_FUNCTION_CALL_TRIE_H
15
#define XRAY_FUNCTION_CALL_TRIE_H
16

17
#include "xray_buffer_queue.h"
18
#include "xray_defs.h"
19
#include "xray_profiling_flags.h"
20
#include "xray_segmented_array.h"
21
#include <limits>
22
#include <memory> // For placement new.
23
#include <utility>
24

25
namespace __xray {
26

27
/// A FunctionCallTrie represents the stack traces of XRay instrumented
28
/// functions that we've encountered, where a node corresponds to a function and
29
/// the path from the root to the node its stack trace. Each node in the trie
30
/// will contain some useful values, including:
31
///
32
///   * The cumulative amount of time spent in this particular node/stack.
33
///   * The number of times this stack has appeared.
34
///   * A histogram of latencies for that particular node.
35
///
36
/// Each node in the trie will also contain a list of callees, represented using
37
/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38
/// ID of the callee, and a pointer to the node.
39
///
40
/// If we visualise this data structure, we'll find the following potential
41
/// representation:
42
///
43
///   [function id node] -> [callees] [cumulative time]
44
///                         [call counter] [latency histogram]
45
///
46
/// As an example, when we have a function in this pseudocode:
47
///
48
///   func f(N) {
49
///     g()
50
///     h()
51
///     for i := 1..N { j() }
52
///   }
53
///
54
/// We may end up with a trie of the following form:
55
///
56
///   f -> [ g, h, j ] [...] [1] [...]
57
///   g -> [ ... ] [...] [1] [...]
58
///   h -> [ ... ] [...] [1] [...]
59
///   j -> [ ... ] [...] [N] [...]
60
///
61
/// If for instance the function g() called j() like so:
62
///
63
///   func g() {
64
///     for i := 1..10 { j() }
65
///   }
66
///
67
/// We'll find the following updated trie:
68
///
69
///   f -> [ g, h, j ] [...] [1] [...]
70
///   g -> [ j' ] [...] [1] [...]
71
///   h -> [ ... ] [...] [1] [...]
72
///   j -> [ ... ] [...] [N] [...]
73
///   j' -> [ ... ] [...] [10] [...]
74
///
75
/// Note that we'll have a new node representing the path `f -> g -> j'` with
76
/// isolated data. This isolation gives us a means of representing the stack
77
/// traces as a path, as opposed to a key in a table. The alternative
78
/// implementation here would be to use a separate table for the path, and use
79
/// hashes of the path as an identifier to accumulate the information. We've
80
/// moved away from this approach as it takes a lot of time to compute the hash
81
/// every time we need to update a function's call information as we're handling
82
/// the entry and exit events.
83
///
84
/// This approach allows us to maintain a shadow stack, which represents the
85
/// currently executing path, and on function exits quickly compute the amount
86
/// of time elapsed from the entry, then update the counters for the node
87
/// already represented in the trie. This necessitates an efficient
88
/// representation of the various data structures (the list of callees must be
89
/// cache-aware and efficient to look up, and the histogram must be compact and
90
/// quick to update) to enable us to keep the overheads of this implementation
91
/// to the minimum.
92
class FunctionCallTrie {
93
public:
94
  struct Node;
95

96
  // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97
  // standard library types in this header.
98
  struct NodeIdPair {
99
    Node *NodePtr;
100
    int32_t FId;
101
  };
102

103
  using NodeIdPairArray = Array<NodeIdPair>;
104
  using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105

106
  // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107
  // number of times this node actually appeared, the cumulative amount of time
108
  // for this particular node including its children call times, and just the
109
  // local time spent on this node. Each Node will have the ID of the XRay
110
  // instrumented function that it is associated to.
111
  struct Node {
112
    Node *Parent;
113
    NodeIdPairArray Callees;
114
    uint64_t CallCount;
115
    uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116
    int32_t FId;
117

118
    // TODO: Include the compact histogram.
119
  };
120

121
private:
122
  struct ShadowStackEntry {
123
    uint64_t EntryTSC;
124
    Node *NodePtr;
125
    uint16_t EntryCPU;
126
  };
127

128
  using NodeArray = Array<Node>;
129
  using RootArray = Array<Node *>;
130
  using ShadowStackArray = Array<ShadowStackEntry>;
131

132
public:
133
  // We collate the allocators we need into a single struct, as a convenience to
134
  // allow us to initialize these as a group.
135
  struct Allocators {
136
    using NodeAllocatorType = NodeArray::AllocatorType;
137
    using RootAllocatorType = RootArray::AllocatorType;
138
    using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139

140
    // Use hosted aligned storage members to allow for trivial move and init.
141
    // This also allows us to sidestep the potential-failing allocation issue.
142
    alignas(NodeAllocatorType) std::byte
143
        NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144
    alignas(RootAllocatorType) std::byte
145
        RootAllocatorStorage[sizeof(RootAllocatorType)];
146
    alignas(ShadowStackAllocatorType) std::byte
147
        ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148
    alignas(NodeIdPairAllocatorType) std::byte
149
        NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
150

151
    NodeAllocatorType *NodeAllocator = nullptr;
152
    RootAllocatorType *RootAllocator = nullptr;
153
    ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
154
    NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
155

156
    Allocators() = default;
157
    Allocators(const Allocators &) = delete;
158
    Allocators &operator=(const Allocators &) = delete;
159

160
    struct Buffers {
161
      BufferQueue::Buffer NodeBuffer;
162
      BufferQueue::Buffer RootsBuffer;
163
      BufferQueue::Buffer ShadowStackBuffer;
164
      BufferQueue::Buffer NodeIdPairBuffer;
165
    };
166

167
    explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
168
      new (&NodeAllocatorStorage)
169
          NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
170
      NodeAllocator =
171
          reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
172

173
      new (&RootAllocatorStorage)
174
          RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
175
      RootAllocator =
176
          reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
177

178
      new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
179
          B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
180
      ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
181
          &ShadowStackAllocatorStorage);
182

183
      new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
184
          B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
185
      NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
186
          &NodeIdPairAllocatorStorage);
187
    }
188

189
    explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
190
      new (&NodeAllocatorStorage) NodeAllocatorType(Max);
191
      NodeAllocator =
192
          reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
193

194
      new (&RootAllocatorStorage) RootAllocatorType(Max);
195
      RootAllocator =
196
          reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
197

198
      new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
199
      ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
200
          &ShadowStackAllocatorStorage);
201

202
      new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
203
      NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
204
          &NodeIdPairAllocatorStorage);
205
    }
206

207
    Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
208
      // Here we rely on the safety of memcpy'ing contents of the storage
209
      // members, and then pointing the source pointers to nullptr.
210
      internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
211
                      sizeof(NodeAllocatorType));
212
      internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
213
                      sizeof(RootAllocatorType));
214
      internal_memcpy(&ShadowStackAllocatorStorage,
215
                      &O.ShadowStackAllocatorStorage,
216
                      sizeof(ShadowStackAllocatorType));
217
      internal_memcpy(&NodeIdPairAllocatorStorage,
218
                      &O.NodeIdPairAllocatorStorage,
219
                      sizeof(NodeIdPairAllocatorType));
220

221
      NodeAllocator =
222
          reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
223
      RootAllocator =
224
          reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
225
      ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
226
          &ShadowStackAllocatorStorage);
227
      NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
228
          &NodeIdPairAllocatorStorage);
229

230
      O.NodeAllocator = nullptr;
231
      O.RootAllocator = nullptr;
232
      O.ShadowStackAllocator = nullptr;
233
      O.NodeIdPairAllocator = nullptr;
234
    }
235

236
    Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
237
      // When moving into an existing instance, we ensure that we clean up the
238
      // current allocators.
239
      if (NodeAllocator)
240
        NodeAllocator->~NodeAllocatorType();
241
      if (O.NodeAllocator) {
242
        new (&NodeAllocatorStorage)
243
            NodeAllocatorType(std::move(*O.NodeAllocator));
244
        NodeAllocator =
245
            reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
246
        O.NodeAllocator = nullptr;
247
      } else {
248
        NodeAllocator = nullptr;
249
      }
250

251
      if (RootAllocator)
252
        RootAllocator->~RootAllocatorType();
253
      if (O.RootAllocator) {
254
        new (&RootAllocatorStorage)
255
            RootAllocatorType(std::move(*O.RootAllocator));
256
        RootAllocator =
257
            reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
258
        O.RootAllocator = nullptr;
259
      } else {
260
        RootAllocator = nullptr;
261
      }
262

263
      if (ShadowStackAllocator)
264
        ShadowStackAllocator->~ShadowStackAllocatorType();
265
      if (O.ShadowStackAllocator) {
266
        new (&ShadowStackAllocatorStorage)
267
            ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
268
        ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
269
            &ShadowStackAllocatorStorage);
270
        O.ShadowStackAllocator = nullptr;
271
      } else {
272
        ShadowStackAllocator = nullptr;
273
      }
274

275
      if (NodeIdPairAllocator)
276
        NodeIdPairAllocator->~NodeIdPairAllocatorType();
277
      if (O.NodeIdPairAllocator) {
278
        new (&NodeIdPairAllocatorStorage)
279
            NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
280
        NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
281
            &NodeIdPairAllocatorStorage);
282
        O.NodeIdPairAllocator = nullptr;
283
      } else {
284
        NodeIdPairAllocator = nullptr;
285
      }
286

287
      return *this;
288
    }
289

290
    ~Allocators() XRAY_NEVER_INSTRUMENT {
291
      if (NodeAllocator != nullptr)
292
        NodeAllocator->~NodeAllocatorType();
293
      if (RootAllocator != nullptr)
294
        RootAllocator->~RootAllocatorType();
295
      if (ShadowStackAllocator != nullptr)
296
        ShadowStackAllocator->~ShadowStackAllocatorType();
297
      if (NodeIdPairAllocator != nullptr)
298
        NodeIdPairAllocator->~NodeIdPairAllocatorType();
299
    }
300
  };
301

302
  static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
303
    return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
304
  }
305

306
  static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
307
    Allocators A(Max);
308
    return A;
309
  }
310

311
  static Allocators
312
  InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
313
    Allocators A(Bufs);
314
    return A;
315
  }
316

317
private:
318
  NodeArray Nodes;
319
  RootArray Roots;
320
  ShadowStackArray ShadowStack;
321
  NodeIdPairAllocatorType *NodeIdPairAllocator;
322
  uint32_t OverflowedFunctions;
323

324
public:
325
  explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
326
      : Nodes(*A.NodeAllocator),
327
        Roots(*A.RootAllocator),
328
        ShadowStack(*A.ShadowStackAllocator),
329
        NodeIdPairAllocator(A.NodeIdPairAllocator),
330
        OverflowedFunctions(0) {}
331

332
  FunctionCallTrie() = delete;
333
  FunctionCallTrie(const FunctionCallTrie &) = delete;
334
  FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
335

336
  FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
337
      : Nodes(std::move(O.Nodes)),
338
        Roots(std::move(O.Roots)),
339
        ShadowStack(std::move(O.ShadowStack)),
340
        NodeIdPairAllocator(O.NodeIdPairAllocator),
341
        OverflowedFunctions(O.OverflowedFunctions) {}
342

343
  FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
344
    Nodes = std::move(O.Nodes);
345
    Roots = std::move(O.Roots);
346
    ShadowStack = std::move(O.ShadowStack);
347
    NodeIdPairAllocator = O.NodeIdPairAllocator;
348
    OverflowedFunctions = O.OverflowedFunctions;
349
    return *this;
350
  }
351

352
  ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
353

354
  void enterFunction(const int32_t FId, uint64_t TSC,
355
                     uint16_t CPU) XRAY_NEVER_INSTRUMENT {
356
    DCHECK_NE(FId, 0);
357

358
    // If we're already overflowed the function call stack, do not bother
359
    // attempting to record any more function entries.
360
    if (UNLIKELY(OverflowedFunctions)) {
361
      ++OverflowedFunctions;
362
      return;
363
    }
364

365
    // If this is the first function we've encountered, we want to set up the
366
    // node(s) and treat it as a root.
367
    if (UNLIKELY(ShadowStack.empty())) {
368
      auto *NewRoot = Nodes.AppendEmplace(
369
          nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
370
      if (UNLIKELY(NewRoot == nullptr))
371
        return;
372
      if (Roots.AppendEmplace(NewRoot) == nullptr) {
373
        Nodes.trim(1);
374
        return;
375
      }
376
      if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
377
        Nodes.trim(1);
378
        Roots.trim(1);
379
        ++OverflowedFunctions;
380
        return;
381
      }
382
      return;
383
    }
384

385
    // From this point on, we require that the stack is not empty.
386
    DCHECK(!ShadowStack.empty());
387
    auto TopNode = ShadowStack.back().NodePtr;
388
    DCHECK_NE(TopNode, nullptr);
389

390
    // If we've seen this callee before, then we access that node and place that
391
    // on the top of the stack.
392
    auto* Callee = TopNode->Callees.find_element(
393
        [FId](const NodeIdPair &NR) { return NR.FId == FId; });
394
    if (Callee != nullptr) {
395
      CHECK_NE(Callee->NodePtr, nullptr);
396
      if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
397
        ++OverflowedFunctions;
398
      return;
399
    }
400

401
    // This means we've never seen this stack before, create a new node here.
402
    auto* NewNode = Nodes.AppendEmplace(
403
        TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
404
    if (UNLIKELY(NewNode == nullptr))
405
      return;
406
    DCHECK_NE(NewNode, nullptr);
407
    TopNode->Callees.AppendEmplace(NewNode, FId);
408
    if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
409
      ++OverflowedFunctions;
410
    return;
411
  }
412

413
  void exitFunction(int32_t FId, uint64_t TSC,
414
                    uint16_t CPU) XRAY_NEVER_INSTRUMENT {
415
    // If we're exiting functions that have "overflowed" or don't fit into the
416
    // stack due to allocator constraints, we then decrement that count first.
417
    if (OverflowedFunctions) {
418
      --OverflowedFunctions;
419
      return;
420
    }
421

422
    // When we exit a function, we look up the ShadowStack to see whether we've
423
    // entered this function before. We do as little processing here as we can,
424
    // since most of the hard work would have already been done at function
425
    // entry.
426
    uint64_t CumulativeTreeTime = 0;
427

428
    while (!ShadowStack.empty()) {
429
      const auto &Top = ShadowStack.back();
430
      auto TopNode = Top.NodePtr;
431
      DCHECK_NE(TopNode, nullptr);
432

433
      // We may encounter overflow on the TSC we're provided, which may end up
434
      // being less than the TSC when we first entered the function.
435
      //
436
      // To get the accurate measurement of cycles, we need to check whether
437
      // we've overflowed (TSC < Top.EntryTSC) and then account the difference
438
      // between the entry TSC and the max for the TSC counter (max of uint64_t)
439
      // then add the value of TSC. We can prove that the maximum delta we will
440
      // get is at most the 64-bit unsigned value, since the difference between
441
      // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
442
      // - 1) + 1.
443
      //
444
      // NOTE: This assumes that TSCs are synchronised across CPUs.
445
      // TODO: Count the number of times we've seen CPU migrations.
446
      uint64_t LocalTime =
447
          Top.EntryTSC > TSC
448
              ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
449
              : TSC - Top.EntryTSC;
450
      TopNode->CallCount++;
451
      TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
452
      CumulativeTreeTime += LocalTime;
453
      ShadowStack.trim(1);
454

455
      // TODO: Update the histogram for the node.
456
      if (TopNode->FId == FId)
457
        break;
458
    }
459
  }
460

461
  const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
462

463
  // The deepCopyInto operation will update the provided FunctionCallTrie by
464
  // re-creating the contents of this particular FunctionCallTrie in the other
465
  // FunctionCallTrie. It will do this using a Depth First Traversal from the
466
  // roots, and while doing so recreating the traversal in the provided
467
  // FunctionCallTrie.
468
  //
469
  // This operation will *not* destroy the state in `O`, and thus may cause some
470
  // duplicate entries in `O` if it is not empty.
471
  //
472
  // This function is *not* thread-safe, and may require external
473
  // synchronisation of both "this" and |O|.
474
  //
475
  // This function must *not* be called with a non-empty FunctionCallTrie |O|.
476
  void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
477
    DCHECK(O.getRoots().empty());
478

479
    // We then push the root into a stack, to use as the parent marker for new
480
    // nodes we push in as we're traversing depth-first down the call tree.
481
    struct NodeAndParent {
482
      FunctionCallTrie::Node *Node;
483
      FunctionCallTrie::Node *NewNode;
484
    };
485
    using Stack = Array<NodeAndParent>;
486

487
    typename Stack::AllocatorType StackAllocator(
488
        profilingFlags()->stack_allocator_max);
489
    Stack DFSStack(StackAllocator);
490

491
    for (const auto Root : getRoots()) {
492
      // Add a node in O for this root.
493
      auto NewRoot = O.Nodes.AppendEmplace(
494
          nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
495
          Root->CumulativeLocalTime, Root->FId);
496

497
      // Because we cannot allocate more memory we should bail out right away.
498
      if (UNLIKELY(NewRoot == nullptr))
499
        return;
500

501
      if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
502
        return;
503

504
      // TODO: Figure out what to do if we fail to allocate any more stack
505
      // space. Maybe warn or report once?
506
      if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
507
        return;
508
      while (!DFSStack.empty()) {
509
        NodeAndParent NP = DFSStack.back();
510
        DCHECK_NE(NP.Node, nullptr);
511
        DCHECK_NE(NP.NewNode, nullptr);
512
        DFSStack.trim(1);
513
        for (const auto Callee : NP.Node->Callees) {
514
          auto NewNode = O.Nodes.AppendEmplace(
515
              NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
516
              Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
517
              Callee.FId);
518
          if (UNLIKELY(NewNode == nullptr))
519
            return;
520
          if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
521
                       nullptr))
522
            return;
523
          if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
524
                       nullptr))
525
            return;
526
        }
527
      }
528
    }
529
  }
530

531
  // The mergeInto operation will update the provided FunctionCallTrie by
532
  // traversing the current trie's roots and updating (i.e. merging) the data in
533
  // the nodes with the data in the target's nodes. If the node doesn't exist in
534
  // the provided trie, we add a new one in the right position, and inherit the
535
  // data from the original (current) trie, along with all its callees.
536
  //
537
  // This function is *not* thread-safe, and may require external
538
  // synchronisation of both "this" and |O|.
539
  void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
540
    struct NodeAndTarget {
541
      FunctionCallTrie::Node *OrigNode;
542
      FunctionCallTrie::Node *TargetNode;
543
    };
544
    using Stack = Array<NodeAndTarget>;
545
    typename Stack::AllocatorType StackAllocator(
546
        profilingFlags()->stack_allocator_max);
547
    Stack DFSStack(StackAllocator);
548

549
    for (const auto Root : getRoots()) {
550
      Node *TargetRoot = nullptr;
551
      auto R = O.Roots.find_element(
552
          [&](const Node *Node) { return Node->FId == Root->FId; });
553
      if (R == nullptr) {
554
        TargetRoot = O.Nodes.AppendEmplace(
555
            nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
556
            Root->FId);
557
        if (UNLIKELY(TargetRoot == nullptr))
558
          return;
559

560
        O.Roots.Append(TargetRoot);
561
      } else {
562
        TargetRoot = *R;
563
      }
564

565
      DFSStack.AppendEmplace(Root, TargetRoot);
566
      while (!DFSStack.empty()) {
567
        NodeAndTarget NT = DFSStack.back();
568
        DCHECK_NE(NT.OrigNode, nullptr);
569
        DCHECK_NE(NT.TargetNode, nullptr);
570
        DFSStack.trim(1);
571
        // TODO: Update the histogram as well when we have it ready.
572
        NT.TargetNode->CallCount += NT.OrigNode->CallCount;
573
        NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
574
        for (const auto Callee : NT.OrigNode->Callees) {
575
          auto TargetCallee = NT.TargetNode->Callees.find_element(
576
              [&](const FunctionCallTrie::NodeIdPair &C) {
577
                return C.FId == Callee.FId;
578
              });
579
          if (TargetCallee == nullptr) {
580
            auto NewTargetNode = O.Nodes.AppendEmplace(
581
                NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
582
                Callee.FId);
583

584
            if (UNLIKELY(NewTargetNode == nullptr))
585
              return;
586

587
            TargetCallee =
588
                NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
589
          }
590
          DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
591
        }
592
      }
593
    }
594
  }
595
};
596

597
} // namespace __xray
598

599
#endif // XRAY_FUNCTION_CALL_TRIE_H
600

601
Product

Resources

Company