CoCalc -- CtxInstrProfiling.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
³⁵²³³ views
1
/*===- CtxInstrProfiling.h- Contextual instrumentation-based PGO  ---------===*\
2
|*
3
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
|* See https://llvm.org/LICENSE.txt for license information.
5
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
|*
7
\*===----------------------------------------------------------------------===*/
8

9
#ifndef CTX_PROFILE_CTXINSTRPROFILING_H_
10
#define CTX_PROFILE_CTXINSTRPROFILING_H_
11

12
#include "CtxInstrContextNode.h"
13
#include "sanitizer_common/sanitizer_mutex.h"
14
#include <sanitizer/common_interface_defs.h>
15

16
using namespace llvm::ctx_profile;
17

18
// Forward-declare for the one unittest checking Arena construction zeroes out
19
// its allocatable space.
20
class ArenaTest_ZeroInit_Test;
21
namespace __ctx_profile {
22

23
static constexpr size_t ExpectedAlignment = 8;
24
// We really depend on this, see further below. We currently support x86_64.
25
// When we want to support other archs, we need to trace the places Alignment is
26
// used and adjust accordingly.
27
static_assert(sizeof(void *) == ExpectedAlignment);
28

29
/// Arena (bump allocator) forming a linked list. Intentionally not thread safe.
30
/// Allocation and de-allocation happen using sanitizer APIs. We make that
31
/// explicit.
32
class Arena final {
33
public:
34
  // When allocating a new Arena, optionally specify an existing one to append
35
  // to, assumed to be the last in the Arena list. We only need to support
36
  // appending to the arena list.
37
  static Arena *allocateNewArena(size_t Size, Arena *Prev = nullptr);
38
  static void freeArenaList(Arena *&A);
39

40
  uint64_t size() const { return Size; }
41

42
  // Allocate S bytes or return nullptr if we don't have that many available.
43
  char *tryBumpAllocate(size_t S) {
44
    if (Pos + S > Size)
45
      return nullptr;
46
    Pos += S;
47
    return start() + (Pos - S);
48
  }
49

50
  Arena *next() const { return Next; }
51

52
  // the beginning of allocatable memory.
53
  const char *start() const { return const_cast<Arena *>(this)->start(); }
54
  const char *pos() const { return start() + Pos; }
55

56
private:
57
  friend class ::ArenaTest_ZeroInit_Test;
58
  explicit Arena(uint32_t Size);
59
  ~Arena() = delete;
60

61
  char *start() { return reinterpret_cast<char *>(&this[1]); }
62

63
  Arena *Next = nullptr;
64
  uint64_t Pos = 0;
65
  const uint64_t Size;
66
};
67

68
// The memory available for allocation follows the Arena header, and we expect
69
// it to be thus aligned.
70
static_assert(alignof(Arena) == ExpectedAlignment);
71

72
// Verify maintenance to ContextNode doesn't change this invariant, which makes
73
// sure the inlined vectors are appropriately aligned.
74
static_assert(alignof(ContextNode) == ExpectedAlignment);
75

76
/// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned
77
/// with allocating and zero-initializing the global value (as in, GlobalValue)
78
/// for it.
79
struct ContextRoot {
80
  ContextNode *FirstNode = nullptr;
81
  Arena *FirstMemBlock = nullptr;
82
  Arena *CurrentMem = nullptr;
83
  // This is init-ed by the static zero initializer in LLVM.
84
  // Taken is used to ensure only one thread traverses the contextual graph -
85
  // either to read it or to write it. On server side, the same entrypoint will
86
  // be entered by numerous threads, but over time, the profile aggregated by
87
  // collecting sequentially on one thread at a time is expected to converge to
88
  // the aggregate profile that may have been observable on all the threads.
89
  // Note that this is node-by-node aggregation, i.e. summing counters of nodes
90
  // at the same position in the graph, not flattening.
91
  // Threads that cannot lock Taken (fail TryLock) are given a "scratch context"
92
  // - a buffer they can clobber, safely from a memory access perspective.
93
  //
94
  // Note about "scratch"-ness: we currently ignore the data written in them
95
  // (which is anyway clobbered). The design allows for that not be the case -
96
  // because "scratch"-ness is first and foremost about not trying to build
97
  // subcontexts, and is captured by tainting the pointer value (pointer to the
98
  // memory treated as context), but right now, we drop that info.
99
  //
100
  // We could consider relaxing the requirement of more than one thread
101
  // entering by holding a few context trees per entrypoint and then aggregating
102
  // them (as explained above) at the end of the profile collection - it's a
103
  // tradeoff between collection time and memory use: higher precision can be
104
  // obtained with either less concurrent collections but more collection time,
105
  // or with more concurrent collections (==more memory) and less collection
106
  // time. Note that concurrent collection does happen for different
107
  // entrypoints, regardless.
108
  ::__sanitizer::StaticSpinMutex Taken;
109

110
  // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
111
  // instrumentation lowering side because it is responsible for allocating and
112
  // zero-initializing ContextRoots.
113
  static_assert(sizeof(Taken) == 1);
114
};
115

116
/// This API is exposed for testing. See the APIs below about the contract with
117
/// LLVM.
118
inline bool isScratch(const void *Ctx) {
119
  return (reinterpret_cast<uint64_t>(Ctx) & 1);
120
}
121

122
} // namespace __ctx_profile
123

124
extern "C" {
125

126
// LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.
127
// position 0 is used when the current context isn't scratch, 1 when it is. They
128
// are volatile because of signal handlers - we mean to specifically control
129
// when the data is loaded.
130
//
131
/// TLS where LLVM stores the pointer of the called value, as part of lowering a
132
/// llvm.instrprof.callsite
133
extern __thread void *volatile __llvm_ctx_profile_expected_callee[2];
134
/// TLS where LLVM stores the pointer inside a caller's subcontexts vector that
135
/// corresponds to the callsite being lowered.
136
extern __thread ContextNode **volatile __llvm_ctx_profile_callsite[2];
137

138
// __llvm_ctx_profile_current_context_root is exposed for unit testing,
139
// othwerise it's only used internally by compiler-rt/ctx_profile.
140
extern __thread __ctx_profile::ContextRoot
141
    *volatile __llvm_ctx_profile_current_context_root;
142

143
/// called by LLVM in the entry BB of a "entry point" function. The returned
144
/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
145
ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
146
                                              GUID Guid, uint32_t Counters,
147
                                              uint32_t Callsites);
148

149
/// paired with __llvm_ctx_profile_start_context, and called at the exit of the
150
/// entry point function.
151
void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
152

153
/// called for any other function than entry points, in the entry BB of such
154
/// function. Same consideration about LSB of returned value as .._start_context
155
ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
156
                                            uint32_t NrCounters,
157
                                            uint32_t NrCallsites);
158

159
/// Prepares for collection. Currently this resets counter values but preserves
160
/// internal context tree structure.
161
void __llvm_ctx_profile_start_collection();
162

163
/// Completely free allocated memory.
164
void __llvm_ctx_profile_free();
165

166
/// Used to obtain the profile. The Writer is called for each root ContextNode,
167
/// with the ContextRoot::Taken taken. The Writer is responsible for traversing
168
/// the structure underneath.
169
/// The Writer's first parameter plays the role of closure for Writer, and is
170
/// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter.
171
/// The second parameter is the root of a context tree.
172
bool __llvm_ctx_profile_fetch(void *Data,
173
                              bool (*Writer)(void *, const ContextNode &));
174
}
175
#endif // CTX_PROFILE_CTXINSTRPROFILING_H_
176

177
Product

Resources

Company