Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp
35265 views
1
//===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of XRay, a dynamic runtime instrumentation system.
10
//
11
// This implements the interface for the profileCollectorService.
12
//
13
//===----------------------------------------------------------------------===//
14
#include "xray_profile_collector.h"
15
#include "sanitizer_common/sanitizer_common.h"
16
#include "xray_allocator.h"
17
#include "xray_defs.h"
18
#include "xray_profiling_flags.h"
19
#include "xray_segmented_array.h"
20
#include <memory>
21
#include <pthread.h>
22
#include <utility>
23
24
namespace __xray {
25
namespace profileCollectorService {
26
27
namespace {
28
29
SpinMutex GlobalMutex;
30
struct ThreadTrie {
31
tid_t TId;
32
alignas(FunctionCallTrie) std::byte TrieStorage[sizeof(FunctionCallTrie)];
33
};
34
35
struct ProfileBuffer {
36
void *Data;
37
size_t Size;
38
};
39
40
// Current version of the profile format.
41
constexpr u64 XRayProfilingVersion = 0x20180424;
42
43
// Identifier for XRay profiling files 'xrayprof' in hex.
44
constexpr u64 XRayMagicBytes = 0x7872617970726f66;
45
46
struct XRayProfilingFileHeader {
47
const u64 MagicBytes = XRayMagicBytes;
48
const u64 Version = XRayProfilingVersion;
49
u64 Timestamp = 0; // System time in nanoseconds.
50
u64 PID = 0; // Process ID.
51
};
52
53
struct BlockHeader {
54
u32 BlockSize;
55
u32 BlockNum;
56
u64 ThreadId;
57
};
58
59
struct ThreadData {
60
BufferQueue *BQ;
61
FunctionCallTrie::Allocators::Buffers Buffers;
62
FunctionCallTrie::Allocators Allocators;
63
FunctionCallTrie FCT;
64
tid_t TId;
65
};
66
67
using ThreadDataArray = Array<ThreadData>;
68
using ThreadDataAllocator = ThreadDataArray::AllocatorType;
69
70
// We use a separate buffer queue for the backing store for the allocator used
71
// by the ThreadData array. This lets us host the buffers, allocators, and tries
72
// associated with a thread by moving the data into the array instead of
73
// attempting to copy the data to a separately backed set of tries.
74
alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)];
75
static BufferQueue *BQ = nullptr;
76
static BufferQueue::Buffer Buffer;
77
alignas(ThreadDataAllocator) static std::byte
78
ThreadDataAllocatorStorage[sizeof(ThreadDataAllocator)];
79
alignas(ThreadDataArray) static std::byte
80
ThreadDataArrayStorage[sizeof(ThreadDataArray)];
81
82
static ThreadDataAllocator *TDAllocator = nullptr;
83
static ThreadDataArray *TDArray = nullptr;
84
85
using ProfileBufferArray = Array<ProfileBuffer>;
86
using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType;
87
88
// These need to be global aligned storage to avoid dynamic initialization. We
89
// need these to be aligned to allow us to placement new objects into the
90
// storage, and have pointers to those objects be appropriately aligned.
91
alignas(ProfileBufferArray) static std::byte
92
ProfileBuffersStorage[sizeof(ProfileBufferArray)];
93
alignas(ProfileBufferArrayAllocator) static std::byte
94
ProfileBufferArrayAllocatorStorage[sizeof(ProfileBufferArrayAllocator)];
95
96
static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr;
97
static ProfileBufferArray *ProfileBuffers = nullptr;
98
99
// Use a global flag to determine whether the collector implementation has been
100
// initialized.
101
static atomic_uint8_t CollectorInitialized{0};
102
103
} // namespace
104
105
void post(BufferQueue *Q, FunctionCallTrie &&T,
106
FunctionCallTrie::Allocators &&A,
107
FunctionCallTrie::Allocators::Buffers &&B,
108
tid_t TId) XRAY_NEVER_INSTRUMENT {
109
DCHECK_NE(Q, nullptr);
110
111
// Bail out early if the collector has not been initialized.
112
if (!atomic_load(&CollectorInitialized, memory_order_acquire)) {
113
T.~FunctionCallTrie();
114
A.~Allocators();
115
Q->releaseBuffer(B.NodeBuffer);
116
Q->releaseBuffer(B.RootsBuffer);
117
Q->releaseBuffer(B.ShadowStackBuffer);
118
Q->releaseBuffer(B.NodeIdPairBuffer);
119
B.~Buffers();
120
return;
121
}
122
123
{
124
SpinMutexLock Lock(&GlobalMutex);
125
DCHECK_NE(TDAllocator, nullptr);
126
DCHECK_NE(TDArray, nullptr);
127
128
if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T),
129
TId) == nullptr) {
130
// If we fail to add the data to the array, we should destroy the objects
131
// handed us.
132
T.~FunctionCallTrie();
133
A.~Allocators();
134
Q->releaseBuffer(B.NodeBuffer);
135
Q->releaseBuffer(B.RootsBuffer);
136
Q->releaseBuffer(B.ShadowStackBuffer);
137
Q->releaseBuffer(B.NodeIdPairBuffer);
138
B.~Buffers();
139
}
140
}
141
}
142
143
// A PathArray represents the function id's representing a stack trace. In this
144
// context a path is almost always represented from the leaf function in a call
145
// stack to a root of the call trie.
146
using PathArray = Array<int32_t>;
147
148
struct ProfileRecord {
149
using PathAllocator = typename PathArray::AllocatorType;
150
151
// The Path in this record is the function id's from the leaf to the root of
152
// the function call stack as represented from a FunctionCallTrie.
153
PathArray Path;
154
const FunctionCallTrie::Node *Node;
155
};
156
157
namespace {
158
159
using ProfileRecordArray = Array<ProfileRecord>;
160
161
// Walk a depth-first traversal of each root of the FunctionCallTrie to generate
162
// the path(s) and the data associated with the path.
163
static void
164
populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA,
165
const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT {
166
using StackArray = Array<const FunctionCallTrie::Node *>;
167
using StackAllocator = typename StackArray::AllocatorType;
168
StackAllocator StackAlloc(profilingFlags()->stack_allocator_max);
169
StackArray DFSStack(StackAlloc);
170
for (const auto *R : Trie.getRoots()) {
171
DFSStack.Append(R);
172
while (!DFSStack.empty()) {
173
auto *Node = DFSStack.back();
174
DFSStack.trim(1);
175
if (Node == nullptr)
176
continue;
177
auto Record = PRs.AppendEmplace(PathArray{PA}, Node);
178
if (Record == nullptr)
179
return;
180
DCHECK_NE(Record, nullptr);
181
182
// Traverse the Node's parents and as we're doing so, get the FIds in
183
// the order they appear.
184
for (auto N = Node; N != nullptr; N = N->Parent)
185
Record->Path.Append(N->FId);
186
DCHECK(!Record->Path.empty());
187
188
for (const auto C : Node->Callees)
189
DFSStack.Append(C.NodePtr);
190
}
191
}
192
}
193
194
static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header,
195
const ProfileRecordArray &ProfileRecords)
196
XRAY_NEVER_INSTRUMENT {
197
auto NextPtr = static_cast<uint8_t *>(
198
internal_memcpy(Buffer->Data, &Header, sizeof(Header))) +
199
sizeof(Header);
200
for (const auto &Record : ProfileRecords) {
201
// List of IDs follow:
202
for (const auto FId : Record.Path)
203
NextPtr =
204
static_cast<uint8_t *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) +
205
sizeof(FId);
206
207
// Add the sentinel here.
208
constexpr int32_t SentinelFId = 0;
209
NextPtr = static_cast<uint8_t *>(
210
internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) +
211
sizeof(SentinelFId);
212
213
// Add the node data here.
214
NextPtr =
215
static_cast<uint8_t *>(internal_memcpy(
216
NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) +
217
sizeof(Record.Node->CallCount);
218
NextPtr = static_cast<uint8_t *>(
219
internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime,
220
sizeof(Record.Node->CumulativeLocalTime))) +
221
sizeof(Record.Node->CumulativeLocalTime);
222
}
223
224
DCHECK_EQ(NextPtr - static_cast<uint8_t *>(Buffer->Data), Buffer->Size);
225
}
226
227
} // namespace
228
229
void serialize() XRAY_NEVER_INSTRUMENT {
230
if (!atomic_load(&CollectorInitialized, memory_order_acquire))
231
return;
232
233
SpinMutexLock Lock(&GlobalMutex);
234
235
// Clear out the global ProfileBuffers, if it's not empty.
236
for (auto &B : *ProfileBuffers)
237
deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size);
238
ProfileBuffers->trim(ProfileBuffers->size());
239
240
DCHECK_NE(TDArray, nullptr);
241
if (TDArray->empty())
242
return;
243
244
// Then repopulate the global ProfileBuffers.
245
u32 I = 0;
246
auto MaxSize = profilingFlags()->global_allocator_max;
247
auto ProfileArena = allocateBuffer(MaxSize);
248
if (ProfileArena == nullptr)
249
return;
250
251
auto ProfileArenaCleanup = at_scope_exit(
252
[&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); });
253
254
auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max);
255
if (PathArena == nullptr)
256
return;
257
258
auto PathArenaCleanup = at_scope_exit(
259
[&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); });
260
261
for (const auto &ThreadTrie : *TDArray) {
262
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
263
ProfileRecordAllocator PRAlloc(ProfileArena,
264
profilingFlags()->global_allocator_max);
265
ProfileRecord::PathAllocator PathAlloc(
266
PathArena, profilingFlags()->global_allocator_max);
267
ProfileRecordArray ProfileRecords(PRAlloc);
268
269
// First, we want to compute the amount of space we're going to need. We'll
270
// use a local allocator and an __xray::Array<...> to store the intermediary
271
// data, then compute the size as we're going along. Then we'll allocate the
272
// contiguous space to contain the thread buffer data.
273
if (ThreadTrie.FCT.getRoots().empty())
274
continue;
275
276
populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT);
277
DCHECK(!ThreadTrie.FCT.getRoots().empty());
278
DCHECK(!ProfileRecords.empty());
279
280
// Go through each record, to compute the sizes.
281
//
282
// header size = block size (4 bytes)
283
// + block number (4 bytes)
284
// + thread id (8 bytes)
285
// record size = path ids (4 bytes * number of ids + sentinel 4 bytes)
286
// + call count (8 bytes)
287
// + local time (8 bytes)
288
// + end of record (8 bytes)
289
u32 CumulativeSizes = 0;
290
for (const auto &Record : ProfileRecords)
291
CumulativeSizes += 20 + (4 * Record.Path.size());
292
293
BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId};
294
auto B = ProfileBuffers->Append({});
295
B->Size = sizeof(Header) + CumulativeSizes;
296
B->Data = allocateBuffer(B->Size);
297
DCHECK_NE(B->Data, nullptr);
298
serializeRecords(B, Header, ProfileRecords);
299
}
300
}
301
302
void reset() XRAY_NEVER_INSTRUMENT {
303
atomic_store(&CollectorInitialized, 0, memory_order_release);
304
SpinMutexLock Lock(&GlobalMutex);
305
306
if (ProfileBuffers != nullptr) {
307
// Clear out the profile buffers that have been serialized.
308
for (auto &B : *ProfileBuffers)
309
deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size);
310
ProfileBuffers->trim(ProfileBuffers->size());
311
ProfileBuffers = nullptr;
312
}
313
314
if (TDArray != nullptr) {
315
// Release the resources as required.
316
for (auto &TD : *TDArray) {
317
TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer);
318
TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer);
319
TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer);
320
TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer);
321
}
322
// We don't bother destroying the array here because we've already
323
// potentially freed the backing store for the array. Instead we're going to
324
// reset the pointer to nullptr, and re-use the storage later instead
325
// (placement-new'ing into the storage as-is).
326
TDArray = nullptr;
327
}
328
329
if (TDAllocator != nullptr) {
330
TDAllocator->~Allocator();
331
TDAllocator = nullptr;
332
}
333
334
if (Buffer.Data != nullptr) {
335
BQ->releaseBuffer(Buffer);
336
}
337
338
if (BQ == nullptr) {
339
bool Success = false;
340
new (&BufferQueueStorage)
341
BufferQueue(profilingFlags()->global_allocator_max, 1, Success);
342
if (!Success)
343
return;
344
BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
345
} else {
346
BQ->finalize();
347
348
if (BQ->init(profilingFlags()->global_allocator_max, 1) !=
349
BufferQueue::ErrorCode::Ok)
350
return;
351
}
352
353
if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok)
354
return;
355
356
new (&ProfileBufferArrayAllocatorStorage)
357
ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max);
358
ProfileBuffersAllocator = reinterpret_cast<ProfileBufferArrayAllocator *>(
359
&ProfileBufferArrayAllocatorStorage);
360
361
new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator);
362
ProfileBuffers =
363
reinterpret_cast<ProfileBufferArray *>(&ProfileBuffersStorage);
364
365
new (&ThreadDataAllocatorStorage)
366
ThreadDataAllocator(Buffer.Data, Buffer.Size);
367
TDAllocator =
368
reinterpret_cast<ThreadDataAllocator *>(&ThreadDataAllocatorStorage);
369
new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator);
370
TDArray = reinterpret_cast<ThreadDataArray *>(&ThreadDataArrayStorage);
371
372
atomic_store(&CollectorInitialized, 1, memory_order_release);
373
}
374
375
XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT {
376
SpinMutexLock Lock(&GlobalMutex);
377
378
if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0)
379
return {nullptr, 0};
380
381
static pthread_once_t Once = PTHREAD_ONCE_INIT;
382
alignas(XRayProfilingFileHeader) static std::byte
383
FileHeaderStorage[sizeof(XRayProfilingFileHeader)];
384
pthread_once(
385
&Once, +[]() XRAY_NEVER_INSTRUMENT {
386
new (&FileHeaderStorage) XRayProfilingFileHeader{};
387
});
388
389
if (UNLIKELY(B.Data == nullptr)) {
390
// The first buffer should always contain the file header information.
391
auto &FileHeader =
392
*reinterpret_cast<XRayProfilingFileHeader *>(&FileHeaderStorage);
393
FileHeader.Timestamp = NanoTime();
394
FileHeader.PID = internal_getpid();
395
return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)};
396
}
397
398
if (UNLIKELY(B.Data == &FileHeaderStorage))
399
return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size};
400
401
BlockHeader Header;
402
internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
403
auto NextBlock = Header.BlockNum + 1;
404
if (NextBlock < ProfileBuffers->size())
405
return {(*ProfileBuffers)[NextBlock].Data,
406
(*ProfileBuffers)[NextBlock].Size};
407
return {nullptr, 0};
408
}
409
410
} // namespace profileCollectorService
411
} // namespace __xray
412
413