Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp
35265 views
1
//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of XRay, a dynamic runtime instrumentation system.
10
//
11
// This is the implementation of a profiling handler.
12
//
13
//===----------------------------------------------------------------------===//
14
#include <memory>
15
#include <time.h>
16
17
#include "sanitizer_common/sanitizer_atomic.h"
18
#include "sanitizer_common/sanitizer_flags.h"
19
#include "xray/xray_interface.h"
20
#include "xray/xray_log_interface.h"
21
#include "xray_buffer_queue.h"
22
#include "xray_flags.h"
23
#include "xray_profile_collector.h"
24
#include "xray_profiling_flags.h"
25
#include "xray_recursion_guard.h"
26
#include "xray_tsc.h"
27
#include "xray_utils.h"
28
#include <pthread.h>
29
30
namespace __xray {
31
32
namespace {
33
34
static atomic_sint32_t ProfilerLogFlushStatus = {
35
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
36
37
static atomic_sint32_t ProfilerLogStatus = {
38
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
39
40
static SpinMutex ProfilerOptionsMutex;
41
42
struct ProfilingData {
43
atomic_uintptr_t Allocators;
44
atomic_uintptr_t FCT;
45
};
46
47
static pthread_key_t ProfilingKey;
48
49
// We use a global buffer queue, which gets initialized once at initialisation
50
// time, and gets reset when profiling is "done".
51
alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)];
52
static BufferQueue *BQ = nullptr;
53
54
thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
55
alignas(FunctionCallTrie::Allocators) thread_local std::byte
56
AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)];
57
alignas(FunctionCallTrie) thread_local std::byte
58
FunctionCallTrieStorage[sizeof(FunctionCallTrie)];
59
thread_local ProfilingData TLD{{0}, {0}};
60
thread_local atomic_uint8_t ReentranceGuard{0};
61
62
// We use a separate guard for ensuring that for this thread, if we're already
63
// cleaning up, that any signal handlers don't attempt to cleanup nor
64
// initialise.
65
thread_local atomic_uint8_t TLDInitGuard{0};
66
67
// We also use a separate latch to signal that the thread is exiting, and
68
// non-essential work should be ignored (things like recording events, etc.).
69
thread_local atomic_uint8_t ThreadExitingLatch{0};
70
71
static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
72
thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
73
pthread_setspecific(ProfilingKey, &TLD);
74
return false;
75
}();
76
(void)ThreadOnce;
77
78
RecursionGuard TLDInit(TLDInitGuard);
79
if (!TLDInit)
80
return nullptr;
81
82
if (atomic_load_relaxed(&ThreadExitingLatch))
83
return nullptr;
84
85
uptr Allocators = 0;
86
if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
87
memory_order_acq_rel)) {
88
bool Success = false;
89
auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
90
if (!Success)
91
atomic_store(&TLD.Allocators, 0, memory_order_release);
92
});
93
94
// Acquire a set of buffers for this thread.
95
if (BQ == nullptr)
96
return nullptr;
97
98
if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
99
return nullptr;
100
auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
101
if (!Success)
102
BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
103
});
104
105
if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
106
return nullptr;
107
auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
108
if (!Success)
109
BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
110
});
111
112
if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
113
BufferQueue::ErrorCode::Ok)
114
return nullptr;
115
auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
116
if (!Success)
117
BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
118
});
119
120
if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
121
BufferQueue::ErrorCode::Ok)
122
return nullptr;
123
124
Success = true;
125
new (&AllocatorsStorage) FunctionCallTrie::Allocators(
126
FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
127
Allocators = reinterpret_cast<uptr>(
128
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
129
atomic_store(&TLD.Allocators, Allocators, memory_order_release);
130
}
131
132
if (Allocators == 1)
133
return nullptr;
134
135
uptr FCT = 0;
136
if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
137
new (&FunctionCallTrieStorage)
138
FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
139
atomic_load_relaxed(&TLD.Allocators)));
140
FCT = reinterpret_cast<uptr>(
141
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
142
atomic_store(&TLD.FCT, FCT, memory_order_release);
143
}
144
145
if (FCT == 1)
146
return nullptr;
147
148
return &TLD;
149
}
150
151
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
152
auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
153
if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
154
&FunctionCallTrieStorage)))
155
reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
156
157
auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
158
if (Allocators ==
159
reinterpret_cast<uptr>(
160
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
161
reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
162
}
163
164
static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
165
RecursionGuard TLDInit(TLDInitGuard);
166
if (!TLDInit)
167
return;
168
169
uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
170
if (P != reinterpret_cast<uptr>(
171
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
172
return;
173
174
auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
175
DCHECK_NE(FCT, nullptr);
176
177
uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
178
if (A !=
179
reinterpret_cast<uptr>(
180
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
181
return;
182
183
auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
184
DCHECK_NE(Allocators, nullptr);
185
186
// Always move the data into the profile collector.
187
profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
188
std::move(ThreadBuffers), GetTid());
189
190
// Re-initialize the ThreadBuffers object to a known "default" state.
191
ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
192
}
193
194
} // namespace
195
196
const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
197
#ifdef XRAY_PROFILER_DEFAULT_OPTIONS
198
return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
199
#else
200
return "";
201
#endif
202
}
203
204
XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
205
if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
206
XRayLogInitStatus::XRAY_LOG_FINALIZED) {
207
if (Verbosity())
208
Report("Not flushing profiles, profiling not been finalized.\n");
209
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
210
}
211
212
RecursionGuard SignalGuard(ReentranceGuard);
213
if (!SignalGuard) {
214
if (Verbosity())
215
Report("Cannot finalize properly inside a signal handler!\n");
216
atomic_store(&ProfilerLogFlushStatus,
217
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
218
memory_order_release);
219
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
220
}
221
222
s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
223
XRayLogFlushStatus::XRAY_LOG_FLUSHING,
224
memory_order_acq_rel);
225
if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
226
if (Verbosity())
227
Report("Not flushing profiles, implementation still flushing.\n");
228
return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
229
}
230
231
// At this point, we'll create the file that will contain the profile, but
232
// only if the options say so.
233
if (!profilingFlags()->no_flush) {
234
// First check whether we have data in the profile collector service
235
// before we try and write anything down.
236
XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
237
if (B.Data == nullptr) {
238
if (Verbosity())
239
Report("profiling: No data to flush.\n");
240
} else {
241
LogWriter *LW = LogWriter::Open();
242
if (LW == nullptr) {
243
if (Verbosity())
244
Report("profiling: Failed to flush to file, dropping data.\n");
245
} else {
246
// Now for each of the buffers, write out the profile data as we would
247
// see it in memory, verbatim.
248
while (B.Data != nullptr && B.Size != 0) {
249
LW->WriteAll(reinterpret_cast<const char *>(B.Data),
250
reinterpret_cast<const char *>(B.Data) + B.Size);
251
B = profileCollectorService::nextBuffer(B);
252
}
253
LogWriter::Close(LW);
254
}
255
}
256
}
257
258
profileCollectorService::reset();
259
260
atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
261
memory_order_release);
262
atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
263
memory_order_release);
264
265
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
266
}
267
268
void profilingHandleArg0(int32_t FuncId,
269
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
270
unsigned char CPU;
271
auto TSC = readTSC(CPU);
272
RecursionGuard G(ReentranceGuard);
273
if (!G)
274
return;
275
276
auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
277
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
278
Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
279
return;
280
281
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
282
Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
283
postCurrentThreadFCT(TLD);
284
return;
285
}
286
287
auto T = getThreadLocalData();
288
if (T == nullptr)
289
return;
290
291
auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
292
switch (Entry) {
293
case XRayEntryType::ENTRY:
294
case XRayEntryType::LOG_ARGS_ENTRY:
295
FCT->enterFunction(FuncId, TSC, CPU);
296
break;
297
case XRayEntryType::EXIT:
298
case XRayEntryType::TAIL:
299
FCT->exitFunction(FuncId, TSC, CPU);
300
break;
301
default:
302
// FIXME: Handle bugs.
303
break;
304
}
305
}
306
307
void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
308
uint64_t) XRAY_NEVER_INSTRUMENT {
309
return profilingHandleArg0(FuncId, Entry);
310
}
311
312
XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
313
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
314
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
315
XRayLogInitStatus::XRAY_LOG_FINALIZING,
316
memory_order_release)) {
317
if (Verbosity())
318
Report("Cannot finalize profile, the profiling is not initialized.\n");
319
return static_cast<XRayLogInitStatus>(CurrentStatus);
320
}
321
322
// Mark then finalize the current generation of buffers. This allows us to let
323
// the threads currently holding onto new buffers still use them, but let the
324
// last reference do the memory cleanup.
325
DCHECK_NE(BQ, nullptr);
326
BQ->finalize();
327
328
// Wait a grace period to allow threads to see that we're finalizing.
329
SleepForMillis(profilingFlags()->grace_period_ms);
330
331
// If we for some reason are entering this function from an instrumented
332
// handler, we bail out.
333
RecursionGuard G(ReentranceGuard);
334
if (!G)
335
return static_cast<XRayLogInitStatus>(CurrentStatus);
336
337
// Post the current thread's data if we have any.
338
postCurrentThreadFCT(TLD);
339
340
// Then we force serialize the log data.
341
profileCollectorService::serialize();
342
343
atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
344
memory_order_release);
345
return XRayLogInitStatus::XRAY_LOG_FINALIZED;
346
}
347
348
XRayLogInitStatus
349
profilingLoggingInit(size_t, size_t, void *Options,
350
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
351
RecursionGuard G(ReentranceGuard);
352
if (!G)
353
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
354
355
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
356
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
357
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
358
memory_order_acq_rel)) {
359
if (Verbosity())
360
Report("Cannot initialize already initialised profiling "
361
"implementation.\n");
362
return static_cast<XRayLogInitStatus>(CurrentStatus);
363
}
364
365
{
366
SpinMutexLock Lock(&ProfilerOptionsMutex);
367
FlagParser ConfigParser;
368
ProfilerFlags Flags;
369
Flags.setDefaults();
370
registerProfilerFlags(&ConfigParser, &Flags);
371
ConfigParser.ParseString(profilingCompilerDefinedFlags());
372
const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
373
if (Env == nullptr)
374
Env = "";
375
ConfigParser.ParseString(Env);
376
377
// Then parse the configuration string provided.
378
ConfigParser.ParseString(static_cast<const char *>(Options));
379
if (Verbosity())
380
ReportUnrecognizedFlags();
381
*profilingFlags() = Flags;
382
}
383
384
// We need to reset the profile data collection implementation now.
385
profileCollectorService::reset();
386
387
// Then also reset the buffer queue implementation.
388
if (BQ == nullptr) {
389
bool Success = false;
390
new (&BufferQueueStorage)
391
BufferQueue(profilingFlags()->per_thread_allocator_max,
392
profilingFlags()->buffers_max, Success);
393
if (!Success) {
394
if (Verbosity())
395
Report("Failed to initialize preallocated memory buffers!");
396
atomic_store(&ProfilerLogStatus,
397
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
398
memory_order_release);
399
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
400
}
401
402
// If we've succeeded, set the global pointer to the initialised storage.
403
BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
404
} else {
405
BQ->finalize();
406
auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
407
profilingFlags()->buffers_max);
408
409
if (InitStatus != BufferQueue::ErrorCode::Ok) {
410
if (Verbosity())
411
Report("Failed to initialize preallocated memory buffers; error: %s",
412
BufferQueue::getErrorString(InitStatus));
413
atomic_store(&ProfilerLogStatus,
414
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
415
memory_order_release);
416
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
417
}
418
419
DCHECK(!BQ->finalizing());
420
}
421
422
// We need to set up the exit handlers.
423
static pthread_once_t Once = PTHREAD_ONCE_INIT;
424
pthread_once(
425
&Once, +[] {
426
pthread_key_create(
427
&ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
428
if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
429
return;
430
431
if (P == nullptr)
432
return;
433
434
auto T = reinterpret_cast<ProfilingData *>(P);
435
if (atomic_load_relaxed(&T->Allocators) == 0)
436
return;
437
438
{
439
// If we're somehow executing this while inside a
440
// non-reentrant-friendly context, we skip attempting to post
441
// the current thread's data.
442
RecursionGuard G(ReentranceGuard);
443
if (!G)
444
return;
445
446
postCurrentThreadFCT(*T);
447
}
448
});
449
450
// We also need to set up an exit handler, so that we can get the
451
// profile information at exit time. We use the C API to do this, to not
452
// rely on C++ ABI functions for registering exit handlers.
453
Atexit(+[]() XRAY_NEVER_INSTRUMENT {
454
if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
455
return;
456
457
auto Cleanup =
458
at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
459
460
// Finalize and flush.
461
if (profilingFinalize() != XRAY_LOG_FINALIZED ||
462
profilingFlush() != XRAY_LOG_FLUSHED)
463
return;
464
465
if (Verbosity())
466
Report("XRay Profile flushed at exit.");
467
});
468
});
469
470
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
471
__xray_set_handler(profilingHandleArg0);
472
__xray_set_handler_arg1(profilingHandleArg1);
473
474
atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
475
memory_order_release);
476
if (Verbosity())
477
Report("XRay Profiling init successful.\n");
478
479
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
480
}
481
482
bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
483
// Set up the flag defaults from the static defaults and the
484
// compiler-provided defaults.
485
{
486
SpinMutexLock Lock(&ProfilerOptionsMutex);
487
auto *F = profilingFlags();
488
F->setDefaults();
489
FlagParser ProfilingParser;
490
registerProfilerFlags(&ProfilingParser, F);
491
ProfilingParser.ParseString(profilingCompilerDefinedFlags());
492
}
493
494
XRayLogImpl Impl{
495
profilingLoggingInit,
496
profilingFinalize,
497
profilingHandleArg0,
498
profilingFlush,
499
};
500
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
501
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
502
if (Verbosity())
503
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
504
"%d\n",
505
RegistrationResult);
506
return false;
507
}
508
509
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
510
__xray_log_select_mode("xray_profiling");
511
return true;
512
}
513
514
} // namespace __xray
515
516
static auto UNUSED Unused = __xray::profilingDynamicInitializer();
517
518