Path: blob/main/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp
35265 views
//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file is a part of XRay, a dynamic runtime instrumentation system.9//10// This is the implementation of a profiling handler.11//12//===----------------------------------------------------------------------===//13#include <memory>14#include <time.h>1516#include "sanitizer_common/sanitizer_atomic.h"17#include "sanitizer_common/sanitizer_flags.h"18#include "xray/xray_interface.h"19#include "xray/xray_log_interface.h"20#include "xray_buffer_queue.h"21#include "xray_flags.h"22#include "xray_profile_collector.h"23#include "xray_profiling_flags.h"24#include "xray_recursion_guard.h"25#include "xray_tsc.h"26#include "xray_utils.h"27#include <pthread.h>2829namespace __xray {3031namespace {3233static atomic_sint32_t ProfilerLogFlushStatus = {34XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};3536static atomic_sint32_t ProfilerLogStatus = {37XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};3839static SpinMutex ProfilerOptionsMutex;4041struct ProfilingData {42atomic_uintptr_t Allocators;43atomic_uintptr_t FCT;44};4546static pthread_key_t ProfilingKey;4748// We use a global buffer queue, which gets initialized once at initialisation49// time, and gets reset when profiling is "done".50alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)];51static BufferQueue *BQ = nullptr;5253thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;54alignas(FunctionCallTrie::Allocators) thread_local std::byte55AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)];56alignas(FunctionCallTrie) thread_local std::byte57FunctionCallTrieStorage[sizeof(FunctionCallTrie)];58thread_local ProfilingData TLD{{0}, {0}};59thread_local atomic_uint8_t ReentranceGuard{0};6061// We use a separate guard for ensuring that for this thread, if we're already62// cleaning up, that any signal handlers don't attempt to cleanup nor63// initialise.64thread_local atomic_uint8_t TLDInitGuard{0};6566// We also use a separate latch to signal that the thread is exiting, and67// non-essential work should be ignored (things like recording events, etc.).68thread_local atomic_uint8_t ThreadExitingLatch{0};6970static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {71thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {72pthread_setspecific(ProfilingKey, &TLD);73return false;74}();75(void)ThreadOnce;7677RecursionGuard TLDInit(TLDInitGuard);78if (!TLDInit)79return nullptr;8081if (atomic_load_relaxed(&ThreadExitingLatch))82return nullptr;8384uptr Allocators = 0;85if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,86memory_order_acq_rel)) {87bool Success = false;88auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {89if (!Success)90atomic_store(&TLD.Allocators, 0, memory_order_release);91});9293// Acquire a set of buffers for this thread.94if (BQ == nullptr)95return nullptr;9697if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)98return nullptr;99auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {100if (!Success)101BQ->releaseBuffer(ThreadBuffers.NodeBuffer);102});103104if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)105return nullptr;106auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {107if (!Success)108BQ->releaseBuffer(ThreadBuffers.RootsBuffer);109});110111if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=112BufferQueue::ErrorCode::Ok)113return nullptr;114auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {115if (!Success)116BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);117});118119if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=120BufferQueue::ErrorCode::Ok)121return nullptr;122123Success = true;124new (&AllocatorsStorage) FunctionCallTrie::Allocators(125FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));126Allocators = reinterpret_cast<uptr>(127reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));128atomic_store(&TLD.Allocators, Allocators, memory_order_release);129}130131if (Allocators == 1)132return nullptr;133134uptr FCT = 0;135if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {136new (&FunctionCallTrieStorage)137FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(138atomic_load_relaxed(&TLD.Allocators)));139FCT = reinterpret_cast<uptr>(140reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));141atomic_store(&TLD.FCT, FCT, memory_order_release);142}143144if (FCT == 1)145return nullptr;146147return &TLD;148}149150static void cleanupTLD() XRAY_NEVER_INSTRUMENT {151auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);152if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(153&FunctionCallTrieStorage)))154reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();155156auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);157if (Allocators ==158reinterpret_cast<uptr>(159reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))160reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();161}162163static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {164RecursionGuard TLDInit(TLDInitGuard);165if (!TLDInit)166return;167168uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);169if (P != reinterpret_cast<uptr>(170reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))171return;172173auto FCT = reinterpret_cast<FunctionCallTrie *>(P);174DCHECK_NE(FCT, nullptr);175176uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);177if (A !=178reinterpret_cast<uptr>(179reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))180return;181182auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);183DCHECK_NE(Allocators, nullptr);184185// Always move the data into the profile collector.186profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),187std::move(ThreadBuffers), GetTid());188189// Re-initialize the ThreadBuffers object to a known "default" state.190ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};191}192193} // namespace194195const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {196#ifdef XRAY_PROFILER_DEFAULT_OPTIONS197return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);198#else199return "";200#endif201}202203XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {204if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=205XRayLogInitStatus::XRAY_LOG_FINALIZED) {206if (Verbosity())207Report("Not flushing profiles, profiling not been finalized.\n");208return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;209}210211RecursionGuard SignalGuard(ReentranceGuard);212if (!SignalGuard) {213if (Verbosity())214Report("Cannot finalize properly inside a signal handler!\n");215atomic_store(&ProfilerLogFlushStatus,216XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,217memory_order_release);218return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;219}220221s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,222XRayLogFlushStatus::XRAY_LOG_FLUSHING,223memory_order_acq_rel);224if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {225if (Verbosity())226Report("Not flushing profiles, implementation still flushing.\n");227return XRayLogFlushStatus::XRAY_LOG_FLUSHING;228}229230// At this point, we'll create the file that will contain the profile, but231// only if the options say so.232if (!profilingFlags()->no_flush) {233// First check whether we have data in the profile collector service234// before we try and write anything down.235XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});236if (B.Data == nullptr) {237if (Verbosity())238Report("profiling: No data to flush.\n");239} else {240LogWriter *LW = LogWriter::Open();241if (LW == nullptr) {242if (Verbosity())243Report("profiling: Failed to flush to file, dropping data.\n");244} else {245// Now for each of the buffers, write out the profile data as we would246// see it in memory, verbatim.247while (B.Data != nullptr && B.Size != 0) {248LW->WriteAll(reinterpret_cast<const char *>(B.Data),249reinterpret_cast<const char *>(B.Data) + B.Size);250B = profileCollectorService::nextBuffer(B);251}252LogWriter::Close(LW);253}254}255}256257profileCollectorService::reset();258259atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,260memory_order_release);261atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,262memory_order_release);263264return XRayLogFlushStatus::XRAY_LOG_FLUSHED;265}266267void profilingHandleArg0(int32_t FuncId,268XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {269unsigned char CPU;270auto TSC = readTSC(CPU);271RecursionGuard G(ReentranceGuard);272if (!G)273return;274275auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);276if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||277Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))278return;279280if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||281Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {282postCurrentThreadFCT(TLD);283return;284}285286auto T = getThreadLocalData();287if (T == nullptr)288return;289290auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));291switch (Entry) {292case XRayEntryType::ENTRY:293case XRayEntryType::LOG_ARGS_ENTRY:294FCT->enterFunction(FuncId, TSC, CPU);295break;296case XRayEntryType::EXIT:297case XRayEntryType::TAIL:298FCT->exitFunction(FuncId, TSC, CPU);299break;300default:301// FIXME: Handle bugs.302break;303}304}305306void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,307uint64_t) XRAY_NEVER_INSTRUMENT {308return profilingHandleArg0(FuncId, Entry);309}310311XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {312s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;313if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,314XRayLogInitStatus::XRAY_LOG_FINALIZING,315memory_order_release)) {316if (Verbosity())317Report("Cannot finalize profile, the profiling is not initialized.\n");318return static_cast<XRayLogInitStatus>(CurrentStatus);319}320321// Mark then finalize the current generation of buffers. This allows us to let322// the threads currently holding onto new buffers still use them, but let the323// last reference do the memory cleanup.324DCHECK_NE(BQ, nullptr);325BQ->finalize();326327// Wait a grace period to allow threads to see that we're finalizing.328SleepForMillis(profilingFlags()->grace_period_ms);329330// If we for some reason are entering this function from an instrumented331// handler, we bail out.332RecursionGuard G(ReentranceGuard);333if (!G)334return static_cast<XRayLogInitStatus>(CurrentStatus);335336// Post the current thread's data if we have any.337postCurrentThreadFCT(TLD);338339// Then we force serialize the log data.340profileCollectorService::serialize();341342atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,343memory_order_release);344return XRayLogInitStatus::XRAY_LOG_FINALIZED;345}346347XRayLogInitStatus348profilingLoggingInit(size_t, size_t, void *Options,349size_t OptionsSize) XRAY_NEVER_INSTRUMENT {350RecursionGuard G(ReentranceGuard);351if (!G)352return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;353354s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;355if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,356XRayLogInitStatus::XRAY_LOG_INITIALIZING,357memory_order_acq_rel)) {358if (Verbosity())359Report("Cannot initialize already initialised profiling "360"implementation.\n");361return static_cast<XRayLogInitStatus>(CurrentStatus);362}363364{365SpinMutexLock Lock(&ProfilerOptionsMutex);366FlagParser ConfigParser;367ProfilerFlags Flags;368Flags.setDefaults();369registerProfilerFlags(&ConfigParser, &Flags);370ConfigParser.ParseString(profilingCompilerDefinedFlags());371const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");372if (Env == nullptr)373Env = "";374ConfigParser.ParseString(Env);375376// Then parse the configuration string provided.377ConfigParser.ParseString(static_cast<const char *>(Options));378if (Verbosity())379ReportUnrecognizedFlags();380*profilingFlags() = Flags;381}382383// We need to reset the profile data collection implementation now.384profileCollectorService::reset();385386// Then also reset the buffer queue implementation.387if (BQ == nullptr) {388bool Success = false;389new (&BufferQueueStorage)390BufferQueue(profilingFlags()->per_thread_allocator_max,391profilingFlags()->buffers_max, Success);392if (!Success) {393if (Verbosity())394Report("Failed to initialize preallocated memory buffers!");395atomic_store(&ProfilerLogStatus,396XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,397memory_order_release);398return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;399}400401// If we've succeeded, set the global pointer to the initialised storage.402BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);403} else {404BQ->finalize();405auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,406profilingFlags()->buffers_max);407408if (InitStatus != BufferQueue::ErrorCode::Ok) {409if (Verbosity())410Report("Failed to initialize preallocated memory buffers; error: %s",411BufferQueue::getErrorString(InitStatus));412atomic_store(&ProfilerLogStatus,413XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,414memory_order_release);415return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;416}417418DCHECK(!BQ->finalizing());419}420421// We need to set up the exit handlers.422static pthread_once_t Once = PTHREAD_ONCE_INIT;423pthread_once(424&Once, +[] {425pthread_key_create(426&ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {427if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))428return;429430if (P == nullptr)431return;432433auto T = reinterpret_cast<ProfilingData *>(P);434if (atomic_load_relaxed(&T->Allocators) == 0)435return;436437{438// If we're somehow executing this while inside a439// non-reentrant-friendly context, we skip attempting to post440// the current thread's data.441RecursionGuard G(ReentranceGuard);442if (!G)443return;444445postCurrentThreadFCT(*T);446}447});448449// We also need to set up an exit handler, so that we can get the450// profile information at exit time. We use the C API to do this, to not451// rely on C++ ABI functions for registering exit handlers.452Atexit(+[]() XRAY_NEVER_INSTRUMENT {453if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))454return;455456auto Cleanup =457at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });458459// Finalize and flush.460if (profilingFinalize() != XRAY_LOG_FINALIZED ||461profilingFlush() != XRAY_LOG_FLUSHED)462return;463464if (Verbosity())465Report("XRay Profile flushed at exit.");466});467});468469__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);470__xray_set_handler(profilingHandleArg0);471__xray_set_handler_arg1(profilingHandleArg1);472473atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,474memory_order_release);475if (Verbosity())476Report("XRay Profiling init successful.\n");477478return XRayLogInitStatus::XRAY_LOG_INITIALIZED;479}480481bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {482// Set up the flag defaults from the static defaults and the483// compiler-provided defaults.484{485SpinMutexLock Lock(&ProfilerOptionsMutex);486auto *F = profilingFlags();487F->setDefaults();488FlagParser ProfilingParser;489registerProfilerFlags(&ProfilingParser, F);490ProfilingParser.ParseString(profilingCompilerDefinedFlags());491}492493XRayLogImpl Impl{494profilingLoggingInit,495profilingFinalize,496profilingHandleArg0,497profilingFlush,498};499auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);500if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {501if (Verbosity())502Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "503"%d\n",504RegistrationResult);505return false;506}507508if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))509__xray_log_select_mode("xray_profiling");510return true;511}512513} // namespace __xray514515static auto UNUSED Unused = __xray::profilingDynamicInitializer();516517518