Path: blob/main/contrib/llvm-project/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
35233 views
/*===- CtxInstrProfiling.h- Contextual instrumentation-based PGO ---------===*\1|*2|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3|* See https://llvm.org/LICENSE.txt for license information.4|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5|*6\*===----------------------------------------------------------------------===*/78#ifndef CTX_PROFILE_CTXINSTRPROFILING_H_9#define CTX_PROFILE_CTXINSTRPROFILING_H_1011#include "CtxInstrContextNode.h"12#include "sanitizer_common/sanitizer_mutex.h"13#include <sanitizer/common_interface_defs.h>1415using namespace llvm::ctx_profile;1617// Forward-declare for the one unittest checking Arena construction zeroes out18// its allocatable space.19class ArenaTest_ZeroInit_Test;20namespace __ctx_profile {2122static constexpr size_t ExpectedAlignment = 8;23// We really depend on this, see further below. We currently support x86_64.24// When we want to support other archs, we need to trace the places Alignment is25// used and adjust accordingly.26static_assert(sizeof(void *) == ExpectedAlignment);2728/// Arena (bump allocator) forming a linked list. Intentionally not thread safe.29/// Allocation and de-allocation happen using sanitizer APIs. We make that30/// explicit.31class Arena final {32public:33// When allocating a new Arena, optionally specify an existing one to append34// to, assumed to be the last in the Arena list. We only need to support35// appending to the arena list.36static Arena *allocateNewArena(size_t Size, Arena *Prev = nullptr);37static void freeArenaList(Arena *&A);3839uint64_t size() const { return Size; }4041// Allocate S bytes or return nullptr if we don't have that many available.42char *tryBumpAllocate(size_t S) {43if (Pos + S > Size)44return nullptr;45Pos += S;46return start() + (Pos - S);47}4849Arena *next() const { return Next; }5051// the beginning of allocatable memory.52const char *start() const { return const_cast<Arena *>(this)->start(); }53const char *pos() const { return start() + Pos; }5455private:56friend class ::ArenaTest_ZeroInit_Test;57explicit Arena(uint32_t Size);58~Arena() = delete;5960char *start() { return reinterpret_cast<char *>(&this[1]); }6162Arena *Next = nullptr;63uint64_t Pos = 0;64const uint64_t Size;65};6667// The memory available for allocation follows the Arena header, and we expect68// it to be thus aligned.69static_assert(alignof(Arena) == ExpectedAlignment);7071// Verify maintenance to ContextNode doesn't change this invariant, which makes72// sure the inlined vectors are appropriately aligned.73static_assert(alignof(ContextNode) == ExpectedAlignment);7475/// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned76/// with allocating and zero-initializing the global value (as in, GlobalValue)77/// for it.78struct ContextRoot {79ContextNode *FirstNode = nullptr;80Arena *FirstMemBlock = nullptr;81Arena *CurrentMem = nullptr;82// This is init-ed by the static zero initializer in LLVM.83// Taken is used to ensure only one thread traverses the contextual graph -84// either to read it or to write it. On server side, the same entrypoint will85// be entered by numerous threads, but over time, the profile aggregated by86// collecting sequentially on one thread at a time is expected to converge to87// the aggregate profile that may have been observable on all the threads.88// Note that this is node-by-node aggregation, i.e. summing counters of nodes89// at the same position in the graph, not flattening.90// Threads that cannot lock Taken (fail TryLock) are given a "scratch context"91// - a buffer they can clobber, safely from a memory access perspective.92//93// Note about "scratch"-ness: we currently ignore the data written in them94// (which is anyway clobbered). The design allows for that not be the case -95// because "scratch"-ness is first and foremost about not trying to build96// subcontexts, and is captured by tainting the pointer value (pointer to the97// memory treated as context), but right now, we drop that info.98//99// We could consider relaxing the requirement of more than one thread100// entering by holding a few context trees per entrypoint and then aggregating101// them (as explained above) at the end of the profile collection - it's a102// tradeoff between collection time and memory use: higher precision can be103// obtained with either less concurrent collections but more collection time,104// or with more concurrent collections (==more memory) and less collection105// time. Note that concurrent collection does happen for different106// entrypoints, regardless.107::__sanitizer::StaticSpinMutex Taken;108109// If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM110// instrumentation lowering side because it is responsible for allocating and111// zero-initializing ContextRoots.112static_assert(sizeof(Taken) == 1);113};114115/// This API is exposed for testing. See the APIs below about the contract with116/// LLVM.117inline bool isScratch(const void *Ctx) {118return (reinterpret_cast<uint64_t>(Ctx) & 1);119}120121} // namespace __ctx_profile122123extern "C" {124125// LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.126// position 0 is used when the current context isn't scratch, 1 when it is. They127// are volatile because of signal handlers - we mean to specifically control128// when the data is loaded.129//130/// TLS where LLVM stores the pointer of the called value, as part of lowering a131/// llvm.instrprof.callsite132extern __thread void *volatile __llvm_ctx_profile_expected_callee[2];133/// TLS where LLVM stores the pointer inside a caller's subcontexts vector that134/// corresponds to the callsite being lowered.135extern __thread ContextNode **volatile __llvm_ctx_profile_callsite[2];136137// __llvm_ctx_profile_current_context_root is exposed for unit testing,138// othwerise it's only used internally by compiler-rt/ctx_profile.139extern __thread __ctx_profile::ContextRoot140*volatile __llvm_ctx_profile_current_context_root;141142/// called by LLVM in the entry BB of a "entry point" function. The returned143/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.144ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,145GUID Guid, uint32_t Counters,146uint32_t Callsites);147148/// paired with __llvm_ctx_profile_start_context, and called at the exit of the149/// entry point function.150void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);151152/// called for any other function than entry points, in the entry BB of such153/// function. Same consideration about LSB of returned value as .._start_context154ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,155uint32_t NrCounters,156uint32_t NrCallsites);157158/// Prepares for collection. Currently this resets counter values but preserves159/// internal context tree structure.160void __llvm_ctx_profile_start_collection();161162/// Completely free allocated memory.163void __llvm_ctx_profile_free();164165/// Used to obtain the profile. The Writer is called for each root ContextNode,166/// with the ContextRoot::Taken taken. The Writer is responsible for traversing167/// the structure underneath.168/// The Writer's first parameter plays the role of closure for Writer, and is169/// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter.170/// The second parameter is the root of a context tree.171bool __llvm_ctx_profile_fetch(void *Data,172bool (*Writer)(void *, const ContextNode &));173}174#endif // CTX_PROFILE_CTXINSTRPROFILING_H_175176177