Path: blob/main/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingValue.c
35233 views
/*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\1|*2|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3|* See https://llvm.org/LICENSE.txt for license information.4|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5|*6\*===----------------------------------------------------------------------===*/78#include <assert.h>9#include <limits.h>10#include <stdio.h>11#include <stdlib.h>12#include <string.h>1314#include "InstrProfiling.h"15#include "InstrProfilingInternal.h"16#include "InstrProfilingUtil.h"1718#define INSTR_PROF_VALUE_PROF_DATA19#define INSTR_PROF_COMMON_API_IMPL20#define INSTR_PROF_VALUE_PROF_MEMOP_API21#include "profile/InstrProfData.inc"2223static int hasStaticCounters = 1;24static int OutOfNodesWarnings = 0;25static int hasNonDefaultValsPerSite = 0;26#define INSTR_PROF_MAX_VP_WARNS 1027#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 2428#define INSTR_PROF_VNODE_POOL_SIZE 10242930#ifndef _MSC_VER31/* A shared static pool in addition to the vnodes statically32* allocated by the compiler. */33COMPILER_RT_VISIBILITY ValueProfNode34lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(35COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);36#endif3738COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =39INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;4041COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {42const char *Str = 0;43Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");44if (Str && Str[0]) {45VPMaxNumValsPerSite = atoi(Str);46hasNonDefaultValsPerSite = 1;47}48if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)49VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;50}5152COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {53VPMaxNumValsPerSite = MaxVals;54hasNonDefaultValsPerSite = 1;55}5657/* This method is only used in value profiler mock testing. */58COMPILER_RT_VISIBILITY void59__llvm_profile_set_num_value_sites(__llvm_profile_data *Data,60uint32_t ValueKind, uint16_t NumValueSites) {61#ifdef __GNUC__62#pragma GCC diagnostic push63#pragma GCC diagnostic ignored "-Wcast-qual"64#elif defined(__clang__)65#pragma clang diagnostic push66#pragma clang diagnostic ignored "-Wcast-qual"67#endif68*((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;69#ifdef __GNUC__70#pragma GCC diagnostic pop71#elif defined(__clang__)72#pragma clang diagnostic pop73#endif74}7576/* This method is only used in value profiler mock testing. */77COMPILER_RT_VISIBILITY const __llvm_profile_data *78__llvm_profile_iterate_data(const __llvm_profile_data *Data) {79return Data + 1;80}8182/* This method is only used in value profiler mock testing. */83COMPILER_RT_VISIBILITY void *84__llvm_get_function_addr(const __llvm_profile_data *Data) {85return Data->FunctionPointer;86}8788/* Allocate an array that holds the pointers to the linked lists of89* value profile counter nodes. The number of element of the array90* is the total number of value profile sites instrumented. Returns91* 0 if allocation fails.92*/9394static int allocateValueProfileCounters(__llvm_profile_data *Data) {95uint64_t NumVSites = 0;96uint32_t VKI;9798/* This function will never be called when value site array is allocated99statically at compile time. */100hasStaticCounters = 0;101/* When dynamic allocation is enabled, allow tracking the max number of102* values allowd. */103if (!hasNonDefaultValsPerSite)104VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;105106for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)107NumVSites += Data->NumValueSites[VKI];108109// If NumVSites = 0, calloc is allowed to return a non-null pointer.110assert(NumVSites > 0 && "NumVSites can't be zero");111ValueProfNode **Mem =112(ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));113if (!Mem)114return 0;115if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {116free(Mem);117return 0;118}119return 1;120}121122static ValueProfNode *allocateOneNode(void) {123ValueProfNode *Node;124125if (!hasStaticCounters)126return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));127128/* Early check to avoid value wrapping around. */129if (CurrentVNode + 1 > EndVNode) {130if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {131PROF_WARN("Unable to track new values: %s. "132" Consider using option -mllvm -vp-counters-per-site=<n> to "133"allocate more"134" value profile counters at compile time. \n",135"Running out of static counters");136}137return 0;138}139Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);140/* Due to section padding, EndVNode point to a byte which is one pass141* an incomplete VNode, so we need to skip the last incomplete node. */142if (Node + 1 > EndVNode)143return 0;144145return Node;146}147148static COMPILER_RT_ALWAYS_INLINE void149instrumentTargetValueImpl(uint64_t TargetValue, void *Data,150uint32_t CounterIndex, uint64_t CountValue) {151__llvm_profile_data *PData = (__llvm_profile_data *)Data;152if (!PData)153return;154if (!CountValue)155return;156if (!PData->Values) {157if (!allocateValueProfileCounters(PData))158return;159}160161ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;162ValueProfNode *PrevVNode = NULL;163ValueProfNode *MinCountVNode = NULL;164ValueProfNode *CurVNode = ValueCounters[CounterIndex];165uint64_t MinCount = UINT64_MAX;166167uint8_t VDataCount = 0;168while (CurVNode) {169if (TargetValue == CurVNode->Value) {170CurVNode->Count += CountValue;171return;172}173if (CurVNode->Count < MinCount) {174MinCount = CurVNode->Count;175MinCountVNode = CurVNode;176}177PrevVNode = CurVNode;178CurVNode = CurVNode->Next;179++VDataCount;180}181182if (VDataCount >= VPMaxNumValsPerSite) {183/* Bump down the min count node's count. If it reaches 0,184* evict it. This eviction/replacement policy makes hot185* targets more sticky while cold targets less so. In other186* words, it makes it less likely for the hot targets to be187* prematurally evicted during warmup/establishment period,188* when their counts are still low. In a special case when189* the number of values tracked is reduced to only one, this190* policy will guarantee that the dominating target with >50%191* total count will survive in the end. Note that this scheme192* allows the runtime to track the min count node in an adaptive193* manner. It can correct previous mistakes and eventually194* lock on a cold target that is alread in stable state.195*196* In very rare cases, this replacement scheme may still lead197* to target loss. For instance, out of \c N value slots, \c N-1198* slots are occupied by luke warm targets during the warmup199* period and the remaining one slot is competed by two or more200* very hot targets. If those hot targets occur in an interleaved201* way, none of them will survive (gain enough weight to throw out202* other established entries) due to the ping-pong effect.203* To handle this situation, user can choose to increase the max204* number of tracked values per value site. Alternatively, a more205* expensive eviction mechanism can be implemented. It requires206* the runtime to track the total number of evictions per-site.207* When the total number of evictions reaches certain threshold,208* the runtime can wipe out more than one lowest count entries209* to give space for hot targets.210*/211if (MinCountVNode->Count <= CountValue) {212CurVNode = MinCountVNode;213CurVNode->Value = TargetValue;214CurVNode->Count = CountValue;215} else216MinCountVNode->Count -= CountValue;217218return;219}220221CurVNode = allocateOneNode();222if (!CurVNode)223return;224CurVNode->Value = TargetValue;225CurVNode->Count += CountValue;226227uint32_t Success = 0;228if (!ValueCounters[CounterIndex])229Success =230COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);231else if (PrevVNode && !PrevVNode->Next)232Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);233234if (!Success && !hasStaticCounters) {235free(CurVNode);236return;237}238}239240COMPILER_RT_VISIBILITY void241__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,242uint32_t CounterIndex) {243instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);244}245COMPILER_RT_VISIBILITY void246__llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,247uint32_t CounterIndex,248uint64_t CountValue) {249instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);250}251252/*253* The target values are partitioned into multiple ranges. The range spec is254* defined in InstrProfData.inc.255*/256COMPILER_RT_VISIBILITY void257__llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,258uint32_t CounterIndex) {259// Map the target value to the representative value of its range.260uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);261__llvm_profile_instrument_target(RepValue, Data, CounterIndex);262}263264/*265* A wrapper struct that represents value profile runtime data.266* Like InstrProfRecord class which is used by profiling host tools,267* ValueProfRuntimeRecord also implements the abstract interfaces defined in268* ValueProfRecordClosure so that the runtime data can be serialized using269* shared C implementation.270*/271typedef struct ValueProfRuntimeRecord {272const __llvm_profile_data *Data;273ValueProfNode **NodesKind[IPVK_Last + 1];274uint8_t **SiteCountArray;275} ValueProfRuntimeRecord;276277/* ValueProfRecordClosure Interface implementation. */278279static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {280return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];281}282283static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {284uint32_t S = 0, I;285const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;286if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)287return 0;288for (I = 0; I < Record->Data->NumValueSites[VK]; I++)289S += Record->SiteCountArray[VK][I];290return S;291}292293static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,294uint32_t S) {295const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;296return Record->SiteCountArray[VK][S];297}298299static ValueProfRuntimeRecord RTRecord;300static ValueProfRecordClosure RTRecordClosure = {301&RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */302getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,303INSTR_PROF_NULLPTR, /* RemapValueData */304INSTR_PROF_NULLPTR, /* GetValueForSite, */305INSTR_PROF_NULLPTR /* AllocValueProfData */306};307308static uint32_t309initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,310uint8_t *SiteCountArray[]) {311unsigned I, J, S = 0, NumValueKinds = 0;312ValueProfNode **Nodes = (ValueProfNode **)Data->Values;313RTRecord.Data = Data;314RTRecord.SiteCountArray = SiteCountArray;315for (I = 0; I <= IPVK_Last; I++) {316uint16_t N = Data->NumValueSites[I];317if (!N)318continue;319320NumValueKinds++;321322RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;323for (J = 0; J < N; J++) {324/* Compute value count for each site. */325uint32_t C = 0;326ValueProfNode *Site =327Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;328while (Site) {329C++;330Site = Site->Next;331}332if (C > UCHAR_MAX)333C = UCHAR_MAX;334RTRecord.SiteCountArray[I][J] = C;335}336S += N;337}338return NumValueKinds;339}340341static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,342InstrProfValueData *Dst,343ValueProfNode *StartNode, uint32_t N) {344unsigned I;345ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];346for (I = 0; I < N; I++) {347Dst[I].Value = VNode->Value;348Dst[I].Count = VNode->Count;349VNode = VNode->Next;350}351return VNode;352}353354static uint32_t getValueProfDataSizeWrapper(void) {355return getValueProfDataSize(&RTRecordClosure);356}357358static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {359return getNumValueDataForSiteRT(&RTRecord, VK, S);360}361362static VPDataReaderType TheVPDataReader = {363initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,364getFirstValueProfRecord, getNumValueDataForSiteWrapper,365getValueProfDataSizeWrapper, getNextNValueData};366367COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {368return &TheVPDataReader;369}370371372