Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingValue.c
35233 views
1
/*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2
|*
3
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
|* See https://llvm.org/LICENSE.txt for license information.
5
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
|*
7
\*===----------------------------------------------------------------------===*/
8
9
#include <assert.h>
10
#include <limits.h>
11
#include <stdio.h>
12
#include <stdlib.h>
13
#include <string.h>
14
15
#include "InstrProfiling.h"
16
#include "InstrProfilingInternal.h"
17
#include "InstrProfilingUtil.h"
18
19
#define INSTR_PROF_VALUE_PROF_DATA
20
#define INSTR_PROF_COMMON_API_IMPL
21
#define INSTR_PROF_VALUE_PROF_MEMOP_API
22
#include "profile/InstrProfData.inc"
23
24
static int hasStaticCounters = 1;
25
static int OutOfNodesWarnings = 0;
26
static int hasNonDefaultValsPerSite = 0;
27
#define INSTR_PROF_MAX_VP_WARNS 10
28
#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29
#define INSTR_PROF_VNODE_POOL_SIZE 1024
30
31
#ifndef _MSC_VER
32
/* A shared static pool in addition to the vnodes statically
33
* allocated by the compiler. */
34
COMPILER_RT_VISIBILITY ValueProfNode
35
lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
36
COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37
#endif
38
39
COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
40
INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
41
42
COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
43
const char *Str = 0;
44
Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
45
if (Str && Str[0]) {
46
VPMaxNumValsPerSite = atoi(Str);
47
hasNonDefaultValsPerSite = 1;
48
}
49
if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
50
VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51
}
52
53
COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
54
VPMaxNumValsPerSite = MaxVals;
55
hasNonDefaultValsPerSite = 1;
56
}
57
58
/* This method is only used in value profiler mock testing. */
59
COMPILER_RT_VISIBILITY void
60
__llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
61
uint32_t ValueKind, uint16_t NumValueSites) {
62
#ifdef __GNUC__
63
#pragma GCC diagnostic push
64
#pragma GCC diagnostic ignored "-Wcast-qual"
65
#elif defined(__clang__)
66
#pragma clang diagnostic push
67
#pragma clang diagnostic ignored "-Wcast-qual"
68
#endif
69
*((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
70
#ifdef __GNUC__
71
#pragma GCC diagnostic pop
72
#elif defined(__clang__)
73
#pragma clang diagnostic pop
74
#endif
75
}
76
77
/* This method is only used in value profiler mock testing. */
78
COMPILER_RT_VISIBILITY const __llvm_profile_data *
79
__llvm_profile_iterate_data(const __llvm_profile_data *Data) {
80
return Data + 1;
81
}
82
83
/* This method is only used in value profiler mock testing. */
84
COMPILER_RT_VISIBILITY void *
85
__llvm_get_function_addr(const __llvm_profile_data *Data) {
86
return Data->FunctionPointer;
87
}
88
89
/* Allocate an array that holds the pointers to the linked lists of
90
* value profile counter nodes. The number of element of the array
91
* is the total number of value profile sites instrumented. Returns
92
* 0 if allocation fails.
93
*/
94
95
static int allocateValueProfileCounters(__llvm_profile_data *Data) {
96
uint64_t NumVSites = 0;
97
uint32_t VKI;
98
99
/* This function will never be called when value site array is allocated
100
statically at compile time. */
101
hasStaticCounters = 0;
102
/* When dynamic allocation is enabled, allow tracking the max number of
103
* values allowd. */
104
if (!hasNonDefaultValsPerSite)
105
VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
106
107
for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
108
NumVSites += Data->NumValueSites[VKI];
109
110
// If NumVSites = 0, calloc is allowed to return a non-null pointer.
111
assert(NumVSites > 0 && "NumVSites can't be zero");
112
ValueProfNode **Mem =
113
(ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
114
if (!Mem)
115
return 0;
116
if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
117
free(Mem);
118
return 0;
119
}
120
return 1;
121
}
122
123
static ValueProfNode *allocateOneNode(void) {
124
ValueProfNode *Node;
125
126
if (!hasStaticCounters)
127
return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
128
129
/* Early check to avoid value wrapping around. */
130
if (CurrentVNode + 1 > EndVNode) {
131
if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
132
PROF_WARN("Unable to track new values: %s. "
133
" Consider using option -mllvm -vp-counters-per-site=<n> to "
134
"allocate more"
135
" value profile counters at compile time. \n",
136
"Running out of static counters");
137
}
138
return 0;
139
}
140
Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
141
/* Due to section padding, EndVNode point to a byte which is one pass
142
* an incomplete VNode, so we need to skip the last incomplete node. */
143
if (Node + 1 > EndVNode)
144
return 0;
145
146
return Node;
147
}
148
149
static COMPILER_RT_ALWAYS_INLINE void
150
instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
151
uint32_t CounterIndex, uint64_t CountValue) {
152
__llvm_profile_data *PData = (__llvm_profile_data *)Data;
153
if (!PData)
154
return;
155
if (!CountValue)
156
return;
157
if (!PData->Values) {
158
if (!allocateValueProfileCounters(PData))
159
return;
160
}
161
162
ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
163
ValueProfNode *PrevVNode = NULL;
164
ValueProfNode *MinCountVNode = NULL;
165
ValueProfNode *CurVNode = ValueCounters[CounterIndex];
166
uint64_t MinCount = UINT64_MAX;
167
168
uint8_t VDataCount = 0;
169
while (CurVNode) {
170
if (TargetValue == CurVNode->Value) {
171
CurVNode->Count += CountValue;
172
return;
173
}
174
if (CurVNode->Count < MinCount) {
175
MinCount = CurVNode->Count;
176
MinCountVNode = CurVNode;
177
}
178
PrevVNode = CurVNode;
179
CurVNode = CurVNode->Next;
180
++VDataCount;
181
}
182
183
if (VDataCount >= VPMaxNumValsPerSite) {
184
/* Bump down the min count node's count. If it reaches 0,
185
* evict it. This eviction/replacement policy makes hot
186
* targets more sticky while cold targets less so. In other
187
* words, it makes it less likely for the hot targets to be
188
* prematurally evicted during warmup/establishment period,
189
* when their counts are still low. In a special case when
190
* the number of values tracked is reduced to only one, this
191
* policy will guarantee that the dominating target with >50%
192
* total count will survive in the end. Note that this scheme
193
* allows the runtime to track the min count node in an adaptive
194
* manner. It can correct previous mistakes and eventually
195
* lock on a cold target that is alread in stable state.
196
*
197
* In very rare cases, this replacement scheme may still lead
198
* to target loss. For instance, out of \c N value slots, \c N-1
199
* slots are occupied by luke warm targets during the warmup
200
* period and the remaining one slot is competed by two or more
201
* very hot targets. If those hot targets occur in an interleaved
202
* way, none of them will survive (gain enough weight to throw out
203
* other established entries) due to the ping-pong effect.
204
* To handle this situation, user can choose to increase the max
205
* number of tracked values per value site. Alternatively, a more
206
* expensive eviction mechanism can be implemented. It requires
207
* the runtime to track the total number of evictions per-site.
208
* When the total number of evictions reaches certain threshold,
209
* the runtime can wipe out more than one lowest count entries
210
* to give space for hot targets.
211
*/
212
if (MinCountVNode->Count <= CountValue) {
213
CurVNode = MinCountVNode;
214
CurVNode->Value = TargetValue;
215
CurVNode->Count = CountValue;
216
} else
217
MinCountVNode->Count -= CountValue;
218
219
return;
220
}
221
222
CurVNode = allocateOneNode();
223
if (!CurVNode)
224
return;
225
CurVNode->Value = TargetValue;
226
CurVNode->Count += CountValue;
227
228
uint32_t Success = 0;
229
if (!ValueCounters[CounterIndex])
230
Success =
231
COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
232
else if (PrevVNode && !PrevVNode->Next)
233
Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
234
235
if (!Success && !hasStaticCounters) {
236
free(CurVNode);
237
return;
238
}
239
}
240
241
COMPILER_RT_VISIBILITY void
242
__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
243
uint32_t CounterIndex) {
244
instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
245
}
246
COMPILER_RT_VISIBILITY void
247
__llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
248
uint32_t CounterIndex,
249
uint64_t CountValue) {
250
instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
251
}
252
253
/*
254
* The target values are partitioned into multiple ranges. The range spec is
255
* defined in InstrProfData.inc.
256
*/
257
COMPILER_RT_VISIBILITY void
258
__llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
259
uint32_t CounterIndex) {
260
// Map the target value to the representative value of its range.
261
uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
262
__llvm_profile_instrument_target(RepValue, Data, CounterIndex);
263
}
264
265
/*
266
* A wrapper struct that represents value profile runtime data.
267
* Like InstrProfRecord class which is used by profiling host tools,
268
* ValueProfRuntimeRecord also implements the abstract interfaces defined in
269
* ValueProfRecordClosure so that the runtime data can be serialized using
270
* shared C implementation.
271
*/
272
typedef struct ValueProfRuntimeRecord {
273
const __llvm_profile_data *Data;
274
ValueProfNode **NodesKind[IPVK_Last + 1];
275
uint8_t **SiteCountArray;
276
} ValueProfRuntimeRecord;
277
278
/* ValueProfRecordClosure Interface implementation. */
279
280
static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
281
return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
282
}
283
284
static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
285
uint32_t S = 0, I;
286
const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
287
if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
288
return 0;
289
for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
290
S += Record->SiteCountArray[VK][I];
291
return S;
292
}
293
294
static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
295
uint32_t S) {
296
const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
297
return Record->SiteCountArray[VK][S];
298
}
299
300
static ValueProfRuntimeRecord RTRecord;
301
static ValueProfRecordClosure RTRecordClosure = {
302
&RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
303
getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
304
INSTR_PROF_NULLPTR, /* RemapValueData */
305
INSTR_PROF_NULLPTR, /* GetValueForSite, */
306
INSTR_PROF_NULLPTR /* AllocValueProfData */
307
};
308
309
static uint32_t
310
initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
311
uint8_t *SiteCountArray[]) {
312
unsigned I, J, S = 0, NumValueKinds = 0;
313
ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
314
RTRecord.Data = Data;
315
RTRecord.SiteCountArray = SiteCountArray;
316
for (I = 0; I <= IPVK_Last; I++) {
317
uint16_t N = Data->NumValueSites[I];
318
if (!N)
319
continue;
320
321
NumValueKinds++;
322
323
RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
324
for (J = 0; J < N; J++) {
325
/* Compute value count for each site. */
326
uint32_t C = 0;
327
ValueProfNode *Site =
328
Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
329
while (Site) {
330
C++;
331
Site = Site->Next;
332
}
333
if (C > UCHAR_MAX)
334
C = UCHAR_MAX;
335
RTRecord.SiteCountArray[I][J] = C;
336
}
337
S += N;
338
}
339
return NumValueKinds;
340
}
341
342
static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
343
InstrProfValueData *Dst,
344
ValueProfNode *StartNode, uint32_t N) {
345
unsigned I;
346
ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
347
for (I = 0; I < N; I++) {
348
Dst[I].Value = VNode->Value;
349
Dst[I].Count = VNode->Count;
350
VNode = VNode->Next;
351
}
352
return VNode;
353
}
354
355
static uint32_t getValueProfDataSizeWrapper(void) {
356
return getValueProfDataSize(&RTRecordClosure);
357
}
358
359
static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
360
return getNumValueDataForSiteRT(&RTRecord, VK, S);
361
}
362
363
static VPDataReaderType TheVPDataReader = {
364
initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
365
getFirstValueProfRecord, getNumValueDataForSiteWrapper,
366
getValueProfDataSizeWrapper, getNextNValueData};
367
368
COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
369
return &TheVPDataReader;
370
}
371
372