Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
folium-app
GitHub Repository: folium-app/Folium
Path: blob/a-new-beginning/SharedDependencies/Sources/microprofile/microprofile.cpp
2 views
1
#define MICROPROFILE_IMPL
2
#include "microprofile.h"
3
#if MICROPROFILE_ENABLED
4
5
#define BREAK_SKIP() __builtin_trap()
6
7
#ifdef _WIN32
8
#if !defined(WIN32_LEAN_AND_MEAN)
9
#define WIN32_LEAN_AND_MEAN
10
#endif
11
#include <malloc.h>
12
#endif
13
14
#ifdef _WIN32
15
#define MICROPROFILE_MAX_PATH MAX_PATH
16
#else
17
#define MICROPROFILE_MAX_PATH 1024
18
#endif
19
20
#include <atomic>
21
#include <ctype.h>
22
#include <mutex>
23
#include <stdarg.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <thread>
27
28
#if defined(MICROPROFILE_SYSTEM_STB)
29
#include <stb_sprintf.h>
30
#else
31
#define STB_SPRINTF_IMPLEMENTATION
32
#include "stb_sprintf.h"
33
#endif
34
35
#if defined(_WIN32) && _MSC_VER == 1700
36
#define PRIx64 "llx"
37
#define PRIu64 "llu"
38
#define PRId64 "lld"
39
#else
40
#include <inttypes.h>
41
#endif
42
43
#define MICROPROFILE_MAX_COUNTERS 512
44
#define MICROPROFILE_MAX_COUNTER_NAME_CHARS (MICROPROFILE_MAX_COUNTERS * 16)
45
#define MICROPROFILE_MAX_GROUP_INTS (MICROPROFILE_MAX_GROUPS / 32)
46
#define MICROPROFILE_MAX_CATEGORIES 16
47
#define MICROPROFILE_MAX_GRAPHS 5
48
#define MICROPROFILE_GRAPH_HISTORY 128
49
#define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE) / sizeof(MicroProfileLogEntry))
50
#define MICROPROFILE_GPU_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_GPU_BUFFER_SIZE) / sizeof(MicroProfileLogEntry))
51
#define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256
52
#define MICROPROFILE_WEBSOCKET_BUFFER_SIZE (64 << 10)
53
#define MICROPROFILE_INVALID_TICK ((uint64_t) - 1)
54
#define MICROPROFILE_DROPPED_TICK ((uint64_t) - 2)
55
#define MICROPROFILE_INVALID_FRAME ((uint32_t) - 1)
56
#define MICROPROFILE_GROUP_MASK_ALL 0xffffffff
57
#define MICROPROFILE_MAX_PATCH_ERRORS 32
58
#define MICROPROFILE_MAX_MODULE_EXEC_REGIONS 16
59
60
#define MP_LOG_TICK_MASK 0x0000ffffffffffff
61
#define MP_LOG_INDEX_MASK 0x3fff000000000000
62
#define MP_LOG_BEGIN_MASK 0xc000000000000000
63
#define MP_LOG_CSTR_MASK 0xe000000000000000
64
#define MP_LOG_CSTR_BIT 0x2000000000000000
65
#define MP_LOG_PAYLOAD_PTR_MASK (~(MP_LOG_BEGIN_MASK | MP_LOG_CSTR_BIT))
66
67
#define MP_LOG_ENTER_LEAVE_MASK 0x8000000000000000
68
69
#define MP_LOG_LEAVE 0x0
70
#define MP_LOG_ENTER 0x1
71
#define MP_LOG_EXTENDED 0x2
72
#define MP_LOG_EXTENDED_NO_DATA 0x3
73
74
#ifndef MICROPROFILE_SETTINGS_FILE
75
#define MICROPROFILE_SETTINGS_FILE "mppresets.cfg"
76
#endif
77
#ifndef MICROPROFILE_SETTINGS_FILE_BUILTIN
78
#define MICROPROFILE_SETTINGS_FILE_BUILTIN "mppresets.builtin.cfg"
79
#endif
80
#ifndef MICROPROFILE_SETTINGS_FILE_TEMP
81
#define MICROPROFILE_SETTINGS_FILE_TEMP ".tmp"
82
#endif
83
84
// #define MP_LOG_EXTRA_DATA 0x3
85
86
static_assert(0 == (MICROPROFILE_MAX_GROUPS % 32), "MICROPROFILE_MAX_GROUPS must be divisible by 32");
87
88
enum EMicroProfileTokenExtended
89
{
90
ETOKEN_GPU_CPU_TIMESTAMP = 0x3fff,
91
ETOKEN_GPU_CPU_SOURCE_THREAD = 0x3ffe,
92
ETOKEN_META_MARKER = 0x3ffd,
93
ETOKEN_CUSTOM_NAME = 0x3ffc,
94
ETOKEN_CUSTOM_COLOR = 0x3ffb,
95
ETOKEN_CUSTOM_ID = 0x3ffa,
96
ETOKEN_CSTR_PTR = 0x2000, // note, matches MP_LOG_CSTR_BIT
97
ETOKEN_MAX = 0x2000,
98
};
99
100
enum
101
{
102
MICROPROFILE_WEBSOCKET_DIRTY_MENU,
103
MICROPROFILE_WEBSOCKET_DIRTY_ENABLED,
104
};
105
106
#ifndef MICROPROFILE_ALLOC // redefine all if overriding
107
#define MICROPROFILE_ALLOC(nSize, nAlign) MicroProfileAllocAligned(nSize, nAlign);
108
#define MICROPROFILE_REALLOC(p, s) realloc(p, s)
109
#define MICROPROFILE_FREE(p) MicroProfileFreeAligned(p)
110
#define MICROPROFILE_FREE_NON_ALIGNED(p) free(p)
111
#endif
112
113
#define MP_ALLOC(nSize, nAlign) MicroProfileAllocInternal(nSize, nAlign)
114
#define MP_REALLOC(p, s) MicroProfileReallocInternal(p, s)
115
#define MP_FREE(p) MicroProfileFreeInternal(p)
116
#define MP_ALLOC_OBJECT(T) (T*)MP_ALLOC(sizeof(T), alignof(T))
117
#define MP_ALLOC_OBJECT_ARRAY(T, Count) (T*)MP_ALLOC(sizeof(T) * Count, alignof(T))
118
119
#ifndef MICROPROFILE_DEBUG
120
#define MICROPROFILE_DEBUG 0
121
#endif
122
123
typedef uint64_t MicroProfileLogEntry;
124
125
void MicroProfileSleep(uint32_t nMs);
126
template <typename T>
127
T MicroProfileMin(T a, T b);
128
template <typename T>
129
T MicroProfileMax(T a, T b);
130
template <typename T>
131
T MicroProfileClamp(T a, T min_, T max_);
132
int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond);
133
float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond);
134
uint32_t MicroProfileLogGetType(MicroProfileLogEntry Index);
135
uint64_t MicroProfileLogGetTimerIndex(MicroProfileLogEntry Index);
136
MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick);
137
int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End);
138
int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick);
139
uint16_t MicroProfileGetTimerIndex(MicroProfileToken t);
140
uint32_t MicroProfileGetGroupMask(MicroProfileToken t);
141
MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint32_t nGroupIndex, uint16_t nTimer);
142
bool MicroProfileAnyGroupActive();
143
void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData);
144
145
// defer implementation
146
#define CONCAT_INTERNAL(x, y) x##y
147
#define CONCAT(x, y) CONCAT_INTERNAL(x, y)
148
void IntentionallyNotDefinedFunction__(); // DO NOT DEFINE THIS
149
template <typename T>
150
struct MicroProfileExitScope
151
{
152
T lambda;
153
MicroProfileExitScope(T lambda)
154
: lambda(lambda)
155
{
156
}
157
~MicroProfileExitScope()
158
{
159
lambda();
160
}
161
162
MicroProfileExitScope(const MicroProfileExitScope& rhs)
163
: lambda(rhs.lambda)
164
{
165
IntentionallyNotDefinedFunction__(); // this is here to ensure the compiler does not create duplicate copies
166
}
167
168
private:
169
MicroProfileExitScope& operator=(const MicroProfileExitScope&);
170
};
171
172
class MicroProfileExitScopeHelp
173
{
174
public:
175
template <typename T>
176
MicroProfileExitScope<T> operator+(T t)
177
{
178
return t;
179
}
180
};
181
#define defer const auto& CONCAT(defer__, __LINE__) = MicroProfileExitScopeHelp() + [&]()
182
183
//////////////////////////////////////////////////////////////////////////
184
// platform IMPL
185
void* MicroProfileAllocInternal(size_t nSize, size_t nAlign);
186
void MicroProfileFreeInternal(void* pPtr);
187
void* MicroProfileReallocInternal(void* pPtr, size_t nSize);
188
189
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign);
190
void MicroProfileFreeAligned(void* pMem);
191
192
#if defined(__APPLE__)
193
#include <TargetConditionals.h>
194
#include <float.h>
195
#include <libkern/OSAtomic.h>
196
#include <mach/mach.h>
197
#include <mach/mach_time.h>
198
#include <unistd.h>
199
200
#if TARGET_OS_IPHONE
201
#define MICROPROFILE_IOS
202
#endif
203
204
#define MP_TICK() mach_absolute_time()
205
inline int64_t MicroProfileTicksPerSecondCpu_()
206
{
207
static int64_t nTicksPerSecond = 0;
208
if(nTicksPerSecond == 0)
209
{
210
mach_timebase_info_data_t sTimebaseInfo;
211
mach_timebase_info(&sTimebaseInfo);
212
nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer;
213
}
214
return nTicksPerSecond;
215
}
216
217
int64_t MicroProfileTicksPerSecondCpu()
218
{
219
return MicroProfileTicksPerSecondCpu_();
220
}
221
#define MicroProfileTicksPerSecondCpu MicroProfileTicksPerSecondCpu_
222
223
inline uint64_t MicroProfileGetCurrentThreadId()
224
{
225
uint64_t tid;
226
pthread_threadid_np(pthread_self(), &tid);
227
return tid;
228
}
229
230
#include <stdlib.h>
231
232
#define MP_BREAK() __builtin_trap()
233
#define MP_THREAD_LOCAL __thread
234
#define MP_STRCASECMP strcasecmp
235
#define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId()
236
#define MP_STRCASESTR strcasestr
237
#define MP_THREAD_LOCAL __thread
238
#define MP_NOINLINE __attribute__((noinline))
239
240
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
241
{
242
void* p;
243
int result = posix_memalign(&p, nAlign, nSize);
244
if(result != 0)
245
{
246
return nullptr;
247
}
248
return p;
249
}
250
251
void MicroProfileFreeAligned(void* pMem)
252
{
253
free(pMem);
254
}
255
256
#elif defined(_WIN32)
257
#include <Shlwapi.h>
258
#include <winsock2.h>
259
#include <ws2tcpip.h>
260
int64_t MicroProfileGetTick();
261
#define MP_TICK() MicroProfileGetTick()
262
#define MP_BREAK() __debugbreak()
263
#define MP_THREAD_LOCAL __declspec(thread)
264
#define MP_STRCASECMP _stricmp
265
#define MP_GETCURRENTTHREADID() GetCurrentThreadId()
266
#define MP_STRCASESTR StrStrI
267
#define MP_THREAD_LOCAL __declspec(thread)
268
#define MP_NOINLINE __declspec(noinline)
269
270
#ifndef MICROPROFILE_WIN32_TRAP_ALLOCATOR
271
#define MICROPROFILE_WIN32_TRAP_ALLOCATOR 0
272
#endif
273
274
#if MICROPROFILE_WIN32_TRAP_ALLOCATOR
275
// minimal trap allocator
276
#define PAGE_SIZE (4096)
277
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
278
{
279
(void)nAlign;
280
size_t nAlignedSize = (nSize + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1));
281
size_t nDelta = nAlignedSize - nSize;
282
size_t nFullSize = nAlignedSize + 2 * PAGE_SIZE;
283
284
void* ptr = VirtualAlloc(0, nFullSize, MEM_RESERVE, PAGE_READWRITE);
285
intptr_t intptr = (intptr_t)ptr;
286
287
void* pResult = VirtualAlloc((void*)(intptr + PAGE_SIZE), nAlignedSize, MEM_COMMIT, PAGE_READWRITE);
288
memset(pResult, 0xf0, nAlignedSize);
289
290
intptr_t page = (intptr_t)pResult;
291
//((char*)page)[-1] = 0x70; //trap test
292
page += nDelta;
293
pResult = (void*)page;
294
memset(pResult, 0xfe, nSize);
295
//((char*)page)[nSize] = 0x70; //trap test
296
return (void*)page;
297
}
298
299
void MicroProfileFreeAligned(void* pMem)
300
{
301
intptr_t intptr = (intptr_t)pMem;
302
intptr = (intptr & (~(PAGE_SIZE - 1))) - PAGE_SIZE;
303
VirtualFree(pMem, 0, MEM_RELEASE);
304
}
305
#else
306
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
307
{
308
return _aligned_malloc(nSize, nAlign);
309
}
310
311
void MicroProfileFreeAligned(void* pMem)
312
{
313
_aligned_free(pMem);
314
}
315
#endif
316
317
#else
318
319
#ifndef MICROPROFILE_CUSTOM_PLATFORM
320
#include <float.h>
321
#include <malloc.h>
322
#include <stdlib.h>
323
#include <time.h>
324
#include <unistd.h>
325
326
inline int64_t MicroProfileTicksPerSecondCpu_()
327
{
328
return 1000000000ll;
329
}
330
331
int64_t MicroProfileTicksPerSecondCpu()
332
{
333
return MicroProfileTicksPerSecondCpu_();
334
}
335
#define MicroProfileTicksPerSecondCpu MicroProfileTicksPerSecondCpu_
336
337
inline int64_t MicroProfileGetTick()
338
{
339
timespec ts;
340
clock_gettime(CLOCK_REALTIME, &ts);
341
return 1000000000ll * ts.tv_sec + ts.tv_nsec;
342
}
343
#define MP_TICK() MicroProfileGetTick()
344
#define MP_BREAK() __builtin_trap()
345
#define MP_THREAD_LOCAL __thread
346
#define MP_STRCASECMP strcasecmp
347
#define MP_GETCURRENTTHREADID() (uint64_t) pthread_self()
348
#define MP_STRCASESTR strcasestr
349
#define MP_THREAD_LOCAL __thread
350
#define MP_NOINLINE __attribute__((noinline))
351
352
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
353
{
354
#if defined(__linux__)
355
void* p;
356
int result = posix_memalign(&p, nAlign, nSize);
357
if(result != 0)
358
{
359
return nullptr;
360
}
361
return p;
362
#else
363
return memalign(nAlign, nSize);
364
#endif
365
}
366
void MicroProfileFreeAligned(void* pMem)
367
{
368
free(pMem);
369
}
370
#endif
371
372
#endif
373
374
#ifdef MICROPROFILE_PS4
375
#define MICROPROFILE_PS4_DECL
376
#include "microprofile_ps4.h"
377
#endif
378
379
#ifdef MICROPROFILE_XBOXONE
380
#define MICROPROFILE_XBOXONE_DECL
381
#include "microprofile_xboxone.h"
382
#else
383
#ifdef _WIN32
384
#include <d3d11_1.h>
385
#endif
386
#endif
387
388
#ifdef _WIN32
389
typedef uint32_t MicroProfileThreadIdType;
390
#else
391
#ifdef MICROPROFILE_THREADID_SIZE_4BYTE
392
typedef uint32_t MicroProfileThreadIdType;
393
#elif MICROPROFILE_THREADID_SIZE_8BYTE
394
typedef uint64_t MicroProfileThreadIdType;
395
#else
396
typedef uint64_t MicroProfileThreadIdType;
397
#endif
398
#endif
399
400
#define MP_ASSERT(a) \
401
do \
402
{ \
403
if(!(a)) \
404
{ \
405
MP_BREAK(); \
406
} \
407
} while(0)
408
409
#ifdef _WIN32
410
#include <basetsd.h>
411
typedef UINT_PTR MpSocket;
412
#else
413
typedef int MpSocket;
414
#endif
415
416
#ifndef _WIN32
417
typedef pthread_t MicroProfileThread;
418
#elif defined(_WIN32)
419
#if _MSC_VER == 1900
420
typedef void* HANDLE;
421
#endif
422
423
typedef HANDLE MicroProfileThread;
424
#else
425
typedef std::thread* MicroProfileThread;
426
#endif
427
428
#if MICROPROFILE_DYNAMIC_INSTRUMENT
429
struct MicroProfileSymbolDesc;
430
431
#define MICROPROFILE_SUSPEND_MAX (4 << 10)
432
struct MicroProfileSuspendState
433
{
434
uint32_t SuspendCounter = 0;
435
uint32_t NumSuspended = 0;
436
#ifdef _WIN32
437
HANDLE Suspended[MICROPROFILE_SUSPEND_MAX];
438
intptr_t SuspendedIP[MICROPROFILE_SUSPEND_MAX];
439
#endif
440
};
441
442
void MicroProfileSymbolQueryFunctions(MpSocket Connection, const char* pFilter);
443
bool MicroProfileInstrumentFunction(void* pFunction, const char* pModuleName, const char* pFunctionName, uint32_t nColor);
444
bool MicroProfileSymbolInitialize(bool bStartLoad, const char* pModuleName = 0);
445
MicroProfileSymbolDesc* MicroProfileSymbolFindFuction(void* pAddress);
446
void MicroProfileInstrumentFunctionsCalled(void* pFunction, const char* pModuleName, const char* pFunctionName, int nMinBytes, int nMaxCalls);
447
void MicroProfileSymbolQuerySendResult(MpSocket Connection);
448
void MicroProfileSymbolSendFunctionNames(MpSocket Connection);
449
void MicroProfileSymbolSendErrors(MpSocket Connection);
450
const char* MicroProfileSymbolModuleGetString(uint32_t nIndex);
451
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols);
452
void MicroProfileSymbolUpdateModuleList();
453
bool MicroProfileSymInit();
454
void MicroProfileSymCleanup();
455
#endif
456
457
struct MicroProfileFunctionQuery;
458
459
// hash table functions & declarations
460
struct MicroProfileHashTable;
461
struct MicroProfileHashTableIterator;
462
typedef bool (*MicroProfileHashCompareFunction)(uint64_t l, uint64_t r);
463
typedef uint64_t (*MicroProfileHashFunction)(uint64_t p);
464
uint64_t MicroProfileHashTableHashString(uint64_t pString);
465
bool MicroProfileHashTableCompareString(uint64_t L, uint64_t R);
466
uint64_t MicroProfileHashTableHashPtr(uint64_t pString);
467
bool MicroProfileHashTableComparePtr(uint64_t L, uint64_t R);
468
void MicroProfileHashTableInit(MicroProfileHashTable* pTable, uint32_t nInitialSize, uint32_t nSearchLimit, MicroProfileHashCompareFunction CompareFunc, MicroProfileHashFunction HashFunc);
469
void MicroProfileHashTableDestroy(MicroProfileHashTable* pTable);
470
uint64_t MicroProfileHashTableHash(MicroProfileHashTable* pTable, uint64_t K);
471
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value);
472
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorBegin(MicroProfileHashTable* HashTable);
473
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorEnd(MicroProfileHashTable* HashTable);
474
475
template <typename T>
476
struct MicroProfileArray
477
{
478
T* Data = nullptr;
479
uint32_t Size = 0;
480
uint32_t Capacity = 0;
481
T& operator[](const uint32_t Index);
482
const T& operator[](const uint32_t Index) const;
483
T* begin();
484
T* end();
485
};
486
487
template <typename T>
488
void MicroProfileArrayInit(MicroProfileArray<T>& Array, uint32_t InitialCapacity);
489
template <typename T>
490
void MicroProfileArrayDestroy(MicroProfileArray<T>& Array, uint32_t InitialCapacity);
491
template <typename T>
492
void MicroProfileArrayClear(MicroProfileArray<T>& Array);
493
template <typename T>
494
void MicroProfileArrayPushBack(MicroProfileArray<T>& Array, const T& v);
495
496
struct MicroProfileTimer
497
{
498
uint64_t nTicks;
499
uint32_t nCount;
500
};
501
502
struct MicroProfileCategory
503
{
504
char pName[MICROPROFILE_NAME_MAX_LEN];
505
uint32_t nGroupMask[MICROPROFILE_MAX_GROUP_INTS];
506
};
507
508
struct MicroProfileGroupInfo
509
{
510
char pName[MICROPROFILE_NAME_MAX_LEN];
511
uint32_t nNameLen;
512
uint32_t nGroupIndex;
513
uint32_t nNumTimers;
514
uint32_t nMaxTimerNameLen;
515
uint32_t nColor;
516
uint32_t nCategory;
517
MicroProfileTokenType Type;
518
int nWSNext;
519
};
520
521
struct MicroProfileTimerInfo
522
{
523
MicroProfileToken nToken;
524
uint32_t nTimerIndex;
525
uint32_t nGroupIndex;
526
char pName[MICROPROFILE_NAME_MAX_LEN];
527
char pNameExt[MICROPROFILE_NAME_MAX_LEN];
528
uint32_t nNameLen;
529
uint32_t nColor;
530
int nWSNext;
531
bool bGraph;
532
MicroProfileTokenType Type;
533
uint32_t Flags;
534
};
535
536
struct MicroProfileCounterInfo
537
{
538
int nParent;
539
int nSibling;
540
int nFirstChild;
541
uint16_t nNameLen;
542
uint8_t nLevel;
543
const char* pName;
544
uint32_t nFlags;
545
int64_t nLimit;
546
double dLimit;
547
int nWSNext;
548
MicroProfileCounterFormat eFormat;
549
std::atomic<int64_t> ExternalAtomic;
550
};
551
552
struct MicroProfileCounterHistory
553
{
554
uint32_t nPut;
555
uint64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
556
};
557
558
struct MicroProfileCounterSource
559
{
560
void* pSource;
561
uint32_t nSourceSize;
562
};
563
564
struct MicroProfileGraphState
565
{
566
int64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
567
MicroProfileToken nToken;
568
int32_t nKey;
569
};
570
571
struct MicroProfileContextSwitch
572
{
573
MicroProfileThreadIdType nThreadOut;
574
MicroProfileThreadIdType nThreadIn;
575
int64_t nCpu : 8;
576
int64_t nTicks : 56;
577
};
578
579
struct MicroProfileFrameState
580
{
581
uint64_t nFrameStartCpu;
582
uint64_t nFrameStartGpu;
583
uint64_t nFrameId;
584
uint32_t nGpuPending;
585
uint32_t nLogStart[MICROPROFILE_MAX_THREADS];
586
uint32_t nLogStartTimeline;
587
uint32_t nTimelineFrameMax;
588
int32_t nHistoryTimeline;
589
};
590
591
// All frame counter data stored. Used to store the time for all counters/groups for every frame.
592
// Must be enabled with MicroProfileEnableFrameCounterExtraData()
593
// Will allocate sizeof(MicroProfileFrameExtraCounterData) * MICROPROFILE_MAX_FRAME_HISTORY bytes
594
struct MicroProfileFrameExtraCounterData
595
{
596
uint16_t NumTimers;
597
uint16_t NumGroups;
598
uint64_t Timers[MICROPROFILE_MAX_TIMERS];
599
uint64_t Groups[MICROPROFILE_MAX_GROUPS];
600
};
601
602
struct MicroProfileCsvConfig
603
{
604
enum CsvConfigState
605
{
606
INACTIVE = 0,
607
CONFIG,
608
ACTIVE,
609
};
610
CsvConfigState State;
611
uint32_t NumTimers;
612
uint32_t NumGroups;
613
uint32_t NumCounters;
614
uint32_t MaxTimers;
615
uint32_t MaxGroups;
616
uint32_t MaxCounters;
617
uint32_t TotalElements;
618
uint16_t* TimerIndices;
619
uint16_t* GroupIndices;
620
uint16_t* CounterIndices;
621
uint64_t* FrameData;
622
const char** pTimerNames;
623
const char** pGroupNames;
624
const char** pCounterNames;
625
uint32_t Flags;
626
};
627
628
#ifdef _WIN32
629
#pragma warning(push)
630
#pragma warning(disable : 4200) // zero-sized struct
631
#pragma warning(disable : 4201) // nameless struct/union
632
#pragma warning(disable : 4244) // possible loss of data
633
#pragma warning(disable : 4100) // unreferenced formal parameter
634
#pragma warning(disable : 4091)
635
#pragma warning(disable : 4189) // local variable is initialized but not referenced. (for defer local variables)
636
#pragma warning(disable : 4456)
637
#pragma warning(disable : 4702)
638
#endif
639
640
struct MicroProfileStringBlock
641
{
642
enum
643
{
644
DEFAULT_SIZE = 8192,
645
};
646
MicroProfileStringBlock* pNext;
647
uint32_t nUsed;
648
uint32_t nSize;
649
char Memory[];
650
};
651
652
struct MicroProfileHashTableEntry
653
{
654
uint64_t Key;
655
uint64_t Hash;
656
uintptr_t Value;
657
};
658
659
struct MicroProfileHashTable
660
{
661
MicroProfileHashTableEntry* pEntries;
662
uint32_t nUsed;
663
uint32_t nAllocated;
664
uint32_t nSearchLimit;
665
uint32_t nLim;
666
MicroProfileHashCompareFunction CompareFunc;
667
MicroProfileHashFunction HashFunc;
668
};
669
670
struct MicroProfileHashTableIterator
671
{
672
MicroProfileHashTableIterator(uint32_t nIndex, MicroProfileHashTable* pTable)
673
: nIndex(nIndex)
674
, pTable(pTable)
675
{
676
}
677
MicroProfileHashTableIterator(const MicroProfileHashTableIterator& other)
678
: nIndex(other.nIndex)
679
, pTable(other.pTable)
680
{
681
}
682
683
uint32_t nIndex;
684
MicroProfileHashTable* pTable;
685
686
void AssertValid()
687
{
688
MP_ASSERT(nIndex < pTable->nAllocated);
689
}
690
691
MicroProfileHashTableEntry& operator*()
692
{
693
AssertValid();
694
return pTable->pEntries[nIndex];
695
}
696
MicroProfileHashTableEntry* operator->()
697
{
698
AssertValid();
699
return &pTable->pEntries[nIndex];
700
}
701
bool operator==(const MicroProfileHashTableIterator& rhs)
702
{
703
return nIndex == rhs.nIndex && pTable == rhs.pTable;
704
}
705
bool operator!=(const MicroProfileHashTableIterator& rhs)
706
{
707
return nIndex != rhs.nIndex || pTable != rhs.pTable;
708
}
709
710
void SkipInvalid()
711
{
712
while(nIndex < pTable->nAllocated && pTable->pEntries[nIndex].Hash == 0)
713
nIndex++;
714
}
715
MicroProfileHashTableIterator operator++()
716
{
717
AssertValid();
718
nIndex++;
719
SkipInvalid();
720
return *this;
721
}
722
MicroProfileHashTableIterator operator++(int)
723
{
724
MicroProfileHashTableIterator tmp = *this;
725
++(*this);
726
return tmp;
727
}
728
};
729
730
struct MicroProfileStrings
731
{
732
MicroProfileHashTable HashTable;
733
MicroProfileStringBlock* pFirst;
734
MicroProfileStringBlock* pLast;
735
};
736
737
struct MicroProfileThreadLog
738
{
739
740
std::atomic<uint32_t> nPut;
741
std::atomic<uint32_t> nGet;
742
743
MicroProfileLogEntry Log[MICROPROFILE_BUFFER_SIZE];
744
745
uint32_t nStackPut;
746
uint32_t nStackScope;
747
#ifdef MICROPROFILE_VERIFY_BALANCED
748
uint64_t VerifyStack[MICROPROFILE_STACK_MAX];
749
#endif
750
MicroProfileScopeStateC ScopeState[MICROPROFILE_STACK_MAX];
751
752
uint32_t nActive;
753
uint32_t nGpu;
754
MicroProfileThreadIdType nThreadId;
755
uint32_t nLogIndex;
756
uint32_t nCustomId;
757
uint32_t nIdleFrames;
758
759
MicroProfileLogEntry nStackLogEntry[MICROPROFILE_STACK_MAX];
760
uint64_t nChildTickStack[MICROPROFILE_STACK_MAX + 1];
761
int32_t nStackPos;
762
763
uint8_t nGroupStackPos[MICROPROFILE_MAX_GROUPS];
764
uint64_t nGroupTicks[MICROPROFILE_MAX_GROUPS];
765
uint64_t nAggregateGroupTicks[MICROPROFILE_MAX_GROUPS];
766
enum
767
{
768
THREAD_MAX_LEN = 64,
769
};
770
char ThreadName[64];
771
int nFreeListNext;
772
};
773
774
struct MicroProfileWebSocketBuffer
775
{
776
char* pBufferAllocation;
777
char* pBuffer;
778
uint32_t nBufferSize;
779
uint32_t nPut;
780
MpSocket Socket;
781
782
char SendBuffer[MICROPROFILE_WEBSOCKET_BUFFER_SIZE];
783
std::atomic<uint32_t> nSendPut;
784
std::atomic<uint32_t> nSendGet;
785
};
786
787
typedef void (*MicroProfileHookFunc)(int x);
788
789
struct MicroProfilePatchError
790
{
791
unsigned char Code[32];
792
char Message[256];
793
int AlreadyInstrumented;
794
int nCodeSize;
795
};
796
797
// linear, per-frame per-thread gpu log
798
struct MicroProfileThreadLogGpu
799
{
800
MicroProfileLogEntry Log[MICROPROFILE_GPU_BUFFER_SIZE];
801
uint32_t nPut;
802
uint32_t nStart;
803
uint32_t nId;
804
void* pContext;
805
uint32_t nAllocated;
806
807
uint32_t nStackScope;
808
MicroProfileScopeStateC ScopeState[MICROPROFILE_STACK_MAX];
809
};
810
811
#if MICROPROFILE_GPU_TIMERS
812
static MicroProfileGpuInsertTimeStamp_CB MicroProfileGpuInsertTimeStamp_Callback = 0;
813
static MicroProfileGpuGetTimeStamp_CB MicroProfileGpuGetTimeStamp_Callback = 0;
814
static MicroProfileTicksPerSecondGpu_CB MicroProfileTicksPerSecondGpu_Callback = 0;
815
static MicroProfileGetGpuTickReference_CB MicroProfileGetGpuTickReference_Callback = 0;
816
static MicroProfileGpuFlip_CB MicroProfileGpuFlip_Callback = 0;
817
static MicroProfileGpuShutdown_CB MicroProfileGpuShutdown_Callback = 0;
818
819
uint32_t MicroProfileGpuInsertTimeStamp(void* pContext)
820
{
821
return MicroProfileGpuInsertTimeStamp_Callback ? MicroProfileGpuInsertTimeStamp_Callback(pContext) : 0;
822
}
823
uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey)
824
{
825
return MicroProfileGpuGetTimeStamp_Callback ? MicroProfileGpuGetTimeStamp_Callback(nKey) : 1;
826
}
827
uint64_t MicroProfileTicksPerSecondGpu()
828
{
829
return MicroProfileTicksPerSecondGpu_Callback ? MicroProfileTicksPerSecondGpu_Callback() : 1;
830
}
831
int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu)
832
{
833
return MicroProfileGetGpuTickReference_Callback ? MicroProfileGetGpuTickReference_Callback(pOutCPU, pOutGpu) : 0;
834
}
835
uint32_t MicroProfileGpuFlip(void* p)
836
{
837
return MicroProfileGpuFlip_Callback ? MicroProfileGpuFlip_Callback(p) : 0;
838
}
839
void MicroProfileGpuShutdown()
840
{
841
if(MicroProfileGpuShutdown_Callback)
842
MicroProfileGpuShutdown_Callback();
843
}
844
845
#endif
846
847
#if MICROPROFILE_GPU_TIMERS_D3D11
848
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::::'##:::
849
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####::::'####:::
850
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##::::.. ##:::
851
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##:::::: ##:::
852
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::::: ##:::
853
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##:::::: ##:::
854
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######::'######:
855
//:......::::..::::::::::.......::::::........::::.......:::........::::......:::......::
856
857
struct MicroProfileD3D11Frame
858
{
859
uint32_t m_nQueryStart;
860
uint32_t m_nQueryCountMax;
861
std::atomic<uint32_t> m_nQueryCount;
862
uint32_t m_nRateQueryStarted;
863
void* m_pRateQuery;
864
};
865
866
struct MicroProfileGpuTimerStateD3D11 : public MicroProfileGpuTimerState
867
{
868
uint32_t bInitialized;
869
void* m_pDevice;
870
void* m_pImmediateContext;
871
void* m_pQueries[MICROPROFILE_D3D11_MAX_QUERIES];
872
int64_t m_nQueryResults[MICROPROFILE_D3D11_MAX_QUERIES];
873
874
uint32_t m_nQueryPut;
875
uint32_t m_nQueryGet;
876
uint32_t m_nQueryFrame;
877
int64_t m_nQueryFrequency;
878
void* pSyncQuery;
879
880
MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY];
881
};
882
883
uint32_t MicroProfileGpuInsertTimeStampD3D11(void* pContext_);
884
uint64_t MicroProfileGpuGetTimeStampD3D11(uint32_t nIndex);
885
bool MicroProfileGpuGetDataD3D11(void* pQuery, void* pData, uint32_t nDataSize);
886
uint64_t MicroProfileTicksPerSecondGpuD3D11();
887
uint32_t MicroProfileGpuFlipD3D11(void* pDeviceContext_);
888
void MicroProfileGpuInitD3D11(void* pDevice_, void* pImmediateContext);
889
void MicroProfileGpuShutdownD3D11();
890
int MicroProfileGetGpuTickReferenceD3D11(int64_t* pOutCPU, int64_t* pOutGpu);
891
MicroProfileGpuTimerStateD3D11* MicroProfileGetGpuTimerStateD3D11();
892
#endif
893
894
#if MICROPROFILE_GPU_TIMERS_D3D12
895
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::'#######::
896
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####:::'##.... ##:
897
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##:::..::::: ##:
898
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##::::'#######::
899
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::'##::::::::
900
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##::: ##::::::::
901
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######: #########:
902
//:......::::..::::::::::.......::::::........::::.......:::........::::......::.........::
903
904
#include <d3d12.h>
905
906
#ifndef MICROPROFILE_D3D12_MAX_QUERIES
907
#define MICROPROFILE_D3D12_MAX_QUERIES (32 << 10)
908
#endif
909
910
#define MICROPROFILE_D3D_MAX_NODE_COUNT 4
911
#define MICROPROFILE_D3D_INTERNAL_DELAY 8
912
913
#define MP_NODE_MASK_ALL(n) ((1u << (n)) - 1u)
914
#define MP_NODE_MASK_ONE(n) (1u << (n))
915
916
struct MicroProfileGpuTimerStateD3D12;
917
918
int MicroProfileGetGpuTickReferenceD3D12(int64_t* pOutCPU, int64_t* pOutGpu);
919
uint32_t MicroProfileGpuInsertTimeStampD3D12(void* pContext);
920
uint64_t MicroProfileGpuGetTimeStampD3D12(uint32_t nIndex);
921
uint64_t MicroProfileTicksPerSecondGpuD3D12();
922
uint32_t MicroProfileGpuFlipD3D12(void* pContext);
923
void MicroProfileGpuInitD3D12(void* pDevice_, uint32_t nNodeCount, void** pCommandQueues_, void** pCommandQueuesCopy_);
924
void MicroProfileGpuShutdownD3D12();
925
void MicroProfileSetCurrentNodeD3D12(uint32_t nNode);
926
int MicroProfileGetGpuTickReferenceD3D12(int64_t* pOutCPU, int64_t* pOutGpu);
927
MicroProfileGpuTimerStateD3D12* MicroProfileGetGpuTimerStateD3D12();
928
929
struct MicroProfileFrameD3D12
930
{
931
uint32_t nTimeStampBegin;
932
uint32_t nTimeStampCount;
933
uint32_t nTimeStampBeginCopyQueue;
934
uint32_t nTimeStampCountCopyQueue;
935
uint32_t nNode;
936
ID3D12GraphicsCommandList* pCommandList[MICROPROFILE_D3D_MAX_NODE_COUNT];
937
ID3D12GraphicsCommandList* pCommandListCopy[MICROPROFILE_D3D_MAX_NODE_COUNT];
938
ID3D12CommandAllocator* pCommandAllocator;
939
ID3D12CommandAllocator* pCommandAllocatorCopy;
940
};
941
942
struct MicroProfileGpuTimerStateD3D12 : public MicroProfileGpuTimerState
943
{
944
ID3D12Device* pDevice;
945
uint32_t nNodeCount;
946
uint32_t nCurrentNode;
947
948
uint64_t nFrame;
949
uint64_t nPendingFrame;
950
951
uint32_t nFrameStartTimeStamps;
952
uint32_t nFrameStartCopyQueueTimeStamps;
953
std::atomic<uint32_t> nFrameCountTimeStamps;
954
std::atomic<uint32_t> nFrameCountCopyQueueTimeStamps;
955
956
int64_t nFrequency;
957
ID3D12Resource* pBuffer;
958
ID3D12Resource* pBufferCopy;
959
960
struct
961
{
962
ID3D12CommandQueue* pCommandQueue;
963
ID3D12CommandQueue* pCommandQueueCopy;
964
ID3D12QueryHeap* pHeap;
965
ID3D12QueryHeap* pCopyQueueHeap;
966
ID3D12Fence* pFence;
967
ID3D12Fence* pFenceCopy;
968
} NodeState[MICROPROFILE_D3D_MAX_NODE_COUNT];
969
970
uint16_t nQueryFrames[MICROPROFILE_D3D12_MAX_QUERIES];
971
int64_t nResults[MICROPROFILE_D3D12_MAX_QUERIES];
972
uint16_t nQueryFramesCopy[MICROPROFILE_D3D12_MAX_QUERIES];
973
int64_t nResultsCopy[MICROPROFILE_D3D12_MAX_QUERIES];
974
975
MicroProfileFrameD3D12 Frames[MICROPROFILE_D3D_INTERNAL_DELAY];
976
};
977
#endif
978
979
#if MICROPROFILE_GPU_TIMERS_GL
980
//:'######:::'########::'##::::'##:::::'######:::'##:::::::
981
//'##... ##:: ##.... ##: ##:::: ##::::'##... ##:: ##:::::::
982
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::..::: ##:::::::
983
// ##::'####: ########:: ##:::: ##:::: ##::'####: ##:::::::
984
// ##::: ##:: ##.....::: ##:::: ##:::: ##::: ##:: ##:::::::
985
// ##::: ##:: ##:::::::: ##:::: ##:::: ##::: ##:: ##:::::::
986
//. ######::: ##::::::::. #######:::::. ######::: ########:
987
//:......::::..::::::::::.......:::::::......::::........::
988
struct MicroProfileGpuTimerStateGL : public MicroProfileGpuTimerState
989
{
990
uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES];
991
uint32_t GLTimerPos;
992
};
993
994
MicroProfileGpuTimerStateGL* MicroProfileGetGpuTimerStateGL();
995
uint32_t MicroProfileGpuInsertTimeStampGL(void* pContext);
996
uint64_t MicroProfileGpuGetTimeStampGL(uint32_t nKey);
997
uint64_t MicroProfileTicksPerSecondGpuGL();
998
int MicroProfileGetGpuTickReferenceGL(int64_t* pOutCpu, int64_t* pOutGpu);
999
uint32_t MicroProfileGpuFlipGL(void* pContext);
1000
void MicroProfileGpuShutdownGL();
1001
#endif
1002
1003
#if MICROPROFILE_GPU_TIMERS_VULKAN
1004
1005
//:'######:::'########::'##::::'##::::'##::::'##:'##::::'##:'##:::::::'##:::'##::::'###::::'##::: ##:
1006
//'##... ##:: ##.... ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##::'##::::'## ##::: ###:: ##:
1007
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##:'##::::'##:. ##:: ####: ##:
1008
// ##::'####: ########:: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: #####::::'##:::. ##: ## ## ##:
1009
// ##::: ##:: ##.....::: ##:::: ##::::. ##:: ##:: ##:::: ##: ##::::::: ##. ##::: #########: ##. ####:
1010
// ##::: ##:: ##:::::::: ##:::: ##:::::. ## ##::: ##:::: ##: ##::::::: ##:. ##:: ##.... ##: ##:. ###:
1011
//. ######::: ##::::::::. #######:::::::. ###::::. #######:: ########: ##::. ##: ##:::: ##: ##::. ##:
1012
//:......::::..::::::::::.......:::::::::...::::::.......:::........::..::::..::..:::::..::..::::..::
1013
1014
struct MicroProfileGpuTimerStateVulkan;
1015
MicroProfileGpuTimerStateVulkan* MicroProfileGetGpuTimerStateVulkan();
1016
uint32_t MicroProfileGpuInsertTimeStampVulkan(void* pContext);
1017
uint64_t MicroProfileGpuGetTimeStampVulkan(uint32_t nKey);
1018
uint64_t MicroProfileTicksPerSecondGpuVulkan();
1019
int MicroProfileGetGpuTickReferenceVulkan(int64_t* pOutCpu, int64_t* pOutGpu);
1020
uint32_t MicroProfileGpuFlipVulkan(void* pContext);
1021
void MicroProfileGpuShutdownVulkan();
1022
#endif
1023
1024
struct MicroProfileSymbolState
1025
{
1026
std::atomic<int> nModuleLoadsFinished;
1027
std::atomic<int> nModuleLoadsRequested;
1028
std::atomic<int64_t> nSymbolsLoaded;
1029
};
1030
1031
struct MicroProfileSymbolModuleRegion
1032
{
1033
intptr_t nBegin;
1034
intptr_t nEnd;
1035
};
1036
struct MicroProfileSymbolModule
1037
{
1038
uint64_t nModuleBase;
1039
uint32_t nMatchOffset;
1040
uint32_t nStringOffset;
1041
const char* pBaseString;
1042
const char* pTrimmedString;
1043
MicroProfileSymbolModuleRegion Regions[MICROPROFILE_MAX_MODULE_EXEC_REGIONS];
1044
int nNumExecutableRegions;
1045
1046
bool bDownloading;
1047
intptr_t nProgress;
1048
intptr_t nProgressTarget;
1049
struct MicroProfileSymbolBlock* pSymbolBlock;
1050
MicroProfileHashTable AddressToSymbol;
1051
1052
int64_t nSymbols;
1053
std::atomic<int64_t> nSymbolsLoaded;
1054
std::atomic<int> nModuleLoadRequested;
1055
std::atomic<int> nModuleLoadFinished;
1056
};
1057
1058
struct MicroProfileInstrumentMemoryRegion
1059
{
1060
intptr_t Start;
1061
intptr_t Size;
1062
uint32_t Protect;
1063
};
1064
1065
struct MicroProfile
1066
{
1067
uint32_t nTotalTimers;
1068
uint32_t nGroupCount;
1069
uint32_t nCategoryCount;
1070
uint32_t nAggregateClear;
1071
uint32_t nAggregateFlip;
1072
uint32_t nAggregateFlipCount;
1073
uint32_t nAggregateFrames;
1074
1075
uint64_t nFlipStartTick;
1076
uint64_t nAggregateFlipTick;
1077
1078
uint32_t nDisplay;
1079
uint32_t nBars;
1080
uint32_t nActiveGroups[MICROPROFILE_MAX_GROUP_INTS];
1081
bool AnyActive;
1082
uint32_t nFrozen;
1083
uint32_t nWasFrozen;
1084
uint32_t nPlatformMarkersEnabled;
1085
1086
uint32_t nForceEnable;
1087
1088
uint32_t nForceGroups[MICROPROFILE_MAX_GROUP_INTS];
1089
uint32_t nActiveGroupsWanted[MICROPROFILE_MAX_GROUP_INTS];
1090
uint32_t nGroupMask[MICROPROFILE_MAX_GROUP_INTS];
1091
1092
uint32_t nStartEnabled;
1093
uint32_t nAllThreadsWanted;
1094
1095
uint32_t nOverflow;
1096
1097
uint32_t nMaxGroupSize;
1098
uint32_t nDumpFileNextFrame;
1099
uint32_t nDumpFileCountDown;
1100
uint32_t nDumpSpikeMask;
1101
uint32_t nAutoClearFrames;
1102
1103
float fDumpCpuSpike;
1104
float fDumpGpuSpike;
1105
char HtmlDumpPath[512];
1106
char CsvDumpPath[512];
1107
uint32_t DumpFrameCount;
1108
1109
int64_t nPauseTicks;
1110
std::atomic<int64_t> nContextSwitchStalledTick;
1111
int64_t nContextSwitchLastPushed;
1112
int64_t nContextSwitchLastIndexPushed;
1113
1114
float fReferenceTime;
1115
float fRcpReferenceTime;
1116
1117
MicroProfileCategory CategoryInfo[MICROPROFILE_MAX_CATEGORIES];
1118
MicroProfileGroupInfo GroupInfo[MICROPROFILE_MAX_GROUPS];
1119
MicroProfileTimerInfo TimerInfo[MICROPROFILE_MAX_TIMERS];
1120
uint32_t TimerToGroup[MICROPROFILE_MAX_TIMERS];
1121
1122
MicroProfileTimer AccumTimers[MICROPROFILE_MAX_TIMERS];
1123
uint64_t AccumMaxTimers[MICROPROFILE_MAX_TIMERS];
1124
uint64_t AccumMinTimers[MICROPROFILE_MAX_TIMERS];
1125
uint64_t AccumTimersExclusive[MICROPROFILE_MAX_TIMERS];
1126
uint64_t AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS];
1127
1128
MicroProfileTimer Frame[MICROPROFILE_MAX_TIMERS];
1129
uint64_t FrameExclusive[MICROPROFILE_MAX_TIMERS];
1130
1131
MicroProfileTimer Aggregate[MICROPROFILE_MAX_TIMERS];
1132
uint64_t AggregateMax[MICROPROFILE_MAX_TIMERS];
1133
uint64_t AggregateMin[MICROPROFILE_MAX_TIMERS];
1134
uint64_t AggregateExclusive[MICROPROFILE_MAX_TIMERS];
1135
uint64_t AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS];
1136
1137
uint32_t FrameGroupThreadValid[MICROPROFILE_MAX_THREADS / 32 + 1];
1138
struct GroupTime
1139
{
1140
uint64_t nTicks;
1141
uint64_t nTicksExclusive;
1142
uint32_t nCount;
1143
};
1144
1145
GroupTime FrameGroupThread[MICROPROFILE_MAX_THREADS][MICROPROFILE_MAX_GROUPS];
1146
GroupTime FrameGroup[MICROPROFILE_MAX_GROUPS];
1147
uint64_t AccumGroup[MICROPROFILE_MAX_GROUPS];
1148
uint64_t AccumGroupMax[MICROPROFILE_MAX_GROUPS];
1149
1150
uint64_t AggregateGroup[MICROPROFILE_MAX_GROUPS];
1151
uint64_t AggregateGroupMax[MICROPROFILE_MAX_GROUPS];
1152
1153
MicroProfileGraphState Graph[MICROPROFILE_MAX_GRAPHS];
1154
uint32_t nGraphPut;
1155
1156
uint32_t nThreadActive[MICROPROFILE_MAX_THREADS];
1157
MicroProfileThreadLog* Pool[MICROPROFILE_MAX_THREADS];
1158
MicroProfileThreadLogGpu* PoolGpu[MICROPROFILE_MAX_THREADS];
1159
1160
MicroProfileThreadLog TimelineLog;
1161
uint32_t TimelineTokenFrameEnter[MICROPROFILE_TIMELINE_MAX_TOKENS];
1162
uint32_t TimelineTokenFrameLeave[MICROPROFILE_TIMELINE_MAX_TOKENS];
1163
uint32_t TimelineToken[MICROPROFILE_TIMELINE_MAX_TOKENS];
1164
const char* TimelineTokenStaticString[MICROPROFILE_TIMELINE_MAX_TOKENS];
1165
1166
uint32_t nTimelineFrameMax;
1167
MicroProfileFrameExtraCounterData* FrameExtraCounterData;
1168
MicroProfileCsvConfig CsvConfig;
1169
const char* pSettings;
1170
const char* pSettingsReadOnly;
1171
const char* pSettingsTemp;
1172
1173
uint32_t nNumLogs;
1174
uint32_t nNumLogsGpu;
1175
uint32_t nMemUsage;
1176
int nFreeListHead;
1177
1178
uint32_t nFrameCurrent;
1179
uint32_t nFrameCurrentIndex;
1180
uint32_t nFramePut;
1181
uint32_t nFrameNext;
1182
uint64_t nFramePutIndex;
1183
1184
MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY];
1185
1186
uint64_t nFlipTicks;
1187
uint64_t nFlipAggregate;
1188
uint64_t nFlipMax;
1189
uint64_t nFlipAggregateDisplay;
1190
uint64_t nFlipMaxDisplay;
1191
1192
MicroProfileThread ContextSwitchThread;
1193
bool bContextSwitchRunning;
1194
bool bContextSwitchStop;
1195
bool bContextSwitchAllThreads;
1196
bool bContextSwitchNoBars;
1197
uint32_t nContextSwitchUsage;
1198
uint32_t nContextSwitchLastPut;
1199
1200
int64_t nContextSwitchHoverTickIn;
1201
int64_t nContextSwitchHoverTickOut;
1202
uint32_t nContextSwitchHoverThread;
1203
uint32_t nContextSwitchHoverThreadBefore;
1204
uint32_t nContextSwitchHoverThreadAfter;
1205
uint8_t nContextSwitchHoverCpu;
1206
uint8_t nContextSwitchHoverCpuNext;
1207
1208
uint32_t CoreCount;
1209
uint8_t CoreEfficiencyClass[MICROPROFILE_MAX_CPU_CORES];
1210
1211
uint32_t nContextSwitchPut;
1212
MicroProfileContextSwitch ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE];
1213
1214
MpSocket ListenerSocket;
1215
uint32_t nWebServerPort;
1216
1217
char WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE];
1218
uint32_t WebServerPut;
1219
1220
uint64_t nWebServerDataSent;
1221
1222
int WebSocketTimers;
1223
int WebSocketCounters;
1224
int WebSocketGroups;
1225
uint32_t nWebSocketDirty;
1226
MpSocket WebSockets[1];
1227
int64_t WebSocketFrameLast[1];
1228
uint32_t nNumWebSockets;
1229
uint32_t nSocketFail; // for error propagation.
1230
1231
MicroProfileThread WebSocketSendThread;
1232
bool WebSocketThreadRunning;
1233
bool WebSocketThreadJoined;
1234
1235
uint32_t WSCategoriesSent;
1236
uint32_t WSGroupsSent;
1237
uint32_t WSTimersSent;
1238
uint32_t WSCountersSent;
1239
MicroProfileWebSocketBuffer WSBuf;
1240
char* pJsonSettings;
1241
const char* pJsonSettingsName;
1242
bool bJsonSettingsReadOnly;
1243
uint32_t nJsonSettingsPending;
1244
uint32_t nJsonSettingsBufferSize;
1245
uint32_t nWSWasConnected;
1246
uint32_t nMicroProfileShutdown;
1247
uint32_t nWSViewMode;
1248
1249
char CounterNames[MICROPROFILE_MAX_COUNTER_NAME_CHARS];
1250
MicroProfileCounterInfo CounterInfo[MICROPROFILE_MAX_COUNTERS];
1251
MicroProfileCounterSource CounterSource[MICROPROFILE_MAX_COUNTERS];
1252
uint32_t nNumCounters;
1253
uint32_t nCounterNamePos;
1254
std::atomic<int64_t> Counters[MICROPROFILE_MAX_COUNTERS];
1255
std::atomic<double>* CountersDouble;
1256
#if MICROPROFILE_COUNTER_HISTORY // uses 1kb per allocated counter. 512kb for default counter count
1257
uint32_t nCounterHistoryPut;
1258
int64_t nCounterHistory[MICROPROFILE_GRAPH_HISTORY][MICROPROFILE_MAX_COUNTERS]; // flipped to make swapping cheap, drawing more expensive.
1259
int64_t nCounterMax[MICROPROFILE_MAX_COUNTERS];
1260
int64_t nCounterMin[MICROPROFILE_MAX_COUNTERS];
1261
double* dCounterHistory;
1262
double* dCounterMax;
1263
double* dCounterMin;
1264
#endif
1265
1266
MicroProfileThread AutoFlipThread;
1267
std::atomic<uint32_t> nAutoFlipDelay;
1268
std::atomic<uint32_t> nAutoFlipStop;
1269
1270
MicroProfileStrings Strings;
1271
MicroProfileToken CounterToken_MicroProfile;
1272
MicroProfileToken CounterToken_StringBlock;
1273
MicroProfileToken CounterToken_StringBlock_Count;
1274
MicroProfileToken CounterToken_StringBlock_Waste;
1275
MicroProfileToken CounterToken_StringBlock_Strings;
1276
MicroProfileToken CounterToken_StringBlock_Memory;
1277
1278
MicroProfileToken CounterToken_Alloc;
1279
MicroProfileToken CounterToken_Alloc_Memory;
1280
MicroProfileToken CounterToken_Alloc_Count;
1281
1282
#if MICROPROFILE_DYNAMIC_INSTRUMENT
1283
uint32_t DynamicTokenIndex;
1284
MicroProfileToken DynamicTokens[MICROPROFILE_MAX_DYNAMIC_TOKENS];
1285
void* FunctionsInstrumented[MICROPROFILE_MAX_DYNAMIC_TOKENS];
1286
const char* FunctionsInstrumentedName[MICROPROFILE_MAX_DYNAMIC_TOKENS];
1287
const char* FunctionsInstrumentedModuleNames[MICROPROFILE_MAX_DYNAMIC_TOKENS];
1288
// const char* FunctionsInstrumentedUnmangled[MICROPROFILE_MAX_DYNAMIC_TOKENS];
1289
uint32_t WSFunctionsInstrumentedSent;
1290
MicroProfileSymbolState SymbolState;
1291
1292
MicroProfileSymbolModule SymbolModules[MICROPROFILE_INSTRUMENT_MAX_MODULES];
1293
char SymbolModuleNameBuffer[MICROPROFILE_INSTRUMENT_MAX_MODULE_CHARS];
1294
int SymbolModuleNameOffset;
1295
int SymbolNumModules;
1296
int WSSymbolModulesSent;
1297
std::atomic<int> nSymbolsDirty;
1298
1299
MicroProfileFunctionQuery* pPendingQuery;
1300
MicroProfileFunctionQuery* pFinishedQuery;
1301
MicroProfileFunctionQuery* pQueryFreeList;
1302
uint32_t nQueryProcessed;
1303
uint32_t nNumQueryFree;
1304
uint32_t nNumQueryAllocated;
1305
1306
int SymbolThreadRunning;
1307
int SymbolThreadFinished;
1308
MicroProfileThread SymbolThread;
1309
int nNumPatchErrors;
1310
MicroProfilePatchError PatchErrors[MICROPROFILE_MAX_PATCH_ERRORS];
1311
int nNumPatchErrorFunctions;
1312
const char* PatchErrorFunctionNames[MICROPROFILE_MAX_PATCH_ERRORS];
1313
MicroProfileSuspendState SuspendState;
1314
MicroProfileArray<MicroProfileInstrumentMemoryRegion> MemoryRegions;
1315
#endif
1316
1317
int GpuQueue;
1318
MicroProfileThreadLogGpu* pGpuGlobal;
1319
MicroProfileGpuTimerState* pGPU;
1320
};
1321
1322
inline uint32_t MicroProfileLogGetType(MicroProfileLogEntry Index)
1323
{
1324
return ((MP_LOG_BEGIN_MASK & Index) >> 62) & 0x3;
1325
}
1326
1327
inline uint64_t MicroProfileLogGetTimerIndex(MicroProfileLogEntry Index)
1328
{
1329
return (0x3fff & (Index >> 48));
1330
}
1331
uint32_t MicroProfileLogGetDataSize(MicroProfileLogEntry Index)
1332
{
1333
if(MicroProfileLogGetType(Index) == MP_LOG_EXTENDED)
1334
return 0xffff & (Index >> 32);
1335
else
1336
return 0;
1337
}
1338
1339
inline EMicroProfileTokenExtended MicroProfileLogGetExtendedToken(MicroProfileLogEntry Index)
1340
{
1341
return (EMicroProfileTokenExtended)(0x3fff & (Index >> 48));
1342
}
1343
1344
inline uint32_t MicroProfileLogGetExtendedDataSize(MicroProfileLogEntry Index)
1345
{
1346
return (uint32_t)(0xffff & (Index >> 32));
1347
}
1348
1349
inline uint32_t MicroProfileLogGetExtendedPayload(MicroProfileLogEntry Index)
1350
{
1351
return (uint32_t)(0xffffffff & Index);
1352
}
1353
1354
inline uint64_t MicroProfileLogGetExtendedPayloadNoData(MicroProfileLogEntry Index)
1355
{
1356
return (uint64_t)(MP_LOG_TICK_MASK & Index);
1357
}
1358
1359
inline void* MicroProfileLogGetExtendedPayloadNoDataPtr(MicroProfileLogEntry Index)
1360
{
1361
return (void*)(MP_LOG_PAYLOAD_PTR_MASK & Index);
1362
}
1363
1364
MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick);
1365
MicroProfileLogEntry MicroProfileMakeLogExtended(EMicroProfileTokenExtended eTokenExt, uint32_t nDataSizeQWords, uint32_t nPayload);
1366
MicroProfileLogEntry MicroProfileMakeLogExtendedNoData(EMicroProfileTokenExtended eTokenExt, uint64_t nTick);
1367
1368
inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick)
1369
{
1370
MicroProfileLogEntry Entry = (nBegin << 62) | ((0x3fff & nToken) << 48) | (MP_LOG_TICK_MASK & nTick);
1371
uint32_t t = MicroProfileLogGetType(Entry);
1372
uint64_t nTimerIndex = MicroProfileLogGetTimerIndex(Entry);
1373
MP_ASSERT(t == nBegin);
1374
MP_ASSERT(nTimerIndex == (nToken & 0x3fff));
1375
return Entry;
1376
}
1377
1378
// extended data, with the option to store 0xfffe * 8 bytes after
1379
inline MicroProfileLogEntry MicroProfileMakeLogExtended(EMicroProfileTokenExtended eTokenExt, uint32_t nDataSizeQWords, uint32_t nPayload)
1380
{
1381
MP_ASSERT(nDataSizeQWords < 0xffff);
1382
MicroProfileLogEntry Entry = (((uint64_t)MP_LOG_EXTENDED) << 62) | ((0x3fff & (uint64_t)eTokenExt) << 48) | ((0xffff & (uint64_t)nDataSizeQWords) << 32) | nPayload;
1383
1384
MP_ASSERT(MicroProfileLogGetExtendedToken(Entry) == eTokenExt);
1385
MP_ASSERT(MicroProfileLogGetExtendedDataSize(Entry) == nDataSizeQWords);
1386
MP_ASSERT(MicroProfileLogGetExtendedPayload(Entry) == nPayload);
1387
1388
return Entry;
1389
}
1390
// extended with no data, but instead 48 bits payload
1391
inline MicroProfileLogEntry MicroProfileMakeLogExtendedNoData(EMicroProfileTokenExtended eTokenExt, uint64_t nPayload)
1392
{
1393
MicroProfileLogEntry Entry = (((uint64_t)MP_LOG_EXTENDED_NO_DATA) << 62) | ((0x3fff & (uint64_t)eTokenExt) << 48) | (MP_LOG_TICK_MASK & nPayload);
1394
1395
MP_ASSERT(MicroProfileLogGetExtendedToken(Entry) == eTokenExt);
1396
MP_ASSERT(MicroProfileLogGetExtendedPayloadNoData(Entry) == nPayload);
1397
1398
return Entry;
1399
}
1400
1401
// extended with no data, but instead 61 bits payload. used to store a pointer.
1402
inline MicroProfileLogEntry MicroProfileMakeLogExtendedNoDataPtr(uint64_t nPayload)
1403
{
1404
uint64_t hest = ETOKEN_CSTR_PTR;
1405
MicroProfileLogEntry Entry = (((uint64_t)MP_LOG_EXTENDED_NO_DATA) << 62) | (hest << 48) | (MP_LOG_PAYLOAD_PTR_MASK & nPayload);
1406
uint64_t v0 = (MP_LOG_PAYLOAD_PTR_MASK & nPayload);
1407
uint64_t v1 = (uint64_t)MicroProfileLogGetExtendedPayloadNoDataPtr(Entry);
1408
1409
MP_ASSERT(v0 == v1);
1410
return Entry;
1411
}
1412
1413
inline uint32_t MicroProfileGetQWordSize(uint32_t nDataSize)
1414
{
1415
uint32_t nSize = (nDataSize + 7) / 8;
1416
MP_ASSERT(nSize < 0xffff); // won't pack...
1417
return nSize;
1418
}
1419
1420
namespace
1421
{
1422
struct MicroProfilePayloadPack
1423
{
1424
union
1425
{
1426
struct
1427
{
1428
#if MICROPROFILE_BIG_ENDIAN /// NOT implemented.
1429
char h;
1430
char message[7];
1431
#else
1432
char message[7];
1433
char h;
1434
#endif
1435
};
1436
uint64_t LogEntry;
1437
};
1438
};
1439
}; // namespace
1440
1441
inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End)
1442
{
1443
int64_t nStart = Start;
1444
int64_t nEnd = End;
1445
int64_t nDifference = ((nEnd << 16) - (nStart << 16));
1446
return nDifference >> 16;
1447
}
1448
inline int64_t MicroProfileLogTickMax(MicroProfileLogEntry A, MicroProfileLogEntry B)
1449
{
1450
int64_t Diff = MicroProfileLogTickDifference(A, B);
1451
if(Diff < 0)
1452
{
1453
return A;
1454
}
1455
else
1456
{
1457
return B;
1458
}
1459
}
1460
1461
inline int64_t MicroProfileLogTickMin(MicroProfileLogEntry A, MicroProfileLogEntry B)
1462
{
1463
int64_t Diff = MicroProfileLogTickDifference(A, B);
1464
if(Diff < 0)
1465
{
1466
return B;
1467
}
1468
else
1469
{
1470
return A;
1471
}
1472
}
1473
inline int64_t MicroProfileLogTickClamp(uint64_t T, uint64_t min, uint64_t max)
1474
{
1475
return MicroProfileLogTickMin(MicroProfileLogTickMax(T, min), max);
1476
}
1477
1478
inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e)
1479
{
1480
return MP_LOG_TICK_MASK & e;
1481
}
1482
1483
inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick)
1484
{
1485
return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK);
1486
}
1487
1488
inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t)
1489
{
1490
return (t & 0xffff);
1491
}
1492
inline uint32_t MicroProfileGetGroupMask(MicroProfileToken t)
1493
{
1494
return (uint32_t)((t >> 16) & MICROPROFILE_GROUP_MASK_ALL);
1495
}
1496
inline uint32_t MicroProfileGetGroupMaskIndex(MicroProfileToken t)
1497
{
1498
return (uint32_t)(t >> 48);
1499
}
1500
1501
inline MicroProfileToken MicroProfileMakeToken(uint32_t nGroupMask, uint16_t nGroupIndex, uint16_t nTimer)
1502
{
1503
uint64_t token = ((uint64_t)nGroupIndex << 48llu) | ((uint64_t)nGroupMask << 16llu) | nTimer;
1504
if(0 != (token & MP_LOG_CSTR_MASK))
1505
{
1506
MP_BREAK(); // should never happen
1507
}
1508
return token;
1509
}
1510
1511
template <typename T>
1512
T MicroProfileMin(T a, T b)
1513
{
1514
return a < b ? a : b;
1515
}
1516
1517
template <typename T>
1518
T MicroProfileMax(T a, T b)
1519
{
1520
return a > b ? a : b;
1521
}
1522
template <typename T>
1523
T MicroProfileClamp(T a, T min_, T max_)
1524
{
1525
return MicroProfileMin(max_, MicroProfileMax(min_, a));
1526
}
1527
1528
inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond)
1529
{
1530
return (int64_t)(fMs * 0.001f * nTicksPerSecond);
1531
}
1532
1533
inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)
1534
{
1535
return 1000.f / (nTicksPerSecond ? nTicksPerSecond : 1);
1536
}
1537
float MicroProfileTickToMsMultiplierCpu()
1538
{
1539
return MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
1540
}
1541
1542
float MicroProfileTickToMsMultiplierGpu()
1543
{
1544
return MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
1545
}
1546
uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
1547
{
1548
return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)];
1549
}
1550
1551
uint64_t MicroProfileTick()
1552
{
1553
return MP_TICK();
1554
}
1555
1556
#ifdef _WIN32
1557
#include <windows.h>
1558
#define fopen microprofile_fopen_helper
1559
1560
FILE* microprofile_fopen_helper(const char* filename, const char* mode)
1561
{
1562
FILE* F = 0;
1563
if(0 == fopen_s(&F, filename, mode))
1564
{
1565
return F;
1566
}
1567
return 0;
1568
}
1569
1570
int64_t MicroProfileTicksPerSecondCpu()
1571
{
1572
static int64_t nTicksPerSecond = 0;
1573
if(nTicksPerSecond == 0)
1574
{
1575
QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond);
1576
}
1577
return nTicksPerSecond;
1578
}
1579
int64_t MicroProfileGetTick()
1580
{
1581
int64_t ticks;
1582
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
1583
return ticks;
1584
}
1585
1586
#endif
1587
1588
#if 1
1589
1590
typedef void* (*MicroProfileThreadFunc)(void*);
1591
1592
#ifndef _WIN32
1593
typedef pthread_t MicroProfileThread;
1594
void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
1595
{
1596
pthread_attr_t Attr;
1597
int r = pthread_attr_init(&Attr);
1598
MP_ASSERT(r == 0);
1599
pthread_create(pThread, &Attr, Func, 0);
1600
}
1601
void MicroProfileThreadJoin(MicroProfileThread* pThread)
1602
{
1603
int r = pthread_join(*pThread, 0);
1604
MP_ASSERT(r == 0);
1605
}
1606
#elif defined(_WIN32)
1607
typedef HANDLE MicroProfileThread;
1608
DWORD __stdcall ThreadTrampoline(void* pFunc)
1609
{
1610
MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
1611
return (uint32_t)(uintptr_t)F(0);
1612
}
1613
1614
void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
1615
{
1616
*pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
1617
}
1618
void MicroProfileThreadJoin(MicroProfileThread* pThread)
1619
{
1620
WaitForSingleObject(*pThread, INFINITE);
1621
CloseHandle(*pThread);
1622
}
1623
#else
1624
#include <thread>
1625
typedef std::thread* MicroProfileThread;
1626
inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
1627
{
1628
*pThread = MP_ALLOC_OBJECT(std::thread);
1629
new(*pThread) std::thread(Func, nullptr);
1630
}
1631
inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
1632
{
1633
(*pThread)->join();
1634
(*pThread)->~thread();
1635
MP_FREE(*pThread);
1636
*pThread = 0;
1637
}
1638
#endif
1639
#endif
1640
1641
#if MICROPROFILE_WEBSERVER
1642
1643
#ifdef _WIN32
1644
#define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET)
1645
#else
1646
#include <fcntl.h>
1647
#include <netinet/in.h>
1648
#include <sys/socket.h>
1649
#define MP_INVALID_SOCKET(f) (f < 0)
1650
#endif
1651
1652
void MicroProfileWebServerStart();
1653
void MicroProfileWebServerStop();
1654
void MicroProfileWebServerJoin();
1655
bool MicroProfileWebServerUpdate();
1656
void MicroProfileDumpToFile();
1657
1658
#else
1659
1660
#define MicroProfileWebServerStart() \
1661
do \
1662
{ \
1663
} while(0)
1664
#define MicroProfileWebServerStop() \
1665
do \
1666
{ \
1667
} while(0)
1668
#define MicroProfileWebServerJoin() \
1669
do \
1670
{ \
1671
} while(0)
1672
#define MicroProfileWebServerUpdate() false
1673
#define MicroProfileDumpToFile() \
1674
do \
1675
{ \
1676
} while(0)
1677
#endif
1678
1679
#include <algorithm>
1680
#include <math.h>
1681
#include <stdio.h>
1682
#include <stdlib.h>
1683
1684
#if MICROPROFILE_DEBUG
1685
#ifdef _WIN32
1686
void uprintf(const char* fmt, ...)
1687
{
1688
va_list args;
1689
va_start(args, fmt);
1690
char buffer[1024];
1691
stbsp_vsnprintf(buffer, sizeof(buffer) - 1, fmt, args);
1692
OutputDebugStringA(buffer);
1693
va_end(args);
1694
}
1695
#else
1696
#define uprintf(...) printf(__VA_ARGS__)
1697
#endif
1698
#else
1699
#define uprintf(...) \
1700
do \
1701
{ \
1702
sizeof(__VA_ARGS__); \
1703
} while(0)
1704
#endif
1705
1706
#define S g_MicroProfile
1707
1708
MicroProfile g_MicroProfile;
1709
#ifdef MICROPROFILE_IOS
1710
// iOS doesn't support __thread
1711
static pthread_key_t g_MicroProfileThreadLogKey;
1712
static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT;
1713
1714
static void MicroProfileCreateThreadLogKey()
1715
{
1716
pthread_key_create(&g_MicroProfileThreadLogKey, NULL);
1717
}
1718
#else
1719
MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLogThreadLocal = 0;
1720
#endif
1721
static bool g_bUseLock = false; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled)
1722
1723
MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", MP_GREEN4);
1724
MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", MP_GREEN4);
1725
MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", MP_GREEN4);
1726
MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", MP_GREEN4);
1727
MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch, "MicroProfile", "ContextSwitchSearch", MP_GREEN4);
1728
MICROPROFILE_DEFINE(g_MicroProfileGpuSubmit, "MicroProfile", "MicroProfileGpuSubmit", MP_HOTPINK2);
1729
MICROPROFILE_DEFINE(g_MicroProfileSendLoop, "MicroProfile", "MicroProfileSocketSendLoop", MP_GREEN4);
1730
MICROPROFILE_DEFINE_LOCAL_ATOMIC_COUNTER(g_MicroProfileBytesPerFlip, "microprofile/bytesperflip");
1731
1732
// void MicroProfileHashTableInit(MicroProfileHashTable* pTable, uint32_t nInitialSize, MicroProfileHashCompareFunction CompareFunc, MicroProfileHashFunction HashFunc);
1733
void MicroProfileHashTableDestroy(MicroProfileHashTable* pTable);
1734
uint64_t MicroProfileHashTableHash(MicroProfileHashTable* pTable, uint64_t K);
1735
void MicroProfileHashTableGrow(MicroProfileHashTable* pTable);
1736
1737
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value, uint64_t H, bool bAllowGrow);
1738
bool MicroProfileHashTableGet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t* pValue);
1739
bool MicroProfileHashTableRemove(MicroProfileHashTable* pTable, uint64_t Key);
1740
1741
bool MicroProfileHashTableSetString(MicroProfileHashTable* pTable, const char* pKey, const char* pValue);
1742
bool MicroProfileHashTableGetString(MicroProfileHashTable* pTable, const char* pKey, const char** pValue);
1743
bool MicroProfileHashTableRemoveString(MicroProfileHashTable* pTable, const char* pKey);
1744
1745
bool MicroProfileHashTableSetPtr(MicroProfileHashTable* pTable, const void* pKey, void* pValue);
1746
template <typename T = void>
1747
bool MicroProfileHashTableGetPtr(MicroProfileHashTable* pTable, const void* pKey, T** pValue = nullptr);
1748
bool MicroProfileHashTableRemovePtr(MicroProfileHashTable* pTable, const void* pKey);
1749
1750
enum
1751
{
1752
ESTRINGINTERN_LOWERCASE = 1,
1753
ESTRINGINTERN_FORCEFORWARDSLASH = 0x2,
1754
};
1755
const char* MicroProfileStringIntern(const char* pStr);
1756
const char* MicroProfileStringInternLower(const char* pStr);
1757
const char* MicroProfileStringInternSlash(const char* pStr);
1758
const char* MicroProfileStringIntern(const char* pStr, uint32_t nLen, uint32_t nInternalFlags = 0);
1759
1760
void MicroProfileStringsInit(MicroProfileStrings* pStrings);
1761
void MicroProfileStringsDestroy(MicroProfileStrings* pStrings);
1762
1763
MicroProfileToken MicroProfileCounterTokenInit(int nParent, uint32_t nFlags);
1764
void MicroProfileCounterTokenInitName(MicroProfileToken nToken, const char* pName);
1765
void MicroProfileCounterConfigToken(MicroProfileToken, uint32_t eFormat, int64_t nLimit, uint32_t nFlags);
1766
uint16_t MicroProfileFindGroup(const char* pGroup);
1767
1768
inline std::recursive_mutex& MicroProfileMutex()
1769
{
1770
static std::recursive_mutex Mutex;
1771
return Mutex;
1772
}
1773
std::recursive_mutex& MicroProfileGetMutex()
1774
{
1775
return MicroProfileMutex();
1776
}
1777
1778
inline std::recursive_mutex& MicroProfileTimelineMutex()
1779
{
1780
static std::recursive_mutex Mutex;
1781
return Mutex;
1782
}
1783
MICROPROFILE_API MicroProfile* MicroProfileGet()
1784
{
1785
return &g_MicroProfile;
1786
}
1787
1788
MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName);
1789
MicroProfileThreadLogGpu* MicroProfileThreadLogGpuAllocInternal();
1790
void* MicroProfileSocketSenderThread(void*);
1791
1792
void MicroProfileInit()
1793
{
1794
static bool bOnce = true;
1795
if(!bOnce)
1796
{
1797
return;
1798
}
1799
1800
std::recursive_mutex& mutex = MicroProfileMutex();
1801
bool bUseLock = g_bUseLock;
1802
if(bUseLock)
1803
mutex.lock();
1804
if(bOnce)
1805
{
1806
bOnce = false;
1807
memset(&S, 0, sizeof(S));
1808
1809
MicroProfileStringsInit(&S.Strings);
1810
1811
// these strings are used for counter names inside the string
1812
S.CounterToken_MicroProfile = MicroProfileCounterTokenInit(-1, 0);
1813
S.CounterToken_StringBlock = MicroProfileCounterTokenInit(S.CounterToken_MicroProfile, 0);
1814
S.CounterToken_StringBlock_Count = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
1815
S.CounterToken_StringBlock_Waste = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
1816
S.CounterToken_StringBlock_Strings = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
1817
S.CounterToken_StringBlock_Memory = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
1818
1819
S.CounterToken_Alloc = MicroProfileCounterTokenInit(S.CounterToken_MicroProfile, 0);
1820
S.CounterToken_Alloc_Memory = MicroProfileCounterTokenInit(S.CounterToken_Alloc, 0);
1821
S.CounterToken_Alloc_Count = MicroProfileCounterTokenInit(S.CounterToken_Alloc, 0);
1822
1823
MicroProfileCounterTokenInitName(S.CounterToken_MicroProfile, "microprofile");
1824
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock, "stringblock");
1825
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Count, "count");
1826
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Waste, "waste");
1827
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Strings, "strings");
1828
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Memory, "memory");
1829
1830
MicroProfileCounterTokenInitName(S.CounterToken_Alloc, "alloc");
1831
MicroProfileCounterTokenInitName(S.CounterToken_Alloc_Memory, "memory");
1832
MicroProfileCounterTokenInitName(S.CounterToken_Alloc_Count, "count");
1833
1834
S.nMemUsage += sizeof(S);
1835
for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1836
{
1837
S.GroupInfo[i].pName[0] = '\0';
1838
}
1839
for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i)
1840
{
1841
S.CategoryInfo[i].pName[0] = '\0';
1842
memset(S.CategoryInfo[i].nGroupMask, 0, sizeof(S.CategoryInfo[i].nGroupMask));
1843
}
1844
memcpy(&S.CategoryInfo[0].pName[0], "default", sizeof("default"));
1845
S.nCategoryCount = 1;
1846
for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i)
1847
{
1848
S.TimerInfo[i].pName[0] = '\0';
1849
}
1850
S.nGroupCount = 0;
1851
S.nFlipStartTick = MP_TICK();
1852
S.nContextSwitchStalledTick = MP_TICK();
1853
S.nAggregateFlipTick = MP_TICK();
1854
memset(S.nActiveGroups, 0, sizeof(S.nActiveGroups));
1855
S.nFrozen = 0;
1856
S.nWasFrozen = 0;
1857
memset(S.nForceGroups, 0, sizeof(S.nForceGroups));
1858
memset(S.nActiveGroupsWanted, 0, sizeof(S.nActiveGroupsWanted));
1859
S.nStartEnabled = 0;
1860
S.nAllThreadsWanted = 1;
1861
S.nAggregateFlip = 0;
1862
S.nTotalTimers = 0;
1863
for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
1864
{
1865
S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
1866
}
1867
S.fReferenceTime = 33.33f;
1868
S.fRcpReferenceTime = 1.f / S.fReferenceTime;
1869
S.nFreeListHead = -1;
1870
int64_t nTick = MP_TICK();
1871
for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
1872
{
1873
S.Frames[i].nFrameStartCpu = nTick;
1874
S.Frames[i].nFrameStartGpu = MICROPROFILE_INVALID_TICK;
1875
}
1876
S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT; // Use defined value as default port
1877
S.nWebServerDataSent = (uint64_t)-1;
1878
S.WebSocketTimers = -1;
1879
S.WebSocketCounters = -1;
1880
S.WebSocketGroups = -1;
1881
S.nSocketFail = 0;
1882
1883
S.DumpFrameCount = MICROPROFILE_WEBSERVER_DEFAULT_FRAMES;
1884
1885
#if MICROPROFILE_COUNTER_HISTORY
1886
S.nCounterHistoryPut = 0;
1887
for(uint32_t i = 0; i < MICROPROFILE_MAX_COUNTERS; ++i)
1888
{
1889
S.nCounterMin[i] = 0x7fffffffffffffff;
1890
S.nCounterMax[i] = 0x8000000000000000;
1891
}
1892
#endif
1893
S.GpuQueue = MICROPROFILE_GPU_INIT_QUEUE("GPU");
1894
S.pGpuGlobal = MicroProfileThreadLogGpuAllocInternal();
1895
MicroProfileGpuBegin(0, S.pGpuGlobal);
1896
1897
S.pJsonSettings = 0;
1898
S.pJsonSettingsName = nullptr;
1899
S.nJsonSettingsPending = 0;
1900
S.nJsonSettingsBufferSize = 0;
1901
S.nWSWasConnected = 0;
1902
1903
for(uint32_t i = 0; i < MICROPROFILE_TIMELINE_MAX_TOKENS; ++i)
1904
{
1905
S.TimelineTokenFrameEnter[i] = MICROPROFILE_INVALID_FRAME;
1906
S.TimelineTokenFrameLeave[i] = MICROPROFILE_INVALID_FRAME;
1907
S.TimelineTokenStaticString[i] = nullptr;
1908
S.TimelineToken[i] = 0;
1909
}
1910
memset(&S.AccumMinTimers[0], 0xFF, sizeof(S.AccumMinTimers));
1911
S.CountersDouble = (std::atomic<double>*)&S.Counters;
1912
#if MICROPROFILE_COUNTER_HISTORY
1913
S.dCounterHistory = (double*)S.nCounterHistory;
1914
S.dCounterMax = (double*)S.nCounterMax;
1915
S.dCounterMin = (double*)S.nCounterMin;
1916
#endif
1917
}
1918
MicroProfileUpdateSettingsPath();
1919
1920
#if MICROPROFILE_FRAME_EXTRA_DATA
1921
S.FrameExtraCounterData = (MicroProfileFrameExtraCounterData*)1;
1922
#endif
1923
MicroProfileCounterConfigToken(S.CounterToken_Alloc_Memory, MICROPROFILE_COUNTER_FORMAT_BYTES, 0, MICROPROFILE_COUNTER_FLAG_DETAILED);
1924
MICROPROFILE_COUNTER_CONFIG("MicroProfile/ThreadLog/Memory", MICROPROFILE_COUNTER_FORMAT_BYTES, 0, MICROPROFILE_COUNTER_FLAG_DETAILED);
1925
1926
if(bUseLock)
1927
{
1928
mutex.unlock();
1929
}
1930
}
1931
void MicroProfileUpdateSettingsPath()
1932
{
1933
if(S.pSettings)
1934
{
1935
MicroProfileFreeInternal((void*)S.pSettings);
1936
S.pSettings = nullptr;
1937
}
1938
if(S.pSettingsReadOnly)
1939
{
1940
MicroProfileFreeInternal((void*)S.pSettingsReadOnly);
1941
S.pSettingsReadOnly = nullptr;
1942
}
1943
if(S.pSettingsTemp)
1944
{
1945
MicroProfileFreeInternal((void*)S.pSettingsTemp);
1946
S.pSettingsTemp = nullptr;
1947
}
1948
auto DupeString = [](const char* BasePath, const char* File) -> const char*
1949
{
1950
size_t BaseLen = strlen(BasePath);
1951
bool TrailingSlash = BaseLen > 1 && (BasePath[BaseLen - 1] == '\\' || BasePath[BaseLen - 1] == '/');
1952
size_t Len = BaseLen + strlen(File) + 2;
1953
char* Data = (char*)MicroProfileAllocInternal(Len + 1, 1);
1954
#ifdef _WIN32
1955
char Slash = '\\';
1956
#else
1957
char Slash = '/';
1958
#endif
1959
if(TrailingSlash)
1960
snprintf(Data, Len, "%s%s", BasePath, File);
1961
else
1962
snprintf(Data, Len, "%s%c%s", BasePath, Slash, File);
1963
1964
return Data;
1965
};
1966
const char* pBaseSettingsPath = MICROPROFILE_GET_SETTINGS_FILE_PATH;
1967
S.pSettings = DupeString(pBaseSettingsPath, MICROPROFILE_SETTINGS_FILE);
1968
S.pSettingsReadOnly = DupeString(pBaseSettingsPath, MICROPROFILE_SETTINGS_FILE_BUILTIN);
1969
S.pSettingsTemp = DupeString(pBaseSettingsPath, MICROPROFILE_SETTINGS_FILE MICROPROFILE_SETTINGS_FILE_TEMP);
1970
}
1971
1972
void MicroProfileJoinContextSwitchTrace();
1973
1974
void MicroProfileShutdown()
1975
{
1976
{
1977
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1978
S.nMicroProfileShutdown = 1;
1979
MicroProfileStopContextSwitchTrace();
1980
}
1981
MicroProfileWebServerJoin();
1982
MicroProfileJoinContextSwitchTrace();
1983
{
1984
1985
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1986
if(S.pJsonSettings)
1987
{
1988
MP_FREE(S.pJsonSettings);
1989
S.pJsonSettings = 0;
1990
S.pJsonSettingsName = 0;
1991
S.nJsonSettingsBufferSize = 0;
1992
}
1993
if(S.pGPU)
1994
{
1995
MicroProfileGpuShutdownPlatform();
1996
}
1997
MicroProfileHashTableDestroy(&S.Strings.HashTable);
1998
MicroProfileStringsDestroy(&S.Strings);
1999
MICROPROFILE_FREE_NON_ALIGNED(S.WSBuf.pBufferAllocation);
2000
2001
MicroProfileFreeGpuQueue(S.GpuQueue);
2002
MicroProfileThreadLogGpuFree(S.pGpuGlobal);
2003
2004
for(uint32_t i = 0; i < S.nNumLogs; ++i)
2005
{
2006
#if MICROPROFILE_ASSERT_LOG_FREED
2007
MP_ASSERT(S.Pool[i]->nActive != 1);
2008
#endif
2009
MP_FREE(S.Pool[i]);
2010
}
2011
2012
for(uint32_t i = 0; i < S.nNumLogsGpu; ++i)
2013
{
2014
#if MICROPROFILE_ASSERT_LOG_FREED
2015
MP_ASSERT(!S.PoolGpu[i]->nAllocated);
2016
#endif
2017
MP_FREE(S.PoolGpu[i]);
2018
}
2019
MicroProfileFreeInternal((void*)S.pSettings);
2020
S.pSettings = nullptr;
2021
MicroProfileFreeInternal((void*)S.pSettingsReadOnly);
2022
S.pSettingsReadOnly = nullptr;
2023
MicroProfileFreeInternal((void*)S.pSettingsTemp);
2024
S.pSettingsTemp = nullptr;
2025
}
2026
}
2027
2028
static void* MicroProfileAutoFlipThread(void*)
2029
{
2030
MicroProfileOnThreadCreate("AutoFlipThread");
2031
while(0 == S.nAutoFlipStop.load())
2032
{
2033
MICROPROFILE_SCOPEI("MICROPROFILE", "AutoFlipThread", 0);
2034
MicroProfileSleep(S.nAutoFlipDelay);
2035
MicroProfileFlip(0);
2036
}
2037
MicroProfileOnThreadExit();
2038
return 0;
2039
}
2040
2041
void MicroProfileStartAutoFlip(uint32_t nMsDelay)
2042
{
2043
S.nAutoFlipDelay = nMsDelay;
2044
S.nAutoFlipStop.store(0);
2045
MicroProfileThreadStart(&S.AutoFlipThread, MicroProfileAutoFlipThread);
2046
}
2047
void MicroProfileStopAutoFlip()
2048
{
2049
S.nAutoFlipStop.store(1);
2050
MicroProfileThreadJoin(&S.AutoFlipThread);
2051
}
2052
2053
void MicroProfileEnableFrameExtraCounterData()
2054
{
2055
// should not be called at the same time as MicroProfileFlip.
2056
if(!S.FrameExtraCounterData)
2057
{
2058
S.FrameExtraCounterData = (MicroProfileFrameExtraCounterData*)1;
2059
}
2060
}
2061
2062
void MicroProfileCsvConfigEnd()
2063
{
2064
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
2065
S.CsvConfig.State = MicroProfileCsvConfig::ACTIVE;
2066
}
2067
void MicroProfileCsvConfigBegin(uint32_t MaxTimers, uint32_t MaxGroups, uint32_t MaxCounters, uint32_t Flags)
2068
{
2069
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::INACTIVE); // right now, only support being configured once.
2070
uint32_t TotalElements = MaxTimers + MaxGroups + MaxCounters;
2071
uint32_t BaseSize = (sizeof(MicroProfileCsvConfig) + 7) & 7;
2072
uint32_t TimerIndexSize = sizeof(uint16_t) * MaxTimers;
2073
uint32_t GroupIndexSize = sizeof(uint16_t) * MaxGroups;
2074
uint32_t CounterIndexSize = sizeof(uint16_t) * MaxCounters;
2075
uint32_t FrameBlockSize = TotalElements * sizeof(uint64_t);
2076
uint32_t FrameDataSize = FrameBlockSize * MICROPROFILE_MAX_FRAME_HISTORY;
2077
S.CsvConfig.NumTimers = 0;
2078
S.CsvConfig.NumGroups = 0;
2079
S.CsvConfig.NumCounters = 0;
2080
S.CsvConfig.MaxTimers = MaxTimers;
2081
S.CsvConfig.MaxGroups = MaxGroups;
2082
S.CsvConfig.MaxCounters = MaxCounters;
2083
S.CsvConfig.TotalElements = TotalElements;
2084
S.CsvConfig.TimerIndices = (uint16_t*)MicroProfileAllocInternal(TimerIndexSize, alignof(uint16_t));
2085
S.CsvConfig.pTimerNames = (const char**)MicroProfileAllocInternal(MaxTimers * sizeof(const char*), alignof(const char*));
2086
memset(S.CsvConfig.pTimerNames, 0, MaxTimers * sizeof(const char*));
2087
for(uint32_t i = 0; i < MaxTimers; ++i)
2088
S.CsvConfig.TimerIndices[i] = UINT16_MAX;
2089
S.CsvConfig.pGroupNames = (const char**)MicroProfileAllocInternal(MaxGroups * sizeof(const char*), alignof(const char*));
2090
memset(S.CsvConfig.pGroupNames, 0, MaxGroups * sizeof(const char*));
2091
S.CsvConfig.GroupIndices = (uint16_t*)MicroProfileAllocInternal(GroupIndexSize, alignof(uint16_t));
2092
for(uint32_t i = 0; i < MaxGroups; ++i)
2093
S.CsvConfig.GroupIndices[i] = UINT16_MAX;
2094
S.CsvConfig.pCounterNames = (const char**)MicroProfileAllocInternal(MaxCounters * sizeof(const char*), alignof(const char*));
2095
memset(S.CsvConfig.pCounterNames, 0, MaxCounters * sizeof(const char*));
2096
S.CsvConfig.CounterIndices = (uint16_t*)MicroProfileAllocInternal(CounterIndexSize, alignof(uint16_t));
2097
for(uint32_t i = 0; i < MaxCounters; ++i)
2098
S.CsvConfig.CounterIndices[i] = UINT16_MAX;
2099
S.CsvConfig.FrameData = (uint64_t*)MicroProfileAllocInternal(FrameDataSize, alignof(uint64_t));
2100
memset(S.CsvConfig.FrameData, 0, FrameDataSize);
2101
S.CsvConfig.State = MicroProfileCsvConfig::CONFIG;
2102
S.CsvConfig.Flags = Flags;
2103
}
2104
void MicroProfileCsvConfigAddTimer(const char* Group, const char* Timer, const char* Name, MicroProfileTokenType Type)
2105
{
2106
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
2107
if(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG && S.CsvConfig.NumTimers < S.CsvConfig.MaxTimers)
2108
{
2109
MicroProfileToken ret = MicroProfileGetToken(Group, Timer, MP_AUTO, Type, MICROPROFILE_TIMER_FLAG_PLACEHOLDER);
2110
if(ret != MICROPROFILE_INVALID_TOKEN)
2111
{
2112
MP_ASSERT(S.CsvConfig.NumTimers < S.CsvConfig.MaxTimers);
2113
uint16_t TimerIndex = MicroProfileGetTimerIndex(ret);
2114
for(uint32_t i = 0; i < S.CsvConfig.NumTimers; ++i)
2115
{
2116
if(S.CsvConfig.TimerIndices[i] == TimerIndex)
2117
return;
2118
}
2119
S.CsvConfig.pTimerNames[S.CsvConfig.NumTimers] = Name;
2120
S.CsvConfig.TimerIndices[S.CsvConfig.NumTimers++] = TimerIndex;
2121
}
2122
}
2123
}
2124
void MicroProfileCsvConfigAddGroup(const char* Group, const char* Name)
2125
{
2126
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
2127
if(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG && S.CsvConfig.NumGroups < S.CsvConfig.MaxGroups)
2128
{
2129
uint16_t Index = MicroProfileFindGroup(Group);
2130
MP_ASSERT(UINT16_MAX != Index);
2131
if(UINT16_MAX != Index)
2132
{
2133
MP_ASSERT(S.CsvConfig.NumGroups < S.CsvConfig.MaxGroups);
2134
for(uint32_t i = 0; i < S.CsvConfig.NumGroups; ++i)
2135
{
2136
if(S.CsvConfig.GroupIndices[i] == Index)
2137
return;
2138
}
2139
S.CsvConfig.pGroupNames[S.CsvConfig.NumGroups] = Name;
2140
S.CsvConfig.GroupIndices[S.CsvConfig.NumGroups++] = Index;
2141
}
2142
}
2143
}
2144
void MicroProfileCsvConfigAddCounter(const char* CounterName, const char* Name)
2145
{
2146
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
2147
if(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG && S.CsvConfig.NumCounters < S.CsvConfig.MaxCounters)
2148
{
2149
MicroProfileToken Token = MicroProfileGetCounterToken(CounterName, 0);
2150
if(MICROPROFILE_INVALID_TOKEN != Token)
2151
{
2152
MP_ASSERT(Token < UINT16_MAX);
2153
MP_ASSERT(S.CsvConfig.NumCounters < S.CsvConfig.MaxCounters);
2154
for(uint32_t i = 0; i < S.CsvConfig.NumCounters; ++i)
2155
{
2156
if(S.CsvConfig.CounterIndices[i] == (uint16_t)Token)
2157
return;
2158
}
2159
S.CsvConfig.pCounterNames[S.CsvConfig.NumCounters] = Name;
2160
S.CsvConfig.CounterIndices[S.CsvConfig.NumCounters++] = (uint16_t)Token;
2161
}
2162
}
2163
}
2164
2165
#ifdef MICROPROFILE_IOS
2166
inline MicroProfileThreadLog* MicroProfileGetThreadLog()
2167
{
2168
pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
2169
return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey);
2170
}
2171
2172
inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
2173
{
2174
pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
2175
pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
2176
}
2177
#else
2178
MicroProfileThreadLog* MicroProfileGetThreadLog()
2179
{
2180
return g_MicroProfileThreadLogThreadLocal;
2181
}
2182
void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
2183
{
2184
g_MicroProfileThreadLogThreadLocal = pLog;
2185
}
2186
#endif
2187
2188
MicroProfileThreadLog* MicroProfileGetThreadLog2()
2189
{
2190
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
2191
if(!pLog)
2192
{
2193
MicroProfileInitThreadLog();
2194
pLog = MicroProfileGetThreadLog();
2195
}
2196
return pLog;
2197
}
2198
2199
struct MicroProfileScopeLock
2200
{
2201
bool bUseLock;
2202
int nUnlock;
2203
std::recursive_mutex& m;
2204
MicroProfileScopeLock(std::recursive_mutex& m)
2205
: bUseLock(g_bUseLock)
2206
, nUnlock(0)
2207
, m(m)
2208
{
2209
if(bUseLock)
2210
m.lock();
2211
}
2212
~MicroProfileScopeLock()
2213
{
2214
MP_ASSERT(nUnlock == 0);
2215
if(bUseLock)
2216
m.unlock();
2217
}
2218
void Unlock()
2219
{
2220
MP_ASSERT(bUseLock);
2221
m.unlock();
2222
nUnlock++;
2223
}
2224
void Lock()
2225
{
2226
m.lock();
2227
nUnlock--;
2228
}
2229
};
2230
2231
void MicroProfileLogReset(MicroProfileThreadLog* pLog);
2232
void MicroProfileLogClearInternal(MicroProfileThreadLog* pLog);
2233
2234
MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName)
2235
{
2236
MicroProfileScopeLock L(MicroProfileMutex());
2237
2238
if(S.nNumLogs == MICROPROFILE_MAX_THREADS && S.nFreeListHead == -1)
2239
{
2240
uprintf("recycling thread logs\n");
2241
// reuse the oldest.
2242
MicroProfileThreadLog* pOldest = 0;
2243
uint32_t nIdleFrames = 0;
2244
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
2245
{
2246
MicroProfileThreadLog* pLog = S.Pool[i];
2247
uprintf("tlactive %p, %d. idle:%d\n", pLog, pLog->nActive, pLog->nIdleFrames);
2248
if(pLog->nActive == 2)
2249
{
2250
if(pLog->nIdleFrames >= nIdleFrames)
2251
{
2252
nIdleFrames = pLog->nIdleFrames;
2253
pOldest = pLog;
2254
}
2255
}
2256
}
2257
MP_ASSERT(pOldest);
2258
MicroProfileLogReset(pOldest);
2259
}
2260
2261
MicroProfileThreadLog* pLog = 0;
2262
if(S.nFreeListHead != -1)
2263
{
2264
pLog = S.Pool[S.nFreeListHead];
2265
MP_ASSERT(pLog->nPut.load() == 0);
2266
MP_ASSERT(pLog->nGet.load() == 0);
2267
S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext;
2268
}
2269
else
2270
{
2271
MICROPROFILE_COUNTER_ADD("MicroProfile/ThreadLog/Allocated", 1);
2272
MICROPROFILE_COUNTER_ADD("MicroProfile/ThreadLog/Memory", sizeof(MicroProfileThreadLog));
2273
pLog = MP_ALLOC_OBJECT(MicroProfileThreadLog);
2274
MicroProfileLogClearInternal(pLog);
2275
S.nMemUsage += sizeof(MicroProfileThreadLog);
2276
pLog->nLogIndex = S.nNumLogs;
2277
MP_ASSERT(S.nNumLogs < MICROPROFILE_MAX_THREADS);
2278
S.Pool[S.nNumLogs++] = pLog;
2279
}
2280
int len = 0;
2281
if(pName)
2282
{
2283
len = (int)strlen(pName);
2284
int maxlen = sizeof(pLog->ThreadName) - 1;
2285
len = len < maxlen ? len : maxlen;
2286
memcpy(&pLog->ThreadName[0], pName, len);
2287
}
2288
else
2289
{
2290
len = snprintf(&pLog->ThreadName[0], sizeof(pLog->ThreadName) - 1, "TID:[%" PRId64 "]", (int64_t)MP_GETCURRENTTHREADID());
2291
}
2292
pLog->ThreadName[len] = '\0';
2293
pLog->nThreadId = MP_GETCURRENTTHREADID();
2294
pLog->nFreeListNext = -1;
2295
pLog->nActive = 1;
2296
return pLog;
2297
}
2298
2299
void MicroProfileOnThreadCreate(const char* pThreadName)
2300
{
2301
char Buffer[64];
2302
g_bUseLock = true;
2303
MicroProfileInit();
2304
MP_ASSERT(MicroProfileGetThreadLog() == 0);
2305
MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName(Buffer));
2306
(void)Buffer;
2307
MP_ASSERT(pLog);
2308
MicroProfileSetThreadLog(pLog);
2309
}
2310
2311
void MicroProfileThreadLogGpuReset(MicroProfileThreadLogGpu* pLog)
2312
{
2313
MP_ASSERT(pLog->nAllocated);
2314
pLog->pContext = (void*)-1;
2315
pLog->nStart = (uint32_t)-1;
2316
pLog->nPut = 0;
2317
pLog->nStackScope = 0;
2318
}
2319
2320
MicroProfileThreadLogGpu* MicroProfileThreadLogGpuAllocInternal()
2321
{
2322
MicroProfileThreadLogGpu* pLog = 0;
2323
for(uint32_t i = 0; i < S.nNumLogsGpu; ++i)
2324
{
2325
MicroProfileThreadLogGpu* pNextLog = S.PoolGpu[i];
2326
if(pNextLog && !pNextLog->nAllocated)
2327
{
2328
pLog = pNextLog;
2329
break;
2330
}
2331
}
2332
if(!pLog)
2333
{
2334
pLog = MP_ALLOC_OBJECT(MicroProfileThreadLogGpu);
2335
int nLogIndex = S.nNumLogsGpu++;
2336
MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS);
2337
pLog->nId = nLogIndex;
2338
S.PoolGpu[nLogIndex] = pLog;
2339
}
2340
pLog->nAllocated = 1;
2341
MicroProfileThreadLogGpuReset(pLog);
2342
return pLog;
2343
}
2344
2345
MicroProfileThreadLogGpu* MicroProfileThreadLogGpuAlloc()
2346
{
2347
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2348
return MicroProfileThreadLogGpuAllocInternal();
2349
}
2350
2351
void MicroProfileThreadLogGpuFree(MicroProfileThreadLogGpu* pLog)
2352
{
2353
MP_ASSERT(pLog->nAllocated);
2354
pLog->nAllocated = 0;
2355
}
2356
2357
int MicroProfileGetGpuQueue(const char* pQueueName)
2358
{
2359
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; i++)
2360
{
2361
MicroProfileThreadLog* pLog = S.Pool[i];
2362
if(pLog && pLog->nGpu && pLog->nActive && 0 == MP_STRCASECMP(pQueueName, pLog->ThreadName))
2363
{
2364
return i;
2365
}
2366
}
2367
MP_ASSERT(0); // call MicroProfileInitGpuQueue
2368
return 0;
2369
}
2370
2371
MicroProfileThreadLog* MicroProfileGetGpuQueueLog(const char* pQueueName)
2372
{
2373
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; i++)
2374
{
2375
MicroProfileThreadLog* pLog = S.Pool[i];
2376
if(pLog && pLog->nGpu && pLog->nActive && 0 == MP_STRCASECMP(pQueueName, pLog->ThreadName))
2377
{
2378
return pLog;
2379
}
2380
}
2381
MP_ASSERT(0); // call MicroProfileInitGpuQueue
2382
return 0;
2383
}
2384
2385
int MicroProfileInitGpuQueue(const char* pQueueName)
2386
{
2387
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
2388
{
2389
MicroProfileThreadLog* pLog = S.Pool[i];
2390
if(pLog && 0 == MP_STRCASECMP(pQueueName, pLog->ThreadName))
2391
{
2392
2393
MP_ASSERT(0); // call MicroProfileInitGpuQueue only once per CommandQueue. name must not clash with threadname
2394
}
2395
}
2396
MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pQueueName);
2397
pLog->nGpu = 1;
2398
pLog->nThreadId = 0;
2399
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
2400
{
2401
if(S.Pool[i] == pLog)
2402
{
2403
return i;
2404
}
2405
}
2406
MP_BREAK();
2407
return 0;
2408
}
2409
2410
void MicroProfileFreeGpuQueue(int nQueue)
2411
{
2412
MicroProfileThreadLog* pLog = S.Pool[nQueue];
2413
if(pLog)
2414
{
2415
MP_ASSERT(pLog->nActive == 1);
2416
pLog->nActive = 2;
2417
}
2418
}
2419
2420
MicroProfileThreadLogGpu* MicroProfileGetGlobalGpuThreadLog()
2421
{
2422
return S.pGpuGlobal;
2423
}
2424
2425
MICROPROFILE_API int MicroProfileGetGlobalGpuQueue()
2426
{
2427
return S.GpuQueue;
2428
}
2429
void MicroProfileLogClearInternal(MicroProfileThreadLog* pLog)
2430
{
2431
// can't clear atomics..
2432
void* pStart = (void*)&pLog->Log[0];
2433
void* pEnd = (void*)(pLog + 1);
2434
memset(pStart, 0, (uintptr_t)pEnd - (uintptr_t)pStart);
2435
pLog->nPut.store(0);
2436
pLog->nGet.store(0);
2437
}
2438
void MicroProfileLogReset(MicroProfileThreadLog* pLog)
2439
{
2440
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2441
2442
int32_t nLogIndex = -1;
2443
for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
2444
{
2445
if(pLog == S.Pool[i])
2446
{
2447
nLogIndex = i;
2448
break;
2449
}
2450
}
2451
MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0);
2452
MicroProfileLogClearInternal(pLog);
2453
pLog->nFreeListNext = S.nFreeListHead;
2454
S.nFreeListHead = nLogIndex;
2455
for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
2456
{
2457
S.Frames[i].nLogStart[nLogIndex] = 0;
2458
}
2459
}
2460
2461
void MicroProfileOnThreadExit()
2462
{
2463
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
2464
if(pLog)
2465
{
2466
MP_ASSERT(pLog->nActive == 1);
2467
pLog->nActive = 2;
2468
}
2469
}
2470
2471
void MicroProfileInitThreadLog()
2472
{
2473
MicroProfileOnThreadCreate(nullptr);
2474
}
2475
2476
MicroProfileToken MicroProfileFindTokenInternal(const char* pGroup, const char* pName)
2477
{
2478
MicroProfileInit();
2479
MicroProfileScopeLock L(MicroProfileMutex());
2480
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2481
{
2482
if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName))
2483
{
2484
return S.TimerInfo[i].nToken;
2485
}
2486
}
2487
return MICROPROFILE_INVALID_TOKEN;
2488
}
2489
MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
2490
{
2491
return MicroProfileGetToken(pGroup, pName, MP_AUTO, MicroProfileTokenTypeCpu, MICROPROFILE_TIMER_FLAG_PLACEHOLDER);
2492
}
2493
2494
uint16_t MicroProfileFindGroup(const char* pGroup)
2495
{
2496
for(uint32_t i = 0; i < S.nGroupCount; ++i)
2497
{
2498
if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
2499
{
2500
return i;
2501
}
2502
}
2503
return UINT16_MAX;
2504
}
2505
2506
uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
2507
{
2508
for(uint32_t i = 0; i < S.nGroupCount; ++i)
2509
{
2510
if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
2511
{
2512
return i;
2513
}
2514
}
2515
uint16_t nGroupIndex = 0xffff;
2516
uint32_t nLen = (uint32_t)strlen(pGroup);
2517
if(nLen > MICROPROFILE_NAME_MAX_LEN - 1)
2518
nLen = MICROPROFILE_NAME_MAX_LEN - 1;
2519
memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen);
2520
S.GroupInfo[S.nGroupCount].pName[nLen] = '\0';
2521
S.GroupInfo[S.nGroupCount].nNameLen = nLen;
2522
S.GroupInfo[S.nGroupCount].nNumTimers = 0;
2523
S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount;
2524
S.GroupInfo[S.nGroupCount].Type = Type;
2525
S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0;
2526
S.GroupInfo[S.nGroupCount].nColor = 0x42;
2527
S.GroupInfo[S.nGroupCount].nCategory = 0;
2528
S.GroupInfo[S.nGroupCount].nWSNext = -2;
2529
2530
uint32_t nIndex = S.nGroupCount / 32;
2531
uint32_t nBit = S.nGroupCount % 32;
2532
{
2533
S.CategoryInfo[0].nGroupMask[nIndex] |= (1 << nBit);
2534
}
2535
if(S.nStartEnabled)
2536
{
2537
S.nActiveGroupsWanted[nIndex] |= (1ll << nBit);
2538
S.nActiveGroups[nIndex] |= (1ll << nBit);
2539
S.AnyActive = true;
2540
}
2541
nGroupIndex = S.nGroupCount++;
2542
S.nGroupMask[nIndex] |= (1 << nBit);
2543
MP_ASSERT(S.nGroupCount < MICROPROFILE_MAX_GROUPS);
2544
return nGroupIndex;
2545
}
2546
2547
void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
2548
{
2549
MicroProfileScopeLock L(MicroProfileMutex());
2550
2551
int nCategoryIndex = -1;
2552
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
2553
{
2554
if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
2555
{
2556
nCategoryIndex = (int)i;
2557
break;
2558
}
2559
}
2560
if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES)
2561
{
2562
MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0');
2563
nCategoryIndex = (int)S.nCategoryCount++;
2564
uint32_t nLen = (uint32_t)strlen(pCategory);
2565
if(nLen > MICROPROFILE_NAME_MAX_LEN - 1)
2566
nLen = MICROPROFILE_NAME_MAX_LEN - 1;
2567
memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen);
2568
S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0';
2569
}
2570
uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu") ? MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu);
2571
S.GroupInfo[nGroup].nColor = nColor;
2572
if(nCategoryIndex >= 0)
2573
{
2574
uint32_t nIndex = nGroup / 32;
2575
uint32_t nBit = nGroup % 32;
2576
nBit = (1 << nBit);
2577
uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory;
2578
S.CategoryInfo[nOldCategory].nGroupMask[nIndex] &= ~nBit;
2579
S.CategoryInfo[nCategoryIndex].nGroupMask[nIndex] |= nBit;
2580
S.GroupInfo[nGroup].nCategory = nCategoryIndex;
2581
}
2582
}
2583
2584
MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type, uint32_t Flags)
2585
{
2586
MicroProfileInit();
2587
MicroProfileScopeLock L(MicroProfileMutex());
2588
MicroProfileToken ret = MicroProfileFindTokenInternal(pGroup, pName);
2589
if(ret != MICROPROFILE_INVALID_TOKEN)
2590
{
2591
int idx = MicroProfileGetTimerIndex(ret);
2592
if(S.TimerInfo[idx].Flags & MICROPROFILE_TIMER_FLAG_PLACEHOLDER)
2593
{
2594
S.TimerInfo[idx].nColor = nColor & 0xffffff;
2595
S.TimerInfo[idx].Flags = Flags;
2596
S.TimerInfo[idx].Type = Type;
2597
}
2598
MP_ASSERT(S.TimerInfo[idx].Flags == Flags || (Flags & MICROPROFILE_TIMER_FLAG_PLACEHOLDER));
2599
return ret;
2600
}
2601
uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type);
2602
uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++);
2603
MP_ASSERT(nTimerIndex < MICROPROFILE_MAX_TIMERS);
2604
2605
uint32_t nBitIndex = nGroupIndex / 32;
2606
uint32_t nBit = nGroupIndex % 32;
2607
uint32_t nGroupMask = 1ll << nBit;
2608
MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, (uint16_t)nBitIndex, nTimerIndex);
2609
S.GroupInfo[nGroupIndex].nNumTimers++;
2610
S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName));
2611
MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); // dont mix cpu & gpu timers in the same group
2612
S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers);
2613
S.TimerInfo[nTimerIndex].nToken = nToken;
2614
uint32_t nLen = (uint32_t)strlen(pName);
2615
if(nLen > MICROPROFILE_NAME_MAX_LEN - 1)
2616
nLen = MICROPROFILE_NAME_MAX_LEN - 1;
2617
memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen);
2618
snprintf(&S.TimerInfo[nTimerIndex].pNameExt[0], sizeof(S.TimerInfo[nTimerIndex].pNameExt) - 1, "%s %s", S.GroupInfo[nGroupIndex].pName, pName);
2619
S.TimerInfo[nTimerIndex].pName[nLen] = '\0';
2620
S.TimerInfo[nTimerIndex].nNameLen = nLen;
2621
S.TimerInfo[nTimerIndex].nColor = nColor & 0xffffff;
2622
S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex;
2623
S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex;
2624
S.TimerInfo[nTimerIndex].nWSNext = -2;
2625
S.TimerInfo[nTimerIndex].Type = Type;
2626
S.TimerInfo[nTimerIndex].Flags = Flags;
2627
// printf("*** TOKEN %08d %s\\%s .. flags %08x\n", nTimerIndex, pGroup, pName, Flags);
2628
S.TimerToGroup[nTimerIndex] = nGroupIndex;
2629
return nToken;
2630
}
2631
2632
void MicroProfileGetTokenC(MicroProfileToken* pToken, const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type, uint32_t flags)
2633
{
2634
if(*pToken == MICROPROFILE_INVALID_TOKEN)
2635
{
2636
MicroProfileInit();
2637
MicroProfileScopeLock L(MicroProfileMutex());
2638
if(*pToken == MICROPROFILE_INVALID_TOKEN)
2639
{
2640
*pToken = MicroProfileGetToken(pGroup, pName, nColor, Type, flags);
2641
}
2642
}
2643
}
2644
2645
const char* MicroProfileNextName(const char* pName, char* pNameOut, uint32_t* nSubNameLen)
2646
{
2647
int nMaxLen = MICROPROFILE_NAME_MAX_LEN - 1;
2648
const char* pRet = 0;
2649
bool bDone = false;
2650
uint32_t nChars = 0;
2651
for(int i = 0; i < nMaxLen && !bDone; ++i)
2652
{
2653
char c = *pName++;
2654
switch(c)
2655
{
2656
case 0:
2657
bDone = true;
2658
break;
2659
case '\\':
2660
case '/':
2661
if(nChars)
2662
{
2663
bDone = true;
2664
pRet = pName;
2665
}
2666
break;
2667
default:
2668
nChars++;
2669
*pNameOut++ = c;
2670
}
2671
}
2672
*nSubNameLen = nChars;
2673
*pNameOut = '\0';
2674
return pRet;
2675
}
2676
2677
const char* MicroProfileCounterFullName(int nCounter)
2678
{
2679
static char Buffer[1024];
2680
int nNodes[32];
2681
int nIndex = 0;
2682
do
2683
{
2684
nNodes[nIndex++] = nCounter;
2685
nCounter = S.CounterInfo[nCounter].nParent;
2686
} while(nCounter >= 0);
2687
int nOffset = 0;
2688
while(nIndex >= 0 && nOffset < (int)sizeof(Buffer) - 2)
2689
{
2690
uint32_t nLen = S.CounterInfo[nNodes[nIndex]].nNameLen + nOffset; // < sizeof(Buffer)-1
2691
nLen = MicroProfileMin((uint32_t)(sizeof(Buffer) - 2 - nOffset), nLen);
2692
memcpy(&Buffer[nOffset], S.CounterInfo[nNodes[nIndex]].pName, nLen);
2693
2694
nOffset += S.CounterInfo[nNodes[nIndex]].nNameLen + 1;
2695
if(nIndex)
2696
{
2697
Buffer[nOffset++] = '/';
2698
}
2699
nIndex--;
2700
}
2701
return &Buffer[0];
2702
}
2703
2704
MicroProfileToken MicroProfileCounterTokenInit(int nParent, uint32_t nFlags)
2705
{
2706
MP_ASSERT(0 == (nFlags & (~MICROPROFILE_COUNTER_FLAG_TYPE_MASK)));
2707
MicroProfileToken nResult = S.nNumCounters++;
2708
S.CounterInfo[nResult].nParent = nParent;
2709
S.CounterInfo[nResult].nSibling = -1;
2710
S.CounterInfo[nResult].nFirstChild = -1;
2711
S.CounterInfo[nResult].nFlags = nFlags;
2712
S.CounterInfo[nResult].eFormat = MICROPROFILE_COUNTER_FORMAT_DEFAULT;
2713
S.CounterInfo[nResult].nLimit = 0;
2714
S.CounterInfo[nResult].ExternalAtomic = 0;
2715
S.CounterSource[nResult].pSource = 0;
2716
S.CounterSource[nResult].nSourceSize = 0;
2717
S.CounterInfo[nResult].nNameLen = 0;
2718
S.CounterInfo[nResult].pName = nullptr;
2719
S.CounterInfo[nResult].nWSNext = -2;
2720
if(nParent >= 0)
2721
{
2722
MP_ASSERT(nParent < (int)S.nNumCounters);
2723
S.CounterInfo[nResult].nSibling = S.CounterInfo[nParent].nFirstChild;
2724
S.CounterInfo[nResult].nLevel = S.CounterInfo[nParent].nLevel + 1;
2725
S.CounterInfo[nParent].nFirstChild = nResult;
2726
}
2727
else
2728
{
2729
S.CounterInfo[nResult].nLevel = 0;
2730
}
2731
return nResult;
2732
}
2733
void MicroProfileCounterTokenInitName(MicroProfileToken nToken, const char* pName)
2734
{
2735
MP_ASSERT(0 == S.CounterInfo[nToken].pName);
2736
S.CounterInfo[nToken].nNameLen = (uint16_t)strlen(pName);
2737
S.CounterInfo[nToken].pName = MicroProfileStringInternLower(pName);
2738
}
2739
2740
MicroProfileToken MicroProfileGetCounterTokenByParent(int nParent, const char* pName, uint32_t nFlags)
2741
{
2742
for(uint32_t i = 0; i < S.nNumCounters; ++i)
2743
{
2744
if(nParent == S.CounterInfo[i].nParent && S.CounterInfo[i].pName == pName)
2745
{
2746
return i;
2747
}
2748
}
2749
if(0 != (MICROPROFILE_COUNTER_FLAG_TOKEN_DONT_CREATE & nFlags))
2750
return MICROPROFILE_INVALID_TOKEN;
2751
MicroProfileToken nResult = MicroProfileCounterTokenInit(nParent, nFlags);
2752
MicroProfileCounterTokenInitName(nResult, pName);
2753
return nResult;
2754
}
2755
2756
// by passing in last token/parent, and a non-changing static string,
2757
// we can quickly return in case the parent is the same as before.
2758
// Note that this doesn't support paths, but instead must be called once per level in the tree
2759
// String must be preinterned.
2760
MicroProfileToken MicroProfileCounterTokenTree(MicroProfileToken* LastToken, MicroProfileToken CurrentParent, const char* pString)
2761
{
2762
MicroProfileToken Token = *LastToken;
2763
if(Token != MICROPROFILE_INVALID_TOKEN)
2764
{
2765
if(S.CounterInfo[Token].pName == pString && S.CounterInfo[Token].nParent == CurrentParent)
2766
{
2767
return Token;
2768
}
2769
}
2770
MicroProfileInit();
2771
MicroProfileScopeLock L(MicroProfileMutex());
2772
Token = MicroProfileGetCounterTokenByParent(CurrentParent, pString, 0);
2773
*LastToken = Token;
2774
return Token;
2775
}
2776
2777
const char* MicroProfileCounterString(const char* pString)
2778
{
2779
MicroProfileInit();
2780
MicroProfileScopeLock L(MicroProfileMutex());
2781
return MicroProfileStringInternLower(pString);
2782
}
2783
2784
// Same as above, but works with non-static strings. always takes a lock, and does a search, so expect this to be not cheap
2785
MicroProfileToken MicroProfileCounterTokenTreeDynamic(MicroProfileToken* LastToken, MicroProfileToken Parent, const char* pString)
2786
{
2787
(void)LastToken;
2788
MicroProfileInit();
2789
MicroProfileScopeLock L(MicroProfileMutex());
2790
const char* pSubNameLower = MicroProfileStringInternLower(pString);
2791
return MicroProfileGetCounterTokenByParent(Parent, pSubNameLower, 0);
2792
}
2793
2794
MicroProfileToken MicroProfileGetCounterToken(const char* pName, uint32_t CounterFlag)
2795
{
2796
MicroProfileInit();
2797
MicroProfileScopeLock L(MicroProfileMutex());
2798
char SubName[MICROPROFILE_NAME_MAX_LEN];
2799
MicroProfileToken nResult = MICROPROFILE_INVALID_TOKEN;
2800
do
2801
{
2802
uint32_t nLen = 0;
2803
pName = MicroProfileNextName(pName, &SubName[0], &nLen);
2804
if(0 == nLen)
2805
{
2806
break;
2807
}
2808
const char* pSubNameLower = MicroProfileStringInternLower(SubName);
2809
nResult = MicroProfileGetCounterTokenByParent(nResult, pSubNameLower, 0);
2810
if(MICROPROFILE_INVALID_TOKEN == nResult)
2811
return nResult;
2812
2813
} while(pName != 0);
2814
S.CounterInfo[nResult].nFlags |= MICROPROFILE_COUNTER_FLAG_LEAF;
2815
2816
#if MICROPROFILE_COUNTER_HISTORY
2817
if(CounterFlag & MICROPROFILE_COUNTER_FLAG_DOUBLE)
2818
{
2819
S.CounterInfo[nResult].nFlags |= MICROPROFILE_COUNTER_FLAG_DOUBLE;
2820
S.dCounterMax[nResult] = -DBL_MAX;
2821
S.dCounterMin[nResult] = DBL_MAX;
2822
}
2823
#endif
2824
2825
MP_ASSERT((int)nResult >= 0);
2826
return nResult;
2827
}
2828
2829
MicroProfileToken MicroProfileGetChildCounterToken(MicroProfileToken Parent, const char* pName)
2830
{
2831
MP_ASSERT(NULL == strpbrk(pName, "\\/")); // delimiters not supported when manually building the tree.
2832
return MicroProfileCounterTokenTreeDynamic(nullptr, Parent, pName);
2833
}
2834
2835
inline void MicroProfileLogPut(MicroProfileLogEntry LE, MicroProfileThreadLog* pLog)
2836
{
2837
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
2838
MP_ASSERT(pLog->nActive == 1); // Dont put after calling thread exit
2839
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
2840
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
2841
uint32_t nGet = pLog->nGet.load(std::memory_order_relaxed);
2842
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
2843
MP_ASSERT(nDistance < MICROPROFILE_BUFFER_SIZE);
2844
uint32_t nStackPut = pLog->nStackPut;
2845
if(nDistance < nStackPut + 2)
2846
{
2847
S.nOverflow = 100;
2848
}
2849
else
2850
{
2851
pLog->Log[nPut] = LE;
2852
pLog->nPut.store(nNextPos, std::memory_order_release);
2853
}
2854
}
2855
2856
inline uint64_t MicroProfileLogPutEnter(MicroProfileToken nToken_, uint64_t nTick, MicroProfileThreadLog* pLog)
2857
{
2858
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
2859
MP_ASSERT(pLog->nActive == 1); // Dont put after calling thread exit
2860
uint32_t nStackPut = pLog->nStackPut;
2861
if(nStackPut < MICROPROFILE_STACK_MAX)
2862
{
2863
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_ENTER, nToken_, nTick);
2864
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
2865
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
2866
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
2867
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
2868
MP_ASSERT(nDistance < MICROPROFILE_BUFFER_SIZE);
2869
if(nDistance < nStackPut + 4) // 2 for ring buffer, 2 for the actual entries
2870
{
2871
S.nOverflow = 100;
2872
return MICROPROFILE_INVALID_TICK;
2873
}
2874
else
2875
{
2876
#ifdef MICROPROFILE_VERIFY_BALANCED
2877
pLog->VerifyStack[nStackPut] = LE;
2878
#endif
2879
pLog->nStackPut = nStackPut + 1;
2880
pLog->Log[nPut] = LE;
2881
pLog->nPut.store(nNextPos, std::memory_order_release);
2882
return nTick;
2883
}
2884
}
2885
else
2886
{
2887
S.nOverflow = 100;
2888
pLog->nStackPut = nStackPut + 1;
2889
return MICROPROFILE_DROPPED_TICK;
2890
}
2891
}
2892
2893
inline uint64_t MicroProfileLogPutEnterCStr(const char* pStr, uint64_t nTick, MicroProfileThreadLog* pLog)
2894
{
2895
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
2896
MP_ASSERT(pLog->nActive == 1); // Dont put after calling thread exit
2897
uint32_t nStackPut = pLog->nStackPut;
2898
if(nStackPut < MICROPROFILE_STACK_MAX)
2899
{
2900
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_ENTER, ETOKEN_CSTR_PTR, nTick);
2901
uint64_t LEStr = MicroProfileMakeLogExtendedNoDataPtr((uint64_t)pStr);
2902
2903
MP_ASSERT(ETOKEN_CSTR_PTR == MicroProfileLogGetTimerIndex(LE));
2904
2905
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
2906
uint32_t nNextPos = (nPut + 2) % MICROPROFILE_BUFFER_SIZE;
2907
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
2908
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
2909
MP_ASSERT(nDistance < MICROPROFILE_BUFFER_SIZE);
2910
if(nDistance < nStackPut + 6) // 2 for ring buffer, 4 for the actual entries
2911
{
2912
S.nOverflow = 100;
2913
return MICROPROFILE_INVALID_TICK;
2914
}
2915
else
2916
{
2917
pLog->nStackPut = nStackPut + 1;
2918
pLog->Log[nPut + 0] = LE;
2919
pLog->Log[(nPut + 1) % MICROPROFILE_BUFFER_SIZE] = LEStr;
2920
pLog->nPut.store(nNextPos, std::memory_order_release);
2921
return nTick;
2922
}
2923
}
2924
else
2925
{
2926
S.nOverflow = 100;
2927
pLog->nStackPut = nStackPut + 1;
2928
return MICROPROFILE_DROPPED_TICK;
2929
}
2930
}
2931
inline void MicroProfileLogPutLeaveCStr(const char* pStr, uint64_t nTick, MicroProfileThreadLog* pLog)
2932
{
2933
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
2934
MP_ASSERT(pLog->nActive);
2935
MP_ASSERT(pLog->nStackPut != 0);
2936
uint32_t nStackPut = --(pLog->nStackPut);
2937
MP_ASSERT(nStackPut < 0xf0000000);
2938
if(nStackPut < MICROPROFILE_STACK_MAX)
2939
{
2940
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_LEAVE, ETOKEN_CSTR_PTR, nTick);
2941
uint64_t LEStr = MicroProfileMakeLogExtendedNoDataPtr((uint64_t)pStr);
2942
MP_ASSERT(ETOKEN_CSTR_PTR == MicroProfileLogGetTimerIndex(LE));
2943
2944
uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
2945
uint32_t nNextPos = (nPos + 2) % MICROPROFILE_BUFFER_SIZE;
2946
2947
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
2948
MP_ASSERT(nStackPut < MICROPROFILE_STACK_MAX);
2949
MP_ASSERT(nNextPos != nGet); // should never happen
2950
pLog->Log[nPos + 0] = LE;
2951
pLog->Log[(nPos + 1) % MICROPROFILE_BUFFER_SIZE] = LEStr;
2952
2953
pLog->nPut.store(nNextPos, std::memory_order_release);
2954
}
2955
}
2956
2957
inline void MicroProfileLogPutLeave(MicroProfileToken nToken_, uint64_t nTick, MicroProfileThreadLog* pLog)
2958
{
2959
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
2960
MP_ASSERT(pLog->nActive);
2961
MP_ASSERT(pLog->nStackPut != 0);
2962
uint32_t nStackPut = --(pLog->nStackPut);
2963
if(nStackPut < MICROPROFILE_STACK_MAX)
2964
{
2965
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_LEAVE, nToken_, nTick);
2966
uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
2967
uint32_t nNextPos = (nPos + 1) % MICROPROFILE_BUFFER_SIZE;
2968
2969
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
2970
MP_ASSERT(nStackPut < MICROPROFILE_STACK_MAX);
2971
MP_ASSERT(nNextPos != nGet); // should never happen
2972
2973
#ifdef MICROPROFILE_VERIFY_BALANCED
2974
// verify what we pop is what we push.
2975
uint64_t Pushed = pLog->VerifyStack[nStackPut];
2976
uint64_t TimerPopped = MicroProfileLogGetTimerIndex(LE);
2977
uint64_t TimerOnStack = MicroProfileLogGetTimerIndex(Pushed);
2978
if(TimerPopped != TimerOnStack)
2979
{
2980
uprintf("Push/Pop Mismatch %s vs %s\n", S.TimerInfo[TimerPopped].pName, S.TimerInfo[TimerOnStack].pName);
2981
MP_ASSERT(0);
2982
}
2983
#endif
2984
2985
pLog->Log[nPos] = LE;
2986
pLog->nPut.store(nNextPos, std::memory_order_release);
2987
}
2988
}
2989
2990
inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog)
2991
{
2992
MicroProfileLogPut(MicroProfileMakeLogIndex(nBegin, nToken_, nTick), pLog);
2993
}
2994
2995
inline void MicroProfileLogPutGpu(MicroProfileLogEntry LE, MicroProfileThreadLogGpu* pLog)
2996
{
2997
uint32_t nPos = pLog->nPut;
2998
if(nPos < MICROPROFILE_GPU_BUFFER_SIZE)
2999
{
3000
pLog->Log[nPos] = LE;
3001
pLog->nPut = nPos + 1;
3002
}
3003
}
3004
3005
inline void MicroProfileLogPutGpuTimer(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLogGpu* pLog)
3006
{
3007
MicroProfileLogPutGpu(MicroProfileMakeLogIndex(nBegin, nToken_, nTick), pLog);
3008
}
3009
3010
inline void MicroProfileLogPutGpuExtended(EMicroProfileTokenExtended eTokenExt, uint32_t nDataSizeQWords, uint32_t nPayload, MicroProfileThreadLogGpu* pLog)
3011
{
3012
MicroProfileLogEntry LE = MicroProfileMakeLogExtended(eTokenExt, nDataSizeQWords, nPayload);
3013
MicroProfileLogPutGpu(LE, pLog);
3014
}
3015
3016
inline void MicroProfileLogPutGpuExtendedNoData(EMicroProfileTokenExtended eTokenExt, uint64_t nPayload, MicroProfileThreadLogGpu* pLog)
3017
{
3018
MicroProfileLogEntry LE = MicroProfileMakeLogExtendedNoData(eTokenExt, nPayload);
3019
MicroProfileLogPutGpu(LE, pLog);
3020
}
3021
3022
uint32_t MicroProfileGroupTokenActive(MicroProfileToken nToken_)
3023
{
3024
uint32_t nMask = MicroProfileGetGroupMask(nToken_);
3025
uint32_t nIndex = MicroProfileGetGroupMaskIndex(nToken_);
3026
return 0 != (S.nActiveGroups[nIndex] & nMask);
3027
}
3028
3029
uint64_t MicroProfileEnterInternal(MicroProfileToken nToken_)
3030
{
3031
if(MicroProfileGroupTokenActive(nToken_))
3032
{
3033
uint64_t nTick = MP_TICK();
3034
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
3035
{
3036
uint32_t idx = MicroProfileGetTimerIndex(nToken_);
3037
MicroProfileTimerInfo& TI = S.TimerInfo[idx];
3038
MICROPROFILE_PLATFORM_MARKER_BEGIN(TI.nColor, TI.pNameExt);
3039
return nTick;
3040
}
3041
else
3042
{
3043
return MicroProfileLogPutEnter(nToken_, nTick, MicroProfileGetThreadLog2());
3044
}
3045
}
3046
return MICROPROFILE_INVALID_TICK;
3047
}
3048
3049
uint64_t MicroProfileEnterInternalCStr(const char* pStr)
3050
{
3051
if(S.AnyActive)
3052
{
3053
uint64_t nTick = MP_TICK();
3054
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
3055
{
3056
MICROPROFILE_PLATFORM_MARKER_BEGIN(0, pStr);
3057
return nTick;
3058
}
3059
else
3060
{
3061
return MicroProfileLogPutEnterCStr(pStr, nTick, MicroProfileGetThreadLog2());
3062
}
3063
}
3064
return MICROPROFILE_INVALID_TICK;
3065
}
3066
3067
void MicroProfileTimelineLeave(uint32_t id)
3068
{
3069
if(!id)
3070
return;
3071
std::lock_guard<std::recursive_mutex> Lock(MicroProfileTimelineMutex());
3072
MicroProfileThreadLog* pLog = &S.TimelineLog;
3073
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
3074
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
3075
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
3076
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
3077
3078
{
3079
uint32_t nFrameStart = S.TimelineTokenFrameEnter[id % MICROPROFILE_TIMELINE_MAX_TOKENS];
3080
uint32_t nFrameCurrent = S.nFrameCurrent;
3081
if(nFrameCurrent < nFrameStart)
3082
nFrameCurrent += MICROPROFILE_MAX_FRAME_HISTORY;
3083
uint32_t nFrameDistance = (nFrameCurrent - nFrameStart) % MICROPROFILE_MAX_FRAME_HISTORY;
3084
3085
S.TimelineTokenFrameEnter[id % MICROPROFILE_TIMELINE_MAX_TOKENS] = MICROPROFILE_INVALID_FRAME;
3086
S.TimelineTokenFrameLeave[id % MICROPROFILE_TIMELINE_MAX_TOKENS] = nFrameCurrent;
3087
3088
S.TimelineToken[id % MICROPROFILE_TIMELINE_MAX_TOKENS] = 0;
3089
S.nTimelineFrameMax = MicroProfileMax(S.nTimelineFrameMax, nFrameDistance);
3090
}
3091
3092
if(nDistance < 2 + 4)
3093
{
3094
S.nOverflow = 100;
3095
}
3096
else
3097
{
3098
uint64_t LEEnter = MicroProfileMakeLogIndex(MP_LOG_LEAVE, ETOKEN_CUSTOM_NAME, MP_TICK());
3099
uint64_t LEId = MicroProfileMakeLogExtended(ETOKEN_CUSTOM_ID, 0, id);
3100
3101
pLog->Log[nPut++] = LEEnter;
3102
nPut %= MICROPROFILE_BUFFER_SIZE;
3103
pLog->Log[nPut++] = LEId;
3104
nPut %= MICROPROFILE_BUFFER_SIZE;
3105
pLog->nPut.store(nPut);
3106
}
3107
}
3108
3109
void MicroProfileTimelineEnterStatic(uint32_t nColor, const char* pStr)
3110
{
3111
if(!S.AnyActive)
3112
return;
3113
uint32_t nToken = MicroProfileTimelineEnterInternal(nColor, pStr, (uint32_t)strlen(pStr), true);
3114
(void)nToken;
3115
}
3116
void MicroProfileTimelineLeaveStatic(const char* pStr)
3117
{
3118
if(!S.AnyActive)
3119
return;
3120
3121
for(uint32_t i = 0; i < MICROPROFILE_TIMELINE_MAX_TOKENS; ++i)
3122
{
3123
if(S.TimelineTokenStaticString[i] && 0 == MP_STRCASECMP(pStr, S.TimelineTokenStaticString[i]))
3124
{
3125
MicroProfileTimelineLeave(S.TimelineToken[i]);
3126
}
3127
}
3128
}
3129
3130
uint32_t MicroProfileTimelineEnterInternal(uint32_t nColor, const char* pStr, uint32_t nStrLen, int bIsStaticString)
3131
{
3132
if(!S.AnyActive)
3133
return 0;
3134
std::lock_guard<std::recursive_mutex> Lock(MicroProfileTimelineMutex());
3135
MicroProfileThreadLog* pLog = &S.TimelineLog;
3136
MP_ASSERT(pStr[nStrLen] == '\0');
3137
nStrLen += 1;
3138
uint32_t nStringQwords = MicroProfileGetQWordSize(nStrLen);
3139
uint32_t nNumMessages = nStringQwords;
3140
3141
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
3142
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
3143
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
3144
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
3145
3146
if(nDistance < nNumMessages + 7)
3147
{
3148
S.nOverflow = 100;
3149
return 0;
3150
}
3151
else
3152
{
3153
3154
uint32_t token = pLog->nCustomId;
3155
uint32_t nFrameLeave = S.TimelineTokenFrameLeave[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
3156
uint32_t nFrameEnter = S.TimelineTokenFrameEnter[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
3157
uint32_t nCounter = 0;
3158
uint32_t nFrameCurrent = S.nFrameCurrent;
3159
{
3160
3161
/// dont reuse tokens until their leave command has been dead for the maximum amount of frames we can generate a capture for.
3162
while(token == 0 || nFrameEnter != MICROPROFILE_INVALID_FRAME || (nFrameCurrent - nFrameLeave < MICROPROFILE_MAX_FRAME_HISTORY + 3 && nFrameLeave != MICROPROFILE_INVALID_FRAME))
3163
{
3164
token = (uint32_t)pLog->nCustomId++;
3165
nFrameLeave = S.TimelineTokenFrameLeave[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
3166
nFrameEnter = S.TimelineTokenFrameEnter[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
3167
if(++nCounter == MICROPROFILE_TIMELINE_MAX_TOKENS)
3168
{
3169
// MP_BREAK();
3170
return 0;
3171
}
3172
}
3173
S.TimelineTokenFrameEnter[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = S.nFrameCurrent;
3174
}
3175
if(bIsStaticString)
3176
{
3177
S.TimelineTokenStaticString[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = pStr;
3178
}
3179
else
3180
{
3181
S.TimelineTokenStaticString[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = nullptr;
3182
}
3183
S.TimelineToken[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = token;
3184
3185
uint64_t LEEnter = MicroProfileMakeLogIndex(MP_LOG_ENTER, ETOKEN_CUSTOM_NAME, MP_TICK());
3186
uint64_t LEColor = MicroProfileMakeLogExtended(ETOKEN_CUSTOM_COLOR, 0, nColor);
3187
uint64_t LEId = MicroProfileMakeLogExtended(ETOKEN_CUSTOM_ID, nStringQwords, token);
3188
3189
pLog->Log[nPut++] = LEEnter;
3190
nPut %= MICROPROFILE_BUFFER_SIZE;
3191
pLog->Log[nPut++] = LEColor;
3192
nPut %= MICROPROFILE_BUFFER_SIZE;
3193
pLog->Log[nPut++] = LEId;
3194
nPut %= MICROPROFILE_BUFFER_SIZE;
3195
3196
// copy if we dont wrap
3197
if(nPut + nStringQwords <= MICROPROFILE_BUFFER_SIZE)
3198
{
3199
memcpy(&pLog->Log[nPut], pStr, nStrLen + 1);
3200
nPut += nStringQwords;
3201
}
3202
else
3203
{
3204
int nCharsLeft = (int)nStrLen;
3205
while(nCharsLeft > 0)
3206
{
3207
int nCount = MicroProfileMin(nCharsLeft, 8);
3208
memcpy(&pLog->Log[nPut++], pStr, nCount);
3209
// uint64_t LEPayload = MicroProfileMakeLogPayload(pStr, nCount);
3210
// pLog->Log[nPut++] = LEPayload; nPut %= MICROPROFILE_BUFFER_SIZE;
3211
pStr += nCount;
3212
nCharsLeft -= nCount;
3213
}
3214
}
3215
pLog->nPut.store(nPut);
3216
return token;
3217
}
3218
}
3219
3220
uint32_t MicroProfileTimelineEnter(uint32_t nColor, const char* pStr)
3221
{
3222
return MicroProfileTimelineEnterInternal(nColor, pStr, (uint32_t)strlen(pStr), false);
3223
}
3224
3225
uint32_t MicroProfileTimelineEnterf(uint32_t nColor, const char* pStr, ...)
3226
{
3227
if(!S.AnyActive)
3228
return 0;
3229
char buffer[MICROPROFILE_MAX_STRING + 1];
3230
va_list args;
3231
va_start(args, pStr);
3232
#ifdef _WIN32
3233
size_t size = vsprintf_s(buffer, pStr, args);
3234
#else
3235
size_t size = vsnprintf(buffer, sizeof(buffer) - 1, pStr, args);
3236
#endif
3237
va_end(args);
3238
MP_ASSERT(size < sizeof(buffer));
3239
buffer[size] = '\0';
3240
return MicroProfileTimelineEnterInternal(nColor, buffer, (uint32_t)size, false);
3241
}
3242
3243
void MicroProfileLocalCounterAdd(int64_t* pCounter, int64_t nCount)
3244
{
3245
*pCounter += nCount;
3246
}
3247
int64_t MicroProfileLocalCounterSet(int64_t* pCounter, int64_t nCount)
3248
{
3249
int64_t r = *pCounter;
3250
*pCounter = nCount;
3251
return r;
3252
}
3253
3254
void MicroProfileLocalCounterAddAtomic(MicroProfileToken nToken, int64_t nCount)
3255
{
3256
std::atomic<int64_t>* pCounter = &S.CounterInfo[nToken].ExternalAtomic;
3257
pCounter->fetch_add(nCount);
3258
}
3259
int64_t MicroProfileLocalCounterSetAtomic(MicroProfileToken nToken, int64_t nCount)
3260
{
3261
3262
std::atomic<int64_t>* pCounter = &S.CounterInfo[nToken].ExternalAtomic;
3263
return pCounter->exchange(nCount);
3264
}
3265
3266
void MicroProfileCounterAdd(MicroProfileToken nToken, int64_t nCount)
3267
{
3268
MP_ASSERT(nToken < S.nNumCounters);
3269
S.Counters[nToken].fetch_add(nCount);
3270
}
3271
void MicroProfileCounterSet(MicroProfileToken nToken, int64_t nCount)
3272
{
3273
MP_ASSERT(nToken < S.nNumCounters);
3274
S.Counters[nToken].store(nCount);
3275
}
3276
int64_t MicroProfileCounterGet(MicroProfileToken nToken)
3277
{
3278
MP_ASSERT(nToken < S.nNumCounters);
3279
return S.Counters[nToken].load();
3280
}
3281
3282
void MicroProfileCounterSetDouble(MicroProfileToken nToken, double nCount)
3283
{
3284
MP_ASSERT(nToken < S.nNumCounters);
3285
MP_ASSERT((S.CounterInfo[nToken].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
3286
S.CountersDouble[nToken].store(nCount);
3287
}
3288
double MicroProfileCounterGetDouble(MicroProfileToken nToken)
3289
{
3290
MP_ASSERT(nToken < S.nNumCounters);
3291
MP_ASSERT((S.CounterInfo[nToken].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
3292
return S.CountersDouble[nToken].load();
3293
}
3294
void MicroProfileCounterSetLimit(MicroProfileToken nToken, int64_t nCount)
3295
{
3296
MP_ASSERT(nToken < S.nNumCounters);
3297
S.CounterInfo[nToken].nLimit = nCount;
3298
}
3299
3300
void MicroProfileCounterSetLimitDouble(MicroProfileToken nToken, double dCount)
3301
{
3302
MP_ASSERT(nToken < S.nNumCounters);
3303
MP_ASSERT((S.CounterInfo[nToken].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
3304
S.CounterInfo[nToken].dLimit = dCount;
3305
}
3306
3307
void MicroProfileCounterConfigToken(MicroProfileToken nToken, uint32_t eFormat, int64_t nLimit, uint32_t nFlags)
3308
{
3309
S.CounterInfo[nToken].eFormat = (MicroProfileCounterFormat)eFormat;
3310
S.CounterInfo[nToken].nLimit = nLimit;
3311
S.CounterInfo[nToken].nFlags |= (nFlags & ~MICROPROFILE_COUNTER_FLAG_INTERNAL_MASK);
3312
}
3313
3314
void MicroProfileCounterConfig(const char* pName, uint32_t eFormat, int64_t nLimit, uint32_t nFlags)
3315
{
3316
MicroProfileToken nToken = MicroProfileGetCounterToken(pName, 0);
3317
MicroProfileCounterConfigToken(nToken, eFormat, nLimit, nFlags);
3318
}
3319
3320
void MicroProfileCounterSetPtr(const char* pCounterName, void* pSource, uint32_t nSize)
3321
{
3322
MicroProfileToken nToken = MicroProfileGetCounterToken(pCounterName, 0);
3323
S.CounterSource[nToken].pSource = pSource;
3324
S.CounterSource[nToken].nSourceSize = nSize;
3325
}
3326
3327
inline void MicroProfileFetchCounter(uint32_t i)
3328
{
3329
MP_ASSERT(0 == S.CounterSource[i].nSourceSize || (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
3330
switch(S.CounterSource[i].nSourceSize)
3331
{
3332
case sizeof(int32_t):
3333
S.Counters[i] = *(int32_t*)S.CounterSource[i].pSource;
3334
break;
3335
case sizeof(int64_t):
3336
S.Counters[i] = *(int64_t*)S.CounterSource[i].pSource;
3337
break;
3338
default:
3339
break;
3340
}
3341
}
3342
void MicroProfileCounterFetchCounters()
3343
{
3344
for(uint32_t i = 0; i < S.nNumCounters; ++i)
3345
{
3346
MicroProfileFetchCounter(i);
3347
}
3348
}
3349
3350
void MicroProfileLeaveInternal(MicroProfileToken nToken_, uint64_t nTickStart)
3351
{
3352
if(MICROPROFILE_INVALID_TICK != nTickStart)
3353
{
3354
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
3355
{
3356
MICROPROFILE_PLATFORM_MARKER_END();
3357
}
3358
else
3359
{
3360
uint64_t nTick = MP_TICK();
3361
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
3362
MicroProfileLogPutLeave(nToken_, nTick, pLog);
3363
}
3364
}
3365
}
3366
3367
void MicroProfileLeaveInternalCStr(const char* pStr, uint64_t nTickStart)
3368
{
3369
if(MICROPROFILE_INVALID_TICK != nTickStart)
3370
{
3371
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
3372
{
3373
MICROPROFILE_PLATFORM_MARKER_END();
3374
}
3375
else
3376
{
3377
uint64_t nTick = MP_TICK();
3378
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
3379
MicroProfileLogPutLeaveCStr(pStr, nTick, pLog);
3380
}
3381
}
3382
}
3383
3384
void MicroProfileEnter(MicroProfileToken nToken)
3385
{
3386
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
3387
MP_ASSERT(pLog->nStackScope < MICROPROFILE_STACK_MAX); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
3388
uint32_t nStackPos = pLog->nStackScope++;
3389
if(nStackPos < MICROPROFILE_STACK_MAX)
3390
{
3391
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
3392
pScopeState->Token = nToken;
3393
pScopeState->nTick = MicroProfileEnterInternal(nToken);
3394
}
3395
else
3396
{
3397
S.nOverflow = 100;
3398
}
3399
}
3400
void MicroProfileLeave()
3401
{
3402
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
3403
MP_ASSERT(pLog->nStackScope > 0); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
3404
uint32_t nStackPos = --pLog->nStackScope;
3405
if(nStackPos < MICROPROFILE_STACK_MAX)
3406
{
3407
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
3408
MicroProfileLeaveInternal(pScopeState->Token, pScopeState->nTick);
3409
}
3410
else
3411
{
3412
S.nOverflow = 100;
3413
}
3414
}
3415
3416
void MicroProfileEnterGpu(MicroProfileToken nToken, MicroProfileThreadLogGpu* pLog)
3417
{
3418
// MP_ASSERT(pLog->nStackScope < MICROPROFILE_STACK_MAX); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
3419
uint32_t nStackPos = pLog->nStackScope++;
3420
if(nStackPos < MICROPROFILE_STACK_MAX)
3421
{
3422
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
3423
pScopeState->Token = nToken;
3424
pScopeState->nTick = MicroProfileGpuEnterInternal(pLog, nToken);
3425
}
3426
else
3427
{
3428
S.nOverflow = 100;
3429
}
3430
}
3431
void MicroProfileLeaveGpu(MicroProfileThreadLogGpu* pLog)
3432
{
3433
uint32_t nStackPos = --pLog->nStackScope;
3434
if(nStackPos < MICROPROFILE_STACK_MAX)
3435
{
3436
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
3437
MicroProfileGpuLeaveInternal(pLog, pScopeState->Token, pScopeState->nTick);
3438
}
3439
}
3440
3441
void MicroProfileGpuBegin(void* pContext, MicroProfileThreadLogGpu* pLog)
3442
{
3443
MP_ASSERT(pLog->pContext == (void*)-1); // dont call begin without calling end
3444
MP_ASSERT(pLog->nStart == (uint32_t)-1);
3445
MP_ASSERT(pContext != (void*)-1);
3446
3447
pLog->pContext = pContext;
3448
pLog->nStart = pLog->nPut;
3449
MicroProfileLogPutGpu(0, pLog);
3450
}
3451
3452
void MicroProfileGpuSetContext(void* pContext, MicroProfileThreadLogGpu* pLog)
3453
{
3454
MP_ASSERT(pLog->pContext != (void*)-1); // dont call begin without calling end
3455
MP_ASSERT(pLog->nStart != (uint32_t)-1);
3456
pLog->pContext = pContext;
3457
}
3458
3459
uint64_t MicroProfileGpuEnd(MicroProfileThreadLogGpu* pLog)
3460
{
3461
uint64_t nStart = pLog->nStart;
3462
uint32_t nEnd = pLog->nPut;
3463
uint64_t nId = pLog->nId;
3464
if(nStart < MICROPROFILE_GPU_BUFFER_SIZE)
3465
{
3466
pLog->Log[nStart] = nEnd - nStart - 1;
3467
}
3468
pLog->pContext = (void*)-1;
3469
pLog->nStart = (uint32_t)-1;
3470
return nStart | (nId << 32);
3471
}
3472
3473
void MicroProfileGpuSubmit(int nQueue, uint64_t nWork)
3474
{
3475
MP_ASSERT(nQueue >= 0 && nQueue < MICROPROFILE_MAX_THREADS);
3476
MICROPROFILE_SCOPE(g_MicroProfileGpuSubmit);
3477
uint32_t nStart = (uint32_t)nWork;
3478
uint32_t nThreadLog = uint32_t(nWork >> 32);
3479
3480
MicroProfileThreadLog* pQueueLog = S.Pool[nQueue];
3481
MP_ASSERT(nQueue < MICROPROFILE_MAX_THREADS);
3482
MicroProfileThreadLogGpu* pGpuLog = S.PoolGpu[nThreadLog];
3483
MP_ASSERT(pGpuLog);
3484
3485
int64_t nCount = 0;
3486
if(nStart < MICROPROFILE_GPU_BUFFER_SIZE)
3487
{
3488
nCount = pGpuLog->Log[nStart];
3489
}
3490
MP_ASSERT(nCount < (int64_t)MICROPROFILE_GPU_BUFFER_SIZE);
3491
nStart++;
3492
for(int32_t i = 0; i < nCount; ++i)
3493
{
3494
MP_ASSERT(nStart < MICROPROFILE_GPU_BUFFER_SIZE);
3495
MicroProfileLogEntry LE = pGpuLog->Log[nStart++];
3496
MicroProfileLogPut(LE, pQueueLog);
3497
}
3498
}
3499
3500
uint64_t MicroProfileGpuEnterInternal(MicroProfileThreadLogGpu* pGpuLog, MicroProfileToken nToken_)
3501
{
3502
if(MicroProfileGroupTokenActive(nToken_))
3503
{
3504
if(!MicroProfileGetThreadLog())
3505
{
3506
MicroProfileInitThreadLog();
3507
}
3508
3509
MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
3510
uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
3511
MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
3512
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
3513
3514
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
3515
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
3516
// MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
3517
// uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
3518
// MicroProfileLogPutGpu(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
3519
// MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
3520
// MicroProfileLogPutGpu(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), MP_LOG_EXTRA_DATA, pGpuLog);
3521
// MicroProfileLogPutGpu(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, MP_LOG_EXTRA_DATA, pGpuLog);
3522
3523
return 1;
3524
}
3525
return 0;
3526
}
3527
3528
uint64_t MicroProfileGpuEnterInternalCStr(MicroProfileThreadLogGpu* pGpuLog, const char* pStr)
3529
{
3530
MP_BREAK(); // not implemented
3531
return 0;
3532
// if(S.AnyGpuActive)
3533
// {
3534
// if(!MicroProfileGetThreadLog())
3535
// {
3536
// MicroProfileInitThreadLog();
3537
// }
3538
3539
// MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
3540
// uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
3541
// MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
3542
// MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
3543
3544
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
3545
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
3546
// // MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
3547
// // uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
3548
// // MicroProfileLogPutGpu(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
3549
// // MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
3550
// // MicroProfileLogPutGpu(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), MP_LOG_EXTRA_DATA, pGpuLog);
3551
// // MicroProfileLogPutGpu(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, MP_LOG_EXTRA_DATA, pGpuLog);
3552
3553
// return 1;
3554
// }
3555
// return 0;
3556
}
3557
3558
void MicroProfileGpuLeaveInternal(MicroProfileThreadLogGpu* pGpuLog, MicroProfileToken nToken_, uint64_t nTickStart)
3559
{
3560
if(nTickStart)
3561
{
3562
if(!MicroProfileGetThreadLog())
3563
{
3564
MicroProfileInitThreadLog();
3565
}
3566
3567
MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
3568
uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
3569
MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_LEAVE, pGpuLog);
3570
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
3571
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
3572
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
3573
}
3574
}
3575
3576
void MicroProfileGpuLeaveInternalCStr(MicroProfileThreadLogGpu* pGpuLog, uint64_t nTickStart)
3577
{
3578
MP_BREAK(); // not implemented
3579
return;
3580
// if(nTickStart)
3581
// {
3582
// if(!MicroProfileGetThreadLog())
3583
// {
3584
// MicroProfileInitThreadLog();
3585
// }
3586
3587
// MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
3588
// uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
3589
// MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_LEAVE, pGpuLog);
3590
// MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
3591
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
3592
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
3593
// }
3594
}
3595
3596
void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
3597
{
3598
if(0 == S.nPauseTicks || (S.nPauseTicks - pContextSwitch->nTicks) > 0)
3599
{
3600
uint32_t nPut = S.nContextSwitchPut;
3601
S.ContextSwitch[nPut] = *pContextSwitch;
3602
S.nContextSwitchPut = (S.nContextSwitchPut + 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
3603
// if(S.nContextSwitchPut < nPut)
3604
//{
3605
// float fMsDelay = MicroProfileTickToMsMultiplierCpu() * ((int64_t)S.nFlipStartTick - pContextSwitch->nTicks);
3606
// uprintf("context switch wrap .. %7.3fms\n", fMsDelay);
3607
// }
3608
// if(S.nContextSwitchPut % 1024 == 0)
3609
//{
3610
// float fMsDelay = MicroProfileTickToMsMultiplierCpu() * ((int64_t)S.nFlipStartTick - pContextSwitch->nTicks);
3611
// uprintf("cswitch tick %x ... %7.3fms\n", S.nContextSwitchPut, fMsDelay);
3612
// }
3613
S.nContextSwitchLastPushed = pContextSwitch->nTicks;
3614
}
3615
else
3616
{
3617
S.nContextSwitchStalledTick = MP_TICK();
3618
}
3619
}
3620
3621
void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2])
3622
{
3623
if(nPut > nGet)
3624
{
3625
nRange[0][0] = nGet;
3626
nRange[0][1] = nPut;
3627
nRange[1][0] = nRange[1][1] = 0;
3628
}
3629
else if(nPut != nGet)
3630
{
3631
MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE);
3632
uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet;
3633
nRange[0][0] = nGet;
3634
nRange[0][1] = nGet + nCountEnd;
3635
nRange[1][0] = 0;
3636
nRange[1][1] = nPut;
3637
}
3638
}
3639
3640
void MicroProfileToggleFrozen()
3641
{
3642
S.nFrozen = !S.nFrozen;
3643
}
3644
3645
int MicroProfileIsFrozen()
3646
{
3647
return S.nFrozen != 0 ? 1 : 0;
3648
}
3649
int MicroProfileEnabled()
3650
{
3651
return MicroProfileAnyGroupActive();
3652
}
3653
void* MicroProfileAllocInternal(size_t nSize, size_t nAlign)
3654
{
3655
nAlign = MicroProfileMax(4 * sizeof(uint32_t), nAlign);
3656
nSize += nAlign;
3657
intptr_t nPtr = (intptr_t)MICROPROFILE_ALLOC(nSize, nAlign);
3658
nPtr += nAlign;
3659
uint32_t* pVal = (uint32_t*)nPtr;
3660
MP_ASSERT(nSize < 0xffffffff);
3661
MP_ASSERT(nAlign < 0xffffffff);
3662
pVal[-1] = (uint32_t)nSize;
3663
pVal[-2] = (uint32_t)nAlign;
3664
pVal[-3] = (uint32_t)0x28586813;
3665
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, nSize);
3666
MicroProfileCounterAdd(S.CounterToken_Alloc_Count, 1);
3667
return (void*)nPtr;
3668
}
3669
void MicroProfileFreeInternal(void* pPtr)
3670
{
3671
intptr_t p = (intptr_t)pPtr;
3672
uint32_t* p4 = (uint32_t*)pPtr;
3673
uint32_t nSize = p4[-1];
3674
uint32_t nAlign = p4[-2];
3675
uint32_t nMagic = p4[-3];
3676
MP_ASSERT(nMagic == 0x28586813);
3677
MICROPROFILE_FREE((void*)(p - nAlign));
3678
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, -(int)nSize);
3679
MicroProfileCounterAdd(S.CounterToken_Alloc_Count, -1);
3680
}
3681
void* MicroProfileReallocInternal(void* pPtr, size_t nSize)
3682
{
3683
intptr_t p = (intptr_t)pPtr;
3684
uint32_t nAlignBase;
3685
3686
if(p)
3687
{
3688
uint32_t* p4 = (uint32_t*)pPtr;
3689
uint32_t nSizeBase = p4[-1];
3690
nAlignBase = p4[-2];
3691
uint32_t nMagicBase = p4[-3];
3692
MP_ASSERT(nMagicBase == 0x28586813);
3693
3694
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, nSize - nSizeBase);
3695
}
3696
else
3697
{
3698
nAlignBase = 4 * sizeof(uint32_t);
3699
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, nSize + nAlignBase);
3700
MicroProfileCounterAdd(S.CounterToken_Alloc_Count, 1);
3701
}
3702
3703
nSize += nAlignBase;
3704
MP_ASSERT(nAlignBase >= 4 * sizeof(uint32_t));
3705
if(p)
3706
{
3707
p = (intptr_t)MICROPROFILE_REALLOC((void*)(p - nAlignBase), nSize);
3708
}
3709
else
3710
{
3711
p = (intptr_t)MICROPROFILE_REALLOC((void*)(p), nSize);
3712
}
3713
p += nAlignBase;
3714
uint32_t* pVal = (uint32_t*)p;
3715
MP_ASSERT(nSize < 0xffffffff);
3716
MP_ASSERT(nAlignBase < 0xffffffff);
3717
pVal[-1] = (uint32_t)nSize;
3718
pVal[-2] = (uint32_t)nAlignBase;
3719
pVal[-3] = (uint32_t)0x28586813;
3720
return (void*)p;
3721
}
3722
3723
static void MicroProfileFlipEnabled()
3724
{
3725
if(S.nFrozen)
3726
{
3727
memset(S.nActiveGroups, 0, sizeof(S.nActiveGroups));
3728
S.AnyActive = false;
3729
}
3730
else
3731
{
3732
bool AnyActive = false;
3733
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
3734
{
3735
uint32_t nNew = S.nActiveGroupsWanted[i];
3736
nNew |= S.nForceGroups[i];
3737
if(nNew)
3738
AnyActive = true;
3739
if(S.nActiveGroups[i] != nNew)
3740
{
3741
S.nActiveGroups[i] = nNew;
3742
}
3743
}
3744
S.AnyActive = AnyActive;
3745
}
3746
}
3747
3748
void MicroProfileFlip(void* pContext, uint32_t FlipFlag)
3749
{
3750
MicroProfileFlip_CB(pContext, nullptr, FlipFlag);
3751
}
3752
3753
#define MICROPROFILE_TICK_VALIDATE_FRAME_TIME 0
3754
3755
void MicroProfileFlip_CB(void* pContext, MicroProfileOnFreeze FreezeCB, uint32_t FlipFlag)
3756
{
3757
MICROPROFILE_COUNTER_LOCAL_UPDATE_SET_ATOMIC(g_MicroProfileBytesPerFlip);
3758
#if 0
3759
//verify LogEntry wraps correctly
3760
MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000;
3761
for(int i = 0; i < 10000; ++i, c += 1)
3762
{
3763
MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK;
3764
MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2));
3765
}
3766
#endif
3767
MICROPROFILE_SCOPE(g_MicroProfileFlip);
3768
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
3769
3770
if(S.nDumpFileNextFrame)
3771
{
3772
if(0 == S.nDumpFileCountDown)
3773
{
3774
MicroProfileDumpToFile();
3775
S.nDumpFileNextFrame = 0;
3776
S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; // hide spike from dumping webpage
3777
}
3778
else
3779
{
3780
S.nDumpFileCountDown--;
3781
}
3782
}
3783
#if MICROPROFILE_WEBSERVER
3784
if(MICROPROFILE_FLIP_FLAG_START_WEBSERVER == (MICROPROFILE_FLIP_FLAG_START_WEBSERVER & FlipFlag) && S.nWebServerDataSent == (uint64_t)-1)
3785
{
3786
MicroProfileWebServerStart();
3787
S.nWebServerDataSent = 0;
3788
if(!S.WebSocketThreadRunning)
3789
{
3790
S.WebSocketThreadRunning = 1;
3791
MicroProfileThreadStart(&S.WebSocketSendThread, MicroProfileSocketSenderThread);
3792
}
3793
}
3794
#endif
3795
3796
int nLoop = 0;
3797
do
3798
{
3799
#if MICROPROFILE_WEBSERVER
3800
if(MicroProfileWebServerUpdate())
3801
{
3802
S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; // hide spike from dumping webpage
3803
}
3804
#endif
3805
if(nLoop++)
3806
{
3807
MicroProfileSleep(100);
3808
if((nLoop % 10) == 0)
3809
{
3810
uprintf("microprofile frozen %d\n", nLoop);
3811
}
3812
}
3813
} while(S.nFrozen);
3814
3815
uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0;
3816
3817
if(S.nAutoClearFrames)
3818
{
3819
nAggregateClear = 1;
3820
nAggregateFlip = 1;
3821
S.nAutoClearFrames -= 1;
3822
}
3823
3824
bool nRunning = MicroProfileAnyGroupActive();
3825
if(nRunning)
3826
{
3827
S.nFlipStartTick = MP_TICK();
3828
int64_t nGpuWork = MicroProfileGpuEnd(S.pGpuGlobal);
3829
MicroProfileGpuSubmit(S.GpuQueue, nGpuWork);
3830
MicroProfileThreadLogGpuReset(S.pGpuGlobal);
3831
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
3832
{
3833
if(S.PoolGpu[i])
3834
{
3835
S.PoolGpu[i]->nPut = 0;
3836
}
3837
}
3838
3839
MicroProfileGpuBegin(pContext, S.pGpuGlobal);
3840
3841
uint32_t nGpuTimeStamp = MicroProfileGpuFlip(pContext);
3842
3843
uint64_t nFrameIdx = S.nFramePutIndex++;
3844
S.nFramePut = (S.nFramePut + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
3845
S.Frames[S.nFramePut].nFrameId = nFrameIdx;
3846
MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut);
3847
S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY;
3848
S.nFrameCurrentIndex++;
3849
uint32_t nFrameNext = (S.nFrameCurrent + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
3850
S.nFrameNext = nFrameNext;
3851
3852
uint32_t nContextSwitchPut = S.nContextSwitchPut;
3853
if(S.nContextSwitchLastPut < nContextSwitchPut)
3854
{
3855
S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut);
3856
}
3857
else
3858
{
3859
S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut;
3860
}
3861
S.nContextSwitchLastPut = nContextSwitchPut;
3862
3863
MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut];
3864
MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
3865
MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
3866
const int64_t nTickStartFrame = pFrameCurrent->nFrameStartCpu;
3867
const int64_t nTickEndFrame = pFrameNext->nFrameStartCpu;
3868
3869
pFrameCurrent->nGpuPending = 0;
3870
pFramePut->nGpuPending = 1;
3871
3872
pFramePut->nFrameStartCpu = MP_TICK();
3873
3874
pFramePut->nFrameStartGpu = nGpuTimeStamp;
3875
{
3876
const float fDumpTimeThreshold = 1000.f * 60 * 60 * 24 * 365.f; // if time above this, then we're handling uninitialized counters
3877
int nDumpNextFrame = 0;
3878
float fTimeGpu = 0.f;
3879
if(pFrameNext->nFrameStartGpu != MICROPROFILE_INVALID_TICK)
3880
{
3881
3882
uint64_t nTickCurrent = pFrameCurrent->nFrameStartGpu;
3883
uint64_t nTickNext = pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu);
3884
nTickCurrent = MicroProfileLogTickMin(nTickCurrent, nTickNext);
3885
float fTime = 1000.f * (nTickNext - nTickCurrent) / (MicroProfileTicksPerSecondGpu());
3886
fTime = fTimeGpu;
3887
if(S.fDumpGpuSpike > 0.f && fTime > S.fDumpGpuSpike && fTime < fDumpTimeThreshold)
3888
{
3889
nDumpNextFrame = 1;
3890
}
3891
}
3892
float fTimeCpu = 1000.f * (pFrameNext->nFrameStartCpu - pFrameCurrent->nFrameStartCpu) / MicroProfileTicksPerSecondCpu();
3893
if(S.fDumpCpuSpike > 0.f && fTimeCpu > S.fDumpCpuSpike && fTimeCpu < fDumpTimeThreshold)
3894
{
3895
nDumpNextFrame = 1;
3896
}
3897
if(nDumpNextFrame)
3898
{
3899
S.nDumpFileNextFrame = S.nDumpSpikeMask;
3900
S.nDumpSpikeMask = 0;
3901
S.nDumpFileCountDown = 5;
3902
}
3903
}
3904
3905
const uint64_t nTickEndFrameGpu_ = pFrameNext->nFrameStartGpu;
3906
const uint64_t nTickStartFrameGpu_ = pFrameCurrent->nFrameStartGpu;
3907
const bool bGpuFrameInvalid = nTickEndFrameGpu_ == MICROPROFILE_INVALID_TICK || nTickStartFrameGpu_ == MICROPROFILE_INVALID_TICK;
3908
const uint64_t nTickEndFrameGpu = bGpuFrameInvalid ? 1 : nTickEndFrameGpu_;
3909
const uint64_t nTickStartFrameGpu = bGpuFrameInvalid ? 2 : nTickStartFrameGpu_;
3910
3911
MicroProfileFrameExtraCounterData* ExtraData = S.FrameExtraCounterData;
3912
bool UsingExtraData = false;
3913
if(ExtraData)
3914
{
3915
if((intptr_t)ExtraData == 1)
3916
{
3917
size_t Bytes = sizeof(MicroProfileFrameExtraCounterData) * MICROPROFILE_MAX_FRAME_HISTORY;
3918
printf(" allocating %d bytes %f\n", (int)Bytes, Bytes / (1024.0 * 1024.0));
3919
ExtraData = S.FrameExtraCounterData = (MicroProfileFrameExtraCounterData*)MicroProfileAllocInternal(Bytes, alignof(uint64_t));
3920
memset(ExtraData, 0, Bytes);
3921
}
3922
ExtraData = ExtraData + S.nFrameCurrent;
3923
UsingExtraData = true;
3924
}
3925
#define MP_ASSERT_LE_WRAP(l, g) MP_ASSERT(uint64_t(g - l) < 0x8000000000000000)
3926
3927
{
3928
MP_ASSERT_LE_WRAP(nTickStartFrame, nTickEndFrame);
3929
uint64_t nTick = nTickEndFrame - nTickStartFrame;
3930
S.nFlipTicks = nTick;
3931
S.nFlipAggregate += nTick;
3932
S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick);
3933
}
3934
3935
uint32_t* pTimerToGroup = &S.TimerToGroup[0];
3936
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
3937
{
3938
MicroProfileThreadLog* pLog = S.Pool[i];
3939
if(!pLog)
3940
{
3941
pFramePut->nLogStart[i] = 0;
3942
}
3943
else
3944
{
3945
uint32_t nPut = pLog->nPut.load(std::memory_order_acquire);
3946
pFramePut->nLogStart[i] = nPut;
3947
if(!pLog->nGpu)
3948
{
3949
uint32_t nStart = pFrameCurrent->nLogStart[i];
3950
while(nStart != nPut)
3951
{
3952
int64_t LE = pLog->Log[nStart];
3953
int64_t nDifference = MicroProfileLogTickDifference(LE, nTickEndFrame);
3954
uint32_t Ext = MicroProfileLogGetType(LE);
3955
if(nDifference > 0 || 0 != (0x2 & Ext))
3956
{
3957
nStart = (nStart + 1) % MICROPROFILE_BUFFER_SIZE;
3958
}
3959
else
3960
{
3961
break;
3962
}
3963
}
3964
pFrameNext->nLogStart[i] = nStart;
3965
}
3966
}
3967
}
3968
{
3969
pFramePut->nLogStartTimeline = S.TimelineLog.nPut.load(std::memory_order_acquire);
3970
3971
uint32_t nFrameCurrent = S.nFrameCurrent;
3972
uint32_t nTimelineFrameDeltaMax = S.nTimelineFrameMax;
3973
for(uint32_t i = 0; i != MICROPROFILE_TIMELINE_MAX_TOKENS; ++i)
3974
{
3975
uint32_t nFrameStart = S.TimelineTokenFrameEnter[i];
3976
if(nFrameStart != MICROPROFILE_INVALID_FRAME)
3977
{
3978
uint32_t nCur = nFrameCurrent;
3979
if(nCur < nFrameStart)
3980
nCur += MICROPROFILE_MAX_FRAME_HISTORY;
3981
if(nCur >= nFrameStart)
3982
{
3983
uint32_t D = nCur - nFrameStart;
3984
nTimelineFrameDeltaMax = MicroProfileMax(nTimelineFrameDeltaMax, D);
3985
}
3986
}
3987
}
3988
pFramePut->nTimelineFrameMax = nTimelineFrameDeltaMax;
3989
S.nTimelineFrameMax = 0;
3990
}
3991
{
3992
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
3993
{
3994
MicroProfileThreadLog* pLog = S.Pool[i];
3995
if(!pLog)
3996
continue;
3997
if(pLog->nGpu)
3998
{
3999
uint32_t nPut = pFrameNext->nLogStart[i];
4000
uint32_t nGet = pFrameCurrent->nLogStart[i];
4001
uint32_t nRange[2][2] = {
4002
{ 0, 0 },
4003
{ 0, 0 },
4004
};
4005
MicroProfileGetRange(nPut, nGet, nRange);
4006
for(uint32_t j = 0; j < 2; ++j)
4007
{
4008
uint32_t nStart = nRange[j][0];
4009
uint32_t nEnd = nRange[j][1];
4010
for(uint32_t k = nStart; k < nEnd; ++k)
4011
{
4012
MicroProfileLogEntry L = pLog->Log[k];
4013
if(MicroProfileLogGetType(L) < MP_LOG_EXTENDED)
4014
{
4015
pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L)));
4016
}
4017
k += MicroProfileLogGetDataSize(L);
4018
}
4019
}
4020
}
4021
}
4022
}
4023
4024
{
4025
MicroProfile::GroupTime* pFrameGroup = &S.FrameGroup[0];
4026
{
4027
MICROPROFILE_SCOPE(g_MicroProfileClear);
4028
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
4029
{
4030
S.Frame[i].nTicks = 0;
4031
S.Frame[i].nCount = 0;
4032
S.FrameExclusive[i] = 0;
4033
}
4034
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
4035
{
4036
pFrameGroup[i].nTicks = 0;
4037
pFrameGroup[i].nTicksExclusive = 0;
4038
pFrameGroup[i].nCount = 0;
4039
}
4040
}
4041
{
4042
MICROPROFILE_SCOPE(g_MicroProfileThreadLoop);
4043
memset(S.FrameGroupThreadValid, 0, sizeof(S.FrameGroupThreadValid));
4044
4045
for(uint32_t idx_thread = 0; idx_thread < MICROPROFILE_MAX_THREADS; ++idx_thread)
4046
{
4047
MicroProfileThreadLog* pLog = S.Pool[idx_thread];
4048
if(!pLog)
4049
continue;
4050
bool bGpu = pLog->nGpu != 0;
4051
int64_t nTickStartLog = bGpu ? nTickStartFrameGpu : nTickStartFrame;
4052
int64_t nTickEndLog = bGpu ? nTickEndFrameGpu : nTickEndFrame;
4053
4054
float fToMs = bGpu ? MicroProfileTickToMsMultiplierGpu() : MicroProfileTickToMsMultiplierCpu();
4055
float fFrameTime = fToMs * (nTickEndLog - nTickStartLog);
4056
4057
MicroProfile::GroupTime* pFrameGroupThread = &S.FrameGroupThread[idx_thread][0];
4058
4059
uint32_t nPut = pFrameNext->nLogStart[idx_thread];
4060
uint32_t nGet = pFrameCurrent->nLogStart[idx_thread];
4061
uint32_t nRange[2][2] = {
4062
{ 0, 0 },
4063
{ 0, 0 },
4064
};
4065
MicroProfileGetRange(nPut, nGet, nRange);
4066
if(nPut != nGet)
4067
{
4068
S.FrameGroupThreadValid[idx_thread / 32] |= 1 << (idx_thread % 32);
4069
memset(pFrameGroupThread, 0, sizeof(S.FrameGroupThread[idx_thread]));
4070
}
4071
4072
uint64_t* pStackLog = &pLog->nStackLogEntry[0];
4073
uint64_t* pChildTickStack = &pLog->nChildTickStack[1];
4074
int32_t nStackPos = pLog->nStackPos;
4075
uint8_t TimerStackPos[MICROPROFILE_MAX_TIMERS];
4076
uint8_t GroupStackPos[MICROPROFILE_MAX_GROUPS];
4077
memset(TimerStackPos, 0, sizeof(TimerStackPos));
4078
memset(GroupStackPos, 0, sizeof(GroupStackPos));
4079
4080
// restore group and timer stack pos.
4081
for(int32_t i = 0; i < nStackPos; ++i)
4082
{
4083
uint64_t nTimer = MicroProfileLogGetTimerIndex(pStackLog[i]);
4084
uint32_t nGroup = pTimerToGroup[nTimer];
4085
MP_ASSERT(nTimer < MICROPROFILE_MAX_TIMERS);
4086
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
4087
TimerStackPos[nTimer]++;
4088
GroupStackPos[nGroup]++;
4089
}
4090
4091
for(uint32_t j = 0; j < 2; ++j)
4092
{
4093
uint32_t nStart = nRange[j][0];
4094
uint32_t nEnd = nRange[j][1];
4095
for(uint32_t k = nStart; k < nEnd; ++k)
4096
{
4097
MicroProfileLogEntry LE = pLog->Log[k];
4098
uint32_t nType = MicroProfileLogGetType(LE);
4099
4100
switch(nType)
4101
{
4102
case MP_LOG_ENTER:
4103
{
4104
uint64_t nTimer = MicroProfileLogGetTimerIndex(LE);
4105
if(nTimer != ETOKEN_CSTR_PTR)
4106
{
4107
MP_ASSERT(nTimer < S.nTotalTimers);
4108
uint32_t nGroup = pTimerToGroup[nTimer];
4109
MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
4110
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
4111
4112
// When we aggretate the total time, we have to count if the timers & groups are layered, to avoid summing them twice when calculating the total time.
4113
// Averages become nonsense regardless.
4114
TimerStackPos[nTimer]++;
4115
GroupStackPos[nGroup]++;
4116
4117
pStackLog[nStackPos] = LE;
4118
4119
pChildTickStack[nStackPos] = 0;
4120
nStackPos++;
4121
}
4122
break;
4123
}
4124
case MP_LOG_LEAVE:
4125
{
4126
uint64_t nTimer = MicroProfileLogGetTimerIndex(LE);
4127
if(nTimer != ETOKEN_CSTR_PTR)
4128
{
4129
MP_ASSERT(nTimer < S.nTotalTimers);
4130
uint32_t nGroup = pTimerToGroup[nTimer];
4131
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
4132
MP_ASSERT(nStackPos);
4133
uint64_t nTicks;
4134
bool bGroupRoot = 0 == GroupStackPos[nGroup] || 0 == --GroupStackPos[nGroup];
4135
bool bTimerRoot = 0 == TimerStackPos[nTimer] || 0 == --TimerStackPos[nTimer];
4136
{
4137
nStackPos--;
4138
MicroProfileLogEntry LEStack = pStackLog[nStackPos];
4139
MP_ASSERT(MicroProfileLogGetTimerIndex(LEStack) == nTimer); // unbalanced timers are not supported
4140
uint64_t nTickStart = MicroProfileLogTickClamp(LEStack, nTickStartLog, nTickEndLog);
4141
uint64_t nClamped = MicroProfileLogTickClamp(LE, nTickStartLog, nTickEndLog);
4142
nTicks = MicroProfileLogTickDifference(nTickStart, nClamped);
4143
MP_ASSERT(nTicks < 0x8000000000000000);
4144
4145
uint64_t nChildTicks = pChildTickStack[nStackPos];
4146
4147
MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
4148
if(nStackPos)
4149
{
4150
pChildTickStack[nStackPos - 1] += nTicks;
4151
}
4152
MP_ASSERT(nTicks >= nChildTicks);
4153
uint64_t nTicksExclusive = (nTicks - nChildTicks);
4154
S.FrameExclusive[nTimer] += nTicksExclusive;
4155
pFrameGroupThread[nGroup].nTicksExclusive += nTicksExclusive;
4156
if(bTimerRoot) // dont count this if its below another instance of the same timer.
4157
{
4158
S.Frame[nTimer].nTicks += nTicks;
4159
S.Frame[nTimer].nCount += 1;
4160
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
4161
if(bGroupRoot)
4162
{
4163
pFrameGroupThread[nGroup].nTicks += nTicks;
4164
pFrameGroupThread[nGroup].nCount += 1;
4165
}
4166
}
4167
}
4168
}
4169
break;
4170
}
4171
case MP_LOG_EXTENDED:
4172
{
4173
k += MicroProfileLogGetDataSize(LE);
4174
break;
4175
}
4176
case MP_LOG_EXTENDED_NO_DATA:
4177
break;
4178
}
4179
}
4180
}
4181
4182
for(int32_t i = nStackPos - 1; i >= 0; --i)
4183
{
4184
4185
MicroProfileLogEntry LE = pStackLog[i];
4186
uint64_t nTickStart = MicroProfileLogTickClamp(LE, nTickStartLog, nTickEndLog);
4187
uint64_t nTicks = MicroProfileLogTickDifference(nTickStart, nTickEndLog);
4188
int64_t nChildTicks = pChildTickStack[i];
4189
pChildTickStack[i] = 0; // consume..
4190
4191
MP_ASSERT(i < MICROPROFILE_STACK_MAX && i >= 0);
4192
if(i)
4193
{
4194
pChildTickStack[i - 1] += nTicks;
4195
}
4196
MP_ASSERT(nTicks >= (uint64_t)nChildTicks);
4197
4198
uint32_t nTimer = (uint32_t)MicroProfileLogGetTimerIndex(LE);
4199
uint32_t nGroup = pTimerToGroup[nTimer];
4200
4201
bool bGroupRoot = 0 == GroupStackPos[nGroup] || 0 == --GroupStackPos[nGroup];
4202
bool bTimerRoot = 0 == TimerStackPos[nTimer] || 0 == --TimerStackPos[nTimer];
4203
4204
uint64_t nTicksExclusive = (nTicks - nChildTicks);
4205
S.FrameExclusive[nTimer] += nTicksExclusive;
4206
pFrameGroupThread[nGroup].nTicksExclusive += nTicksExclusive;
4207
if(bTimerRoot)
4208
{
4209
S.Frame[nTimer].nTicks += nTicks;
4210
S.Frame[nTimer].nCount += 1;
4211
4212
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
4213
if(bGroupRoot)
4214
{
4215
pFrameGroupThread[nGroup].nTicks += nTicks;
4216
pFrameGroupThread[nGroup].nCount += 1;
4217
}
4218
}
4219
}
4220
#ifdef MP_ASSERT
4221
for(uint8_t& g : GroupStackPos)
4222
{
4223
MP_ASSERT(g == 0);
4224
}
4225
for(uint8_t& g : TimerStackPos)
4226
{
4227
MP_ASSERT(g == 0);
4228
}
4229
#endif
4230
4231
pLog->nStackPos = nStackPos;
4232
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
4233
{
4234
pLog->nGroupTicks[j] += pFrameGroupThread[j].nTicks;
4235
4236
if((S.FrameGroupThreadValid[idx_thread / 32] & (1 << (idx_thread % 32))) != 0)
4237
{
4238
pFrameGroup[j].nTicks += pFrameGroupThread[j].nTicks;
4239
pFrameGroup[j].nTicksExclusive += pFrameGroupThread[j].nTicksExclusive;
4240
pFrameGroup[j].nCount += pFrameGroupThread[j].nCount;
4241
}
4242
}
4243
4244
if(pLog->nPut == pLog->nGet && pLog->nActive == 2)
4245
{
4246
pLog->nIdleFrames++;
4247
}
4248
else
4249
{
4250
pLog->nIdleFrames = 0;
4251
}
4252
if(pLog->nActive == 2 && pLog->nIdleFrames > MICROPROFILE_THREAD_LOG_FRAMES_REUSE)
4253
{
4254
MicroProfileLogReset(pLog);
4255
}
4256
}
4257
}
4258
{
4259
MICROPROFILE_SCOPE(g_MicroProfileAccumulate);
4260
uint64_t* ExtraPut = nullptr;
4261
if(UsingExtraData)
4262
{
4263
ExtraPut = &ExtraData->Timers[0];
4264
ExtraData->NumTimers = S.nTotalTimers;
4265
}
4266
4267
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
4268
{
4269
S.AccumTimers[i].nTicks += S.Frame[i].nTicks;
4270
S.AccumTimers[i].nCount += S.Frame[i].nCount;
4271
S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks);
4272
S.AccumMinTimers[i] = MicroProfileMin(S.AccumMinTimers[i], S.Frame[i].nTicks);
4273
S.AccumTimersExclusive[i] += S.FrameExclusive[i];
4274
S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]);
4275
if(ExtraPut)
4276
*ExtraPut++ = S.Frame[i].nTicks;
4277
}
4278
ExtraPut = nullptr;
4279
if(UsingExtraData)
4280
{
4281
ExtraPut = &ExtraData->Groups[0];
4282
ExtraData->NumGroups = S.nGroupCount;
4283
}
4284
4285
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
4286
{
4287
S.AccumGroup[i] += pFrameGroup[i].nTicks;
4288
S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i].nTicks);
4289
if(ExtraPut)
4290
*ExtraPut++ = pFrameGroup[i].nTicks;
4291
}
4292
#if MICROPROFILE_IMGUI
4293
void MicroProfileImguiGather();
4294
MicroProfileImguiGather();
4295
#endif
4296
if(S.CsvConfig.State == MicroProfileCsvConfig::ACTIVE)
4297
{
4298
uint32_t FrameIndex = S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY;
4299
uint64_t* FrameData = S.CsvConfig.FrameData + S.CsvConfig.TotalElements * FrameIndex;
4300
{
4301
uint16_t* TimerIndices = S.CsvConfig.TimerIndices;
4302
for(uint32_t i = 0; i < S.CsvConfig.NumTimers; ++i)
4303
{
4304
uint16_t Index = TimerIndices[i];
4305
if(Index != UINT16_MAX)
4306
{
4307
*FrameData = S.Frame[Index].nTicks;
4308
}
4309
else
4310
{
4311
*FrameData = 0;
4312
}
4313
FrameData++;
4314
}
4315
}
4316
{
4317
uint16_t* GroupIndices = S.CsvConfig.GroupIndices;
4318
for(uint32_t i = 0; i < S.CsvConfig.NumGroups; ++i)
4319
{
4320
uint16_t Index = GroupIndices[i];
4321
if(Index != UINT16_MAX)
4322
{
4323
*FrameData = pFrameGroup[Index].nTicks;
4324
}
4325
else
4326
{
4327
*FrameData = 0;
4328
}
4329
FrameData++;
4330
}
4331
}
4332
{
4333
uint16_t* CounterIndices = S.CsvConfig.CounterIndices;
4334
for(uint32_t i = 0; i < S.CsvConfig.NumCounters; ++i)
4335
{
4336
uint16_t Index = CounterIndices[i];
4337
if(Index != UINT16_MAX)
4338
{
4339
if(S.CounterInfo[Index].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE)
4340
{
4341
double d = S.CountersDouble[Index].load();
4342
memcpy(FrameData, &d, sizeof(d));
4343
}
4344
else
4345
{
4346
*FrameData = S.Counters[Index].load();
4347
}
4348
}
4349
else
4350
{
4351
*FrameData = 0;
4352
}
4353
FrameData++;
4354
}
4355
}
4356
}
4357
}
4358
for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
4359
{
4360
if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
4361
{
4362
MicroProfileToken nToken = S.Graph[i].nToken;
4363
S.Graph[i].nHistory[S.nGraphPut] = S.Frame[MicroProfileGetTimerIndex(nToken)].nTicks;
4364
}
4365
}
4366
S.nGraphPut = (S.nGraphPut + 1) % MICROPROFILE_GRAPH_HISTORY;
4367
}
4368
4369
if(S.nAggregateFlip <= ++S.nAggregateFlipCount)
4370
{
4371
nAggregateFlip = 1;
4372
if(S.nAggregateFlip) // if 0 accumulate indefinitely
4373
{
4374
nAggregateClear = 1;
4375
}
4376
}
4377
4378
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
4379
{
4380
MicroProfileThreadLog* pLog = S.Pool[i];
4381
uint32_t nNewGet = pFrameNext->nLogStart[i];
4382
4383
if(pLog && nNewGet != (uint32_t)-1)
4384
{
4385
pLog->nGet.store(nNewGet);
4386
}
4387
}
4388
if(pFrameNext->nLogStartTimeline != (uint32_t)-1)
4389
{
4390
S.TimelineLog.nGet.store(pFrameNext->nLogStartTimeline);
4391
}
4392
}
4393
if(nAggregateFlip)
4394
{
4395
memcpy(&S.Aggregate[0], &S.AccumTimers[0], sizeof(S.Aggregate[0]) * S.nTotalTimers);
4396
memcpy(&S.AggregateMax[0], &S.AccumMaxTimers[0], sizeof(S.AggregateMax[0]) * S.nTotalTimers);
4397
memcpy(&S.AggregateMin[0], &S.AccumMinTimers[0], sizeof(S.AggregateMin[0]) * S.nTotalTimers);
4398
memcpy(&S.AggregateExclusive[0], &S.AccumTimersExclusive[0], sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
4399
memcpy(&S.AggregateMaxExclusive[0], &S.AccumMaxTimersExclusive[0], sizeof(S.AggregateMaxExclusive[0]) * S.nTotalTimers);
4400
4401
memcpy(&S.AggregateGroup[0], &S.AccumGroup[0], sizeof(S.AggregateGroup));
4402
memcpy(&S.AggregateGroupMax[0], &S.AccumGroupMax[0], sizeof(S.AggregateGroup));
4403
4404
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
4405
{
4406
MicroProfileThreadLog* pLog = S.Pool[i];
4407
if(!pLog)
4408
continue;
4409
4410
memcpy(&pLog->nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks));
4411
4412
if(nAggregateClear)
4413
{
4414
memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks));
4415
}
4416
}
4417
4418
S.nAggregateFrames = S.nAggregateFlipCount;
4419
S.nFlipAggregateDisplay = S.nFlipAggregate;
4420
S.nFlipMaxDisplay = S.nFlipMax;
4421
if(nAggregateClear)
4422
{
4423
memset(&S.AccumTimers[0], 0, sizeof(S.Aggregate[0]) * S.nTotalTimers);
4424
memset(&S.AccumMaxTimers[0], 0, sizeof(S.AccumMaxTimers[0]) * S.nTotalTimers);
4425
memset(&S.AccumMinTimers[0], 0xFF, sizeof(S.AccumMinTimers[0]) * S.nTotalTimers);
4426
memset(&S.AccumTimersExclusive[0], 0, sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
4427
memset(&S.AccumMaxTimersExclusive[0], 0, sizeof(S.AccumMaxTimersExclusive[0]) * S.nTotalTimers);
4428
memset(&S.AccumGroup[0], 0, sizeof(S.AggregateGroup));
4429
memset(&S.AccumGroupMax[0], 0, sizeof(S.AggregateGroup));
4430
4431
S.nAggregateFlipCount = 0;
4432
S.nFlipAggregate = 0;
4433
S.nFlipMax = 0;
4434
4435
S.nAggregateFlipTick = MP_TICK();
4436
}
4437
4438
#if MICROPROFILE_COUNTER_HISTORY
4439
int64_t* pDest = &S.nCounterHistory[S.nCounterHistoryPut][0];
4440
S.nCounterHistoryPut = (S.nCounterHistoryPut + 1) % MICROPROFILE_GRAPH_HISTORY;
4441
for(uint32_t i = 0; i < S.nNumCounters; ++i)
4442
{
4443
if(0 != (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DETAILED))
4444
{
4445
MicroProfileFetchCounter(i);
4446
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
4447
if(IsDouble)
4448
{
4449
double dValue = S.CountersDouble[i].load(std::memory_order_relaxed);
4450
memcpy(&pDest[i], &dValue, sizeof(dValue));
4451
S.dCounterMin[i] = MicroProfileMin(S.dCounterMin[i], dValue);
4452
S.dCounterMax[i] = MicroProfileMax(S.dCounterMax[i], dValue);
4453
}
4454
else
4455
{
4456
uint64_t nValue = S.Counters[i].load(std::memory_order_relaxed);
4457
pDest[i] = nValue;
4458
S.nCounterMin[i] = MicroProfileMin(S.nCounterMin[i], (int64_t)nValue);
4459
S.nCounterMax[i] = MicroProfileMax(S.nCounterMax[i], (int64_t)nValue);
4460
}
4461
}
4462
}
4463
#endif
4464
}
4465
S.nAggregateClear = 0;
4466
4467
MicroProfileFlipEnabled();
4468
}
4469
4470
void MicroProfileSetEnableAllGroups(int bEnable)
4471
{
4472
if(bEnable)
4473
{
4474
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
4475
{
4476
S.nActiveGroupsWanted[i] = S.nGroupMask[i];
4477
}
4478
S.nStartEnabled = 1;
4479
MicroProfileFlipEnabled();
4480
}
4481
else
4482
{
4483
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
4484
{
4485
S.nActiveGroupsWanted[i] = 0;
4486
}
4487
S.nStartEnabled = 0;
4488
MicroProfileFlipEnabled();
4489
}
4490
}
4491
void MicroProfileEnableCategory(const char* pCategory, int bEnabled)
4492
{
4493
int nCategoryIndex = -1;
4494
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
4495
{
4496
if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
4497
{
4498
nCategoryIndex = (int)i;
4499
break;
4500
}
4501
}
4502
if(nCategoryIndex >= 0)
4503
{
4504
if(bEnabled)
4505
{
4506
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
4507
{
4508
S.nActiveGroupsWanted[i] |= S.CategoryInfo[nCategoryIndex].nGroupMask[i];
4509
}
4510
}
4511
else
4512
{
4513
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
4514
{
4515
S.nActiveGroupsWanted[i] &= ~S.CategoryInfo[nCategoryIndex].nGroupMask[i];
4516
}
4517
}
4518
}
4519
}
4520
4521
void MicroProfileEnableCategory(const char* pCategory)
4522
{
4523
MicroProfileEnableCategory(pCategory, true);
4524
}
4525
void MicroProfileDisableCategory(const char* pCategory)
4526
{
4527
MicroProfileEnableCategory(pCategory, false);
4528
}
4529
4530
int MicroProfileGetEnableAllGroups()
4531
{
4532
return 0 == memcmp(S.nGroupMask, S.nActiveGroupsWanted, sizeof(S.nGroupMask));
4533
}
4534
4535
void MicroProfileSetForceMetaCounters(int bForce)
4536
{
4537
}
4538
4539
int MicroProfileGetForceMetaCounters()
4540
{
4541
4542
return 0;
4543
}
4544
4545
void MicroProfileEnableMetaCounter(const char* pMeta)
4546
{
4547
}
4548
4549
void MicroProfileDisableMetaCounter(const char* pMeta)
4550
{
4551
}
4552
4553
void MicroProfileSetAggregateFrames(int nFrames)
4554
{
4555
S.nAggregateFlip = (uint32_t)nFrames;
4556
if(0 == nFrames)
4557
{
4558
S.nAggregateClear = 1;
4559
}
4560
}
4561
4562
int MicroProfileGetAggregateFrames()
4563
{
4564
return S.nAggregateFlip;
4565
}
4566
4567
int MicroProfileGetCurrentAggregateFrames()
4568
{
4569
return int(S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount);
4570
}
4571
4572
void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type)
4573
{
4574
MicroProfileInit();
4575
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
4576
uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
4577
uint32_t nIndex = nGroup / 32;
4578
uint32_t nBit = nGroup % 32;
4579
S.nForceGroups[nIndex] |= (1ll << nBit);
4580
}
4581
4582
void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type)
4583
{
4584
MicroProfileInit();
4585
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
4586
uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
4587
uint32_t nIndex = nGroup / 32;
4588
uint32_t nBit = nGroup % 32;
4589
4590
S.nForceGroups[nIndex] &= ~(1ll << nBit);
4591
}
4592
4593
struct MicroProfileTimerValues
4594
{
4595
float TimeMs;
4596
float AverageMs;
4597
float MaxMs;
4598
float MinMs;
4599
float CallAverageMs;
4600
float ExclusiveMs;
4601
float AverageExclusiveMs;
4602
float MaxExclusiveMs;
4603
float TotalMs;
4604
uint32_t nCount;
4605
};
4606
4607
void MicroProfileCalcTimers(int nTimer, MicroProfileTimerValues& Out)
4608
{
4609
const uint32_t nGroupId = S.TimerInfo[nTimer].nGroupIndex;
4610
const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
4611
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
4612
uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
4613
Out.nCount = S.Aggregate[nTimer].nCount;
4614
4615
float fToPrc = S.fRcpReferenceTime;
4616
float fMs = fToMs * (S.Frame[nTimer].nTicks);
4617
4618
float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
4619
float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
4620
float fMinMs = fToMs * (S.AggregateMin[nTimer] != uint64_t(-1) ? S.AggregateMin[nTimer] : 0);
4621
float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
4622
float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
4623
float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
4624
float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
4625
float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
4626
4627
Out.TimeMs = fMs;
4628
Out.AverageMs = fAverageMs;
4629
Out.MaxMs = fMaxMs;
4630
Out.MinMs = fMinMs;
4631
Out.CallAverageMs = fCallAverageMs;
4632
Out.ExclusiveMs = fMsExclusive;
4633
Out.AverageExclusiveMs = fAverageMsExclusive;
4634
Out.MaxExclusiveMs = fMaxMsExclusive;
4635
Out.TotalMs = fTotalMs;
4636
}
4637
4638
void MicroProfileCalcAllTimers(
4639
float* pTimers, float* pAverage, float* pMax, float* pMin, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
4640
{
4641
for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
4642
{
4643
const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex;
4644
const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
4645
uint32_t nTimer = i;
4646
uint32_t nIdx = i * 2;
4647
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
4648
uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
4649
float fToPrc = S.fRcpReferenceTime;
4650
float fMs = fToMs * (S.Frame[nTimer].nTicks);
4651
float fPrc = MicroProfileMin(fMs * fToPrc, 1.f);
4652
float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
4653
float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f);
4654
float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
4655
float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f);
4656
float fMinMs = fToMs * (S.AggregateMin[nTimer] != uint64_t(-1) ? S.AggregateMin[nTimer] : 0);
4657
float fMinPrc = MicroProfileMin(fMinMs * fToPrc, 1.f);
4658
float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
4659
float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f);
4660
float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
4661
float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f);
4662
float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
4663
float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f);
4664
float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
4665
float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f);
4666
float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
4667
pTimers[nIdx] = fMs;
4668
pTimers[nIdx + 1] = fPrc;
4669
pAverage[nIdx] = fAverageMs;
4670
pAverage[nIdx + 1] = fAveragePrc;
4671
pMax[nIdx] = fMaxMs;
4672
pMax[nIdx + 1] = fMaxPrc;
4673
pMin[nIdx] = fMinMs;
4674
pMin[nIdx + 1] = fMinPrc;
4675
pCallAverage[nIdx] = fCallAverageMs;
4676
pCallAverage[nIdx + 1] = fCallAveragePrc;
4677
pExclusive[nIdx] = fMsExclusive;
4678
pExclusive[nIdx + 1] = fPrcExclusive;
4679
pAverageExclusive[nIdx] = fAverageMsExclusive;
4680
pAverageExclusive[nIdx + 1] = fAveragePrcExclusive;
4681
pMaxExclusive[nIdx] = fMaxMsExclusive;
4682
pMaxExclusive[nIdx + 1] = fMaxPrcExclusive;
4683
pTotal[nIdx] = fTotalMs;
4684
pTotal[nIdx + 1] = 0.f;
4685
}
4686
}
4687
4688
float MicroProfileGetTime(const char* pGroup, const char* pName)
4689
{
4690
MicroProfileToken nToken = MicroProfileFindTokenInternal(pGroup, pName);
4691
if(nToken == MICROPROFILE_INVALID_TOKEN)
4692
{
4693
return 0.f;
4694
}
4695
uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken);
4696
uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken);
4697
float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
4698
return S.Frame[nTimerIndex].nTicks * fToMs;
4699
}
4700
4701
int MicroProfilePlatformMarkersGetEnabled()
4702
{
4703
return S.nPlatformMarkersEnabled != 0 ? 1 : 0;
4704
}
4705
void MicroProfilePlatformMarkersSetEnabled(int bEnabled)
4706
{
4707
S.nPlatformMarkersEnabled = bEnabled ? 1 : 0;
4708
}
4709
4710
#define MICROPROFILE_CONTEXT_SWITCH_SEARCH_DEBUG MICROPROFILE_DEBUG
4711
4712
void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu)
4713
{
4714
MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch);
4715
uint32_t nContextSwitchPut = S.nContextSwitchPut;
4716
uint64_t nContextSwitchStart, nContextSwitchEnd;
4717
nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
4718
int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
4719
int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
4720
4721
#if MICROPROFILE_CONTEXT_SWITCH_SEARCH_DEBUG
4722
int64_t lp = S.nContextSwitchLastPushed;
4723
uprintf("cswitch-search\n");
4724
uprintf("Begin %" PRId64 " End %" PRId64 " Last %" PRId64 "\n", nSearchBegin, nSearchEnd, lp);
4725
4726
float fToMs = MicroProfileTickToMsMultiplierCpu();
4727
uprintf("E %6.2fms\n", fToMs * (nSearchEnd - nSearchBegin));
4728
uprintf("LAST %6.2fms\n", fToMs * (lp - nSearchBegin));
4729
#endif
4730
4731
int64_t nMax = INT64_MIN;
4732
int64_t nMin = INT64_MAX;
4733
for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
4734
{
4735
uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i + 1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
4736
MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex];
4737
if(nMax < CS.nTicks)
4738
nMax = CS.nTicks;
4739
if(nMin > CS.nTicks && CS.nTicks != 0)
4740
nMin = CS.nTicks;
4741
if(CS.nTicks > nSearchEnd)
4742
{
4743
nContextSwitchEnd = nIndex;
4744
}
4745
if(CS.nTicks > nSearchBegin)
4746
{
4747
nContextSwitchStart = nIndex;
4748
}
4749
}
4750
*pContextSwitchStart = nContextSwitchStart;
4751
*pContextSwitchEnd = nContextSwitchEnd;
4752
4753
#if MICROPROFILE_CONTEXT_SWITCH_SEARCH_DEBUG
4754
{
4755
uprintf("contextswitch start %" PRId64 " %" PRId64 "\n", nContextSwitchStart, nContextSwitchEnd);
4756
4757
MicroProfileContextSwitch& CS0 = S.ContextSwitch[0];
4758
int64_t nMax = CS0.nTicks;
4759
int64_t nMin = CS0.nTicks;
4760
int64_t nBegin = 0;
4761
int64_t nEnd = 0;
4762
int nRanges = 0;
4763
for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; i += 1024)
4764
{
4765
int64_t MinTick = INT64_MAX;
4766
int64_t MaxTick = INT64_MIN;
4767
for(int j = 0; j < 1024; ++j)
4768
{
4769
MicroProfileContextSwitch& CS = S.ContextSwitch[i + j];
4770
int64_t nTicks = CS.nTicks;
4771
MinTick = MicroProfileMin(nTicks, MinTick);
4772
MaxTick = MicroProfileMax(nTicks, MaxTick);
4773
}
4774
4775
uprintf("XX range [%5" PRIx64 ":%5" PRIx64 "] :: [%6.2f:%6.2f] [%p :: %p] .. ref %p\n",
4776
i,
4777
i + 1024,
4778
fToMs * (MinTick - nSearchBegin),
4779
fToMs * (MaxTick - nSearchBegin),
4780
(void*)MinTick,
4781
(void*)MaxTick,
4782
(void*)nSearchBegin
4783
4784
);
4785
}
4786
uprintf("\n\n");
4787
4788
for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
4789
{
4790
MicroProfileContextSwitch& CS = S.ContextSwitch[i];
4791
int64_t nTicks = CS.nTicks;
4792
float fMs = (nTicks - nMax) * fToMs;
4793
if(fMs < 0 || fMs > 50)
4794
{
4795
// dump range here
4796
uprintf("range [%5" PRId64 ":%5" PRId64 "] :: [%6.2f:%6.2f] [%p :: %p] .. ref %p\n",
4797
nBegin,
4798
nEnd,
4799
fToMs * (nMin - nSearchBegin),
4800
fToMs * (nMax - nSearchBegin),
4801
(void*)nMin,
4802
(void*)nMax,
4803
(void*)nSearchBegin
4804
4805
);
4806
4807
nEnd = nBegin = i;
4808
nMax = nMin = CS.nTicks;
4809
nRanges++;
4810
}
4811
else
4812
{
4813
nEnd = i;
4814
nMax = MicroProfileMax(nTicks, nMax);
4815
}
4816
}
4817
}
4818
4819
lp = S.nContextSwitchLastPushed;
4820
uprintf("E %6.2fms\n", fToMs * (nSearchEnd - nSearchBegin));
4821
uprintf("LP2 %6.2fms\n", fToMs * (lp - nSearchBegin));
4822
#endif
4823
}
4824
4825
int MicroProfileFormatCounter(int eFormat, int64_t nCounter, char* pOut, uint32_t nBufferSize)
4826
{
4827
if(!nCounter)
4828
{
4829
pOut[0] = '0';
4830
pOut[1] = '\0';
4831
return 1;
4832
}
4833
int nLen = 0;
4834
char* pBase = pOut;
4835
char* pTmp = pOut;
4836
char* pEnd = pOut + nBufferSize;
4837
int nNegative = 0;
4838
if(nCounter < 0)
4839
{
4840
nCounter = -nCounter;
4841
nNegative = 1;
4842
if(nCounter < 0) // handle INT_MIN
4843
{
4844
nCounter = -(nCounter + 1);
4845
}
4846
}
4847
4848
switch(eFormat)
4849
{
4850
case MICROPROFILE_COUNTER_FORMAT_DEFAULT:
4851
{
4852
int nSeperate = 0;
4853
while(nCounter)
4854
{
4855
if(nSeperate)
4856
{
4857
*pTmp++ = '.';
4858
}
4859
nSeperate = 1;
4860
for(uint32_t i = 0; nCounter && i < 3; ++i)
4861
{
4862
int nDigit = nCounter % 10;
4863
nCounter /= 10;
4864
*pTmp++ = '0' + nDigit;
4865
}
4866
}
4867
if(nNegative)
4868
{
4869
*pTmp++ = '-';
4870
}
4871
nLen = pTmp - pOut;
4872
--pTmp;
4873
MP_ASSERT(pTmp <= pEnd);
4874
while(pTmp > pOut) // reverse string
4875
{
4876
char c = *pTmp;
4877
*pTmp = *pOut;
4878
*pOut = c;
4879
pTmp--;
4880
pOut++;
4881
}
4882
}
4883
break;
4884
case MICROPROFILE_COUNTER_FORMAT_BYTES:
4885
{
4886
const char* pExt[] = { "b", "kb", "mb", "gb", "tb", "pb", "eb", "zb", "yb" };
4887
size_t nNumExt = sizeof(pExt) / sizeof(pExt[0]);
4888
int64_t nShift = 0;
4889
int64_t nDivisor = 1;
4890
int64_t nCountShifted = nCounter >> 10;
4891
while(nCountShifted)
4892
{
4893
nDivisor <<= 10;
4894
nCountShifted >>= 10;
4895
nShift++;
4896
}
4897
MP_ASSERT(nShift < (int64_t)nNumExt);
4898
if(nShift)
4899
{
4900
nLen = snprintf(pOut, nBufferSize - 1, "%c%3.2f%s", nNegative ? '-' : ' ', (double)nCounter / nDivisor, pExt[nShift]);
4901
}
4902
else
4903
{
4904
nLen = snprintf(pOut, nBufferSize - 1, "%c%" PRId64 "%s", nNegative ? '-' : ' ', nCounter, pExt[nShift]);
4905
}
4906
}
4907
break;
4908
}
4909
pBase[nLen] = '\0';
4910
4911
return nLen;
4912
}
4913
4914
int MicroProfileFormatCounterDouble(int eFormat, double dCounter, char* pOut, uint32_t nBufferSize)
4915
{
4916
int nLen = 0;
4917
switch(eFormat)
4918
{
4919
case MICROPROFILE_COUNTER_FORMAT_DEFAULT:
4920
{
4921
nLen = stbsp_snprintf(pOut, nBufferSize - 1, "%f", dCounter);
4922
}
4923
break;
4924
case MICROPROFILE_COUNTER_FORMAT_BYTES:
4925
{
4926
const char* pExt[] = { "b", "kb", "mb", "gb", "tb", "pb", "eb", "zb", "yb" };
4927
double scale = 1.f;
4928
int offset = 0;
4929
int end = sizeof(pExt) / sizeof(pExt[0]);
4930
double d = dCounter;
4931
while(d / scale > 1024.f && offset + 1 < end)
4932
{
4933
scale *= 1024.f;
4934
offset += 1;
4935
}
4936
nLen = stbsp_snprintf(pOut, nBufferSize - 1, "%.3f%s", d / scale, pExt[offset]);
4937
}
4938
break;
4939
}
4940
pOut[nLen] = '\0';
4941
4942
return nLen;
4943
}
4944
4945
bool MicroProfileAnyGroupActive()
4946
{
4947
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
4948
{
4949
if(S.nActiveGroups[i] != 0)
4950
return true;
4951
}
4952
return false;
4953
}
4954
bool MicroProfileGroupActive(uint32_t nGroupIndex)
4955
{
4956
MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS);
4957
uint32_t nIndex = nGroupIndex / 32;
4958
uint32_t nBit = nGroupIndex % 32;
4959
return ((S.nActiveGroups[nIndex] >> nBit) & 1) == 1;
4960
}
4961
4962
void MicroProfileToggleGroup(uint32_t nGroup)
4963
{
4964
if(nGroup < S.nGroupCount)
4965
{
4966
uint32_t nIndex = nGroup / 32;
4967
uint32_t nBit = nGroup % 32;
4968
S.nActiveGroupsWanted[nIndex] ^= (1ll << nBit);
4969
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
4970
}
4971
}
4972
void MicroProfileGroupSetEnabled(uint32_t nGroup)
4973
{
4974
if(nGroup < S.nGroupCount)
4975
{
4976
uint32_t nIndex = nGroup / 32;
4977
uint32_t nBit = nGroup % 32;
4978
S.nActiveGroupsWanted[nIndex] |= (1ll << nBit);
4979
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
4980
}
4981
}
4982
bool MicroProfileGroupEnabled(uint32_t nGroup)
4983
{
4984
if(nGroup < S.nGroupCount)
4985
{
4986
uint32_t nIndex = nGroup / 32;
4987
uint32_t nBit = nGroup % 32;
4988
return 0 != (S.nActiveGroupsWanted[nIndex] & (1ll << nBit));
4989
}
4990
return false;
4991
}
4992
bool MicroProfileCategoryEnabled(uint32_t nCategory)
4993
{
4994
if(nCategory < S.nCategoryCount)
4995
{
4996
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
4997
{
4998
if(S.CategoryInfo[nCategory].nGroupMask[i] != (S.CategoryInfo[nCategory].nGroupMask[i] & S.nActiveGroupsWanted[i]))
4999
{
5000
return false;
5001
}
5002
}
5003
return true;
5004
}
5005
return false;
5006
}
5007
5008
bool MicroProfileCategoryDisabled(uint32_t nCategory)
5009
{
5010
if(nCategory < S.nCategoryCount)
5011
{
5012
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
5013
{
5014
uint32_t ActiveMask = S.nActiveGroupsWanted[i];
5015
uint32_t CategoryMask = S.CategoryInfo[nCategory].nGroupMask[i];
5016
5017
if(0 != (ActiveMask & CategoryMask))
5018
{
5019
return false;
5020
}
5021
}
5022
return true;
5023
}
5024
return false;
5025
}
5026
5027
void MicroProfileToggleCategory(uint32_t nCategory)
5028
{
5029
if(nCategory < S.nCategoryCount)
5030
{
5031
bool bAllSet = true;
5032
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
5033
{
5034
bAllSet = bAllSet && S.CategoryInfo[nCategory].nGroupMask[i] == (S.CategoryInfo[nCategory].nGroupMask[i] & S.nActiveGroupsWanted[i]);
5035
}
5036
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
5037
{
5038
if(bAllSet)
5039
{
5040
S.nActiveGroupsWanted[i] &= ~S.CategoryInfo[nCategory].nGroupMask[i];
5041
}
5042
else
5043
{
5044
S.nActiveGroupsWanted[i] |= S.CategoryInfo[nCategory].nGroupMask[i];
5045
}
5046
}
5047
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
5048
}
5049
}
5050
5051
void MicroProfileSleep(uint32_t nMs)
5052
{
5053
#ifdef _WIN32
5054
Sleep(nMs);
5055
#else
5056
usleep(nMs * 1000);
5057
#endif
5058
}
5059
5060
#if MICROPROFILE_WEBSERVER
5061
5062
#define MICROPROFILE_EMBED_HTML
5063
5064
extern const char* g_MicroProfileHtml_begin[];
5065
extern size_t g_MicroProfileHtml_begin_sizes[];
5066
extern size_t g_MicroProfileHtml_begin_count;
5067
extern const char* g_MicroProfileHtml_end[];
5068
extern size_t g_MicroProfileHtml_end_sizes[];
5069
extern size_t g_MicroProfileHtml_end_count;
5070
5071
extern const char* g_MicroProfileHtmlLive_begin[];
5072
extern size_t g_MicroProfileHtmlLive_begin_sizes[];
5073
extern size_t g_MicroProfileHtmlLive_begin_count;
5074
extern const char* g_MicroProfileHtmlLive_end[];
5075
extern size_t g_MicroProfileHtmlLive_end_sizes[];
5076
extern size_t g_MicroProfileHtmlLive_end_count;
5077
5078
extern const uint32_t uprof_16[];
5079
extern const uint32_t uprof_16_len;
5080
extern const uint32_t uprof_32[];
5081
extern const uint32_t uprof_32_len;
5082
extern const uint32_t uprof_192[];
5083
extern const uint32_t uprof_192_len;
5084
extern const uint32_t uprof_512[];
5085
extern const uint32_t uprof_512_len;
5086
5087
typedef void (*MicroProfileWriteCallback)(void* Handle, size_t size, const char* pData);
5088
5089
uint32_t MicroProfileWebServerPort()
5090
{
5091
return S.nWebServerPort;
5092
}
5093
5094
void MicroProfileSetWebServerPort(uint32_t nPort)
5095
{
5096
if(S.nWebServerPort != nPort)
5097
{
5098
MicroProfileWebServerJoin();
5099
MicroProfileWebServerStop();
5100
S.nWebServerPort = nPort;
5101
S.nWebServerDataSent = (uint64_t)-1; // Will cause the web server and its thread to be restarted next time MicroProfileFlip() is called.
5102
}
5103
}
5104
5105
void MicroProfileDumpFileImmediately(const char* pHtml, const char* pCsv, void* pGpuContext, uint32_t FrameCount)
5106
{
5107
for(uint32_t i = 0; i < 2; ++i)
5108
{
5109
MicroProfileFlip(pGpuContext);
5110
}
5111
for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY + 1; ++i)
5112
{
5113
MicroProfileFlip(pGpuContext);
5114
}
5115
5116
uint32_t nDumpMask = 0;
5117
if(pHtml)
5118
{
5119
5120
size_t nLen = strlen(pHtml);
5121
if(nLen > sizeof(S.HtmlDumpPath) - 1)
5122
{
5123
return;
5124
}
5125
const size_t ExtSize = sizeof(".html") - 1;
5126
if(nLen > ExtSize && 0 == memcmp(".html", pHtml + nLen - ExtSize, ExtSize))
5127
nLen -= ExtSize;
5128
memcpy(S.HtmlDumpPath, pHtml, nLen);
5129
S.HtmlDumpPath[nLen] = '\0';
5130
5131
nDumpMask |= 1;
5132
}
5133
if(pCsv)
5134
{
5135
size_t nLen = strlen(pCsv);
5136
if(nLen > sizeof(S.CsvDumpPath) - 1)
5137
{
5138
return;
5139
}
5140
const size_t ExtSize = sizeof(".csv") - 1;
5141
if(nLen > ExtSize && 0 == memcmp(".csv", pCsv + nLen - ExtSize, ExtSize))
5142
nLen -= ExtSize;
5143
memcpy(S.CsvDumpPath, pCsv, nLen + 1);
5144
S.CsvDumpPath[nLen] = '\0';
5145
5146
nDumpMask |= 2;
5147
}
5148
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
5149
S.nDumpFileNextFrame = nDumpMask;
5150
S.nDumpSpikeMask = 0;
5151
S.nDumpFileCountDown = 0;
5152
S.DumpFrameCount = FrameCount;
5153
5154
MicroProfileDumpToFile();
5155
}
5156
void MicroProfileDumpFile(const char* pHtml, const char* pCsv, float fCpuSpike, float fGpuSpike, uint32_t FrameCount)
5157
{
5158
S.fDumpCpuSpike = fCpuSpike;
5159
S.fDumpGpuSpike = fGpuSpike;
5160
S.DumpFrameCount = FrameCount;
5161
uint32_t nDumpMask = 0;
5162
if(pHtml)
5163
{
5164
size_t nLen = strlen(pHtml);
5165
if(nLen > sizeof(S.HtmlDumpPath) - 1)
5166
{
5167
return;
5168
}
5169
const size_t ExtSize = sizeof(".html") - 1;
5170
if(nLen > ExtSize && 0 == memcmp(".html", pHtml + nLen - ExtSize, ExtSize))
5171
nLen -= ExtSize;
5172
memcpy(S.HtmlDumpPath, pHtml, nLen);
5173
S.HtmlDumpPath[nLen] = '\0';
5174
5175
nDumpMask |= 1;
5176
}
5177
if(pCsv)
5178
{
5179
size_t nLen = strlen(pCsv);
5180
if(nLen > sizeof(S.CsvDumpPath) - 1)
5181
{
5182
return;
5183
}
5184
const size_t ExtSize = sizeof(".csv") - 1;
5185
if(nLen > ExtSize && 0 == memcmp(".csv", pCsv + nLen - ExtSize, ExtSize))
5186
nLen -= ExtSize;
5187
memcpy(S.CsvDumpPath, pCsv, nLen);
5188
S.CsvDumpPath[nLen] = '\0';
5189
5190
nDumpMask |= 2;
5191
}
5192
if(fCpuSpike > 0.f || fGpuSpike > 0.f)
5193
{
5194
S.nDumpFileNextFrame = 0;
5195
S.nDumpSpikeMask = nDumpMask;
5196
}
5197
else
5198
{
5199
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
5200
S.nDumpFileNextFrame = nDumpMask;
5201
S.nDumpSpikeMask = 0;
5202
S.nDumpFileCountDown = 0;
5203
5204
MicroProfileDumpToFile();
5205
}
5206
}
5207
5208
struct MicroProfilePrintfArgs
5209
{
5210
MicroProfileWriteCallback CB;
5211
void* Handle;
5212
};
5213
5214
char* MicroProfilePrintfCallback(const char* buf, void* user, int len)
5215
{
5216
MicroProfilePrintfArgs* A = (MicroProfilePrintfArgs*)user;
5217
(A->CB)(A->Handle, len, buf);
5218
return const_cast<char*>(buf);
5219
};
5220
5221
void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...)
5222
{
5223
va_list args;
5224
va_start(args, pFmt);
5225
MicroProfilePrintfArgs A;
5226
A.CB = CB;
5227
A.Handle = Handle;
5228
char Buffer[STB_SPRINTF_MIN];
5229
int size = stbsp_vsprintfcb(MicroProfilePrintfCallback, (void*)&A, Buffer, pFmt, args);
5230
(void)size;
5231
va_end(args);
5232
}
5233
5234
void MicroProfileGetFramesToDump(uint64_t nStartFrameId, uint32_t nMaxFrames, uint32_t& nFirstFrame, uint32_t& nLastFrame, uint32_t& nNumFrames)
5235
{
5236
nFirstFrame = (uint32_t)-1;
5237
nNumFrames = 0;
5238
5239
if(nStartFrameId != (uint64_t)-1)
5240
{
5241
// search for the frane
5242
for(uint32_t i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
5243
{
5244
if(S.Frames[i].nFrameId == nStartFrameId)
5245
{
5246
nFirstFrame = i;
5247
break;
5248
}
5249
}
5250
if(nFirstFrame != (uint32_t)-1)
5251
{
5252
nLastFrame = S.nFrameCurrent;
5253
uint32_t nDistance = (MICROPROFILE_MAX_FRAME_HISTORY + nFirstFrame - nLastFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
5254
nNumFrames = MicroProfileMin(nDistance, (uint32_t)nMaxFrames);
5255
}
5256
}
5257
5258
if(nNumFrames == 0)
5259
{
5260
nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); // leave a few to not overwrite
5261
nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
5262
nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
5263
}
5264
5265
nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
5266
}
5267
5268
#define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
5269
5270
void MicroProfileDumpCsvWithConfig(MicroProfileWriteCallback CB, void* Handle, uint32_t nFirstFrame, uint32_t nLastFrame, uint32_t nNumFrames)
5271
{
5272
uint32_t NumTimers = S.CsvConfig.NumTimers;
5273
uint32_t NumGroups = S.CsvConfig.NumGroups;
5274
uint32_t NumCounters = S.CsvConfig.NumCounters;
5275
uint16_t* TimerIndices = S.CsvConfig.TimerIndices;
5276
uint16_t* GroupIndices = S.CsvConfig.GroupIndices;
5277
uint64_t* FrameData = S.CsvConfig.FrameData;
5278
uint16_t* CounterIndices = S.CsvConfig.CounterIndices;
5279
uint32_t TotalElements = S.CsvConfig.TotalElements;
5280
uint32_t Offset = 0;
5281
bool UseFrameTime = 0 != (MICROPROFILE_CSV_FLAG_FRAME_TIME & S.CsvConfig.Flags);
5282
const char** pTimerNames = S.CsvConfig.pTimerNames;
5283
const char** pGroupNames = S.CsvConfig.pGroupNames;
5284
const char** pCounterNames = S.CsvConfig.pCounterNames;
5285
if(UseFrameTime)
5286
printf("Time");
5287
else
5288
printf("FrameNumber");
5289
for(uint32_t i = 0; i < NumTimers; ++i, ++Offset)
5290
printf(", %s", pTimerNames[i] ? pTimerNames[i] : S.TimerInfo[TimerIndices[i]].pName);
5291
5292
for(uint32_t i = 0; i < NumGroups; ++i, ++Offset)
5293
printf(", %s", pGroupNames[i] ? pGroupNames[i] : S.GroupInfo[GroupIndices[i]].pName);
5294
for(uint32_t i = 0; i < NumCounters; ++i, ++Offset)
5295
printf(", %s", pCounterNames[i] ? pCounterNames[i] : S.CounterInfo[CounterIndices[i]].pName);
5296
printf("\n");
5297
5298
float* fToMsTimer = (float*)alloca(sizeof(float) * NumTimers);
5299
float* fToMsGroup = (float*)alloca(sizeof(float) * NumGroups);
5300
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
5301
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
5302
5303
for(uint32_t i = 0; i < NumTimers; ++i)
5304
fToMsTimer[i] = S.TimerInfo[TimerIndices[i]].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
5305
for(uint32_t i = 0; i < NumGroups; ++i)
5306
fToMsGroup[i] = S.GroupInfo[GroupIndices[i]].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
5307
5308
uint64_t TickStart = S.Frames[nFirstFrame % MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu;
5309
for(uint32_t i = 0; i < nNumFrames; ++i)
5310
{
5311
uint32_t FrameIndex = ((nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY);
5312
uint64_t TickFrame = S.Frames[FrameIndex].nFrameStartCpu;
5313
uint64_t* Data = FrameData + TotalElements * FrameIndex;
5314
if(UseFrameTime)
5315
printf("%f", (TickFrame - TickStart) * fToMsCPU);
5316
else
5317
printf("%d", i);
5318
Offset = 0;
5319
for(uint32_t j = 0; j < NumTimers; ++j)
5320
printf(", %f", Data[Offset++] * fToMsTimer[j]);
5321
for(uint32_t j = 0; j < NumGroups; ++j)
5322
printf(", %f", Data[Offset++] * fToMsGroup[j]);
5323
for(uint32_t j = 0; j < NumCounters; ++j)
5324
{
5325
if(S.CounterInfo[CounterIndices[j]].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE)
5326
{
5327
printf(", %f", ((double*)Data)[Offset++]);
5328
}
5329
else
5330
{
5331
printf(", %lld", Data[Offset++]);
5332
}
5333
}
5334
printf("\n");
5335
}
5336
}
5337
void MicroProfileDumpCsvTimerFrames(MicroProfileWriteCallback CB, void* Handle, uint32_t nFirstFrame, uint32_t nLastFrame, uint32_t nNumFrames)
5338
{
5339
MP_ASSERT(S.FrameExtraCounterData);
5340
uint32_t TotalTimers = S.nTotalTimers;
5341
float* fToMs = (float*)alloca(sizeof(float) * TotalTimers);
5342
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
5343
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
5344
5345
for(uint32_t i = 0; i < TotalTimers; ++i)
5346
fToMs[i] = S.TimerInfo[i].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
5347
5348
for(uint32_t i = 0; i < TotalTimers; ++i)
5349
{
5350
printf(i == 0 ? "FrameNumber, \"%s\"" : ",\"%s\"", S.TimerInfo[i].pName);
5351
}
5352
printf("\n");
5353
5354
for(uint32_t i = 0; i < nNumFrames; ++i)
5355
{
5356
// printf("%d", i) MicroProfileFrame& F = S.Frames[(i + nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY];
5357
MicroProfileFrameExtraCounterData* Data = S.FrameExtraCounterData;
5358
uint32_t NumTimers = 0;
5359
uint32_t j;
5360
printf("%d", i);
5361
Data += ((i + nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY);
5362
NumTimers = MicroProfileMin(TotalTimers, (uint32_t)Data->NumTimers);
5363
for(j = 0; j < NumTimers; ++j)
5364
{
5365
printf(",%f", Data->Timers[j] * fToMs[j]);
5366
}
5367
for(; j < TotalTimers; ++j)
5368
printf(",0");
5369
printf("\n");
5370
}
5371
}
5372
5373
void MicroProfileDumpCsvGroupFrames(MicroProfileWriteCallback CB, void* Handle, uint32_t nFirstFrame, uint32_t nLastFrame, uint32_t nNumFrames)
5374
{
5375
MP_ASSERT(S.FrameExtraCounterData);
5376
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
5377
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
5378
5379
uint32_t nGroupCount = S.nGroupCount;
5380
5381
float* fToMs = (float*)alloca(sizeof(float) * nGroupCount);
5382
for(uint32_t i = 0; i < nGroupCount; ++i)
5383
fToMs[i] = S.GroupInfo[i].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
5384
5385
for(uint32_t i = 0; i < nGroupCount; ++i)
5386
{
5387
printf(i == 0 ? "FrameNumber, \"%s\"" : ",\"%s\"", S.GroupInfo[i].pName);
5388
}
5389
5390
printf("\n");
5391
for(uint32_t i = 0; i < nNumFrames; ++i)
5392
{
5393
MicroProfileFrameExtraCounterData* Data = S.FrameExtraCounterData;
5394
uint32_t NumGroups = 0;
5395
uint32_t j;
5396
printf("%d", i);
5397
Data += ((i + nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY);
5398
NumGroups = MicroProfileMin(nGroupCount, (uint32_t)Data->NumGroups);
5399
for(j = 0; j < NumGroups; ++j)
5400
{
5401
printf(",%f", Data->Groups[j] * fToMs[j]);
5402
}
5403
for(; j < nGroupCount; ++j)
5404
printf(",0");
5405
printf("\n");
5406
}
5407
}
5408
5409
void MicroProfileDumpCsv(uint32_t nDumpFrameCount)
5410
{
5411
uint32_t nNumFrames, nFirstFrame, nLastFrame;
5412
MicroProfileGetFramesToDump((uint64_t)-1, nDumpFrameCount, nFirstFrame, nLastFrame, nNumFrames);
5413
5414
char Path[MICROPROFILE_MAX_PATH];
5415
int Length;
5416
if(S.FrameExtraCounterData)
5417
{
5418
Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s_timer_frames.csv", S.CsvDumpPath);
5419
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
5420
{
5421
FILE* F = fopen(Path, "w");
5422
if(F)
5423
{
5424
MicroProfileDumpCsvTimerFrames(MicroProfileWriteFile, F, nFirstFrame, nLastFrame, nNumFrames);
5425
fclose(F);
5426
}
5427
}
5428
Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s_group_frames.csv", S.CsvDumpPath);
5429
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
5430
{
5431
FILE* F = fopen(Path, "w");
5432
if(F)
5433
{
5434
MicroProfileDumpCsvGroupFrames(MicroProfileWriteFile, F, nFirstFrame, nLastFrame, nNumFrames);
5435
fclose(F);
5436
}
5437
}
5438
}
5439
if(S.CsvConfig.State == MicroProfileCsvConfig::ACTIVE)
5440
{
5441
Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s_custom.csv", S.CsvDumpPath);
5442
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
5443
{
5444
FILE* F = fopen(Path, "w");
5445
if(F)
5446
{
5447
MicroProfileDumpCsvWithConfig(MicroProfileWriteFile, F, nFirstFrame, nLastFrame, nNumFrames);
5448
fclose(F);
5449
}
5450
}
5451
}
5452
}
5453
5454
void MicroProfileDumpCsvLegacy(MicroProfileWriteCallback CB, void* Handle)
5455
{
5456
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
5457
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
5458
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
5459
5460
printf("frames,%d\n", nAggregateFrames);
5461
printf("group,name,average,max,callaverage\n");
5462
5463
uint32_t nNumTimers = S.nTotalTimers;
5464
uint32_t nBlockSize = 2 * nNumTimers;
5465
float* pTimers = (float*)alloca(nBlockSize * 9 * sizeof(float));
5466
float* pAverage = pTimers + nBlockSize;
5467
float* pMax = pTimers + 2 * nBlockSize;
5468
float* pMin = pTimers + 3 * nBlockSize;
5469
float* pCallAverage = pTimers + 4 * nBlockSize;
5470
float* pTimersExclusive = pTimers + 5 * nBlockSize;
5471
float* pAverageExclusive = pTimers + 6 * nBlockSize;
5472
float* pMaxExclusive = pTimers + 7 * nBlockSize;
5473
float* pTotal = pTimers + 8 * nBlockSize;
5474
5475
MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pMin, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
5476
5477
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
5478
{
5479
uint32_t nIdx = i * 2;
5480
printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]);
5481
}
5482
5483
printf("\n\n");
5484
5485
printf("group,average,max,total\n");
5486
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
5487
{
5488
const char* pGroupName = S.GroupInfo[j].pName;
5489
float fToMs = S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
5490
if(pGroupName[0] != '\0')
5491
{
5492
printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]);
5493
}
5494
}
5495
5496
printf("\n\n");
5497
printf("group,thread,average,total\n");
5498
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
5499
{
5500
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5501
{
5502
if(S.Pool[i])
5503
{
5504
const char* pThreadName = &S.Pool[i]->ThreadName[0];
5505
// MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
5506
float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
5507
{
5508
uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j];
5509
float fTime = nTicks / nAggregateFrames * fToMs;
5510
float fTimeTotal = nTicks * fToMs;
5511
if(fTimeTotal > 0.01f)
5512
{
5513
const char* pGroupName = S.GroupInfo[j].pName;
5514
printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal);
5515
}
5516
}
5517
}
5518
}
5519
}
5520
5521
printf("\n\n");
5522
printf("frametimecpu\n");
5523
5524
const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3;
5525
const uint32_t nStart = S.nFrameCurrent;
5526
for(uint32_t i = nCount; i > 0; i--)
5527
{
5528
uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
5529
uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
5530
uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu;
5531
printf("%f,", nTicks * fToMsCPU);
5532
}
5533
printf("\n");
5534
5535
printf("\n\n");
5536
printf("frametimegpu\n");
5537
5538
for(uint32_t i = nCount; i > 0; i--)
5539
{
5540
uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
5541
uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
5542
uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu;
5543
printf("%f,", nTicks * fToMsGPU);
5544
}
5545
printf("\n\n");
5546
}
5547
#undef printf
5548
5549
void MicroProfileDumpCsvLegacy()
5550
{
5551
char Path[MICROPROFILE_MAX_PATH];
5552
int Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s.csv", S.CsvDumpPath);
5553
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
5554
{
5555
FILE* F = fopen(Path, "w");
5556
if(F)
5557
{
5558
MicroProfileDumpCsvLegacy(MicroProfileWriteFile, F);
5559
fclose(F);
5560
}
5561
}
5562
}
5563
5564
void MicroProfileDumpHtmlLive(MicroProfileWriteCallback CB, void* Handle)
5565
{
5566
for(size_t i = 0; i < g_MicroProfileHtmlLive_begin_count; ++i)
5567
{
5568
CB(Handle, g_MicroProfileHtmlLive_begin_sizes[i] - 1, g_MicroProfileHtmlLive_begin[i]);
5569
}
5570
for(size_t i = 0; i < g_MicroProfileHtmlLive_end_count; ++i)
5571
{
5572
CB(Handle, g_MicroProfileHtmlLive_end_sizes[i] - 1, g_MicroProfileHtmlLive_end[i]);
5573
}
5574
}
5575
void MicroProfileGetCoreInformation()
5576
{
5577
#ifdef _WIN32
5578
unsigned long BufferSize;
5579
HANDLE Process = GetCurrentProcess();
5580
GetSystemCpuSetInformation(nullptr, 0, &BufferSize, Process, 0);
5581
char* Buffer = (char*)alloca(BufferSize);
5582
if(!GetSystemCpuSetInformation((PSYSTEM_CPU_SET_INFORMATION)Buffer, BufferSize, &BufferSize, Process, 0))
5583
{
5584
return;
5585
}
5586
for(ULONG Size = 0; Size < BufferSize;)
5587
{
5588
PSYSTEM_CPU_SET_INFORMATION CpuSet = reinterpret_cast<PSYSTEM_CPU_SET_INFORMATION>(Buffer);
5589
if(CpuSet->Type == CPU_SET_INFORMATION_TYPE::CpuSetInformation)
5590
{
5591
if(CpuSet->CpuSet.CoreIndex < MICROPROFILE_MAX_CPU_CORES)
5592
{
5593
S.CoreEfficiencyClass[CpuSet->CpuSet.LogicalProcessorIndex] = CpuSet->CpuSet.EfficiencyClass;
5594
}
5595
}
5596
Buffer += CpuSet->Size;
5597
Size += CpuSet->Size;
5598
}
5599
#endif
5600
}
5601
5602
void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, uint64_t nMaxFrames, const char* pHost, uint64_t nStartFrameId = (uint64_t)-1)
5603
{
5604
// Stall pushing of timers
5605
uint64_t nActiveGroup[MICROPROFILE_MAX_GROUP_INTS];
5606
memcpy(nActiveGroup, S.nActiveGroups, sizeof(S.nActiveGroups));
5607
memset(S.nActiveGroups, 0, sizeof(S.nActiveGroups));
5608
bool AnyActive = S.AnyActive;
5609
S.AnyActive = false;
5610
5611
S.nPauseTicks = MP_TICK();
5612
5613
MicroProfileGetCoreInformation();
5614
5615
if(S.bContextSwitchRunning)
5616
{
5617
auto StallForContextSwitchThread = []()
5618
{
5619
int64_t nPauseTicks = S.nPauseTicks;
5620
int64_t nContextSwitchStalledTick = S.nContextSwitchStalledTick;
5621
return (nPauseTicks - nContextSwitchStalledTick) > 0;
5622
};
5623
int SleepMs = 1;
5624
while(S.bContextSwitchRunning && !S.bContextSwitchStop && StallForContextSwitchThread())
5625
{
5626
MicroProfileSleep(SleepMs);
5627
SleepMs = SleepMs * 2 / 3;
5628
SleepMs = MicroProfileMin(128, SleepMs);
5629
}
5630
int64_t TicksAfterStall = MP_TICK();
5631
uprintf("Stalled %7.2fms for context switch data\n", MicroProfileTickToMsMultiplierCpu() * (TicksAfterStall - S.nPauseTicks));
5632
}
5633
5634
MicroProfileHashTable StringsHashTable;
5635
MicroProfileHashTableInit(&StringsHashTable, 50, 25, MicroProfileHashTableCompareString, MicroProfileHashTableHashString);
5636
5637
defer
5638
{
5639
MicroProfileHashTableDestroy(&StringsHashTable);
5640
};
5641
5642
MicroProfileCounterFetchCounters();
5643
for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i)
5644
{
5645
CB(Handle, g_MicroProfileHtml_begin_sizes[i] - 1, g_MicroProfileHtml_begin[i]);
5646
}
5647
// dump info
5648
uint64_t nTicks = MP_TICK();
5649
5650
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
5651
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
5652
float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick);
5653
5654
uint32_t nNumFrames = 0;
5655
uint32_t nFirstFrame = (uint32_t)-1;
5656
if(nStartFrameId != (uint64_t)-1)
5657
{
5658
// search for the frane
5659
for(uint32_t i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
5660
{
5661
if(S.Frames[i].nFrameId == nStartFrameId)
5662
{
5663
nFirstFrame = i;
5664
break;
5665
}
5666
}
5667
if(nFirstFrame != (uint32_t)-1)
5668
{
5669
uint32_t nLastFrame = S.nFrameCurrent;
5670
uint32_t nDistance = (MICROPROFILE_MAX_FRAME_HISTORY + nFirstFrame - nLastFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
5671
nNumFrames = MicroProfileMin(nDistance, (uint32_t)nMaxFrames);
5672
}
5673
}
5674
5675
if(nNumFrames == 0)
5676
{
5677
nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); // leave a few to not overwrite
5678
nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
5679
nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
5680
}
5681
5682
uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
5683
MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY);
5684
MP_ASSERT(nLastFrame < MICROPROFILE_MAX_FRAME_HISTORY);
5685
5686
MicroProfilePrintf(CB, Handle, "S.DumpHost = '%s';\n", pHost ? pHost : "");
5687
time_t CaptureTime;
5688
time(&CaptureTime);
5689
MicroProfilePrintf(CB, Handle, "S.DumpUtcCaptureTime = %ld;\n", CaptureTime);
5690
MicroProfilePrintf(CB, Handle, "S.AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs);
5691
5692
// categories
5693
MicroProfilePrintf(CB, Handle, "S.CategoryInfo = Array(%d);\n", S.nCategoryCount);
5694
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
5695
{
5696
MicroProfilePrintf(CB, Handle, "S.CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName);
5697
}
5698
5699
// groups
5700
MicroProfilePrintf(CB, Handle, "S.GroupInfo = Array(%d);\n\n", S.nGroupCount + 1);
5701
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
5702
float fRcpAggregateFrames = 1.f / nAggregateFrames;
5703
(void)fRcpAggregateFrames;
5704
char ColorString[32];
5705
for(uint32_t i = 0; i < S.nGroupCount; ++i)
5706
{
5707
MP_ASSERT(i == S.GroupInfo[i].nGroupIndex);
5708
float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU;
5709
const char* pColorStr = "";
5710
if(S.GroupInfo[i].nColor != 0x42)
5711
{
5712
stbsp_snprintf(ColorString,
5713
sizeof(ColorString) - 1,
5714
"#%02x%02x%02x",
5715
MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff,
5716
MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff,
5717
MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff);
5718
pColorStr = &ColorString[0];
5719
}
5720
MicroProfilePrintf(CB,
5721
Handle,
5722
"S.GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '%s');\n",
5723
S.GroupInfo[i].nGroupIndex,
5724
S.GroupInfo[i].nGroupIndex,
5725
S.GroupInfo[i].pName,
5726
S.GroupInfo[i].nCategory,
5727
S.GroupInfo[i].nNumTimers,
5728
S.GroupInfo[i].Type == MicroProfileTokenTypeGpu ? 1 : 0,
5729
fToMs * S.AggregateGroup[i],
5730
fToMs * S.AggregateGroup[i] / nAggregateFrames,
5731
fToMs * S.AggregateGroupMax[i],
5732
pColorStr);
5733
}
5734
uint32_t nUncategorized = S.nGroupCount;
5735
5736
MicroProfilePrintf(CB,
5737
Handle,
5738
"S.GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, 'grey');\n",
5739
nUncategorized,
5740
nUncategorized,
5741
"Uncategorized",
5742
-1,
5743
1,
5744
// S.GroupInfo[i].Type == MicroProfileTokenTypeGpu ? 1 :
5745
0,
5746
0,
5747
0,
5748
0);
5749
5750
// timers
5751
5752
uint32_t nNumTimers = S.nTotalTimers;
5753
uint32_t nBlockSize = 2 * nNumTimers;
5754
float* pTimers = (float*)alloca(nBlockSize * 9 * sizeof(float));
5755
float* pAverage = pTimers + nBlockSize;
5756
float* pMax = pTimers + 2 * nBlockSize;
5757
float* pMin = pTimers + 3 * nBlockSize;
5758
float* pCallAverage = pTimers + 4 * nBlockSize;
5759
float* pTimersExclusive = pTimers + 5 * nBlockSize;
5760
float* pAverageExclusive = pTimers + 6 * nBlockSize;
5761
float* pMaxExclusive = pTimers + 7 * nBlockSize;
5762
float* pTotal = pTimers + 8 * nBlockSize;
5763
5764
MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pMin, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
5765
5766
MicroProfilePrintf(CB, Handle, "\nS.TimerInfo = Array(%d);\n\n", S.nTotalTimers);
5767
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
5768
{
5769
uint32_t nIdx = i * 2;
5770
MP_ASSERT(i == S.TimerInfo[i].nTimerIndex);
5771
MicroProfilePrintf(CB, Handle, "S.Meta%d = [];\n", i);
5772
MicroProfilePrintf(CB, Handle, "S.MetaAvg%d = [];\n", i);
5773
MicroProfilePrintf(CB, Handle, "S.MetaMax%d = [];\n", i);
5774
5775
uint32_t nColor = S.TimerInfo[i].nColor;
5776
uint32_t nColorDark = (nColor >> 1) & ~0x80808080;
5777
MicroProfilePrintf(CB,
5778
Handle,
5779
"S.TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %f, %d, %f, S.Meta%d, S.MetaAvg%d, S.MetaMax%d, %d);\n",
5780
S.TimerInfo[i].nTimerIndex,
5781
S.TimerInfo[i].nTimerIndex,
5782
S.TimerInfo[i].pName,
5783
S.TimerInfo[i].nGroupIndex,
5784
MICROPROFILE_UNPACK_RED(nColor) & 0xff,
5785
MICROPROFILE_UNPACK_GREEN(nColor) & 0xff,
5786
MICROPROFILE_UNPACK_BLUE(nColor) & 0xff,
5787
MICROPROFILE_UNPACK_RED(nColorDark) & 0xff,
5788
MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff,
5789
MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff,
5790
pAverage[nIdx],
5791
pMax[nIdx],
5792
pMin[nIdx],
5793
pAverageExclusive[nIdx],
5794
pMaxExclusive[nIdx],
5795
pCallAverage[nIdx],
5796
S.Aggregate[i].nCount,
5797
pTotal[nIdx],
5798
i,
5799
i,
5800
i,
5801
S.TimerInfo[i].Flags);
5802
}
5803
5804
uint32_t nTotalTimersExt = S.nTotalTimers;
5805
{
5806
for(uint32_t j = 0; j < S.nNumLogs; ++j)
5807
{
5808
MicroProfileThreadLog* pLog = S.Pool[j];
5809
uint32_t nLogStart = S.Frames[nFirstFrame].nLogStart[j];
5810
uint32_t nLogEnd = S.Frames[nLastFrame].nLogStart[j];
5811
uint64_t nLogType;
5812
if(nLogStart != nLogEnd)
5813
{
5814
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
5815
{
5816
uint64_t v = pLog->Log[k];
5817
nLogType = MicroProfileLogGetType(v);
5818
uint32_t tidx = MicroProfileLogGetTimerIndex(v);
5819
if((nLogType == MP_LOG_ENTER || nLogType == MP_LOG_LEAVE) && tidx == ETOKEN_CSTR_PTR)
5820
{
5821
MP_ASSERT(k + 1 != nLogEnd);
5822
uint64_t v1 = pLog->Log[(k + 1) % MICROPROFILE_BUFFER_SIZE];
5823
const char* pString = (const char*)MicroProfileLogGetExtendedPayloadNoDataPtr(v1);
5824
uintptr_t value;
5825
if(!MicroProfileHashTableGet(&StringsHashTable, (uint64_t)pString, &value))
5826
{
5827
uintptr_t nTimerIndex = nTotalTimersExt++;
5828
MicroProfileHashTableSet(&StringsHashTable, (uint64_t)pString, nTimerIndex);
5829
MicroProfilePrintf(
5830
CB, Handle, "S.TimerInfo.push(MakeTimer(%d, \"%s\", %d, '#000000','#000000', 0, 0, 0, 0, 0, 0, 0, 0, null, null, null, 0));\n", nTimerIndex, pString, nUncategorized);
5831
}
5832
}
5833
}
5834
}
5835
}
5836
}
5837
5838
MicroProfilePrintf(CB, Handle, "\nS.ThreadNames = [");
5839
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5840
{
5841
if(S.Pool[i])
5842
{
5843
MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName);
5844
}
5845
else
5846
{
5847
MicroProfilePrintf(CB, Handle, "'Thread %d',", i);
5848
}
5849
}
5850
MicroProfilePrintf(CB, Handle, "];\n\n");
5851
MicroProfilePrintf(CB, Handle, "\nS.ISGPU = [");
5852
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5853
{
5854
MicroProfilePrintf(CB, Handle, "%d,", (S.Pool[i] && S.Pool[i]->nGpu) ? 1 : 0);
5855
}
5856
MicroProfilePrintf(CB, Handle, "];\n\n");
5857
5858
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5859
{
5860
if(S.Pool[i])
5861
{
5862
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTime%d = [", i);
5863
float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
5864
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
5865
{
5866
MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] / nAggregateFrames * fToMs);
5867
}
5868
MicroProfilePrintf(CB, Handle, "];\n");
5869
}
5870
}
5871
MicroProfilePrintf(CB, Handle, "\nS.ThreadGroupTimeArray = [");
5872
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5873
{
5874
if(S.Pool[i])
5875
{
5876
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTime%d,", i);
5877
}
5878
}
5879
MicroProfilePrintf(CB, Handle, "];\n");
5880
5881
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5882
{
5883
if(S.Pool[i])
5884
{
5885
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTimeTotal%d = [", i);
5886
float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
5887
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
5888
{
5889
MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs);
5890
}
5891
MicroProfilePrintf(CB, Handle, "];\n");
5892
}
5893
}
5894
MicroProfilePrintf(CB, Handle, "\nS.ThreadGroupTimeTotalArray = [");
5895
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5896
{
5897
if(S.Pool[i])
5898
{
5899
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTimeTotal%d,", i);
5900
}
5901
}
5902
MicroProfilePrintf(CB, Handle, "];");
5903
5904
MicroProfilePrintf(CB, Handle, "\nS.ThreadIds = [");
5905
for(uint32_t i = 0; i < S.nNumLogs; ++i)
5906
{
5907
if(S.Pool[i])
5908
{
5909
MicroProfileThreadIdType ThreadId = S.Pool[i]->nThreadId;
5910
if(!ThreadId)
5911
{
5912
ThreadId = (MicroProfileThreadIdType)-1;
5913
}
5914
MicroProfilePrintf(CB, Handle, "%" PRIu64 ",", (uint64_t)ThreadId);
5915
}
5916
else
5917
{
5918
MicroProfilePrintf(CB, Handle, "-1,");
5919
}
5920
}
5921
MicroProfilePrintf(CB, Handle, "];\n\n");
5922
5923
for(int i = 0; i < (int)S.nNumCounters; ++i)
5924
{
5925
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
5926
if(0 != (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DETAILED) && !IsDouble)
5927
{
5928
int64_t nCounterMax = S.nCounterMax[i];
5929
int64_t nCounterMin = S.nCounterMin[i];
5930
uint32_t nBaseIndex = S.nCounterHistoryPut;
5931
MicroProfilePrintf(CB, Handle, "\nS.CounterHistoryArray%d =[", i);
5932
for(uint32_t j = 0; j < MICROPROFILE_GRAPH_HISTORY; ++j)
5933
{
5934
uint32_t nHistoryIndex = (nBaseIndex + j) % MICROPROFILE_GRAPH_HISTORY;
5935
int64_t nValue = MicroProfileClamp(S.nCounterHistory[nHistoryIndex][i], nCounterMin, nCounterMax);
5936
MicroProfilePrintf(CB, Handle, "%lld,", nValue);
5937
}
5938
MicroProfilePrintf(CB, Handle, "];\n");
5939
5940
int64_t nCounterHeightBase = nCounterMax;
5941
int64_t nCounterOffset = 0;
5942
if(nCounterMin < 0)
5943
{
5944
nCounterHeightBase = nCounterMax - nCounterMin;
5945
nCounterOffset = -nCounterMin;
5946
}
5947
double fRcp = nCounterHeightBase ? (1.0 / nCounterHeightBase) : 0;
5948
5949
MicroProfilePrintf(CB, Handle, "\nS.CounterHistoryArrayPrc%d =[", i);
5950
for(uint32_t j = 0; j < MICROPROFILE_GRAPH_HISTORY; ++j)
5951
{
5952
uint32_t nHistoryIndex = (nBaseIndex + j) % MICROPROFILE_GRAPH_HISTORY;
5953
int64_t nValue = MicroProfileClamp(S.nCounterHistory[nHistoryIndex][i], nCounterMin, nCounterMax);
5954
float fPrc = (nValue + nCounterOffset) * fRcp;
5955
MicroProfilePrintf(CB, Handle, "%f,", fPrc);
5956
}
5957
MicroProfilePrintf(CB, Handle, "];\n");
5958
MicroProfilePrintf(CB, Handle, "S.CounterHistory%d = MakeCounterHistory(%d, S.CounterHistoryArray%d, S.CounterHistoryArrayPrc%d)\n", i, i, i, i);
5959
}
5960
else
5961
{
5962
MicroProfilePrintf(CB, Handle, "S.CounterHistory%d;\n", i);
5963
}
5964
}
5965
5966
MicroProfilePrintf(CB, Handle, "\nS.CounterInfo = [");
5967
5968
for(int i = 0; i < (int)S.nNumCounters; ++i)
5969
{
5970
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
5971
float fCounterPrc = 0.f;
5972
float fBoxPrc = 1.f;
5973
double dCounter, dLimit, dMax, dMin;
5974
char Formatted[64];
5975
char FormattedLimit[64];
5976
5977
if(!IsDouble)
5978
{
5979
uint64_t nCounter = S.Counters[i].load();
5980
uint64_t nLimit = S.CounterInfo[i].nLimit;
5981
fCounterPrc = 0.f;
5982
if(nLimit)
5983
{
5984
fCounterPrc = (float)nCounter / nLimit;
5985
if(fCounterPrc > 1.f)
5986
{
5987
fBoxPrc = 1.f / fCounterPrc;
5988
fCounterPrc = 1.f;
5989
}
5990
}
5991
MicroProfileFormatCounter(S.CounterInfo[i].eFormat, nCounter, Formatted, sizeof(Formatted) - 1);
5992
MicroProfileFormatCounter(S.CounterInfo[i].eFormat, S.CounterInfo[i].nLimit, FormattedLimit, sizeof(FormattedLimit) - 1);
5993
5994
dCounter = (double)nCounter;
5995
dMin = (double)S.nCounterMin[i];
5996
dMax = (double)S.nCounterMax[i];
5997
dLimit = (double)nLimit;
5998
}
5999
else
6000
{
6001
dCounter = S.CountersDouble[i].load();
6002
dLimit = S.CounterInfo[i].dLimit;
6003
fCounterPrc = 0.f;
6004
if(dLimit > 0.f)
6005
{
6006
fCounterPrc = (float)(dCounter / dLimit);
6007
if(fCounterPrc > 1.f)
6008
{
6009
fBoxPrc = 1.f / fCounterPrc;
6010
fCounterPrc = 1.f;
6011
}
6012
}
6013
MicroProfileFormatCounterDouble(S.CounterInfo[i].eFormat, dCounter, Formatted, sizeof(Formatted) - 1);
6014
MicroProfileFormatCounterDouble(S.CounterInfo[i].eFormat, S.CounterInfo[i].dLimit, FormattedLimit, sizeof(FormattedLimit) - 1);
6015
dMin = (double)S.dCounterMin[i];
6016
dMax = (double)S.dCounterMax[i];
6017
}
6018
MicroProfilePrintf(CB,
6019
Handle,
6020
"MakeCounter(%d, %d, %d, %d, %d, '%s', %f, %f, %f, '%s', %f, '%s', %f, %f, %d, S.CounterHistory%d),",
6021
i,
6022
S.CounterInfo[i].nParent,
6023
S.CounterInfo[i].nSibling,
6024
S.CounterInfo[i].nFirstChild,
6025
S.CounterInfo[i].nLevel,
6026
S.CounterInfo[i].pName,
6027
dCounter,
6028
dMin,
6029
dMax,
6030
Formatted,
6031
dLimit,
6032
FormattedLimit,
6033
fCounterPrc,
6034
fBoxPrc,
6035
S.CounterInfo[i].eFormat == MICROPROFILE_COUNTER_FORMAT_BYTES ? 1 : 0,
6036
i);
6037
}
6038
MicroProfilePrintf(CB, Handle, "];\n\n");
6039
6040
const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu;
6041
const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu;
6042
int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu;
6043
6044
int64_t nTickReferenceCpu, nTickReferenceGpu;
6045
int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
6046
int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
6047
int nTickReference = 0;
6048
if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
6049
{
6050
nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu;
6051
nTickReference = 1;
6052
}
6053
6054
uprintf("dumping %d frames\n", nNumFrames);
6055
uprintf("dumping frame %d to %d\n", nFirstFrame, nLastFrame);
6056
6057
uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t) * S.nTotalTimers);
6058
memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers);
6059
6060
{
6061
MicroProfilePrintf(CB, Handle, " //Timeline begin\n");
6062
MicroProfileThreadLog* pLog = &S.TimelineLog;
6063
uint32_t nFrameIndexFirst = (nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
6064
uint32_t nFrameIndexLast = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
6065
{
6066
// find the frame that has an active marker the furtest distance from the selected range
6067
int nDelta = 0;
6068
int nOffset = 0;
6069
for(uint32_t i = nFrameIndexFirst; i != nFrameIndexLast; i = (i + 1) % MICROPROFILE_MAX_FRAME_HISTORY)
6070
{
6071
int D = (int)S.Frames[i].nTimelineFrameMax - nOffset;
6072
nDelta = MicroProfileMax(D, nDelta);
6073
nOffset++;
6074
}
6075
nFrameIndexFirst = (nFirstFrame - nDelta) % MICROPROFILE_MAX_FRAME_HISTORY;
6076
}
6077
6078
uint32_t nLogStart = S.Frames[nFrameIndexFirst].nLogStartTimeline;
6079
uint32_t nLogEnd = S.Frames[nFrameIndexLast].nLogStartTimeline;
6080
float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
6081
6082
#define pp(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
6083
6084
if(nLogStart != nLogEnd)
6085
{
6086
uint32_t nLogType;
6087
float fTime;
6088
int f = 0;
6089
6090
pp("S.TimelineColorArray=[");
6091
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
6092
{
6093
uint64_t v = pLog->Log[k];
6094
uint64_t nIndex = MicroProfileLogGetTimerIndex(v);
6095
uint64_t nTick = MicroProfileLogGetTick(v);
6096
(void)nTick;
6097
nLogType = MicroProfileLogGetType(v);
6098
switch(nLogType)
6099
{
6100
case MP_LOG_ENTER:
6101
break;
6102
case MP_LOG_LEAVE:
6103
pp("%c'%s'", f++ ? ',' : ' ', "#ff8080");
6104
break;
6105
6106
case MP_LOG_EXTENDED:
6107
case MP_LOG_EXTENDED_NO_DATA:
6108
uint32_t payload = MicroProfileLogGetExtendedPayload(v);
6109
if(nIndex == ETOKEN_CUSTOM_COLOR)
6110
{
6111
uint32_t nColor = payload;
6112
pp("%c'#%02x%02x%02x'", f++ ? ',' : ' ', MICROPROFILE_UNPACK_RED(nColor) & 0xff, MICROPROFILE_UNPACK_GREEN(nColor) & 0xff, MICROPROFILE_UNPACK_BLUE(nColor) & 0xff);
6113
}
6114
k += MicroProfileLogGetDataSize(v);
6115
break;
6116
}
6117
}
6118
pp("];\n");
6119
6120
f = 0;
6121
pp("S.TimelineIdArray=[");
6122
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
6123
{
6124
uint64_t v = pLog->Log[k];
6125
uint64_t nIndex = MicroProfileLogGetTimerIndex(v);
6126
uint64_t nTick = MicroProfileLogGetTick(v);
6127
(void)nTick;
6128
nLogType = MicroProfileLogGetType(v);
6129
switch(nLogType)
6130
{
6131
case MP_LOG_ENTER:
6132
case MP_LOG_LEAVE:
6133
case MP_LOG_EXTENDED_NO_DATA:
6134
6135
break;
6136
case MP_LOG_EXTENDED:
6137
if(nIndex == ETOKEN_CUSTOM_ID)
6138
{
6139
pp("%c%d", f++ ? ',' : ' ', (uint32_t)nTick);
6140
}
6141
k += MicroProfileLogGetDataSize(v);
6142
break;
6143
}
6144
}
6145
pp("];\n");
6146
6147
f = 0;
6148
6149
pp("S.TimelineArray=[");
6150
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
6151
{
6152
uint64_t v = pLog->Log[k];
6153
nLogType = MicroProfileLogGetType(v);
6154
switch(nLogType)
6155
{
6156
case MP_LOG_ENTER:
6157
case MP_LOG_LEAVE:
6158
fTime = MicroProfileLogTickDifference(nTickStart, v) * fToMs;
6159
pp("%c%f", f++ ? ',' : ' ', fTime);
6160
break;
6161
case MP_LOG_EXTENDED:
6162
k += MicroProfileLogGetDataSize(v);
6163
break;
6164
case MP_LOG_EXTENDED_NO_DATA:
6165
break;
6166
}
6167
}
6168
pp("];\n");
6169
pp("S.TimelineNames=[");
6170
f = 0;
6171
char String[MICROPROFILE_MAX_STRING + 1];
6172
for(uint32_t k = nLogStart; k != nLogEnd;)
6173
{
6174
uint64_t v = pLog->Log[k];
6175
nLogType = MicroProfileLogGetType(v);
6176
uint64_t nIndex = MicroProfileLogGetTimerIndex(v);
6177
uint64_t nTick = MicroProfileLogGetTick(v);
6178
(void)nTick;
6179
switch(nLogType)
6180
{
6181
case MP_LOG_ENTER:
6182
case MP_LOG_LEAVE:
6183
if(nIndex == ETOKEN_CUSTOM_NAME && nLogType == MP_LOG_LEAVE)
6184
{
6185
// pp(f++ ? ",''" : "''");
6186
}
6187
k = (k + 1) % MICROPROFILE_BUFFER_SIZE;
6188
break;
6189
case MP_LOG_EXTENDED_NO_DATA:
6190
k = (k + 1) % MICROPROFILE_BUFFER_SIZE;
6191
break;
6192
case MP_LOG_EXTENDED:
6193
uint32_t nSize = MicroProfileLogGetDataSize(v);
6194
6195
if(nIndex == ETOKEN_CUSTOM_ID)
6196
{
6197
char* pSource = (char*)&pLog->Log[(k + 1) % MICROPROFILE_BUFFER_SIZE];
6198
const char* pOut = nullptr;
6199
if(nSize == 0)
6200
{
6201
pOut = "";
6202
}
6203
else if(k + nSize <= MICROPROFILE_BUFFER_SIZE)
6204
{
6205
pOut = pSource;
6206
}
6207
else
6208
{
6209
pOut = &String[0];
6210
char* pDest = &String[0];
6211
MP_ASSERT(nSize * 8 < sizeof(MICROPROFILE_MAX_STRING) + 1);
6212
uint32_t Index = (k + 1) % MICROPROFILE_BUFFER_SIZE;
6213
for(uint32_t l = 0; l < nSize; ++l)
6214
{
6215
memcpy(pDest, (char*)pLog->Log[Index], sizeof(uint64_t));
6216
Index = (Index + 1) % MICROPROFILE_BUFFER_SIZE;
6217
}
6218
}
6219
if(f++)
6220
{
6221
pp(",'%s'", pOut);
6222
}
6223
else
6224
{
6225
pp("'%s'", pOut);
6226
}
6227
}
6228
k = (k + 1 + nSize) % MICROPROFILE_BUFFER_SIZE;
6229
break;
6230
}
6231
}
6232
pp("];\n");
6233
}
6234
MicroProfilePrintf(CB, Handle, " //Timeline end\n");
6235
}
6236
6237
MicroProfilePrintf(CB, Handle, "S.Frames = Array(%d);\n", nNumFrames);
6238
for(uint32_t i = 0; i < nNumFrames; ++i)
6239
{
6240
uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY;
6241
uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
6242
6243
for(uint32_t j = 0; j < S.nNumLogs; ++j)
6244
{
6245
MicroProfileThreadLog* pLog = S.Pool[j];
6246
int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart;
6247
uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j];
6248
uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j];
6249
uint32_t nLogType;
6250
float fToMs;
6251
uint64_t nStartTick;
6252
float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
6253
float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu);
6254
MicroProfilePrintf(CB, Handle, "S.ts_%d_%d = [", i, j);
6255
if(nLogStart != nLogEnd)
6256
{
6257
int f = 0;
6258
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
6259
{
6260
float fTime;
6261
MicroProfileLogEntry v = pLog->Log[k];
6262
nLogType = MicroProfileLogGetType(v);
6263
fToMs = fToMsBase;
6264
nStartTick = nStartTickBase;
6265
switch(nLogType)
6266
{
6267
case MP_LOG_EXTENDED:
6268
{
6269
fTime = 0.f;
6270
k += MicroProfileLogGetDataSize(v);
6271
break;
6272
}
6273
case MP_LOG_EXTENDED_NO_DATA:
6274
{
6275
uint32_t nTimerIndex = (uint32_t)MicroProfileLogGetTimerIndex(v);
6276
if(nTimerIndex == ETOKEN_GPU_CPU_TIMESTAMP)
6277
{
6278
fToMs = fToMsCpu;
6279
nStartTick = nTickStart;
6280
fTime = MicroProfileLogTickDifference(nStartTick, v) * fToMs;
6281
}
6282
else
6283
{
6284
fTime = 0.f;
6285
}
6286
break;
6287
}
6288
default:
6289
fTime = MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
6290
}
6291
MicroProfilePrintf(CB, Handle, f++ ? ",%f" : "%f", fTime);
6292
}
6293
}
6294
MicroProfilePrintf(CB, Handle, "];\n");
6295
6296
MicroProfilePrintf(CB, Handle, "S.tt_%d_%d = [", i, j);
6297
if(nLogStart != nLogEnd)
6298
{
6299
uint32_t k = nLogStart;
6300
MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogGetType(pLog->Log[k]));
6301
for(k = (k + 1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
6302
{
6303
uint64_t v = pLog->Log[k];
6304
uint32_t nLogType2 = MicroProfileLogGetType(v);
6305
6306
if(nLogType2 > MP_LOG_ENTER)
6307
nLogType2 |= (MicroProfileLogGetExtendedToken(v))
6308
<< 2; // pack extended token here.. this way all code can check agains ENTER/LEAVE, and only the ext code needs to care about the top bits.
6309
MicroProfilePrintf(CB, Handle, ",%d", nLogType2);
6310
if(nLogType2 == MP_LOG_EXTENDED)
6311
k += MicroProfileLogGetDataSize(v);
6312
}
6313
}
6314
MicroProfilePrintf(CB, Handle, "];\n");
6315
6316
MicroProfilePrintf(CB, Handle, "S.ti_%d_%d = [", i, j);
6317
if(nLogStart != nLogEnd)
6318
{
6319
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
6320
{
6321
uint64_t v = pLog->Log[k];
6322
nLogType = MicroProfileLogGetType(v);
6323
const char* pFormat = k == nLogStart ? "%d" : ",%d";
6324
if(nLogType == MP_LOG_ENTER || nLogType == MP_LOG_LEAVE)
6325
{
6326
uint32_t nTimerIndex = (uint32_t)MicroProfileLogGetTimerIndex(pLog->Log[k]);
6327
if(ETOKEN_CSTR_PTR == nTimerIndex)
6328
{
6329
MP_ASSERT(k + 1 != nLogEnd);
6330
uint64_t v1 = pLog->Log[(k + 1) % MICROPROFILE_BUFFER_SIZE];
6331
6332
const char* pString = (const char*)MicroProfileLogGetExtendedPayloadNoDataPtr(v1);
6333
uintptr_t value;
6334
if(!MicroProfileHashTableGet(&StringsHashTable, (uint64_t)pString, &value))
6335
{
6336
MP_BREAK(); // should be covered earlier.
6337
}
6338
MicroProfilePrintf(CB, Handle, pFormat, value);
6339
}
6340
else
6341
{
6342
if(nTimerIndex < S.nTotalTimers)
6343
{
6344
nTimerCounter[nTimerIndex]++;
6345
}
6346
MicroProfilePrintf(CB, Handle, pFormat, nTimerIndex);
6347
}
6348
}
6349
else
6350
{
6351
uint64_t ExtendedToken = MicroProfileLogGetExtendedToken(v);
6352
uint64_t PayloadNoData = MicroProfileLogGetExtendedPayloadNoData(v);
6353
switch(ExtendedToken)
6354
{
6355
case ETOKEN_GPU_CPU_SOURCE_THREAD:
6356
MicroProfilePrintf(CB, Handle, pFormat, PayloadNoData);
6357
break;
6358
default:
6359
MicroProfilePrintf(CB, Handle, pFormat, -1);
6360
}
6361
6362
if(nLogType == MP_LOG_EXTENDED)
6363
k += MicroProfileLogGetDataSize(v);
6364
}
6365
}
6366
}
6367
MicroProfilePrintf(CB, Handle, "];\n");
6368
}
6369
6370
MicroProfilePrintf(CB, Handle, "S.ts%d = [", i);
6371
for(uint32_t j = 0; j < S.nNumLogs; ++j)
6372
{
6373
MicroProfilePrintf(CB, Handle, "S.ts_%d_%d,", i, j);
6374
}
6375
MicroProfilePrintf(CB, Handle, "];\n");
6376
MicroProfilePrintf(CB, Handle, "S.tt%d = [", i);
6377
for(uint32_t j = 0; j < S.nNumLogs; ++j)
6378
{
6379
MicroProfilePrintf(CB, Handle, "S.tt_%d_%d,", i, j);
6380
}
6381
MicroProfilePrintf(CB, Handle, "];\n");
6382
6383
MicroProfilePrintf(CB, Handle, "S.ti%d = [", i);
6384
for(uint32_t j = 0; j < S.nNumLogs; ++j)
6385
{
6386
MicroProfilePrintf(CB, Handle, "S.ti_%d_%d,", i, j);
6387
}
6388
MicroProfilePrintf(CB, Handle, "];\n");
6389
6390
int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu;
6391
int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu;
6392
6393
float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
6394
float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs;
6395
float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs;
6396
float fFrameGpuMs = 0;
6397
float fFrameGpuEndMs = 0;
6398
if(nTickReference)
6399
{
6400
fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU;
6401
fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU;
6402
}
6403
MicroProfilePrintf(CB, Handle, "S.Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, S.ts%d, S.tt%d, S.ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i);
6404
}
6405
6406
uint32_t nContextSwitchStart = 0;
6407
uint32_t nContextSwitchEnd = 0;
6408
MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd);
6409
6410
uprintf("CONTEXT SWITCH SEARCH .... %d %d %d .... %lld, %lld\n", nContextSwitchStart, nContextSwitchEnd, nContextSwitchEnd - nContextSwitchStart, nTickStart, nTickEnd);
6411
6412
uint32_t nWrittenBefore = S.nWebServerDataSent;
6413
MicroProfilePrintf(CB, Handle, "S.CSwitchThreadInOutCpu = [");
6414
for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j + 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
6415
{
6416
MicroProfileContextSwitch CS = S.ContextSwitch[j];
6417
int nCpu = CS.nCpu;
6418
MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu);
6419
}
6420
MicroProfilePrintf(CB, Handle, "];\n");
6421
MicroProfilePrintf(CB, Handle, "S.CSwitchTime = [");
6422
float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
6423
for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j + 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
6424
{
6425
MicroProfileContextSwitch CS = S.ContextSwitch[j];
6426
float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu;
6427
MicroProfilePrintf(CB, Handle, "%f,", fTime);
6428
}
6429
MicroProfilePrintf(CB, Handle, "];\n");
6430
6431
MicroProfilePrintf(CB, Handle, "S.CSwitchThreads = {");
6432
6433
MicroProfileThreadInfo* pThreadInfo = nullptr;
6434
uint32_t nNumThreads = MicroProfileGetThreadInfoArray(&pThreadInfo);
6435
for(uint32_t i = 0; i < nNumThreads; ++i)
6436
{
6437
const char* p1 = pThreadInfo[i].pThreadModule ? pThreadInfo[i].pThreadModule : "?";
6438
const char* p2 = pThreadInfo[i].pProcessModule ? pThreadInfo[i].pProcessModule : "?";
6439
6440
MicroProfilePrintf(CB,
6441
Handle,
6442
"%" PRId64 ":{\'tid\':%" PRId64 ",\'pid\':%" PRId64 ",\'t\':\'%s\',\'p\':\'%s\'},",
6443
(uint64_t)pThreadInfo[i].tid,
6444
(uint64_t)pThreadInfo[i].tid,
6445
(uint64_t)pThreadInfo[i].pid,
6446
p1,
6447
p2);
6448
}
6449
6450
MicroProfilePrintf(CB, Handle, "};\n");
6451
MicroProfilePrintf(CB, Handle, "S.CoreEfficiencyClass = [");
6452
for(uint32_t i = 0; i < MICROPROFILE_MAX_CPU_CORES; ++i)
6453
{
6454
MicroProfilePrintf(CB, Handle, "%d,", S.CoreEfficiencyClass[i]);
6455
}
6456
MicroProfilePrintf(CB, Handle, "];\n");
6457
6458
{
6459
MicroProfilePrintf(CB, Handle, "//String Table\n");
6460
MicroProfilePrintf(CB, Handle, "S.StringTable = {}\n");
6461
// dump string table
6462
MicroProfileHashTableIterator beg = MicroProfileGetHashTableIteratorBegin(&StringsHashTable);
6463
MicroProfileHashTableIterator end = MicroProfileGetHashTableIteratorEnd(&StringsHashTable);
6464
while(beg != end)
6465
{
6466
uint64_t Key = beg->Key;
6467
uint64_t Value = beg->Value;
6468
MicroProfilePrintf(CB, Handle, "S.StringTable[%d] = '%s';\n", Value, (const char*)Key);
6469
beg++;
6470
}
6471
}
6472
6473
uint32_t nWrittenAfter = S.nWebServerDataSent;
6474
6475
MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore);
6476
6477
for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i)
6478
{
6479
CB(Handle, g_MicroProfileHtml_end_sizes[i] - 1, g_MicroProfileHtml_end[i]);
6480
}
6481
6482
uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t) * S.nGroupCount);
6483
6484
memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount);
6485
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
6486
{
6487
uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex;
6488
nGroupCounter[nGroupIndex] += nTimerCounter[i];
6489
}
6490
6491
uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t) * S.nGroupCount);
6492
uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t) * S.nTotalTimers);
6493
for(uint32_t i = 0; i < S.nGroupCount; ++i)
6494
{
6495
nGroupCounterSort[i] = i;
6496
}
6497
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
6498
{
6499
nTimerCounterSort[i] = i;
6500
}
6501
std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount, [nGroupCounter](const uint32_t l, const uint32_t r) { return nGroupCounter[l] > nGroupCounter[r]; });
6502
6503
std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers, [nTimerCounter](const uint32_t l, const uint32_t r) { return nTimerCounter[l] > nTimerCounter[r]; });
6504
6505
MicroProfilePrintf(CB, Handle, "\n<!--\nMarker Per Group\n");
6506
for(uint32_t i = 0; i < S.nGroupCount; ++i)
6507
{
6508
uint32_t idx = nGroupCounterSort[i];
6509
MicroProfilePrintf(CB, Handle, "%8d:%s\n", nGroupCounter[idx], S.GroupInfo[idx].pName);
6510
}
6511
MicroProfilePrintf(CB, Handle, "Marker Per Timer\n");
6512
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
6513
{
6514
uint32_t idx = nTimerCounterSort[i];
6515
MicroProfilePrintf(CB, Handle, "%8d:%s(%s)\n", nTimerCounter[idx], S.TimerInfo[idx].pName, S.GroupInfo[S.TimerInfo[idx].nGroupIndex].pName);
6516
}
6517
MicroProfilePrintf(CB, Handle, "\n-->\n");
6518
6519
memcpy(S.nActiveGroups, nActiveGroup, sizeof(S.nActiveGroups));
6520
S.AnyActive = AnyActive;
6521
#if MICROPROFILE_DEBUG
6522
int64_t nTicksEnd = MP_TICK();
6523
float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks);
6524
uprintf("html dump took %6.2fms\n", fMs);
6525
#endif
6526
6527
#undef pp
6528
6529
S.nPauseTicks = 0;
6530
}
6531
6532
void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData)
6533
{
6534
fwrite(pData, nSize, 1, (FILE*)Handle);
6535
}
6536
6537
void MicroProfileDumpToFile()
6538
{
6539
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
6540
if(S.nDumpFileNextFrame & 1)
6541
{
6542
char Path[MICROPROFILE_MAX_PATH];
6543
int Length = snprintf(Path, sizeof(S.HtmlDumpPath), "%s.html", S.HtmlDumpPath);
6544
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
6545
{
6546
FILE* F = fopen(Path, "w");
6547
if(F)
6548
{
6549
MicroProfileDumpHtml(MicroProfileWriteFile, F, S.DumpFrameCount, S.HtmlDumpPath);
6550
fclose(F);
6551
}
6552
}
6553
}
6554
if(S.nDumpFileNextFrame & 2)
6555
{
6556
#if MICROPROFILE_LEGACY_CSV
6557
MicroProfileDumpCsvLegacy();
6558
#else
6559
MicroProfileDumpCsv(S.DumpFrameCount);
6560
#endif
6561
}
6562
}
6563
6564
void MicroProfileFlushSocket(MpSocket Socket)
6565
{
6566
send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0);
6567
S.WebServerPut = 0;
6568
}
6569
6570
void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData)
6571
{
6572
S.nWebServerDataSent += nSize;
6573
MpSocket Socket = *(MpSocket*)Handle;
6574
if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
6575
{
6576
MicroProfileFlushSocket(Socket);
6577
send(Socket, pData, (int)nSize, 0);
6578
}
6579
else
6580
{
6581
memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize);
6582
S.WebServerPut += (uint32_t)nSize;
6583
if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
6584
{
6585
MicroProfileFlushSocket(Socket);
6586
}
6587
}
6588
}
6589
6590
#if MICROPROFILE_MINIZ
6591
#ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE
6592
#define MICROPROFILE_COMPRESS_BUFFER_SIZE (256 << 10)
6593
#endif
6594
6595
#define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE / 2)
6596
struct MicroProfileCompressedSocketState
6597
{
6598
unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK];
6599
unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
6600
mz_stream Stream;
6601
MpSocket Socket;
6602
uint32_t nSize;
6603
uint32_t nCompressedSize;
6604
uint32_t nFlushes;
6605
uint32_t nMemmoveBytes;
6606
};
6607
6608
void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState)
6609
{
6610
mz_stream& Stream = pState->Stream;
6611
unsigned char* pSendStart = &pState->DeflateOut[0];
6612
unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out];
6613
if(pSendStart != pSendEnd)
6614
{
6615
send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0);
6616
pState->nCompressedSize += pSendEnd - pSendStart;
6617
}
6618
Stream.next_out = &pState->DeflateOut[0];
6619
Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
6620
}
6621
void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket)
6622
{
6623
mz_stream& Stream = pState->Stream;
6624
memset(&Stream, 0, sizeof(Stream));
6625
Stream.next_out = &pState->DeflateOut[0];
6626
Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
6627
Stream.next_in = &pState->DeflateIn[0];
6628
Stream.avail_in = 0;
6629
mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION);
6630
pState->Socket = Socket;
6631
pState->nSize = 0;
6632
pState->nCompressedSize = 0;
6633
pState->nFlushes = 0;
6634
pState->nMemmoveBytes = 0;
6635
}
6636
void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState)
6637
{
6638
mz_stream& Stream = pState->Stream;
6639
MicroProfileCompressedSocketFlush(pState);
6640
int r = mz_deflate(&Stream, MZ_FINISH);
6641
MP_ASSERT(r == MZ_STREAM_END);
6642
MicroProfileCompressedSocketFlush(pState);
6643
r = mz_deflateEnd(&Stream);
6644
MP_ASSERT(r == MZ_OK);
6645
}
6646
6647
void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData)
6648
{
6649
MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle;
6650
mz_stream& Stream = pState->Stream;
6651
const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in;
6652
const unsigned char* pDeflateInStart = &pState->DeflateIn[0];
6653
const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
6654
pState->nSize += (uint32_t)nSize;
6655
if((ptrdiff_t)nSize <= pDeflateInRealEnd - pDeflateInEnd)
6656
{
6657
memcpy((void*)pDeflateInEnd, pData, nSize);
6658
Stream.avail_in += (uint32_t)nSize;
6659
MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd);
6660
return;
6661
}
6662
int Flush = 0;
6663
while(nSize)
6664
{
6665
pDeflateInEnd = Stream.next_in + Stream.avail_in;
6666
if(Flush)
6667
{
6668
pState->nFlushes++;
6669
MicroProfileCompressedSocketFlush(pState);
6670
pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
6671
if(pDeflateInEnd == pDeflateInRealEnd)
6672
{
6673
if(Stream.avail_in)
6674
{
6675
MP_ASSERT(pDeflateInStart != Stream.next_in);
6676
memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in);
6677
pState->nMemmoveBytes += Stream.avail_in;
6678
}
6679
Stream.next_in = pDeflateInStart;
6680
pDeflateInEnd = Stream.next_in + Stream.avail_in;
6681
}
6682
}
6683
size_t nSpace = pDeflateInRealEnd - pDeflateInEnd;
6684
size_t nBytes = MicroProfileMin(nSpace, nSize);
6685
MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd);
6686
memcpy((void*)pDeflateInEnd, pData, nBytes);
6687
Stream.avail_in += (uint32_t)nBytes;
6688
nSize -= nBytes;
6689
pData += nBytes;
6690
int r = mz_deflate(&Stream, MZ_NO_FLUSH);
6691
Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0;
6692
MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK);
6693
if(r == MZ_BUF_ERROR)
6694
{
6695
r = mz_deflate(&Stream, MZ_SYNC_FLUSH);
6696
}
6697
}
6698
}
6699
#endif
6700
6701
#ifndef MicroProfileSetNonBlocking // fcntl doesnt work on a some unix like platforms..
6702
void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking)
6703
{
6704
#ifdef _WIN32
6705
u_long nonBlocking = NonBlocking ? 1 : 0;
6706
ioctlsocket(Socket, FIONBIO, &nonBlocking);
6707
#else
6708
int Options = fcntl(Socket, F_GETFL);
6709
if(NonBlocking)
6710
{
6711
fcntl(Socket, F_SETFL, Options | O_NONBLOCK);
6712
}
6713
else
6714
{
6715
fcntl(Socket, F_SETFL, Options & (~O_NONBLOCK));
6716
}
6717
#endif
6718
}
6719
#endif
6720
6721
void MicroProfileWebServerStart()
6722
{
6723
#ifdef _WIN32
6724
WSADATA wsa;
6725
if(WSAStartup(MAKEWORD(2, 2), &wsa))
6726
{
6727
S.ListenerSocket = (MpSocket)-1;
6728
return;
6729
}
6730
#endif
6731
6732
S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6);
6733
MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket));
6734
MicroProfileSetNonBlocking(S.ListenerSocket, 1);
6735
6736
{
6737
int r = 0;
6738
int on = 1;
6739
#if defined(_WIN32)
6740
r = setsockopt(S.ListenerSocket, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on));
6741
#else
6742
r = setsockopt(S.ListenerSocket, SOL_SOCKET, SO_REUSEADDR, (void*)&on, sizeof(on));
6743
#endif
6744
(void)r;
6745
}
6746
6747
int nStartPort = S.nWebServerPort;
6748
struct sockaddr_in Addr;
6749
Addr.sin_family = AF_INET;
6750
Addr.sin_addr.s_addr = INADDR_ANY;
6751
for(int i = 0; i < 20; ++i)
6752
{
6753
Addr.sin_port = htons(nStartPort + i);
6754
if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr)))
6755
{
6756
S.nWebServerPort = (uint32_t)(nStartPort + i);
6757
break;
6758
}
6759
}
6760
listen(S.ListenerSocket, 8);
6761
}
6762
6763
void MicroProfileWebServerJoin()
6764
{
6765
if(S.WebSocketThreadRunning)
6766
{
6767
MicroProfileThreadJoin(&S.WebSocketSendThread);
6768
}
6769
S.WebSocketThreadJoined = 1;
6770
}
6771
6772
void MicroProfileWebServerStop()
6773
{
6774
MP_ASSERT(S.WebSocketThreadJoined);
6775
#ifdef _WIN32
6776
closesocket(S.ListenerSocket);
6777
WSACleanup();
6778
#else
6779
close(S.ListenerSocket);
6780
#endif
6781
}
6782
enum MicroProfileGetCommand
6783
{
6784
EMICROPROFILE_GET_COMMAND_DUMP,
6785
EMICROPROFILE_GET_COMMAND_DUMP_RANGE,
6786
EMICROPROFILE_GET_COMMAND_LIVE,
6787
EMICROPROFILE_GET_COMMAND_FAVICON,
6788
EMICROPROFILE_GET_COMMAND_SERVICE_WORKER,
6789
EMICROPROFILE_GET_COMMAND_UNKNOWN,
6790
};
6791
struct MicroProfileParseGetResult
6792
{
6793
uint64_t nFrames;
6794
uint64_t nFrameStart;
6795
};
6796
MicroProfileGetCommand MicroProfileParseGet(const char* pGet, MicroProfileParseGetResult* pResult)
6797
{
6798
if(0 == strlen(pGet))
6799
{
6800
return EMICROPROFILE_GET_COMMAND_LIVE;
6801
}
6802
if(0 == strcmp(pGet, "favicon.ico"))
6803
{
6804
return EMICROPROFILE_GET_COMMAND_FAVICON;
6805
}
6806
if(0 == strcmp(pGet, "favicon.png"))
6807
{
6808
return EMICROPROFILE_GET_COMMAND_FAVICON;
6809
}
6810
if(0 == strcmp(pGet, "service-worker.js"))
6811
{
6812
return EMICROPROFILE_GET_COMMAND_SERVICE_WORKER;
6813
}
6814
const char* pStart = pGet;
6815
if(*pStart == 'b' || *pStart == 'p')
6816
{
6817
S.nWSWasConnected = 1; // do not load default when url has one specified.
6818
return EMICROPROFILE_GET_COMMAND_LIVE;
6819
}
6820
if(*pStart == 'r') // range
6821
{
6822
// very very manual parsing
6823
if('/' != *++pStart)
6824
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
6825
++pStart;
6826
6827
char* pEnd = nullptr;
6828
uint64_t nFrameStart = strtoll(pStart, &pEnd, 10);
6829
if(pEnd == pStart || *pEnd != '/' || *pEnd == '\0')
6830
{
6831
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
6832
}
6833
pStart = pEnd + 1;
6834
6835
uint64_t nFrameEnd = strtoll(pStart, &pEnd, 10);
6836
6837
if(pEnd == pStart || nFrameEnd <= nFrameStart)
6838
{
6839
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
6840
}
6841
pResult->nFrames = nFrameEnd - nFrameStart;
6842
pResult->nFrameStart = nFrameStart;
6843
return EMICROPROFILE_GET_COMMAND_DUMP_RANGE;
6844
}
6845
while(*pGet != '\0')
6846
{
6847
if(*pGet < '0' || *pGet > '9')
6848
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
6849
pGet++;
6850
}
6851
int nFrames = atoi(pStart);
6852
pResult->nFrameStart = (uint64_t)-1;
6853
if(nFrames)
6854
{
6855
pResult->nFrames = nFrames;
6856
}
6857
else
6858
{
6859
pResult->nFrames = MICROPROFILE_WEBSERVER_DEFAULT_FRAMES;
6860
}
6861
return EMICROPROFILE_GET_COMMAND_DUMP;
6862
}
6863
6864
void MicroProfileBase64Encode(char* pOut, const uint8_t* pIn, uint32_t nLen)
6865
{
6866
static const char* CODES = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
6867
//..straight from wikipedia.
6868
int b;
6869
char* o = pOut;
6870
for(uint32_t i = 0; i < nLen; i += 3)
6871
{
6872
b = (pIn[i] & 0xfc) >> 2;
6873
*o++ = CODES[b];
6874
b = (pIn[i] & 0x3) << 4;
6875
if(i + 1 < nLen)
6876
{
6877
b |= (pIn[i + 1] & 0xF0) >> 4;
6878
*o++ = CODES[b];
6879
b = (pIn[i + 1] & 0x0F) << 2;
6880
if(i + 2 < nLen)
6881
{
6882
b |= (pIn[i + 2] & 0xC0) >> 6;
6883
*o++ = CODES[b];
6884
b = pIn[i + 2] & 0x3F;
6885
*o++ = CODES[b];
6886
}
6887
else
6888
{
6889
*o++ = CODES[b];
6890
*o++ = '=';
6891
}
6892
}
6893
else
6894
{
6895
*o++ = CODES[b];
6896
*o++ = '=';
6897
*o++ = '=';
6898
}
6899
}
6900
}
6901
6902
// begin: SHA-1 in C
6903
// ftp://ftp.funet.fi/pub/crypt/hash/sha/sha1.c
6904
// SHA-1 in C
6905
// By Steve Reid <[email protected]>
6906
// 100% Public Domain
6907
6908
typedef struct
6909
{
6910
uint32_t state[5];
6911
uint32_t count[2];
6912
unsigned char buffer[64];
6913
} MicroProfile_SHA1_CTX;
6914
#include <string.h>
6915
#ifndef _WIN32
6916
#include <netinet/in.h>
6917
#endif
6918
6919
static void MicroProfile_SHA1_Transform(uint32_t[5], const unsigned char[64]);
6920
6921
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
6922
6923
#define blk0(i) (block->l[i] = htonl(block->l[i]))
6924
#define blk(i) (block->l[i & 15] = rol(block->l[(i + 13) & 15] ^ block->l[(i + 8) & 15] ^ block->l[(i + 2) & 15] ^ block->l[i & 15], 1))
6925
6926
#define R0(v, w, x, y, z, i) \
6927
z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + rol(v, 5); \
6928
w = rol(w, 30);
6929
#define R1(v, w, x, y, z, i) \
6930
z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \
6931
w = rol(w, 30);
6932
#define R2(v, w, x, y, z, i) \
6933
z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \
6934
w = rol(w, 30);
6935
#define R3(v, w, x, y, z, i) \
6936
z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \
6937
w = rol(w, 30);
6938
#define R4(v, w, x, y, z, i) \
6939
z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \
6940
w = rol(w, 30);
6941
6942
// Hash a single 512-bit block. This is the core of the algorithm.
6943
6944
static void MicroProfile_SHA1_Transform(uint32_t state[5], const unsigned char buffer[64])
6945
{
6946
uint32_t a, b, c, d, e;
6947
typedef union
6948
{
6949
unsigned char c[64];
6950
uint32_t l[16];
6951
} CHAR64LONG16;
6952
CHAR64LONG16* block;
6953
6954
block = (CHAR64LONG16*)buffer;
6955
// Copy context->state[] to working vars
6956
a = state[0];
6957
b = state[1];
6958
c = state[2];
6959
d = state[3];
6960
e = state[4];
6961
// 4 rounds of 20 operations each. Loop unrolled.
6962
R0(a, b, c, d, e, 0);
6963
R0(e, a, b, c, d, 1);
6964
R0(d, e, a, b, c, 2);
6965
R0(c, d, e, a, b, 3);
6966
R0(b, c, d, e, a, 4);
6967
R0(a, b, c, d, e, 5);
6968
R0(e, a, b, c, d, 6);
6969
R0(d, e, a, b, c, 7);
6970
R0(c, d, e, a, b, 8);
6971
R0(b, c, d, e, a, 9);
6972
R0(a, b, c, d, e, 10);
6973
R0(e, a, b, c, d, 11);
6974
R0(d, e, a, b, c, 12);
6975
R0(c, d, e, a, b, 13);
6976
R0(b, c, d, e, a, 14);
6977
R0(a, b, c, d, e, 15);
6978
R1(e, a, b, c, d, 16);
6979
R1(d, e, a, b, c, 17);
6980
R1(c, d, e, a, b, 18);
6981
R1(b, c, d, e, a, 19);
6982
R2(a, b, c, d, e, 20);
6983
R2(e, a, b, c, d, 21);
6984
R2(d, e, a, b, c, 22);
6985
R2(c, d, e, a, b, 23);
6986
R2(b, c, d, e, a, 24);
6987
R2(a, b, c, d, e, 25);
6988
R2(e, a, b, c, d, 26);
6989
R2(d, e, a, b, c, 27);
6990
R2(c, d, e, a, b, 28);
6991
R2(b, c, d, e, a, 29);
6992
R2(a, b, c, d, e, 30);
6993
R2(e, a, b, c, d, 31);
6994
R2(d, e, a, b, c, 32);
6995
R2(c, d, e, a, b, 33);
6996
R2(b, c, d, e, a, 34);
6997
R2(a, b, c, d, e, 35);
6998
R2(e, a, b, c, d, 36);
6999
R2(d, e, a, b, c, 37);
7000
R2(c, d, e, a, b, 38);
7001
R2(b, c, d, e, a, 39);
7002
R3(a, b, c, d, e, 40);
7003
R3(e, a, b, c, d, 41);
7004
R3(d, e, a, b, c, 42);
7005
R3(c, d, e, a, b, 43);
7006
R3(b, c, d, e, a, 44);
7007
R3(a, b, c, d, e, 45);
7008
R3(e, a, b, c, d, 46);
7009
R3(d, e, a, b, c, 47);
7010
R3(c, d, e, a, b, 48);
7011
R3(b, c, d, e, a, 49);
7012
R3(a, b, c, d, e, 50);
7013
R3(e, a, b, c, d, 51);
7014
R3(d, e, a, b, c, 52);
7015
R3(c, d, e, a, b, 53);
7016
R3(b, c, d, e, a, 54);
7017
R3(a, b, c, d, e, 55);
7018
R3(e, a, b, c, d, 56);
7019
R3(d, e, a, b, c, 57);
7020
R3(c, d, e, a, b, 58);
7021
R3(b, c, d, e, a, 59);
7022
R4(a, b, c, d, e, 60);
7023
R4(e, a, b, c, d, 61);
7024
R4(d, e, a, b, c, 62);
7025
R4(c, d, e, a, b, 63);
7026
R4(b, c, d, e, a, 64);
7027
R4(a, b, c, d, e, 65);
7028
R4(e, a, b, c, d, 66);
7029
R4(d, e, a, b, c, 67);
7030
R4(c, d, e, a, b, 68);
7031
R4(b, c, d, e, a, 69);
7032
R4(a, b, c, d, e, 70);
7033
R4(e, a, b, c, d, 71);
7034
R4(d, e, a, b, c, 72);
7035
R4(c, d, e, a, b, 73);
7036
R4(b, c, d, e, a, 74);
7037
R4(a, b, c, d, e, 75);
7038
R4(e, a, b, c, d, 76);
7039
R4(d, e, a, b, c, 77);
7040
R4(c, d, e, a, b, 78);
7041
R4(b, c, d, e, a, 79);
7042
// Add the working vars back into context.state[]
7043
state[0] += a;
7044
state[1] += b;
7045
state[2] += c;
7046
state[3] += d;
7047
state[4] += e;
7048
// Wipe variables
7049
a = b = c = d = e = 0;
7050
}
7051
7052
void MicroProfile_SHA1_Init(MicroProfile_SHA1_CTX* context)
7053
{
7054
// SHA1 initialization constants
7055
context->state[0] = 0x67452301;
7056
context->state[1] = 0xEFCDAB89;
7057
context->state[2] = 0x98BADCFE;
7058
context->state[3] = 0x10325476;
7059
context->state[4] = 0xC3D2E1F0;
7060
context->count[0] = context->count[1] = 0;
7061
}
7062
7063
// Run your data through this.
7064
7065
void MicroProfile_SHA1_Update(MicroProfile_SHA1_CTX* context, const unsigned char* data, unsigned int len)
7066
{
7067
unsigned int i, j;
7068
7069
j = (context->count[0] >> 3) & 63;
7070
if((context->count[0] += len << 3) < (len << 3))
7071
context->count[1]++;
7072
context->count[1] += (len >> 29);
7073
i = 64 - j;
7074
while(len >= i)
7075
{
7076
memcpy(&context->buffer[j], data, i);
7077
MicroProfile_SHA1_Transform(context->state, context->buffer);
7078
data += i;
7079
len -= i;
7080
i = 64;
7081
j = 0;
7082
}
7083
7084
memcpy(&context->buffer[j], data, len);
7085
}
7086
7087
// Add padding and return the message digest.
7088
7089
void MicroProfile_SHA1_Final(unsigned char digest[20], MicroProfile_SHA1_CTX* context)
7090
{
7091
uint32_t i, j;
7092
unsigned char finalcount[8];
7093
7094
for(i = 0; i < 8; i++)
7095
{
7096
finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); // Endian independent
7097
}
7098
MicroProfile_SHA1_Update(context, (unsigned char*)"\200", 1);
7099
while((context->count[0] & 504) != 448)
7100
{
7101
MicroProfile_SHA1_Update(context, (unsigned char*)"\0", 1);
7102
}
7103
MicroProfile_SHA1_Update(context, finalcount, 8); // Should cause a SHA1Transform()
7104
for(i = 0; i < 20; i++)
7105
{
7106
digest[i] = (unsigned char)((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
7107
}
7108
// Wipe variables
7109
i = j = 0;
7110
memset(context->buffer, 0, 64);
7111
memset(context->state, 0, 20);
7112
memset(context->count, 0, 8);
7113
memset(&finalcount, 0, 8);
7114
}
7115
7116
#undef rol
7117
#undef blk0
7118
#undef blk
7119
#undef R0
7120
#undef R1
7121
#undef R2
7122
#undef R3
7123
#undef R4
7124
7125
// end: SHA-1 in C
7126
7127
void MicroProfileWebSocketSendState(MpSocket C);
7128
void MicroProfileWebSocketSendEnabled(MpSocket C);
7129
void MicroProfileWSPrintStart(MpSocket C);
7130
void MicroProfileWSPrintf(const char* pFmt, ...);
7131
void MicroProfileWSPrintEnd();
7132
void MicroProfileWSFlush();
7133
bool MicroProfileWebSocketReceive(MpSocket C);
7134
7135
enum
7136
{
7137
TYPE_NONE = 0,
7138
TYPE_TIMER = 1,
7139
TYPE_GROUP = 2,
7140
TYPE_CATEGORY = 3,
7141
TYPE_SETTING = 4,
7142
TYPE_COUNTER = 5,
7143
};
7144
7145
enum
7146
{
7147
SETTING_FORCE_ENABLE = 0,
7148
SETTING_CONTEXT_SWITCH_TRACE = 1,
7149
SETTING_PLATFORM_MARKERS = 2,
7150
};
7151
7152
enum
7153
{
7154
MSG_TIMER_TREE = 1,
7155
MSG_ENABLED = 2,
7156
MSG_FRAME = 3,
7157
MSG_LOADSETTINGS = 4,
7158
MSG_PRESETS = 5,
7159
MSG_CURRENTSETTINGS = 6,
7160
MSG_COUNTERS = 7,
7161
MSG_FUNCTION_RESULTS = 8,
7162
MSG_INACTIVE_FRAME = 9,
7163
MSG_FUNCTION_NAMES = 10,
7164
MSG_INSTRUMENT_ERROR = 11,
7165
MSG_QUERY_INDEX = 12,
7166
// MSG_MODULE_NAME = 12,
7167
};
7168
7169
enum
7170
{
7171
VIEW_GRAPH_SPLIT = 0,
7172
VIEW_GRAPH_PERCENTILE = 1,
7173
VIEW_GRAPH_THREAD_GROUP = 2,
7174
VIEW_BAR = 3,
7175
VIEW_BAR_ALL = 4,
7176
VIEW_BAR_SINGLE = 5,
7177
VIEW_COUNTERS = 6,
7178
VIEW_SIZE = 7,
7179
};
7180
7181
void MicroProfileSocketDumpState()
7182
{
7183
fd_set Read, Write, Error;
7184
FD_ZERO(&Read);
7185
FD_ZERO(&Write);
7186
FD_ZERO(&Error);
7187
MpSocket LastSocket = 1;
7188
for(uint32_t i = 0; i < S.nNumWebSockets; ++i)
7189
{
7190
LastSocket = MicroProfileMax(LastSocket, S.WebSockets[i] + 1);
7191
FD_SET(S.WebSockets[i], &Read);
7192
FD_SET(S.WebSockets[i], &Write);
7193
FD_SET(S.WebSockets[i], &Error);
7194
}
7195
timeval tv;
7196
tv.tv_sec = 0;
7197
tv.tv_usec = 0;
7198
7199
if(-1 == select(LastSocket, &Read, &Write, &Error, &tv))
7200
{
7201
MP_ASSERT(0);
7202
}
7203
for(uint32_t i = 0; i < S.nNumWebSockets; i++)
7204
{
7205
MpSocket s = S.WebSockets[i];
7206
uprintf("%" PRId64 " ", (uint64_t)s);
7207
7208
if(FD_ISSET(s, &Error))
7209
{
7210
uprintf("e");
7211
}
7212
else
7213
{
7214
uprintf("_");
7215
}
7216
if(FD_ISSET(s, &Read))
7217
{
7218
uprintf("r");
7219
}
7220
else
7221
{
7222
uprintf(" ");
7223
}
7224
if(FD_ISSET(s, &Write))
7225
{
7226
uprintf("w");
7227
}
7228
else
7229
{
7230
uprintf(" ");
7231
}
7232
}
7233
uprintf("\n");
7234
for(uint32_t i = 1; i < S.nNumWebSockets; i++)
7235
{
7236
MpSocket s = S.WebSockets[i];
7237
int error_code;
7238
socklen_t error_code_size = sizeof(error_code);
7239
int r = getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&error_code, &error_code_size);
7240
MP_ASSERT(r >= 0);
7241
if(error_code != 0)
7242
{
7243
#ifdef _WIN32
7244
char buffer[1024];
7245
strerror_s(buffer, sizeof(buffer) - 1, error_code);
7246
fprintf(stderr, "socket error: %d %s\n", (int)s, buffer);
7247
#else
7248
fprintf(stderr, "socket error: %d %s\n", (int)s, strerror(error_code));
7249
#endif
7250
MP_ASSERT(0);
7251
}
7252
}
7253
}
7254
7255
bool MicroProfileSocketSend2(MpSocket Connection, const void* pMessage, int nLen);
7256
void* MicroProfileSocketSenderThread(void*)
7257
{
7258
MicroProfileOnThreadCreate("MicroProfileSocketSenderThread");
7259
while(!S.nMicroProfileShutdown)
7260
{
7261
if(S.nSocketFail)
7262
{
7263
MicroProfileSleep(100);
7264
continue;
7265
}
7266
7267
uint32_t nEnd = MICROPROFILE_WEBSOCKET_BUFFER_SIZE;
7268
uint32_t nGet = S.WSBuf.nSendGet.load();
7269
uint32_t nPut = S.WSBuf.nSendPut.load();
7270
uint32_t nSendStart = 0;
7271
uint32_t nSendAmount = 0;
7272
if(nGet > nPut)
7273
{
7274
nSendStart = nGet;
7275
nSendAmount = nEnd - nGet;
7276
}
7277
else if(nGet < nPut)
7278
{
7279
nSendStart = nGet;
7280
nSendAmount = nPut - nGet;
7281
}
7282
7283
if(nSendAmount)
7284
{
7285
MICROPROFILE_SCOPE(g_MicroProfileSendLoop);
7286
MICROPROFILE_COUNTER_LOCAL_ADD_ATOMIC(g_MicroProfileBytesPerFlip, nSendAmount);
7287
if(!MicroProfileSocketSend2(S.WebSockets[0], &S.WSBuf.SendBuffer[nSendStart], nSendAmount))
7288
{
7289
S.nSocketFail = 1;
7290
}
7291
else
7292
{
7293
S.WSBuf.nSendGet.store((nGet + nSendAmount) % MICROPROFILE_WEBSOCKET_BUFFER_SIZE);
7294
}
7295
}
7296
else
7297
{
7298
MicroProfileSleep(20);
7299
}
7300
}
7301
MicroProfileOnThreadExit();
7302
return 0;
7303
}
7304
7305
void MicroProfileSocketSend(MpSocket Connection, const void* pMessage, int nLen)
7306
{
7307
if(S.nSocketFail || nLen <= 0)
7308
{
7309
return;
7310
}
7311
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSocketSend", MP_GREEN4);
7312
while(nLen != 0)
7313
{
7314
MP_ASSERT(nLen > 0);
7315
uint32_t nEnd = MICROPROFILE_WEBSOCKET_BUFFER_SIZE;
7316
uint32_t nGet = S.WSBuf.nSendGet.load();
7317
uint32_t nPut = S.WSBuf.nSendPut.load();
7318
uint32_t nAmount = 0;
7319
if(nPut < nGet)
7320
{
7321
nAmount = nGet - nPut - 1;
7322
}
7323
else
7324
{
7325
if(nGet == 0)
7326
{
7327
nAmount = nEnd - nPut - 1;
7328
}
7329
else
7330
{
7331
nAmount = nEnd - nPut;
7332
}
7333
}
7334
MP_ASSERT((int)nAmount >= 0);
7335
nAmount = MicroProfileMin(nLen, (int)nAmount);
7336
if(nAmount)
7337
{
7338
memcpy(&S.WSBuf.SendBuffer[nPut], pMessage, nAmount);
7339
pMessage = (void*)((char*)pMessage + nAmount);
7340
nLen -= nAmount;
7341
S.WSBuf.nSendPut.store((nPut + nAmount) % MICROPROFILE_WEBSOCKET_BUFFER_SIZE);
7342
}
7343
else
7344
{
7345
if(S.nSocketFail)
7346
{
7347
return;
7348
}
7349
MicroProfileSleep(20);
7350
}
7351
}
7352
}
7353
7354
bool MicroProfileSocketSend2(MpSocket Connection, const void* pMessage, int nLen)
7355
{
7356
if(S.nSocketFail || nLen <= 0)
7357
{
7358
return false;
7359
}
7360
// MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSocketSend2", 0);
7361
#ifndef _WIN32
7362
int error_code;
7363
socklen_t error_code_size = sizeof(error_code);
7364
getsockopt(Connection, SOL_SOCKET, SO_ERROR, &error_code, &error_code_size);
7365
if(error_code != 0)
7366
{
7367
return false;
7368
}
7369
#endif
7370
7371
int s = 0;
7372
while(nLen)
7373
{
7374
s = send(Connection, (const char*)pMessage, nLen, 0);
7375
if(s < 0)
7376
{
7377
const int error = errno;
7378
if(error == EAGAIN || error == EWOULDBLOCK)
7379
{
7380
MicroProfileSleep(20);
7381
continue;
7382
}
7383
break;
7384
}
7385
7386
nLen -= s;
7387
pMessage = (const char*)pMessage + s;
7388
}
7389
#ifdef _WIN32
7390
if(s == SOCKET_ERROR)
7391
{
7392
return false;
7393
}
7394
#endif
7395
if(s < 0)
7396
{
7397
return false;
7398
}
7399
return true;
7400
}
7401
7402
uint32_t MicroProfileWebSocketIdPack(uint32_t type, uint32_t element)
7403
{
7404
MP_ASSERT(type < 255);
7405
MP_ASSERT(element < 0xffffff);
7406
return type << 24 | element;
7407
}
7408
void MicroProfileWebSocketIdUnpack(uint32_t nPacked, uint32_t& type, uint32_t& element)
7409
{
7410
type = (nPacked >> 24) & 0xff;
7411
element = nPacked & 0xffffff;
7412
}
7413
7414
struct MicroProfileWebSocketHeader0
7415
{
7416
union
7417
{
7418
struct
7419
{
7420
uint8_t opcode : 4;
7421
uint8_t RSV3 : 1;
7422
uint8_t RSV2 : 1;
7423
uint8_t RSV1 : 1;
7424
uint8_t FIN : 1;
7425
};
7426
uint8_t v;
7427
};
7428
};
7429
7430
struct MicroProfileWebSocketHeader1
7431
{
7432
union
7433
{
7434
struct
7435
{
7436
uint8_t payload : 7;
7437
uint8_t MASK : 1;
7438
};
7439
uint8_t v;
7440
};
7441
};
7442
7443
bool MicroProfileWebSocketSend(MpSocket Connection, const char* pMessage, uint64_t nLen)
7444
{
7445
MicroProfileWebSocketHeader0 h0;
7446
MicroProfileWebSocketHeader1 h1;
7447
h0.v = 0;
7448
h1.v = 0;
7449
h0.opcode = 1;
7450
h0.FIN = 1;
7451
uint32_t nExtraSizeBytes = 0;
7452
uint8_t nExtraSize[8];
7453
if(nLen > 125)
7454
{
7455
if(nLen > 0xffff)
7456
{
7457
nExtraSizeBytes = 8;
7458
h1.payload = 127;
7459
}
7460
else
7461
{
7462
h1.payload = 126;
7463
nExtraSizeBytes = 2;
7464
}
7465
uint64_t nCount = nLen;
7466
for(uint32_t i = 0; i < nExtraSizeBytes; ++i)
7467
{
7468
nExtraSize[nExtraSizeBytes - i - 1] = nCount & 0xff;
7469
nCount >>= 8;
7470
}
7471
7472
uint32_t nSize = 0;
7473
for(uint32_t i = 0; i < nExtraSizeBytes; i++)
7474
{
7475
nSize <<= 8;
7476
nSize += nExtraSize[i];
7477
}
7478
MP_ASSERT(nSize == nLen); // verify
7479
}
7480
else
7481
{
7482
h1.payload = nLen;
7483
}
7484
MP_ASSERT(pMessage == S.WSBuf.pBuffer); // space for header is preallocated here
7485
MP_ASSERT(pMessage == S.WSBuf.pBufferAllocation + 20); // space for header is preallocated here
7486
MP_ASSERT(nExtraSizeBytes < 18);
7487
char* pTmp = (char*)(pMessage - nExtraSizeBytes - 2);
7488
memcpy(pTmp + 2, &nExtraSize[0], nExtraSizeBytes);
7489
pTmp[1] = *(char*)&h1;
7490
pTmp[0] = *(char*)&h0;
7491
// MicroProfileSocketSend(Connection, pTmp, nExtraSizeBytes + 2 + nLen);
7492
#if 1
7493
MicroProfileSocketSend(Connection, &h0, 1);
7494
MicroProfileSocketSend(Connection, &h1, 1);
7495
if(nExtraSizeBytes)
7496
{
7497
MicroProfileSocketSend(Connection, &nExtraSize[0], nExtraSizeBytes);
7498
}
7499
MicroProfileSocketSend(Connection, pMessage, nLen);
7500
#endif
7501
return true;
7502
}
7503
7504
void MicroProfileWebSocketClearTimers()
7505
{
7506
while(S.WebSocketTimers > -1)
7507
{
7508
int nNext = S.TimerInfo[S.WebSocketTimers].nWSNext;
7509
S.TimerInfo[S.WebSocketTimers].nWSNext = -2;
7510
S.WebSocketTimers = nNext;
7511
}
7512
MP_ASSERT(S.WebSocketTimers == -1);
7513
while(S.WebSocketCounters > -1)
7514
{
7515
int nNext = S.CounterInfo[S.WebSocketCounters].nWSNext;
7516
S.CounterInfo[S.WebSocketCounters].nWSNext = -2;
7517
S.WebSocketCounters = nNext;
7518
}
7519
MP_ASSERT(S.WebSocketCounters == -1);
7520
7521
while(S.WebSocketGroups > -1)
7522
{
7523
int nNext = S.GroupInfo[S.WebSocketGroups].nWSNext;
7524
S.GroupInfo[S.WebSocketGroups].nWSNext = -2;
7525
S.WebSocketGroups = nNext;
7526
}
7527
MP_ASSERT(S.WebSocketGroups == -1);
7528
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
7529
}
7530
void MicroProfileWebSocketToggleTimer(uint32_t nTimer)
7531
{
7532
if(nTimer < S.nTotalTimers)
7533
{
7534
auto& TI = S.TimerInfo[nTimer];
7535
int* pPrev = &S.WebSocketTimers;
7536
while(*pPrev > -1 && *pPrev != (int)nTimer)
7537
{
7538
MP_ASSERT(*pPrev < (int)S.nTotalTimers && *pPrev >= 0);
7539
pPrev = &S.TimerInfo[*pPrev].nWSNext;
7540
}
7541
if(TI.nWSNext >= -1)
7542
{
7543
MP_ASSERT(*pPrev == (int)nTimer);
7544
*pPrev = TI.nWSNext;
7545
TI.nWSNext = -2;
7546
}
7547
else
7548
{
7549
MP_ASSERT(*pPrev == -1);
7550
TI.nWSNext = -1;
7551
*pPrev = (int)nTimer;
7552
}
7553
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
7554
}
7555
}
7556
7557
void MicroProfileWebSocketToggleCounter(uint32_t nCounter)
7558
{
7559
if(nCounter < S.nNumCounters)
7560
{
7561
auto& TI = S.CounterInfo[nCounter];
7562
int* pPrev = &S.WebSocketCounters;
7563
while(*pPrev > -1 && *pPrev != (int)nCounter)
7564
{
7565
MP_ASSERT(*pPrev < (int)S.nNumCounters && *pPrev >= 0);
7566
pPrev = &S.CounterInfo[*pPrev].nWSNext;
7567
}
7568
if(TI.nWSNext >= -1)
7569
{
7570
MP_ASSERT(*pPrev == (int)nCounter);
7571
*pPrev = TI.nWSNext;
7572
TI.nWSNext = -2;
7573
}
7574
else
7575
{
7576
MP_ASSERT(*pPrev == -1);
7577
TI.nWSNext = -1;
7578
*pPrev = (int)nCounter;
7579
}
7580
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
7581
}
7582
}
7583
7584
void MicroProfileWebSocketToggleGroup(uint32_t nGroup)
7585
{
7586
if(nGroup < S.nGroupCount)
7587
{
7588
auto& TI = S.GroupInfo[nGroup];
7589
int* pPrev = &S.WebSocketGroups;
7590
while(*pPrev > -1 && *pPrev != (int)nGroup)
7591
{
7592
MP_ASSERT(*pPrev < (int)S.nGroupCount && *pPrev >= 0);
7593
pPrev = &S.GroupInfo[*pPrev].nWSNext;
7594
}
7595
if(TI.nWSNext >= -1)
7596
{
7597
MP_ASSERT(*pPrev == (int)nGroup);
7598
*pPrev = TI.nWSNext;
7599
TI.nWSNext = -2;
7600
}
7601
else
7602
{
7603
MP_ASSERT(*pPrev == -1);
7604
TI.nWSNext = -1;
7605
*pPrev = (int)nGroup;
7606
}
7607
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
7608
}
7609
}
7610
7611
bool MicroProfileWebSocketTimerEnabled(uint32_t nTimer)
7612
{
7613
if(nTimer < S.nTotalTimers)
7614
{
7615
return S.TimerInfo[nTimer].nWSNext > -2;
7616
}
7617
return false;
7618
}
7619
7620
bool MicroProfileWebSocketCounterEnabled(uint32_t nCounter)
7621
{
7622
if(nCounter < S.nNumCounters)
7623
{
7624
return S.CounterInfo[nCounter].nWSNext > -2;
7625
}
7626
return false;
7627
}
7628
void MicroProfileWebSocketCommand(uint32_t nCommand)
7629
{
7630
uint32_t nType, nElement;
7631
MicroProfileWebSocketIdUnpack(nCommand, nType, nElement);
7632
switch(nType)
7633
{
7634
case TYPE_NONE:
7635
break;
7636
case TYPE_SETTING:
7637
switch(nElement)
7638
{
7639
case SETTING_FORCE_ENABLE:
7640
MicroProfileSetEnableAllGroups(!MicroProfileGetEnableAllGroups());
7641
break;
7642
case SETTING_CONTEXT_SWITCH_TRACE:
7643
if(!S.bContextSwitchRunning)
7644
{
7645
MicroProfileStartContextSwitchTrace();
7646
}
7647
else
7648
{
7649
MicroProfileStopContextSwitchTrace();
7650
}
7651
break;
7652
case SETTING_PLATFORM_MARKERS:
7653
MicroProfilePlatformMarkersSetEnabled(!MicroProfilePlatformMarkersGetEnabled());
7654
break;
7655
}
7656
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
7657
break;
7658
case TYPE_TIMER:
7659
MicroProfileWebSocketToggleTimer(nElement);
7660
break;
7661
case TYPE_GROUP:
7662
MicroProfileToggleGroup(nElement);
7663
break;
7664
case TYPE_CATEGORY:
7665
MicroProfileToggleCategory(nElement);
7666
break;
7667
case TYPE_COUNTER:
7668
MicroProfileWebSocketToggleCounter(nElement);
7669
break;
7670
default:
7671
uprintf("unknown type %d\n", nType);
7672
}
7673
}
7674
#define MICROPROFILE_PRESET_HEADER_MAGIC2 0x28586813
7675
#define MICROPROFILE_PRESET_HEADER_VERSION2 0x00000200
7676
7677
struct MicroProfileSettingsFileHeader
7678
{
7679
uint32_t nMagic;
7680
uint32_t nVersion;
7681
uint32_t nNumHeaders;
7682
uint32_t nHeadersOffset;
7683
uint32_t nMaxJsonSize;
7684
uint32_t nMaxNameSize;
7685
};
7686
struct MicroProfileSettingsHeader
7687
{
7688
uint32_t nJsonOffset;
7689
uint32_t nJsonSize;
7690
uint32_t nNameOffset;
7691
uint32_t nNameSize;
7692
};
7693
7694
template <typename T>
7695
void MicroProfileParseSettings(const char* pFileName, T CB)
7696
{
7697
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
7698
7699
FILE* F = fopen(pFileName, "rb");
7700
if(!F)
7701
{
7702
return;
7703
}
7704
long nFileSize = 0;
7705
fseek(F, 0, SEEK_END);
7706
nFileSize = ftell(F);
7707
char* pFile = 0;
7708
char* pAlloc = 0;
7709
if(nFileSize > (32 << 10))
7710
{
7711
pFile = pAlloc = (char*)MP_ALLOC(nFileSize + 1, 1);
7712
}
7713
else
7714
{
7715
pFile = (char*)alloca(nFileSize + 1);
7716
}
7717
fseek(F, 0, SEEK_SET);
7718
if(1 != fread(pFile, nFileSize, 1, F))
7719
{
7720
uprintf("failed to read settings file\n");
7721
fclose(F);
7722
return;
7723
}
7724
fclose(F);
7725
pFile[nFileSize] = '\0';
7726
7727
char* pPos = pFile;
7728
char* pEnd = pFile + nFileSize;
7729
7730
while(pPos != pEnd)
7731
{
7732
const char* pName = 0;
7733
int nNameLen = 0;
7734
const char* pJson = 0;
7735
int nJsonLen = 0;
7736
int Failed = 0;
7737
7738
auto SkipWhite = [&](char* pPos, const char* pEnd)
7739
{
7740
while(pPos != pEnd)
7741
{
7742
if(isspace(*pPos))
7743
{
7744
pPos++;
7745
}
7746
else if('#' == *pPos)
7747
{
7748
while(pPos != pEnd && *pPos != '\n')
7749
{
7750
++pPos;
7751
}
7752
}
7753
else
7754
{
7755
break;
7756
}
7757
}
7758
return pPos;
7759
};
7760
7761
auto ParseName = [&](char* pPos, char* pEnd, const char** ppName, int* pLen)
7762
{
7763
pPos = SkipWhite(pPos, pEnd);
7764
int nLen = 0;
7765
*ppName = pPos;
7766
7767
while(pPos != pEnd && (isalpha(*pPos) || isdigit(*pPos) || *pPos == '_'))
7768
{
7769
nLen++;
7770
pPos++;
7771
}
7772
*pLen = nLen;
7773
if(pPos == pEnd || !isspace(*pPos))
7774
{
7775
Failed = 1;
7776
return pEnd;
7777
}
7778
*pPos++ = '\0';
7779
return pPos;
7780
};
7781
7782
auto ParseJson = [&](char* pPos, char* pEnd, const char** pJson, int* pLen) -> char*
7783
{
7784
pPos = SkipWhite(pPos, pEnd);
7785
if(*pPos != '{' || pPos == pEnd)
7786
{
7787
Failed = 1;
7788
return pPos;
7789
}
7790
*pJson = pPos++;
7791
int nLen = 1;
7792
int nDepth = 1;
7793
while(pPos != pEnd && nDepth != 0)
7794
{
7795
nLen++;
7796
char nChar = *pPos++;
7797
if(nChar == '{')
7798
{
7799
nDepth++;
7800
}
7801
else if(nChar == '}')
7802
{
7803
nDepth--;
7804
}
7805
}
7806
if(pPos == pEnd || !isspace(*pPos))
7807
{
7808
Failed = 1;
7809
return pEnd;
7810
}
7811
*pLen = nLen;
7812
*pPos++ = '\0';
7813
return pPos;
7814
};
7815
7816
pPos = ParseName(pPos, pEnd, &pName, &nNameLen);
7817
pPos = ParseJson(pPos, pEnd, &pJson, &nJsonLen);
7818
if(Failed)
7819
{
7820
break;
7821
}
7822
if(!CB(pName, nNameLen, pJson, nJsonLen))
7823
{
7824
break;
7825
}
7826
}
7827
if(pAlloc)
7828
MP_FREE(pAlloc);
7829
}
7830
7831
bool MicroProfileSavePresets(const char* pSettingsName, const char* pJsonSettings)
7832
{
7833
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
7834
7835
FILE* F = fopen(S.pSettingsTemp, "w");
7836
if(!F)
7837
{
7838
return false;
7839
}
7840
7841
bool bWritten = false;
7842
7843
MicroProfileParseSettings(S.pSettings,
7844
[&](const char* pName, uint32_t nNameSize, const char* pJson, uint32_t nJsonSize) -> bool
7845
{
7846
fwrite(pName, nNameSize, 1, F);
7847
fputc(' ', F);
7848
if(0 != MP_STRCASECMP(pSettingsName, pName))
7849
{
7850
fwrite(pJson, nJsonSize, 1, F);
7851
}
7852
else
7853
{
7854
bWritten = true;
7855
fwrite(pJsonSettings, strlen(pJsonSettings), 1, F);
7856
}
7857
fputc('\n', F);
7858
return true;
7859
});
7860
if(!bWritten)
7861
{
7862
fwrite(pSettingsName, strlen(pSettingsName), 1, F);
7863
fputc(' ', F);
7864
fwrite(pJsonSettings, strlen(pJsonSettings), 1, F);
7865
fputc('\n', F);
7866
}
7867
fflush(F);
7868
fclose(F);
7869
#ifdef MICROPROFILE_MOVE_FILE
7870
MICROPROFILE_MOVE_FILE(S.pSettingsTemp, S.pSettings);
7871
#elif defined(_WIN32)
7872
MoveFileExA(S.pSettingsTemp, S.pSettings, MOVEFILE_REPLACE_EXISTING);
7873
#else
7874
rename(S.pSettingsTemp, S.pSettings);
7875
#endif
7876
return false;
7877
}
7878
7879
void MicroProfileWriteJsonString(const char* pJson, uint32_t nJsonLen)
7880
{
7881
char* pCur = (char*)pJson;
7882
char* pEnd = pCur + nJsonLen;
7883
MicroProfileWSPrintf("\"", pCur);
7884
while(pCur != pEnd)
7885
{
7886
char* pTag = strchr(pCur, '\"');
7887
if(pTag)
7888
{
7889
*pTag = '\0';
7890
MicroProfileWSPrintf("%s\\\"", pCur);
7891
*pTag = '\"';
7892
pCur = pTag + 1;
7893
}
7894
else
7895
{
7896
MicroProfileWSPrintf("%s\"", pCur);
7897
pCur = pEnd;
7898
}
7899
}
7900
};
7901
7902
void MicroProfileWebSocketSendPresets(MpSocket Connection)
7903
{
7904
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
7905
uprintf("sending presets ... \n");
7906
MicroProfileWSPrintStart(Connection);
7907
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{", MSG_PRESETS);
7908
MicroProfileWSPrintf("\"p\":{\"Default\":\"{}\"");
7909
7910
MicroProfileParseSettings(S.pSettings,
7911
[](const char* pName, uint32_t nNameLen, const char* pJson, uint32_t nJsonLen)
7912
{
7913
MicroProfileWSPrintf(",\"%s\":", pName);
7914
MicroProfileWriteJsonString(pJson, nJsonLen);
7915
7916
return true;
7917
});
7918
MicroProfileWSPrintf("},\"r\":{");
7919
bool bFirst = true;
7920
MicroProfileParseSettings(S.pSettingsReadOnly,
7921
[&bFirst](const char* pName, uint32_t nNameLen, const char* pJson, uint32_t nJsonLen)
7922
{
7923
MicroProfileWSPrintf("%c\"%s\":", bFirst ? ' ' : ',', pName);
7924
MicroProfileWriteJsonString(pJson, nJsonLen);
7925
7926
bFirst = false;
7927
return true;
7928
});
7929
MicroProfileWSPrintf("}}}");
7930
MicroProfileWSFlush();
7931
MicroProfileWSPrintEnd();
7932
}
7933
7934
#define LOAD_PRESET_DEFAULT 0x1
7935
#define LOAD_PRESET_READONLY 0x2
7936
7937
void MicroProfileLoadPresets(const char* pSettingsName, uint32_t nLoadPresetType)
7938
{
7939
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
7940
const char* pPresetFiles[] = { S.pSettings, S.pSettingsReadOnly };
7941
for(uint32_t i = 0; i < 2; ++i)
7942
{
7943
if(nLoadPresetType & (1u << i))
7944
{
7945
const char* pPresetFile = pPresetFiles[i];
7946
bool bReadOnly = (1u << i) == LOAD_PRESET_READONLY;
7947
bool bSuccess = false;
7948
MicroProfileParseSettings(pPresetFile,
7949
[&bSuccess, bReadOnly, pSettingsName](const char* pName, uint32_t l0, const char* pJson, uint32_t l1)
7950
{
7951
if(0 == MP_STRCASECMP(pName, pSettingsName))
7952
{
7953
uint32_t nLen = (uint32_t)strlen(pJson) + 1;
7954
if(nLen > S.nJsonSettingsBufferSize)
7955
{
7956
if(S.pJsonSettings)
7957
S.pJsonSettings = nullptr;
7958
S.pJsonSettings = (char*)MP_ALLOC(nLen, 1);
7959
S.nJsonSettingsBufferSize = nLen;
7960
}
7961
S.pJsonSettingsName = pSettingsName;
7962
memcpy(S.pJsonSettings, pJson, nLen);
7963
S.nJsonSettingsPending = 1;
7964
S.bJsonSettingsReadOnly = bReadOnly ? 1 : 0;
7965
bSuccess = true;
7966
return false;
7967
}
7968
return true;
7969
});
7970
if(bSuccess)
7971
return;
7972
}
7973
}
7974
}
7975
7976
bool MicroProfileWebSocketReceive(MpSocket Connection)
7977
{
7978
7979
// 0 1 2 3
7980
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
7981
// +-+-+-+-+-------+-+-------------+-------------------------------+
7982
// |F|R|R|R| opcode|M| Payload len | Extended payload length |
7983
// |I|S|S|S| (4) |A| (7) | (16/64) |
7984
// |N|V|V|V| |S| | (if payload len==126/127) |
7985
// | |1|2|3| |K| | |
7986
// +-+-+-+-+-------+-+-------------+ - - - - - - - - - - - - - - - +
7987
int r;
7988
uint64_t nSize;
7989
uint64_t nSizeBytes = 0;
7990
uint8_t Mask[4];
7991
static unsigned char* Bytes = 0;
7992
static uint64_t BytesAllocated = 0;
7993
MicroProfileWebSocketHeader0 h0;
7994
MicroProfileWebSocketHeader1 h1;
7995
static_assert(sizeof(h0) == 1, "");
7996
static_assert(sizeof(h1) == 1, "");
7997
r = recv(Connection, (char*)&h0, 1, 0);
7998
if(1 != r)
7999
goto fail;
8000
r = recv(Connection, (char*)&h1, 1, 0);
8001
if(1 != r)
8002
goto fail;
8003
8004
if(h0.v == 0x88)
8005
{
8006
goto fail;
8007
}
8008
8009
if(h0.RSV1 != 0 || h0.RSV2 != 0 || h0.RSV3 != 0)
8010
goto fail;
8011
8012
nSize = h1.payload;
8013
nSizeBytes = 0;
8014
switch(nSize)
8015
{
8016
case 126:
8017
nSizeBytes = 2;
8018
break;
8019
case 127:
8020
nSizeBytes = 8;
8021
break;
8022
default:
8023
break;
8024
}
8025
if(nSizeBytes)
8026
{
8027
nSize = 0;
8028
uint64_t MessageLength = 0;
8029
8030
uint8_t BytesMessage[8];
8031
r = recv(Connection, (char*)&BytesMessage[0], nSizeBytes, 0);
8032
if((int)nSizeBytes != r)
8033
goto fail;
8034
for(uint32_t i = 0; i < nSizeBytes; i++)
8035
{
8036
nSize <<= 8;
8037
nSize += BytesMessage[i];
8038
}
8039
8040
for(uint32_t i = 0; i < nSizeBytes; i++)
8041
MessageLength |= BytesMessage[i] << ((nSizeBytes - 1 - i) * 8);
8042
MP_ASSERT(MessageLength == nSize);
8043
}
8044
8045
if(h1.MASK)
8046
{
8047
recv(Connection, (char*)&Mask[0], 4, 0);
8048
}
8049
8050
MICROPROFILE_COUNTER_LOCAL_ADD_ATOMIC(g_MicroProfileBytesPerFlip, nSize);
8051
if(nSize + 1 > BytesAllocated)
8052
{
8053
Bytes = (unsigned char*)MP_REALLOC(Bytes, nSize + 1);
8054
BytesAllocated = nSize + 1;
8055
}
8056
recv(Connection, (char*)Bytes, nSize, 0);
8057
for(uint32_t i = 0; i < nSize; ++i)
8058
Bytes[i] ^= Mask[i & 3];
8059
8060
Bytes[nSize] = '\0';
8061
switch(Bytes[0])
8062
{
8063
case 'a':
8064
{
8065
S.nAggregateFlip = strtoll((const char*)&Bytes[1], nullptr, 10);
8066
}
8067
break;
8068
case 's':
8069
{
8070
char* pJson = strchr((char*)Bytes, ',');
8071
if(pJson && *pJson != '\0')
8072
{
8073
*pJson = '\0';
8074
MicroProfileSavePresets((const char*)Bytes + 1, (const char*)pJson + 1);
8075
}
8076
break;
8077
}
8078
8079
case 'l':
8080
{
8081
MicroProfileLoadPresets((const char*)Bytes + 1, LOAD_PRESET_DEFAULT);
8082
break;
8083
}
8084
case 'm':
8085
{
8086
MicroProfileLoadPresets((const char*)Bytes + 1, LOAD_PRESET_READONLY);
8087
break;
8088
}
8089
case 'd':
8090
{
8091
MicroProfileWebSocketClearTimers();
8092
memset(&S.nActiveGroupsWanted, 0, sizeof(S.nActiveGroupsWanted));
8093
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
8094
break;
8095
}
8096
case 'c':
8097
{
8098
char* pStr = (char*)Bytes + 1;
8099
char* pEnd = pStr + nSize - 1;
8100
uint32_t Message = strtol(pStr, &pEnd, 10);
8101
MicroProfileWebSocketCommand(Message);
8102
}
8103
break;
8104
case 'f':
8105
MicroProfileToggleFrozen();
8106
break;
8107
case 'v':
8108
S.nWSViewMode = (int)Bytes[1] - '0';
8109
break;
8110
case 'r':
8111
uprintf("got clear message\n");
8112
S.nAggregateClear = 1;
8113
break;
8114
case 'x':
8115
MicroProfileWebSocketClearTimers();
8116
break;
8117
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8118
case 'D': // instrumentation without loading queryable symbols.
8119
{
8120
uprintf("got INSTRUMENT Message: %s\n", (const char*)&Bytes[0]);
8121
char* pGet = (char*)&Bytes[1];
8122
uint32_t nNumArguments = 0;
8123
#ifdef _WIN32
8124
r = sscanf_s(pGet, "%d", &nNumArguments);
8125
#else
8126
r = sscanf(pGet, "%d", &nNumArguments);
8127
#endif
8128
if(r != 1)
8129
{
8130
uprintf("failed to parse..\n");
8131
break;
8132
}
8133
while(' ' == *pGet || (*pGet >= '0' && *pGet <= '9'))
8134
{
8135
pGet++;
8136
}
8137
if(nNumArguments > 200)
8138
nNumArguments = 200;
8139
uint32_t nParsedArguments = 0;
8140
const char* pModule = 0;
8141
const char* pSymbol = 0;
8142
const char** pModules = (const char**)(alloca(sizeof(const char*) * nNumArguments));
8143
const char** pSymbols = (const char**)(alloca(sizeof(const char*) * nNumArguments));
8144
auto Next = [&pGet]() -> const char*
8145
{
8146
if(!pGet)
8147
return 0;
8148
const char* pRet = pGet;
8149
pGet = (char*)strchr(pRet, '!');
8150
if(!pGet)
8151
{
8152
return 0;
8153
}
8154
*pGet++ = '\0';
8155
return (const char*)pRet;
8156
};
8157
do
8158
{
8159
pModule = Next();
8160
pSymbol = Next();
8161
if(pModule && pSymbol)
8162
{
8163
pModules[nParsedArguments] = pModule;
8164
pSymbols[nParsedArguments] = pSymbol;
8165
uprintf("found symbol %s ::: %s \n", pModule, pSymbol);
8166
nParsedArguments++;
8167
if(nParsedArguments == nNumArguments)
8168
{
8169
break;
8170
}
8171
}
8172
} while(pGet);
8173
8174
MicroProfileInstrumentWithoutSymbols(pModules, pSymbols, nParsedArguments);
8175
8176
break;
8177
}
8178
case 'I':
8179
case 'i':
8180
{
8181
uprintf("got Message: %s\n", (const char*)&Bytes[0]);
8182
void* p = 0;
8183
uint32_t nColor = 0x0;
8184
int nMinBytes = 0;
8185
int nMaxCalls = 0;
8186
int nCharsRead = 0;
8187
#ifdef _WIN32
8188
r = sscanf_s((const char*)&Bytes[1], "%p %x %d %d%n", &p, &nColor, &nMinBytes, &nMaxCalls, &nCharsRead);
8189
#else
8190
r = sscanf((const char*)&Bytes[1], "%p %x %d %d%n", &p, &nColor, &nMinBytes, &nMaxCalls, &nCharsRead);
8191
#endif
8192
if(r == 4)
8193
{
8194
const char* pModule = (const char*)&Bytes[1];
8195
// int nNumChars = stbsp_snprintf(0, 0, "%p %x", p, nColor);
8196
pModule += nCharsRead;
8197
while(*pModule != ' ' && *pModule != '\0')
8198
++pModule;
8199
8200
if(*pModule == '\0')
8201
break;
8202
8203
pModule++;
8204
const char* pName = pModule;
8205
while(*pName != '!' && *pName != '\0')
8206
{
8207
pName++;
8208
}
8209
if(*pName == '!')
8210
{
8211
// name and module seperately
8212
*(char*)pName = '\0';
8213
pName++;
8214
}
8215
else
8216
{
8217
// name only
8218
pName = pModule;
8219
pModule = "";
8220
}
8221
8222
uprintf("scanning for ptr %p %x mod:'%s' name'%s'\n", p, nColor, pModule, pName);
8223
if(Bytes[0] == 'I')
8224
{
8225
MicroProfileInstrumentFunctionsCalled(p, pModule, pName, nMinBytes, nMaxCalls);
8226
}
8227
else
8228
{
8229
MicroProfileInstrumentFunction(p, pModule, pName, nColor);
8230
}
8231
}
8232
}
8233
break;
8234
case 'S':
8235
uprintf("loading symbols...\n");
8236
MicroProfileSymbolInitialize(true);
8237
break;
8238
case 'q':
8239
MicroProfileSymbolQueryFunctions(Connection, 1 + (const char*)Bytes);
8240
break;
8241
case 'L':
8242
uprintf("LOAD MODULE: '%s'\n", 1 + (const char*)Bytes);
8243
MicroProfileSymbolInitialize(true, 1 + (const char*)Bytes);
8244
break;
8245
#else
8246
case 'D':
8247
case 'I':
8248
case 'i':
8249
case 'S':
8250
case 'q':
8251
case 'L':
8252
break;
8253
#endif
8254
default:
8255
uprintf("got unknown message size %lld: '%s'\n", (long long)nSize, Bytes);
8256
}
8257
return true;
8258
8259
fail:
8260
return false;
8261
}
8262
void MicroProfileWebSocketSendPresets(MpSocket Connection);
8263
8264
void MicroProfileWebSocketHandshake(MpSocket Connection, char* pWebSocketKey)
8265
{
8266
// reset web socket buffer
8267
S.WSBuf.nSendPut.store(0);
8268
S.WSBuf.nSendGet.store(0);
8269
S.nSocketFail = 0;
8270
8271
const char* pGUID = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11";
8272
const char* pHandShake = "HTTP/1.1 101 Switching Protocols\r\n"
8273
"Upgrade: websocket\r\n"
8274
"Connection: Upgrade\r\n"
8275
"Sec-WebSocket-Accept: ";
8276
8277
char EncodeBuffer[512];
8278
int nLen = stbsp_snprintf(EncodeBuffer, sizeof(EncodeBuffer) - 1, "%s%s", pWebSocketKey, pGUID);
8279
// uprintf("encode buffer is '%s' %d, %d\n", EncodeBuffer, nLen, (int)strlen(EncodeBuffer));
8280
8281
uint8_t sha[20];
8282
MicroProfile_SHA1_CTX ctx;
8283
MicroProfile_SHA1_Init(&ctx);
8284
MicroProfile_SHA1_Update(&ctx, (unsigned char*)EncodeBuffer, nLen);
8285
MicroProfile_SHA1_Final((unsigned char*)&sha[0], &ctx);
8286
char HashOut[(2 + sizeof(sha) / 3) * 4];
8287
memset(&HashOut[0], 0, sizeof(HashOut));
8288
MicroProfileBase64Encode(&HashOut[0], &sha[0], sizeof(sha));
8289
8290
char Reply[11024];
8291
nLen = stbsp_snprintf(Reply, sizeof(Reply) - 1, "%s%s\r\n\r\n", pHandShake, HashOut);
8292
;
8293
MP_ASSERT(nLen >= 0);
8294
MicroProfileSocketSend(Connection, Reply, nLen);
8295
S.WebSockets[S.nNumWebSockets++] = Connection;
8296
8297
S.WSCategoriesSent = 0;
8298
S.WSGroupsSent = 0;
8299
S.WSTimersSent = 0;
8300
S.WSCountersSent = 0;
8301
S.nJsonSettingsPending = 0;
8302
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8303
S.WSFunctionsInstrumentedSent = 0;
8304
S.WSSymbolModulesSent = 0;
8305
{
8306
uint64_t t0 = MP_TICK();
8307
MicroProfileSymbolUpdateModuleList();
8308
uint64_t t1 = MP_TICK();
8309
float fTime = float(MicroProfileTickToMsMultiplierCpu()) * (t1 - t0);
8310
(void)fTime;
8311
uprintf("update module list time %6.2fms\n", fTime);
8312
}
8313
#endif
8314
8315
MicroProfileWebSocketSendState(Connection);
8316
MicroProfileWebSocketSendPresets(Connection);
8317
if(!S.nWSWasConnected)
8318
{
8319
S.nWSWasConnected = 1;
8320
MicroProfileLoadPresets("Default", LOAD_PRESET_DEFAULT | LOAD_PRESET_READONLY);
8321
}
8322
else
8323
{
8324
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8325
MicroProfileWSPrintStart(Connection);
8326
MicroProfileWSPrintf("{\"k\":\"%d\",\"qp\":%d}", MSG_QUERY_INDEX, S.nQueryProcessed);
8327
MicroProfileWSFlush();
8328
MicroProfileWSPrintEnd();
8329
#endif
8330
if(S.pJsonSettings)
8331
{
8332
MicroProfileWSPrintStart(Connection);
8333
MicroProfileWSPrintf(
8334
"{\"k\":\"%d\",\"ro\":%d,\"name\":\"%s\",\"v\":%s}", MSG_CURRENTSETTINGS, S.bJsonSettingsReadOnly ? 1 : 0, S.pJsonSettingsName ? S.pJsonSettingsName : "", S.pJsonSettings);
8335
MicroProfileWSFlush();
8336
MicroProfileWSPrintEnd();
8337
}
8338
}
8339
}
8340
8341
void MicroProfileWebSocketSendCounters()
8342
{
8343
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileWebSocketSendCounters", MP_GREEN4);
8344
if(S.nWSViewMode == VIEW_COUNTERS)
8345
{
8346
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":[", MSG_COUNTERS);
8347
for(uint32_t i = 0; i < S.nNumCounters; ++i)
8348
{
8349
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
8350
if(IsDouble)
8351
{
8352
double dCounter = S.CountersDouble[i].load();
8353
MicroProfileWSPrintf("%c%f", i == 0 ? ' ' : ',', dCounter);
8354
}
8355
else
8356
{
8357
uint64_t nCounter = S.Counters[i].load();
8358
MicroProfileWSPrintf("%c%lld", i == 0 ? ' ' : ',', nCounter);
8359
}
8360
}
8361
MicroProfileWSPrintf("]}");
8362
MicroProfileWSFlush();
8363
}
8364
}
8365
8366
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8367
void MicroProfileSymbolSendModuleState()
8368
{
8369
if(S.WSSymbolModulesSent != S.SymbolNumModules || S.nSymbolsDirty.load()) // todo: tag when modulestate is updated.
8370
{
8371
S.nSymbolsDirty.exchange(0);
8372
MicroProfileWSPrintf(",\"M\":[");
8373
bool bFirst = true;
8374
for(int i = 0; i < S.SymbolNumModules; ++i)
8375
{
8376
MicroProfileSymbolModule& M = S.SymbolModules[i];
8377
const char* pModuleName = (const char*)M.pBaseString;
8378
uint64_t nAddrBegin = M.Regions[0].nBegin;
8379
// intptr_t nProgress = M.nProgress;
8380
intptr_t nProgressTarget = M.nProgressTarget;
8381
nProgressTarget = MicroProfileMax(intptr_t(1), M.nProgressTarget);
8382
// nProgress = MicroProfileMin(nProgressTarget, M.nProgress);
8383
float fLoadPrc = M.nProgress / float(nProgressTarget);
8384
uint64_t nNumSymbols = M.nSymbolsLoaded;
8385
#define FMT "{\"n\":\"%s\",\"a\":\"%llx\",\"s\":\"%lld\", \"p\":%f, \"d\":%d}"
8386
MicroProfileWSPrintf(bFirst ? FMT : ("," FMT), pModuleName, nAddrBegin, nNumSymbols, fLoadPrc, M.bDownloading ? 1 : 0);
8387
#undef FMT
8388
bFirst = false;
8389
}
8390
MicroProfileWSPrintf("]");
8391
S.WSSymbolModulesSent = S.SymbolNumModules;
8392
}
8393
}
8394
#endif
8395
8396
void MicroProfileWebSocketSendFrame(MpSocket Connection)
8397
{
8398
if(S.nFrameCurrent != S.WebSocketFrameLast[0] || S.nFrozen)
8399
{
8400
MicroProfileWebSocketSendState(Connection);
8401
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileWebSocketSendFrame", MP_GREEN4);
8402
MicroProfileWSPrintStart(Connection);
8403
float fTickToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
8404
float fTickToMsGpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
8405
MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
8406
MicroProfileFrameState* pFrameNext = &S.Frames[S.nFrameNext];
8407
8408
uint64_t nFrameTicks = pFrameNext->nFrameStartCpu - pFrameCurrent->nFrameStartCpu;
8409
uint64_t nFrame = pFrameCurrent->nFrameId;
8410
double fTime = nFrameTicks * fTickToMsCpu;
8411
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"t\":%f,\"f\":%lld,\"a\":%d,\"fr\":%d,\"m\":%d", MSG_FRAME, fTime, nFrame, MicroProfileGetCurrentAggregateFrames(), S.nFrozen, S.nWSViewMode);
8412
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8413
MicroProfileWSPrintf(",\"s\":{\"n\":%d,\"f\":%d,\"r\":%d,\"l\":%d,\"q\":%d}",
8414
S.SymbolNumModules,
8415
S.SymbolState.nModuleLoadsFinished.load(),
8416
S.SymbolState.nModuleLoadsRequested.load(),
8417
S.SymbolState.nSymbolsLoaded.load(),
8418
S.pPendingQuery ? 1 : 0);
8419
MicroProfileSymbolSendModuleState();
8420
#endif
8421
8422
auto WriteTickArray = [fTickToMsCpu, fTickToMsGpu](MicroProfile::GroupTime* pFrameGroup)
8423
{
8424
MicroProfileWSPrintf("[");
8425
int f = 0;
8426
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
8427
{
8428
uint64_t nTicksExcl = pFrameGroup[i].nTicksExclusive;
8429
if(nTicksExcl)
8430
{
8431
uint64_t nTicks = pFrameGroup[i].nTicks;
8432
float fCount = (float)pFrameGroup[i].nCount;
8433
float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fTickToMsCpu : fTickToMsGpu;
8434
8435
MicroProfileWSPrintf("%c[%f,%f,%f]", f ? ',' : ' ', nTicks * fToMs, nTicksExcl * fToMs, fCount);
8436
f = 1;
8437
}
8438
}
8439
MicroProfileWSPrintf("]");
8440
};
8441
auto WriteIndexArray = [](MicroProfile::GroupTime* pFrameGroup)
8442
{
8443
MicroProfileWSPrintf("[");
8444
int f = 0;
8445
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
8446
{
8447
uint64_t nTicksExcl = pFrameGroup[i].nTicksExclusive;
8448
if(nTicksExcl)
8449
{
8450
uint32_t id = MicroProfileWebSocketIdPack(TYPE_GROUP, i);
8451
MicroProfileWSPrintf("%c%d", f ? ',' : ' ', id);
8452
f = 1;
8453
}
8454
}
8455
MicroProfileWSPrintf("]");
8456
};
8457
8458
MicroProfileWSPrintf(",\"g\":");
8459
WriteTickArray(S.FrameGroup);
8460
MicroProfileWSPrintf(",\"gi\":");
8461
WriteIndexArray(S.FrameGroup);
8462
if(S.nWSViewMode == VIEW_GRAPH_THREAD_GROUP)
8463
{
8464
MicroProfileWSPrintf(",\"gt\":[");
8465
int f = 0;
8466
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
8467
{
8468
if(0 != (S.FrameGroupThreadValid[i / 32] & (1 << (i % 32))))
8469
{
8470
if(!f)
8471
MicroProfileWSPrintf("{");
8472
else
8473
MicroProfileWSPrintf(",{");
8474
MicroProfileThreadLog* pLog = S.Pool[i];
8475
MicroProfileWSPrintf("\"i\":%d,\"n\":\"%s\",\"g\":", i, pLog->ThreadName);
8476
WriteTickArray(&S.FrameGroupThread[i][0]);
8477
MicroProfileWSPrintf(",\"gi\":");
8478
WriteIndexArray(&S.FrameGroupThread[i][0]);
8479
MicroProfileWSPrintf("}");
8480
f = 1;
8481
}
8482
}
8483
MicroProfileWSPrintf("]");
8484
}
8485
8486
if(S.nFrameCurrent != S.WebSocketFrameLast[0])
8487
{
8488
MicroProfileWSPrintf(",\"x\":{\"t\":{");
8489
int nTimer = S.WebSocketTimers;
8490
// uprintf("T : ");
8491
while(nTimer >= 0)
8492
{
8493
MicroProfileTimerInfo& TI = S.TimerInfo[nTimer];
8494
float fTickToMs = TI.Type == MicroProfileTokenTypeGpu ? fTickToMsGpu : fTickToMsCpu;
8495
uint32_t id = MicroProfileWebSocketIdPack(TYPE_TIMER, nTimer);
8496
fTime = fTickToMs * S.Frame[nTimer].nTicks;
8497
float fCount = (float)S.Frame[nTimer].nCount;
8498
float fTimeExcl = fTickToMs * S.FrameExclusive[nTimer];
8499
// uprintf("%4.2f, ", fTimeExcl);
8500
if(!MicroProfileGroupActive(TI.nGroupIndex))
8501
{
8502
fTime = fCount = fTimeExcl = 0.f;
8503
}
8504
nTimer = TI.nWSNext;
8505
MicroProfileWSPrintf("\"%d\":[%f,%f,%f]%c", id, fTime, fTimeExcl, fCount, nTimer == -1 ? ' ' : ',');
8506
}
8507
MicroProfileWSPrintf("}, \"c\":{");
8508
int nCounter = S.WebSocketCounters;
8509
while(nCounter >= 0)
8510
{
8511
MicroProfileCounterInfo& CI = S.CounterInfo[nCounter];
8512
bool IsDouble = (CI.nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
8513
uint32_t id = MicroProfileWebSocketIdPack(TYPE_COUNTER, nCounter);
8514
int nCounterNext = CI.nWSNext;
8515
if(IsDouble)
8516
{
8517
double value = S.CountersDouble[nCounter].load();
8518
MicroProfileWSPrintf("\"%d\":%f%c", id, value, nCounterNext < 0 ? ' ' : ',');
8519
}
8520
else
8521
{
8522
uint64_t value = S.Counters[nCounter].load();
8523
MicroProfileWSPrintf("\"%d\":%lld%c", id, value, nCounterNext < 0 ? ' ' : ',');
8524
}
8525
nCounter = nCounterNext;
8526
}
8527
MicroProfileWSPrintf("}, \"g\":{");
8528
// uprintf("\n");
8529
MicroProfileWSPrintf("}}");
8530
}
8531
MicroProfileWSPrintf("}}");
8532
MicroProfileWSFlush();
8533
MicroProfileWebSocketSendCounters();
8534
MicroProfileWSPrintEnd();
8535
S.WebSocketFrameLast[0] = S.nFrameCurrent;
8536
}
8537
else
8538
{
8539
MicroProfileWSPrintStart(Connection);
8540
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"fr\":%d,\"m\":%d", MSG_INACTIVE_FRAME, S.nFrozen, S.nWSViewMode);
8541
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8542
MicroProfileWSPrintf(",\"s\":{\"n\":%d,\"f\":%d,\"r\":%d,\"l\":%d,\"q\":%d}",
8543
S.SymbolNumModules,
8544
S.SymbolState.nModuleLoadsFinished.load(),
8545
S.SymbolState.nModuleLoadsRequested.load(),
8546
S.SymbolState.nSymbolsLoaded.load(),
8547
S.pPendingQuery ? 1 : 0);
8548
#endif
8549
MicroProfileWSPrintf("}}");
8550
MicroProfileWSFlush();
8551
MicroProfileWebSocketSendCounters();
8552
MicroProfileWSPrintEnd();
8553
}
8554
#if MICROPROFILE_DYNAMIC_INSTRUMENT
8555
MicroProfileSymbolQuerySendResult(Connection);
8556
MicroProfileSymbolSendFunctionNames(Connection);
8557
MicroProfileSymbolSendErrors(Connection);
8558
#endif
8559
}
8560
8561
void MicroProfileWebSocketFrame()
8562
{
8563
if(!S.nNumWebSockets)
8564
{
8565
return;
8566
}
8567
MICROPROFILE_SCOPEI("MicroProfile", "Websocket-update", MP_GREEN4);
8568
fd_set Read, Write, Error;
8569
FD_ZERO(&Read);
8570
FD_ZERO(&Write);
8571
FD_ZERO(&Error);
8572
MpSocket LastSocket = 1;
8573
for(uint32_t i = 0; i < S.nNumWebSockets; ++i)
8574
{
8575
LastSocket = MicroProfileMax(LastSocket, S.WebSockets[i] + 1);
8576
FD_SET(S.WebSockets[i], &Read);
8577
FD_SET(S.WebSockets[i], &Write);
8578
FD_SET(S.WebSockets[i], &Error);
8579
}
8580
timeval tv;
8581
tv.tv_sec = 0;
8582
tv.tv_usec = 0;
8583
8584
if(-1 == select(LastSocket, &Read, &Write, &Error, &tv))
8585
{
8586
MP_ASSERT(0);
8587
}
8588
for(uint32_t i = 0; i < S.nNumWebSockets;)
8589
{
8590
MpSocket s = S.WebSockets[i];
8591
bool bConnected = true;
8592
if(FD_ISSET(s, &Error))
8593
{
8594
MP_ASSERT(0); // todo, remove & fix.
8595
}
8596
if(FD_ISSET(s, &Read))
8597
{
8598
bConnected = MicroProfileWebSocketReceive(s);
8599
}
8600
if(FD_ISSET(s, &Write))
8601
{
8602
if(S.nJsonSettingsPending)
8603
{
8604
MicroProfileWSPrintStart(s);
8605
MicroProfileWSPrintf(
8606
"{\"k\":\"%d\",\"ro\":%d,\"name\":\"%s\",\"v\":%s}", MSG_LOADSETTINGS, S.bJsonSettingsReadOnly ? 1 : 0, S.pJsonSettingsName ? S.pJsonSettingsName : "", S.pJsonSettings);
8607
MicroProfileWSFlush();
8608
MicroProfileWSPrintEnd();
8609
S.nJsonSettingsPending = 0;
8610
}
8611
if(S.nWebSocketDirty)
8612
{
8613
MicroProfileFlipEnabled();
8614
MicroProfileWebSocketSendEnabled(s);
8615
S.nWebSocketDirty = 0;
8616
}
8617
MicroProfileWebSocketSendFrame(s);
8618
}
8619
if(S.nSocketFail)
8620
{
8621
bConnected = false;
8622
}
8623
S.nSocketFail = 0;
8624
8625
if(!bConnected)
8626
{
8627
uprintf("removing socket %" PRId64 "\n", (uint64_t)s);
8628
8629
#ifndef _WIN32
8630
shutdown(S.WebSockets[i], SHUT_WR);
8631
#else
8632
shutdown(S.WebSockets[i], 1);
8633
#endif
8634
char tmp[128];
8635
int r = 1;
8636
while(r > 0)
8637
{
8638
r = recv(S.WebSockets[i], tmp, sizeof(tmp), 0);
8639
}
8640
#ifdef _WIN32
8641
closesocket(S.WebSockets[i]);
8642
#else
8643
close(S.WebSockets[i]);
8644
#endif
8645
8646
--S.nNumWebSockets;
8647
S.WebSockets[i] = S.WebSockets[S.nNumWebSockets];
8648
uprintf("done removing\n");
8649
}
8650
else
8651
{
8652
++i;
8653
}
8654
}
8655
if(S.nWasFrozen)
8656
{
8657
S.nWasFrozen--;
8658
}
8659
}
8660
8661
void MicroProfileWSPrintStart(MpSocket C)
8662
{
8663
MP_ASSERT(S.WSBuf.Socket == 0);
8664
MP_ASSERT(S.WSBuf.nPut == 0);
8665
S.WSBuf.Socket = C;
8666
}
8667
8668
void MicroProfileResizeWSBuf(uint32_t nMinSize = 0)
8669
{
8670
uint32_t nNewSize = MicroProfileMax(S.WSBuf.nPut + 2 * (nMinSize + 2 + 20), MicroProfileMax(S.WSBuf.nBufferSize * 3 / 2, (uint32_t)MICROPROFILE_WEBSOCKET_BUFFER_SIZE));
8671
S.WSBuf.pBufferAllocation = (char*)MICROPROFILE_REALLOC(S.WSBuf.pBufferAllocation, nNewSize);
8672
S.WSBuf.pBuffer = S.WSBuf.pBufferAllocation + 20;
8673
S.WSBuf.nBufferSize = nNewSize - 20;
8674
}
8675
8676
char* MicroProfileWSPrintfCallback(const char* buf, void* user, int len)
8677
{
8678
MP_ASSERT(S.WSBuf.nPut == buf - S.WSBuf.pBuffer);
8679
S.WSBuf.nPut += len;
8680
if(S.WSBuf.nPut + STB_SPRINTF_MIN + 2 >= S.WSBuf.nBufferSize) //
8681
{
8682
MicroProfileResizeWSBuf(S.WSBuf.nPut + STB_SPRINTF_MIN);
8683
}
8684
return S.WSBuf.pBuffer + S.WSBuf.nPut;
8685
}
8686
8687
void MicroProfileWSPrintf(const char* pFmt, ...)
8688
{
8689
if(!S.WSBuf.nBufferSize)
8690
{
8691
MicroProfileResizeWSBuf(STB_SPRINTF_MIN * 2);
8692
}
8693
va_list args;
8694
va_start(args, pFmt);
8695
MP_ASSERT(S.WSBuf.nPut + STB_SPRINTF_MIN < S.WSBuf.nBufferSize);
8696
stbsp_vsprintfcb(MicroProfileWSPrintfCallback, 0, S.WSBuf.pBuffer + S.WSBuf.nPut, pFmt, args);
8697
va_end(args);
8698
}
8699
8700
void MicroProfileWSPrintEnd()
8701
{
8702
MP_ASSERT(S.WSBuf.nPut == 0);
8703
S.WSBuf.Socket = 0;
8704
}
8705
8706
void MicroProfileWSFlush()
8707
{
8708
MP_ASSERT(S.WSBuf.Socket != 0);
8709
MP_ASSERT(S.WSBuf.nPut != 0);
8710
MicroProfileWebSocketSend(S.WSBuf.Socket, &S.WSBuf.pBuffer[0], S.WSBuf.nPut);
8711
S.WSBuf.nPut = 0;
8712
}
8713
void MicroProfileWebSocketSendEnabledMessage(uint32_t id, int bEnabled)
8714
{
8715
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"id\":%d,\"e\":%d}}", MSG_ENABLED, id, bEnabled ? 1 : 0);
8716
MicroProfileWSFlush();
8717
}
8718
void MicroProfileWebSocketSendEnabled(MpSocket C)
8719
{
8720
MICROPROFILE_SCOPEI("MicroProfile", "Websocket-SendEnabled", MP_GREEN4);
8721
MicroProfileWSPrintStart(C);
8722
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
8723
{
8724
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_CATEGORY, i), MicroProfileCategoryEnabled(i));
8725
}
8726
8727
for(uint32_t i = 0; i < S.nGroupCount; ++i)
8728
{
8729
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_GROUP, i), MicroProfileGroupEnabled(i));
8730
}
8731
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
8732
{
8733
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_TIMER, i), MicroProfileWebSocketTimerEnabled(i));
8734
}
8735
for(uint32_t i = 0; i < S.nNumCounters; ++i)
8736
{
8737
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_COUNTER, i), MicroProfileWebSocketCounterEnabled(i));
8738
}
8739
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_FORCE_ENABLE), MicroProfileGetEnableAllGroups());
8740
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_CONTEXT_SWITCH_TRACE), S.bContextSwitchRunning);
8741
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_PLATFORM_MARKERS), MicroProfilePlatformMarkersGetEnabled());
8742
8743
MicroProfileWSPrintEnd();
8744
}
8745
void MicroProfileWebSocketSendEntry(uint32_t id, uint32_t parent, const char* pName, int nEnabled, uint32_t nColor, uint32_t nType)
8746
{
8747
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"id\":%d,\"pid\":%d,", MSG_TIMER_TREE, id, parent);
8748
MicroProfileWSPrintf("\"name\":\"%s\",", pName);
8749
MicroProfileWSPrintf("\"e\":%d,", nEnabled);
8750
MicroProfileWSPrintf("\"type\":%d,", nType);
8751
if(nColor == 0x42)
8752
{
8753
MicroProfileWSPrintf("\"color\":\"\"");
8754
}
8755
else
8756
{
8757
MicroProfileWSPrintf("\"color\":\"#%02x%02x%02x\"", MICROPROFILE_UNPACK_RED(nColor) & 0xff, MICROPROFILE_UNPACK_GREEN(nColor) & 0xff, MICROPROFILE_UNPACK_BLUE(nColor) & 0xff);
8758
}
8759
8760
MicroProfileWSPrintf("}}");
8761
MicroProfileWSFlush();
8762
}
8763
8764
void MicroProfileWebSocketSendCounterEntry(uint32_t id, uint32_t parent, const char* pName, int nEnabled, int64_t nLimit, int nFormat)
8765
{
8766
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"id\":%d,\"pid\":%d,", MSG_TIMER_TREE, id, parent);
8767
MicroProfileWSPrintf("\"name\":\"%s\",", pName);
8768
MicroProfileWSPrintf("\"e\":%d,", nEnabled);
8769
MicroProfileWSPrintf("\"limit\":%lld,", nLimit);
8770
MicroProfileWSPrintf("\"format\":%d", nFormat);
8771
MicroProfileWSPrintf("}}");
8772
MicroProfileWSFlush();
8773
}
8774
8775
void MicroProfileWebSocketSendState(MpSocket C)
8776
{
8777
if(S.WSCategoriesSent != S.nCategoryCount || S.WSGroupsSent != S.nGroupCount || S.WSTimersSent != S.nTotalTimers || S.WSCountersSent != S.nNumCounters)
8778
{
8779
MicroProfileWSPrintStart(C);
8780
uint32_t root = MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_FORCE_ENABLE);
8781
MicroProfileWebSocketSendEntry(root, 0, "All", MicroProfileGetEnableAllGroups(), (uint32_t)-1, 0);
8782
for(uint32_t i = S.WSCategoriesSent; i < S.nCategoryCount; ++i)
8783
{
8784
8785
MicroProfileCategory& CI = S.CategoryInfo[i];
8786
uint32_t id = MicroProfileWebSocketIdPack(TYPE_CATEGORY, i);
8787
uint32_t parent = root;
8788
MicroProfileWebSocketSendEntry(id, parent, CI.pName, MicroProfileCategoryEnabled(i), 0xffffffff, 0);
8789
}
8790
8791
for(uint32_t i = S.WSGroupsSent; i < S.nGroupCount; ++i)
8792
{
8793
MicroProfileGroupInfo& GI = S.GroupInfo[i];
8794
uint32_t id = MicroProfileWebSocketIdPack(TYPE_GROUP, i);
8795
uint32_t parent = MicroProfileWebSocketIdPack(TYPE_CATEGORY, GI.nCategory);
8796
MicroProfileWebSocketSendEntry(id, parent, GI.pName, MicroProfileGroupEnabled(i), GI.nColor, GI.Type);
8797
}
8798
8799
for(uint32_t i = S.WSTimersSent; i < S.nTotalTimers; ++i)
8800
{
8801
MicroProfileTimerInfo& TI = S.TimerInfo[i];
8802
uint32_t id = MicroProfileWebSocketIdPack(TYPE_TIMER, i);
8803
uint32_t parent = MicroProfileWebSocketIdPack(TYPE_GROUP, TI.nGroupIndex);
8804
MicroProfileWebSocketSendEntry(id, parent, TI.pName, MicroProfileWebSocketTimerEnabled(i), TI.nColor, TI.Type);
8805
}
8806
8807
for(uint32_t i = S.WSCountersSent; i < S.nNumCounters; ++i)
8808
{
8809
MicroProfileCounterInfo& CI = S.CounterInfo[i];
8810
uint32_t id = MicroProfileWebSocketIdPack(TYPE_COUNTER, i);
8811
uint32_t parent = CI.nParent == -1 ? 0u : MicroProfileWebSocketIdPack(TYPE_COUNTER, CI.nParent);
8812
MicroProfileWebSocketSendCounterEntry(id, parent, CI.pName, MicroProfileWebSocketCounterEnabled(i), CI.nLimit, CI.eFormat);
8813
}
8814
#if MICROPROFILE_CONTEXT_SWITCH_TRACE
8815
MicroProfileWebSocketSendEntry(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_CONTEXT_SWITCH_TRACE), 0, "Context Switch Trace", S.bContextSwitchRunning, (uint32_t)-1, 0);
8816
#endif
8817
#if MICROPROFILE_PLATFORM_MARKERS
8818
MicroProfileWebSocketSendEntry(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_PLATFORM_MARKERS), 0, "Platform Markers", S.bContextSwitchRunning, (uint32_t)-1);
8819
#endif
8820
MicroProfileWSPrintEnd();
8821
8822
S.WSCategoriesSent = S.nCategoryCount;
8823
S.WSGroupsSent = S.nGroupCount;
8824
S.WSTimersSent = S.nTotalTimers;
8825
S.WSCountersSent = S.nNumCounters;
8826
}
8827
}
8828
8829
bool MicroProfileWebServerUpdate()
8830
{
8831
MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", MP_GREEN4);
8832
MpSocket Connection = accept(S.ListenerSocket, 0, 0);
8833
bool bServed = false;
8834
MicroProfileWebSocketFrame();
8835
if(!MP_INVALID_SOCKET(Connection))
8836
{
8837
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
8838
char Req[8192];
8839
int nReceived = recv(Connection, Req, sizeof(Req) - 1, 0);
8840
if(nReceived > 0)
8841
{
8842
Req[nReceived] = '\0';
8843
uprintf("req received\n%s", Req);
8844
8845
#define MICROPROFILE_HTML_PNG_HEADER "HTTP/1.0 200 OK\r\nContent-Type: image/png\r\n\r\n"
8846
#define MICROPROFILE_HTML_JS_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/javascript\r\n\r\n"
8847
#if MICROPROFILE_MINIZ
8848
// Expires: Tue, 01 Jan 2199 16:00:00 GMT\r\n
8849
#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\n\r\n"
8850
#else
8851
#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\n\r\n"
8852
#endif
8853
8854
char* pHttp = strstr(Req, "HTTP/");
8855
8856
char* pGet = strstr(Req, "GET /");
8857
char* pHost = strstr(Req, "Host: ");
8858
char* pWebSocketKey = strstr(Req, "Sec-WebSocket-Key: ");
8859
auto Terminate = [](char* pString)
8860
{
8861
char* pEnd = pString;
8862
while(*pEnd != '\0')
8863
{
8864
if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ')
8865
{
8866
*pEnd = '\0';
8867
return;
8868
}
8869
pEnd++;
8870
}
8871
};
8872
8873
if(pWebSocketKey)
8874
{
8875
if(S.nNumWebSockets) // only allow 1
8876
{
8877
return false;
8878
}
8879
pWebSocketKey += sizeof("Sec-WebSocket-Key: ") - 1;
8880
Terminate(pWebSocketKey);
8881
MicroProfileWebSocketHandshake(Connection, pWebSocketKey);
8882
return false;
8883
}
8884
8885
if(pHost)
8886
{
8887
pHost += sizeof("Host: ") - 1;
8888
Terminate(pHost);
8889
}
8890
8891
if(pHttp && pGet)
8892
{
8893
*pHttp = '\0';
8894
pGet += sizeof("GET /") - 1;
8895
Terminate(pGet);
8896
MicroProfileParseGetResult R;
8897
auto P = MicroProfileParseGet(pGet, &R);
8898
switch(P)
8899
{
8900
case EMICROPROFILE_GET_COMMAND_SERVICE_WORKER:
8901
{
8902
MicroProfileSetNonBlocking(Connection, 1);
8903
uint64_t nTickStart = MP_TICK();
8904
send(Connection, MICROPROFILE_HTML_JS_HEADER, sizeof(MICROPROFILE_HTML_JS_HEADER) - 1, 0);
8905
const char* JsCode = "self.addEventListener(\"fetch\", () => {}); \r\n\r\n";
8906
send(Connection, JsCode, (int)strlen(JsCode), 0);
8907
break;
8908
}
8909
case EMICROPROFILE_GET_COMMAND_FAVICON:
8910
{
8911
MicroProfileSetNonBlocking(Connection, 1);
8912
uint64_t nTickStart = MP_TICK();
8913
send(Connection, MICROPROFILE_HTML_PNG_HEADER, sizeof(MICROPROFILE_HTML_PNG_HEADER) - 1, 0);
8914
extern const uint32_t uprof_512[];
8915
extern const uint32_t uprof_512_len;
8916
const char* pFile = (const char*)&uprof_512[0];
8917
uint32_t nFileSize = uprof_512_len;
8918
send(Connection, pFile, nFileSize, 0);
8919
}
8920
break;
8921
case EMICROPROFILE_GET_COMMAND_LIVE:
8922
{
8923
MicroProfileSetNonBlocking(Connection, 0);
8924
uint64_t nTickStart = MP_TICK();
8925
send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER) - 1, 0);
8926
uint64_t nDataStart = S.nWebServerDataSent;
8927
S.WebServerPut = 0;
8928
#if 0 == MICROPROFILE_MINIZ
8929
MicroProfileDumpHtmlLive(MicroProfileWriteSocket, &Connection);
8930
uint64_t nDataEnd = S.nWebServerDataSent;
8931
uint64_t nTickEnd = MP_TICK();
8932
uint64_t nDiff = (nTickEnd - nTickStart);
8933
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
8934
int nKb = ((nDataEnd-nDataStart)>>10) + 1;
8935
int nCompressedKb = nKb;
8936
MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
8937
MicroProfileFlushSocket(Connection);
8938
#else
8939
MicroProfileCompressedSocketState CompressState;
8940
MicroProfileCompressedSocketStart(&CompressState, Connection);
8941
MicroProfileDumpHtmlLive(MicroProfileCompressedWriteSocket, &CompressState);
8942
S.nWebServerDataSent += CompressState.nSize;
8943
uint64_t nDataEnd = S.nWebServerDataSent;
8944
uint64_t nTickEnd = MP_TICK();
8945
uint64_t nDiff = (nTickEnd - nTickStart);
8946
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
8947
int nKb = ((nDataEnd - nDataStart) >> 10) + 1;
8948
int nCompressedKb = ((CompressState.nCompressedSize) >> 10) + 1;
8949
MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
8950
MicroProfileCompressedSocketFinish(&CompressState);
8951
MicroProfileFlushSocket(Connection);
8952
#endif
8953
8954
uprintf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
8955
(void)nCompressedKb;
8956
}
8957
break;
8958
case EMICROPROFILE_GET_COMMAND_DUMP_RANGE:
8959
case EMICROPROFILE_GET_COMMAND_DUMP:
8960
{
8961
{
8962
MicroProfileSetNonBlocking(Connection, 0);
8963
uint64_t nTickStart = MP_TICK();
8964
send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER) - 1, 0);
8965
uint64_t nDataStart = S.nWebServerDataSent;
8966
S.WebServerPut = 0;
8967
#if 0 == MICROPROFILE_MINIZ
8968
MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, R.nFrames, pHost, R.nFrameStart);
8969
uint64_t nDataEnd = S.nWebServerDataSent;
8970
uint64_t nTickEnd = MP_TICK();
8971
uint64_t nDiff = (nTickEnd - nTickStart);
8972
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
8973
int nKb = ((nDataEnd-nDataStart)>>10) + 1;
8974
int nCompressedKb = nKb;
8975
MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
8976
MicroProfileFlushSocket(Connection);
8977
#else
8978
MicroProfileCompressedSocketState CompressState;
8979
MicroProfileCompressedSocketStart(&CompressState, Connection);
8980
8981
MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, R.nFrames, pHost, R.nFrameStart);
8982
8983
S.nWebServerDataSent += CompressState.nSize;
8984
uint64_t nDataEnd = S.nWebServerDataSent;
8985
uint64_t nTickEnd = MP_TICK();
8986
uint64_t nDiff = (nTickEnd - nTickStart);
8987
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
8988
int nKb = ((nDataEnd - nDataStart) >> 10) + 1;
8989
int nCompressedKb = ((CompressState.nCompressedSize) >> 10) + 1;
8990
MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
8991
MicroProfileCompressedSocketFinish(&CompressState);
8992
MicroProfileFlushSocket(Connection);
8993
#endif
8994
8995
uprintf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
8996
(void)nCompressedKb;
8997
}
8998
}
8999
break;
9000
case EMICROPROFILE_GET_COMMAND_UNKNOWN:
9001
{
9002
uprintf("unknown get command %s\n", pGet);
9003
}
9004
break;
9005
}
9006
}
9007
}
9008
#ifdef _WIN32
9009
closesocket(Connection);
9010
#else
9011
close(Connection);
9012
#endif
9013
}
9014
return bServed;
9015
}
9016
#endif
9017
9018
#if MICROPROFILE_CONTEXT_SWITCH_TRACE
9019
// functions that need to be implemented per platform.
9020
void* MicroProfileTraceThread(void* unused);
9021
int MicroProfileIsLocalThread(uint32_t nThreadId);
9022
9023
void MicroProfileStartContextSwitchTrace()
9024
{
9025
if(!S.bContextSwitchRunning && !S.nMicroProfileShutdown)
9026
{
9027
S.bContextSwitchRunning = true;
9028
S.bContextSwitchStop = false;
9029
MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread);
9030
}
9031
}
9032
9033
void MicroProfileJoinContextSwitchTrace()
9034
{
9035
if(S.bContextSwitchStop)
9036
{
9037
MicroProfileThreadJoin(&S.ContextSwitchThread);
9038
}
9039
}
9040
9041
void MicroProfileStopContextSwitchTrace()
9042
{
9043
if(S.bContextSwitchRunning)
9044
{
9045
S.bContextSwitchStop = true;
9046
}
9047
}
9048
9049
#ifdef _WIN32
9050
#define INITGUID
9051
#include <evntcons.h>
9052
#include <evntrace.h>
9053
#include <strsafe.h>
9054
9055
static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c };
9056
9057
struct MicroProfileSCSwitch
9058
{
9059
uint32_t NewThreadId;
9060
uint32_t OldThreadId;
9061
int8_t NewThreadPriority;
9062
int8_t OldThreadPriority;
9063
uint8_t PreviousCState;
9064
int8_t SpareByte;
9065
int8_t OldThreadWaitReason;
9066
int8_t OldThreadWaitMode;
9067
int8_t OldThreadState;
9068
int8_t OldThreadWaitIdealProcessor;
9069
uint32_t NewThreadWaitTime;
9070
uint32_t Reserved;
9071
};
9072
9073
VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)
9074
{
9075
if(pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
9076
{
9077
if(pEvent->Header.Class.Type == 36)
9078
{
9079
MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*)pEvent->MofData;
9080
if((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
9081
{
9082
MicroProfileContextSwitch Switch;
9083
Switch.nThreadOut = pCSwitch->OldThreadId;
9084
Switch.nThreadIn = pCSwitch->NewThreadId;
9085
Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
9086
Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
9087
MicroProfileContextSwitchPut(&Switch);
9088
}
9089
}
9090
}
9091
}
9092
9093
ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILEA Buffer)
9094
{
9095
return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE;
9096
}
9097
9098
struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES
9099
{
9100
char dummy[sizeof(KERNEL_LOGGER_NAME)];
9101
};
9102
9103
void MicroProfileContextSwitchShutdownTrace()
9104
{
9105
TRACEHANDLE SessionHandle = 0;
9106
MicroProfileKernelTraceProperties sessionProperties;
9107
9108
ZeroMemory(&sessionProperties, sizeof(sessionProperties));
9109
sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
9110
sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
9111
sessionProperties.Wnode.ClientContext = 1; // QPC clock resolution
9112
sessionProperties.Wnode.Guid = SystemTraceControlGuid;
9113
sessionProperties.BufferSize = 1;
9114
sessionProperties.NumberOfBuffers = 128;
9115
sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH;
9116
sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
9117
sessionProperties.MaximumFileSize = 0;
9118
sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
9119
sessionProperties.LogFileNameOffset = 0;
9120
9121
EVENT_TRACE_LOGFILEA log;
9122
ZeroMemory(&log, sizeof(log));
9123
log.LoggerName = (LPSTR)KERNEL_LOGGER_NAMEA;
9124
log.ProcessTraceMode = 0;
9125
TRACEHANDLE hLog = OpenTraceA(&log);
9126
if(hLog)
9127
{
9128
ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP);
9129
}
9130
CloseTrace(hLog);
9131
}
9132
9133
typedef VOID(WINAPI* EventCallback)(PEVENT_TRACE);
9134
typedef ULONG(WINAPI* BufferCallback)(PEVENT_TRACE_LOGFILEA);
9135
bool MicroProfileStartWin32Trace(EventCallback EvtCb, BufferCallback BufferCB)
9136
{
9137
MicroProfileContextSwitchShutdownTrace();
9138
ULONG status = ERROR_SUCCESS;
9139
TRACEHANDLE SessionHandle = 0;
9140
MicroProfileKernelTraceProperties sessionProperties;
9141
9142
ZeroMemory(&sessionProperties, sizeof(sessionProperties));
9143
sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
9144
sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
9145
sessionProperties.Wnode.ClientContext = 1; // QPC clock resolution
9146
sessionProperties.Wnode.Guid = SystemTraceControlGuid;
9147
sessionProperties.BufferSize = 1;
9148
sessionProperties.NumberOfBuffers = 128;
9149
sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_PROCESS;
9150
sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
9151
sessionProperties.MaximumFileSize = 0;
9152
sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
9153
sessionProperties.LogFileNameOffset = 0;
9154
9155
StopTrace(NULL, KERNEL_LOGGER_NAME, &sessionProperties);
9156
status = StartTrace((PTRACEHANDLE)&SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties);
9157
9158
if(ERROR_SUCCESS != status)
9159
{
9160
return false;
9161
}
9162
9163
EVENT_TRACE_LOGFILEA log;
9164
ZeroMemory(&log, sizeof(log));
9165
9166
log.LoggerName = (LPSTR)KERNEL_LOGGER_NAME;
9167
log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
9168
log.EventCallback = EvtCb;
9169
log.BufferCallback = BufferCB;
9170
9171
TRACEHANDLE hLog = OpenTraceA(&log);
9172
ProcessTrace(&hLog, 1, 0, 0);
9173
CloseTrace(hLog);
9174
MicroProfileContextSwitchShutdownTrace();
9175
return true;
9176
}
9177
9178
#include <psapi.h>
9179
#include <tlhelp32.h>
9180
#include <winternl.h>
9181
#define ThreadQuerySetWin32StartAddress 9
9182
typedef LONG NTSTATUS;
9183
typedef NTSTATUS(WINAPI* pNtQIT)(HANDLE, LONG, PVOID, ULONG, PULONG);
9184
#define STATUS_SUCCESS ((NTSTATUS)0x000 00000L)
9185
#define ThreadQuerySetWin32StartAddress 9
9186
#undef Process32First
9187
#undef Process32Next
9188
#undef PROCESSENTRY32
9189
#undef Module32First
9190
#undef Module32Next
9191
#undef MODULEENTRY32
9192
9193
struct MicroProfileWin32ContextSwitchShared
9194
{
9195
std::atomic<int64_t> nPut;
9196
std::atomic<int64_t> nGet;
9197
std::atomic<int64_t> nQuit;
9198
std::atomic<int64_t> nTickTrace;
9199
std::atomic<int64_t> nTickProgram;
9200
enum
9201
{
9202
BUFFER_SIZE = (2 << 20) / sizeof(MicroProfileContextSwitch),
9203
};
9204
MicroProfileContextSwitch Buffer[BUFFER_SIZE];
9205
};
9206
9207
struct MicroProfileWin32ThreadInfo
9208
{
9209
struct Process
9210
{
9211
uint32_t pid;
9212
uint32_t nNumModules;
9213
uint32_t nModuleStart;
9214
const char* pProcessModule;
9215
};
9216
struct Module
9217
{
9218
int64_t nBase;
9219
int64_t nEnd;
9220
const char* pName;
9221
};
9222
enum
9223
{
9224
MAX_PROCESSES = 5 * 1024,
9225
MAX_THREADS = 20 * 1024,
9226
MAX_MODULES = 20 * 1024,
9227
MAX_STRINGS = 16 * 1024,
9228
MAX_CHARS = 128 * 1024,
9229
};
9230
uint32_t nNumProcesses;
9231
uint32_t nNumThreads;
9232
uint32_t nStringOffset;
9233
uint32_t nNumStrings;
9234
uint32_t nNumModules;
9235
Process P[MAX_PROCESSES];
9236
Module M[MAX_MODULES];
9237
MicroProfileThreadInfo T[MAX_THREADS];
9238
const char* pStrings[MAX_STRINGS];
9239
char StringData[MAX_CHARS];
9240
};
9241
9242
static MicroProfileWin32ThreadInfo g_ThreadInfo;
9243
9244
const char* MicroProfileWin32ThreadInfoAddString(const char* pString)
9245
{
9246
size_t nLen = strlen(pString);
9247
uint32_t nHash = *(uint32_t*)pString;
9248
nHash ^= (nHash >> 16);
9249
enum
9250
{
9251
MAX_SEARCH = 256,
9252
};
9253
for(uint32_t i = 0; i < MAX_SEARCH; ++i)
9254
{
9255
uint32_t idx = (i + nHash) % MicroProfileWin32ThreadInfo::MAX_STRINGS;
9256
if(0 == g_ThreadInfo.pStrings[idx])
9257
{
9258
g_ThreadInfo.pStrings[idx] = &g_ThreadInfo.StringData[g_ThreadInfo.nStringOffset];
9259
memcpy(&g_ThreadInfo.StringData[g_ThreadInfo.nStringOffset], pString, nLen + 1);
9260
g_ThreadInfo.nStringOffset += (uint32_t)(nLen + 1);
9261
return g_ThreadInfo.pStrings[idx];
9262
}
9263
if(0 == strcmp(g_ThreadInfo.pStrings[idx], pString))
9264
{
9265
return g_ThreadInfo.pStrings[idx];
9266
}
9267
}
9268
return "internal hash table fail: should never happen";
9269
}
9270
void MicroProfileWin32ExtractModules(MicroProfileWin32ThreadInfo::Process& P)
9271
{
9272
HANDLE hModuleSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, P.pid);
9273
MODULEENTRY32 me;
9274
if(Module32First(hModuleSnapshot, &me))
9275
{
9276
do
9277
{
9278
if(g_ThreadInfo.nNumModules < MicroProfileWin32ThreadInfo::MAX_MODULES)
9279
{
9280
auto& M = g_ThreadInfo.M[g_ThreadInfo.nNumModules++];
9281
P.nNumModules++;
9282
intptr_t nBase = (intptr_t)me.modBaseAddr;
9283
intptr_t nEnd = nBase + me.modBaseSize;
9284
M.nBase = nBase;
9285
M.nEnd = nEnd;
9286
M.pName = MicroProfileWin32ThreadInfoAddString(&me.szModule[0]);
9287
}
9288
} while(Module32Next(hModuleSnapshot, &me));
9289
}
9290
if(hModuleSnapshot)
9291
CloseHandle(hModuleSnapshot);
9292
}
9293
void MicroProfileWin32InitThreadInfo2()
9294
{
9295
memset(&g_ThreadInfo, 0, sizeof(g_ThreadInfo));
9296
#if MICROPROFILE_DEBUG
9297
float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
9298
#endif
9299
9300
HANDLE hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPALL, 0);
9301
PROCESSENTRY32 pe32;
9302
THREADENTRY32 te32;
9303
te32.dwSize = sizeof(THREADENTRY32);
9304
pe32.dwSize = sizeof(PROCESSENTRY32);
9305
{
9306
#if MICROPROFILE_DEBUG
9307
int64_t nTickStart = MP_TICK();
9308
#endif
9309
if(Process32First(hSnap, &pe32))
9310
{
9311
do
9312
{
9313
9314
MicroProfileWin32ThreadInfo::Process P;
9315
P.pid = pe32.th32ProcessID;
9316
P.pProcessModule = MicroProfileWin32ThreadInfoAddString(pe32.szExeFile);
9317
g_ThreadInfo.P[g_ThreadInfo.nNumProcesses++] = P;
9318
} while(Process32Next(hSnap, &pe32) && g_ThreadInfo.nNumProcesses < MicroProfileWin32ThreadInfo::MAX_PROCESSES);
9319
}
9320
#if MICROPROFILE_DEBUG
9321
int64_t nTicksEnd = MP_TICK();
9322
float fMs = fToMsCpu * (nTicksEnd - nTickStart);
9323
uprintf("Process iteration %6.2fms processes %d\n", fMs, g_ThreadInfo.nNumProcesses);
9324
#endif
9325
}
9326
{
9327
#if MICROPROFILE_DEBUG
9328
int64_t nTickStart = MP_TICK();
9329
#endif
9330
for(uint32_t i = 0; i < g_ThreadInfo.nNumProcesses; ++i)
9331
{
9332
g_ThreadInfo.P[i].nModuleStart = g_ThreadInfo.nNumModules;
9333
g_ThreadInfo.P[i].nNumModules = 0;
9334
MicroProfileWin32ExtractModules(g_ThreadInfo.P[i]);
9335
}
9336
#if MICROPROFILE_DEBUG
9337
int64_t nTicksEnd = MP_TICK();
9338
float fMs = fToMsCpu * (nTicksEnd - nTickStart);
9339
uprintf("Module iteration %6.2fms NumModules %d\n", fMs, g_ThreadInfo.nNumModules);
9340
#endif
9341
}
9342
9343
pNtQIT NtQueryInformationThread = (pNtQIT)GetProcAddress(GetModuleHandleA("ntdll.dll"), "NtQueryInformationThread");
9344
intptr_t dwStartAddress;
9345
ULONG olen;
9346
uint32_t nThreadsTested = 0;
9347
uint32_t nThreadsSucceeded = 0;
9348
9349
if(Thread32First(hSnap, &te32))
9350
{
9351
#if MICROPROFILE_DEBUG
9352
int64_t nTickStart = MP_TICK();
9353
#endif
9354
do
9355
{
9356
nThreadsTested++;
9357
const char* pModule = "?";
9358
HANDLE hThread = OpenThread(THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID);
9359
if(hThread)
9360
{
9361
9362
NTSTATUS ntStatus = NtQueryInformationThread(hThread, (THREADINFOCLASS)ThreadQuerySetWin32StartAddress, &dwStartAddress, sizeof(dwStartAddress), &olen);
9363
if(0 == ntStatus)
9364
{
9365
uint32_t nProcessIndex = (uint32_t)-1;
9366
for(uint32_t i = 0; i < g_ThreadInfo.nNumProcesses; ++i)
9367
{
9368
if(g_ThreadInfo.P[i].pid == te32.th32OwnerProcessID)
9369
{
9370
nProcessIndex = i;
9371
break;
9372
}
9373
}
9374
if(nProcessIndex != (uint32_t)-1)
9375
{
9376
uint32_t nModuleStart = g_ThreadInfo.P[nProcessIndex].nModuleStart;
9377
uint32_t nNumModules = g_ThreadInfo.P[nProcessIndex].nNumModules;
9378
for(uint32_t i = 0; i < nNumModules; ++i)
9379
{
9380
auto& M = g_ThreadInfo.M[nModuleStart + i];
9381
if(M.nBase <= dwStartAddress && M.nEnd >= dwStartAddress)
9382
{
9383
pModule = M.pName;
9384
}
9385
}
9386
}
9387
}
9388
}
9389
if(hThread)
9390
CloseHandle(hThread);
9391
{
9392
MicroProfileThreadInfo T;
9393
T.pid = te32.th32OwnerProcessID;
9394
T.tid = te32.th32ThreadID;
9395
const char* pProcess = "unknown";
9396
for(uint32_t i = 0; i < g_ThreadInfo.nNumProcesses; ++i)
9397
{
9398
if(g_ThreadInfo.P[i].pid == T.pid)
9399
{
9400
pProcess = g_ThreadInfo.P[i].pProcessModule;
9401
break;
9402
}
9403
}
9404
T.pProcessModule = pProcess;
9405
T.pThreadModule = MicroProfileWin32ThreadInfoAddString(pModule);
9406
T.nIsLocal = GetCurrentProcessId() == T.pid ? 1 : 0;
9407
nThreadsSucceeded++;
9408
g_ThreadInfo.T[g_ThreadInfo.nNumThreads++] = T;
9409
}
9410
9411
} while(Thread32Next(hSnap, &te32) && g_ThreadInfo.nNumThreads < MicroProfileWin32ThreadInfo::MAX_THREADS);
9412
9413
#if MICROPROFILE_DEBUG
9414
int64_t nTickEnd = MP_TICK();
9415
float fMs = fToMsCpu * (nTickEnd - nTickStart);
9416
uprintf("Thread iteration %6.2fms Threads %d\n", fMs, g_ThreadInfo.nNumThreads);
9417
#endif
9418
}
9419
}
9420
9421
void MicroProfileWin32UpdateThreadInfo()
9422
{
9423
static int nWasRunning = 1;
9424
static int nOnce = 0;
9425
int nRunning = MicroProfileAnyGroupActive() ? 1 : 0;
9426
9427
if((0 == nRunning && 1 == nWasRunning) || nOnce == 0)
9428
{
9429
nOnce = 1;
9430
MicroProfileWin32InitThreadInfo2();
9431
}
9432
nWasRunning = nRunning;
9433
}
9434
9435
const char* MicroProfileThreadNameFromId(MicroProfileThreadIdType nThreadId)
9436
{
9437
MicroProfileWin32UpdateThreadInfo();
9438
static char result[1024];
9439
for(uint32_t i = 0; i < g_ThreadInfo.nNumThreads; ++i)
9440
{
9441
if(g_ThreadInfo.T[i].tid == nThreadId)
9442
{
9443
sprintf_s(result, "p:%s t:%s", g_ThreadInfo.T[i].pProcessModule, g_ThreadInfo.T[i].pThreadModule);
9444
return result;
9445
}
9446
}
9447
sprintf_s(result, "?");
9448
return result;
9449
}
9450
9451
#define MICROPROFILE_FILEMAPPING "microprofile-shared"
9452
#ifdef MICROPROFILE_WIN32_COLLECTOR
9453
#define MICROPROFILE_WIN32_CSWITCH_TIMEOUT 15 // seconds to wait before collector exits
9454
static MicroProfileWin32ContextSwitchShared* g_pShared = 0;
9455
VOID WINAPI MicroProfileContextSwitchCallbackCollector(PEVENT_TRACE pEvent)
9456
{
9457
static int64_t nPackets = 0;
9458
static int64_t nSkips = 0;
9459
if(pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
9460
{
9461
if(pEvent->Header.Class.Type == 36)
9462
{
9463
MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*)pEvent->MofData;
9464
if((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
9465
{
9466
MicroProfileContextSwitch Switch;
9467
Switch.nThreadOut = pCSwitch->OldThreadId;
9468
Switch.nThreadIn = pCSwitch->NewThreadId;
9469
Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
9470
Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
9471
int64_t nPut = g_pShared->nPut.load(std::memory_order_relaxed);
9472
int64_t nGet = g_pShared->nGet.load(std::memory_order_relaxed);
9473
nPackets++;
9474
if(nPut - nGet < MicroProfileWin32ContextSwitchShared::BUFFER_SIZE)
9475
{
9476
g_pShared->Buffer[nPut % MicroProfileWin32ContextSwitchShared::BUFFER_SIZE] = Switch;
9477
g_pShared->nPut.store(nPut + 1, std::memory_order_release);
9478
nSkips = 0;
9479
}
9480
else
9481
{
9482
nSkips++;
9483
}
9484
}
9485
}
9486
}
9487
if(0 == (nPackets % (4 << 10)))
9488
{
9489
int64_t nTickTrace = MP_TICK();
9490
g_pShared->nTickTrace.store(nTickTrace);
9491
int64_t nTickProgram = g_pShared->nTickProgram.load();
9492
float fTickToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
9493
float fTime = fabs(fTickToMs * (nTickTrace - nTickProgram));
9494
printf("\rRead %" PRId64 " CSwitch Packets, Skips %" PRId64 " Time difference %6.3fms ", nPackets, nSkips, fTime);
9495
fflush(stdout);
9496
if(fTime > MICROPROFILE_WIN32_CSWITCH_TIMEOUT * 1000)
9497
{
9498
g_pShared->nQuit.store(1);
9499
}
9500
}
9501
}
9502
9503
ULONG WINAPI MicroProfileBufferCallbackCollector(PEVENT_TRACE_LOGFILEA Buffer)
9504
{
9505
return (g_pShared->nQuit.load()) ? FALSE : TRUE;
9506
}
9507
9508
int main(int argc, char* argv[])
9509
{
9510
if(argc != 2)
9511
{
9512
return 1;
9513
}
9514
printf("using file '%s'\n", argv[1]);
9515
HANDLE hMemory = OpenFileMappingA(FILE_MAP_ALL_ACCESS, FALSE, argv[1]);
9516
if(hMemory == NULL)
9517
{
9518
return 1;
9519
}
9520
g_pShared = (MicroProfileWin32ContextSwitchShared*)MapViewOfFile(hMemory, FILE_MAP_ALL_ACCESS, 0, 0, sizeof(MicroProfileWin32ContextSwitchShared));
9521
9522
if(g_pShared != NULL)
9523
{
9524
MicroProfileStartWin32Trace(MicroProfileContextSwitchCallbackCollector, MicroProfileBufferCallbackCollector);
9525
UnmapViewOfFile(g_pShared);
9526
}
9527
9528
CloseHandle(hMemory);
9529
return 0;
9530
}
9531
#endif
9532
#include <shellapi.h>
9533
void* MicroProfileTraceThread(void* unused)
9534
{
9535
MicroProfileOnThreadCreate("ContextSwitchThread");
9536
MicroProfileContextSwitchShutdownTrace();
9537
if(!MicroProfileStartWin32Trace(MicroProfileContextSwitchCallback, MicroProfileBufferCallback))
9538
{
9539
MicroProfileContextSwitchShutdownTrace();
9540
// not running as admin. try and start other process.
9541
MicroProfileWin32ContextSwitchShared* pShared = 0;
9542
char Filename[512];
9543
time_t t = time(NULL);
9544
_snprintf_s(Filename, sizeof(Filename), "%s_%d", MICROPROFILE_FILEMAPPING, (int)t);
9545
9546
HANDLE hMemory = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, sizeof(MicroProfileWin32ContextSwitchShared), Filename);
9547
if(hMemory != NULL)
9548
{
9549
pShared = (MicroProfileWin32ContextSwitchShared*)MapViewOfFile(hMemory, FILE_MAP_ALL_ACCESS, 0, 0, sizeof(MicroProfileWin32ContextSwitchShared));
9550
if(pShared != NULL)
9551
{
9552
#ifdef _M_IX86
9553
#define CSWITCH_EXE "microprofile-win32-cswitch_x86.exe"
9554
#else
9555
#define CSWITCH_EXE "microprofile-win32-cswitch_x64.exe"
9556
#endif
9557
pShared->nTickProgram.store(MP_TICK());
9558
pShared->nTickTrace.store(MP_TICK());
9559
HINSTANCE Instance = ShellExecuteA(NULL, "runas", CSWITCH_EXE, Filename, "", SW_SHOWMINNOACTIVE);
9560
int64_t nInstance = (int64_t)Instance;
9561
if(nInstance >= 32)
9562
{
9563
int64_t nPut, nGet;
9564
while(!S.bContextSwitchStop)
9565
{
9566
nPut = pShared->nPut.load(std::memory_order_acquire);
9567
nGet = pShared->nGet.load(std::memory_order_relaxed);
9568
if(nPut == nGet)
9569
{
9570
Sleep(20);
9571
}
9572
else
9573
{
9574
for(int64_t i = nGet; i != nPut; i++)
9575
{
9576
MicroProfileContextSwitchPut(&pShared->Buffer[i % MicroProfileWin32ContextSwitchShared::BUFFER_SIZE]);
9577
}
9578
pShared->nGet.store(nPut, std::memory_order_release);
9579
pShared->nTickProgram.store(MP_TICK());
9580
}
9581
}
9582
pShared->nQuit.store(1);
9583
}
9584
}
9585
UnmapViewOfFile(pShared);
9586
}
9587
CloseHandle(hMemory);
9588
}
9589
S.bContextSwitchRunning = false;
9590
MicroProfileOnThreadExit();
9591
return 0;
9592
}
9593
9594
MicroProfileThreadInfo MicroProfileGetThreadInfo(MicroProfileThreadIdType nThreadId)
9595
{
9596
MicroProfileWin32UpdateThreadInfo();
9597
9598
for(uint32_t i = 0; i < g_ThreadInfo.nNumThreads; ++i)
9599
{
9600
if(g_ThreadInfo.T[i].tid == nThreadId)
9601
{
9602
return g_ThreadInfo.T[i];
9603
}
9604
}
9605
MicroProfileThreadInfo TI((uint32_t)nThreadId, 0, 0);
9606
return TI;
9607
}
9608
uint32_t MicroProfileGetThreadInfoArray(MicroProfileThreadInfo** pThreadArray)
9609
{
9610
MicroProfileWin32InitThreadInfo2();
9611
*pThreadArray = &g_ThreadInfo.T[0];
9612
return g_ThreadInfo.nNumThreads;
9613
}
9614
9615
#elif defined(__APPLE__)
9616
#include <sys/time.h>
9617
void* MicroProfileTraceThread(void* unused)
9618
{
9619
FILE* pFile = fopen("mypipe", "r");
9620
if(!pFile)
9621
{
9622
uprintf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n");
9623
S.bContextSwitchRunning = false;
9624
return 0;
9625
}
9626
uprintf("STARTING TRACE THREAD\n");
9627
char* pLine = 0;
9628
size_t cap = 0;
9629
size_t len = 0;
9630
struct timeval tv;
9631
9632
gettimeofday(&tv, NULL);
9633
9634
uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000;
9635
uint64_t nTickEpoch = MP_TICK();
9636
uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = { 0 };
9637
mach_timebase_info_data_t sTimebaseInfo;
9638
mach_timebase_info(&sTimebaseInfo);
9639
S.bContextSwitchRunning = true;
9640
9641
uint64_t nProcessed = 0;
9642
uint64_t nProcessedLast = 0;
9643
while((len = getline(&pLine, &cap, pFile)) > 0 && !S.bContextSwitchStop)
9644
{
9645
nProcessed += len;
9646
if(nProcessed - nProcessedLast > 10 << 10)
9647
{
9648
nProcessedLast = nProcessed;
9649
uprintf("processed %llukb %llukb\n", (nProcessed - nProcessedLast) >> 10, nProcessed >> 10);
9650
}
9651
9652
char* pX = strchr(pLine, 'X');
9653
if(pX)
9654
{
9655
int cpu = atoi(pX + 1);
9656
char* pX2 = strchr(pX + 1, 'X');
9657
char* pX3 = strchr(pX2 + 1, 'X');
9658
int thread = atoi(pX2 + 1);
9659
char* lala;
9660
int64_t timestamp = strtoll(pX3 + 1, &lala, 10);
9661
MicroProfileContextSwitch Switch;
9662
9663
// convert to ticks.
9664
uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch;
9665
uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom;
9666
uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch;
9667
if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS)
9668
{
9669
Switch.nThreadOut = nLastThread[cpu];
9670
Switch.nThreadIn = thread;
9671
nLastThread[cpu] = thread;
9672
Switch.nCpu = cpu;
9673
Switch.nTicks = nTicks;
9674
MicroProfileContextSwitchPut(&Switch);
9675
}
9676
}
9677
}
9678
uprintf("EXITING TRACE THREAD\n");
9679
S.bContextSwitchRunning = false;
9680
return 0;
9681
}
9682
9683
MicroProfileThreadInfo MicroProfileGetThreadInfo(MicroProfileThreadIdType nThreadId)
9684
{
9685
MicroProfileThreadInfo TI((uint32_t)nThreadId, 0, 0);
9686
return TI;
9687
}
9688
uint32_t MicroProfileGetThreadInfoArray(MicroProfileThreadInfo** pThreadArray)
9689
{
9690
*pThreadArray = 0;
9691
return 0;
9692
}
9693
9694
#endif
9695
#else
9696
9697
MicroProfileThreadInfo MicroProfileGetThreadInfo(MicroProfileThreadIdType nThreadId)
9698
{
9699
MicroProfileThreadInfo TI((uint32_t)nThreadId, 0, 0);
9700
return TI;
9701
}
9702
uint32_t MicroProfileGetThreadInfoArray(MicroProfileThreadInfo** pThreadArray)
9703
{
9704
*pThreadArray = 0;
9705
return 0;
9706
}
9707
void MicroProfileStopContextSwitchTrace()
9708
{
9709
}
9710
void MicroProfileJoinContextSwitchTrace()
9711
{
9712
}
9713
void MicroProfileStartContextSwitchTrace()
9714
{
9715
}
9716
9717
#endif
9718
9719
#if MICROPROFILE_GPU_TIMERS
9720
void MicroProfileGpuShutdownPlatform()
9721
{
9722
if(S.pGPU)
9723
{
9724
MicroProfileGpuShutdown();
9725
MP_FREE(S.pGPU);
9726
S.pGPU = nullptr;
9727
MicroProfileGpuInsertTimeStamp_Callback = nullptr;
9728
MicroProfileGpuGetTimeStamp_Callback = nullptr;
9729
MicroProfileTicksPerSecondGpu_Callback = nullptr;
9730
MicroProfileGetGpuTickReference_Callback = nullptr;
9731
MicroProfileGpuFlip_Callback = nullptr;
9732
MicroProfileGpuShutdown_Callback = nullptr;
9733
}
9734
}
9735
9736
void MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType eType,
9737
MicroProfileGpuTimerState* pGPU,
9738
MicroProfileGpuInsertTimeStamp_CB InsertTimeStamp,
9739
MicroProfileGpuGetTimeStamp_CB GetTimeStamp,
9740
MicroProfileTicksPerSecondGpu_CB TicksPerSecond,
9741
MicroProfileGetGpuTickReference_CB GetTickReference,
9742
MicroProfileGpuFlip_CB Flip,
9743
MicroProfileGpuShutdown_CB Shutdown)
9744
{
9745
9746
MP_ASSERT(S.pGPU == nullptr);
9747
pGPU->Type = eType;
9748
S.pGPU = pGPU;
9749
9750
MicroProfileGpuInsertTimeStamp_Callback = InsertTimeStamp;
9751
MicroProfileGpuGetTimeStamp_Callback = GetTimeStamp;
9752
MicroProfileTicksPerSecondGpu_Callback = TicksPerSecond;
9753
MicroProfileGetGpuTickReference_Callback = GetTickReference;
9754
MicroProfileGpuFlip_Callback = Flip;
9755
MicroProfileGpuShutdown_Callback = Shutdown;
9756
}
9757
#endif
9758
9759
#if MICROPROFILE_GPU_TIMERS_D3D11
9760
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::::'##:::
9761
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####::::'####:::
9762
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##::::.. ##:::
9763
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##:::::: ##:::
9764
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::::: ##:::
9765
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##:::::: ##:::
9766
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######::'######:
9767
//:......::::..::::::::::.......::::::........::::.......:::........::::......:::......::
9768
uint32_t MicroProfileGpuInsertTimeStampD3D11(void* pContext_)
9769
{
9770
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
9771
if(!pGPU)
9772
return 0;
9773
MicroProfileD3D11Frame& Frame = pGPU->m_QueryFrames[pGPU->m_nQueryFrame];
9774
uint32_t nStart = Frame.m_nQueryStart;
9775
if(Frame.m_nRateQueryStarted)
9776
{
9777
uint32_t nIndex = (uint32_t)-1;
9778
do
9779
{
9780
nIndex = Frame.m_nQueryCount.load();
9781
if(nIndex + 1 >= Frame.m_nQueryCountMax)
9782
{
9783
return (uint32_t)-1;
9784
}
9785
} while(!Frame.m_nQueryCount.compare_exchange_weak(nIndex, nIndex + 1));
9786
nIndex += nStart;
9787
uint32_t nQueryIndex = nIndex % MICROPROFILE_D3D11_MAX_QUERIES;
9788
9789
ID3D11Query* pQuery = (ID3D11Query*)pGPU->m_pQueries[nQueryIndex];
9790
ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)pContext_;
9791
pContext->End(pQuery);
9792
return nQueryIndex;
9793
}
9794
return (uint32_t)-1;
9795
}
9796
9797
uint64_t MicroProfileGpuGetTimeStampD3D11(uint32_t nIndex)
9798
{
9799
if(nIndex == (uint32_t)-1)
9800
{
9801
return (uint64_t)-1;
9802
}
9803
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
9804
if(!pGPU)
9805
return 0;
9806
9807
int64_t nResult = pGPU->m_nQueryResults[nIndex];
9808
MP_ASSERT(nResult != -1);
9809
return nResult;
9810
}
9811
9812
bool MicroProfileGpuGetDataD3D11(void* pQuery, void* pData, uint32_t nDataSize)
9813
{
9814
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
9815
if(!pGPU)
9816
return false;
9817
9818
HRESULT hr;
9819
do
9820
{
9821
hr = ((ID3D11DeviceContext*)pGPU->m_pImmediateContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0);
9822
} while(hr == S_FALSE);
9823
switch(hr)
9824
{
9825
case DXGI_ERROR_DEVICE_REMOVED:
9826
case DXGI_ERROR_INVALID_CALL:
9827
case E_INVALIDARG:
9828
MP_BREAK();
9829
return false;
9830
}
9831
return true;
9832
}
9833
9834
uint64_t MicroProfileTicksPerSecondGpuD3D11()
9835
{
9836
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
9837
if(!pGPU)
9838
return 1;
9839
9840
return pGPU->m_nQueryFrequency;
9841
}
9842
9843
uint32_t MicroProfileGpuFlipD3D11(void* pDeviceContext_)
9844
{
9845
if(!pDeviceContext_)
9846
{
9847
return (uint32_t)-1;
9848
}
9849
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
9850
if(!pGPU)
9851
return 0;
9852
9853
ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_;
9854
uint32_t nFrameTimeStamp = MicroProfileGpuInsertTimeStamp(pDeviceContext);
9855
MicroProfileD3D11Frame& CurrentFrame = pGPU->m_QueryFrames[pGPU->m_nQueryFrame];
9856
ID3D11DeviceContext* pImmediateContext = (ID3D11DeviceContext*)pGPU->m_pImmediateContext;
9857
if(CurrentFrame.m_nRateQueryStarted)
9858
{
9859
pImmediateContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery);
9860
}
9861
uint32_t nNextFrame = (pGPU->m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY;
9862
pGPU->m_nQueryPut = (CurrentFrame.m_nQueryStart + CurrentFrame.m_nQueryCount) % MICROPROFILE_D3D11_MAX_QUERIES;
9863
MicroProfileD3D11Frame& OldFrame = pGPU->m_QueryFrames[nNextFrame];
9864
if(OldFrame.m_nRateQueryStarted)
9865
{
9866
struct RateQueryResult
9867
{
9868
uint64_t nFrequency;
9869
BOOL bDisjoint;
9870
};
9871
RateQueryResult Result;
9872
if(MicroProfileGpuGetDataD3D11(OldFrame.m_pRateQuery, &Result, sizeof(Result)))
9873
{
9874
if(pGPU->m_nQueryFrequency != (int64_t)Result.nFrequency)
9875
{
9876
if(pGPU->m_nQueryFrequency)
9877
{
9878
OutputDebugStringA("Query freq changing");
9879
}
9880
pGPU->m_nQueryFrequency = Result.nFrequency;
9881
}
9882
uint32_t nStart = OldFrame.m_nQueryStart;
9883
uint32_t nCount = OldFrame.m_nQueryCount;
9884
for(uint32_t i = 0; i < nCount; ++i)
9885
{
9886
uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D11_MAX_QUERIES;
9887
9888
if(!MicroProfileGpuGetDataD3D11(pGPU->m_pQueries[nIndex], &pGPU->m_nQueryResults[nIndex], sizeof(uint64_t)))
9889
{
9890
pGPU->m_nQueryResults[nIndex] = -1;
9891
}
9892
}
9893
}
9894
else
9895
{
9896
uint32_t nStart = OldFrame.m_nQueryStart;
9897
uint32_t nCount = OldFrame.m_nQueryCount;
9898
9899
for(uint32_t i = 0; i < nCount; ++i)
9900
{
9901
uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D11_MAX_QUERIES;
9902
pGPU->m_nQueryResults[nIndex] = -1;
9903
}
9904
}
9905
pGPU->m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D11_MAX_QUERIES;
9906
}
9907
9908
pGPU->m_nQueryFrame = nNextFrame;
9909
MicroProfileD3D11Frame& NextFrame = pGPU->m_QueryFrames[nNextFrame];
9910
pImmediateContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery);
9911
NextFrame.m_nQueryStart = pGPU->m_nQueryPut;
9912
NextFrame.m_nQueryCount = 0;
9913
if(pGPU->m_nQueryPut >= pGPU->m_nQueryGet)
9914
{
9915
NextFrame.m_nQueryCountMax = (MICROPROFILE_D3D11_MAX_QUERIES - pGPU->m_nQueryPut) + pGPU->m_nQueryGet;
9916
}
9917
else
9918
{
9919
NextFrame.m_nQueryCountMax = pGPU->m_nQueryGet - pGPU->m_nQueryPut - 1;
9920
}
9921
if(NextFrame.m_nQueryCountMax)
9922
NextFrame.m_nQueryCountMax -= 1;
9923
NextFrame.m_nRateQueryStarted = 1;
9924
return nFrameTimeStamp;
9925
}
9926
9927
void MicroProfileGpuInitD3D11(void* pDevice_, void* pImmediateContext)
9928
{
9929
ID3D11Device* pDevice = (ID3D11Device*)pDevice_;
9930
9931
MicroProfileGpuTimerStateD3D11* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateD3D11);
9932
9933
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_D3D11,
9934
pGPU,
9935
MicroProfileGpuInsertTimeStampD3D11,
9936
MicroProfileGpuGetTimeStampD3D11,
9937
MicroProfileTicksPerSecondGpuD3D11,
9938
MicroProfileGetGpuTickReferenceD3D11,
9939
MicroProfileGpuFlipD3D11,
9940
MicroProfileGpuShutdownD3D11);
9941
9942
pGPU->m_pImmediateContext = pImmediateContext;
9943
9944
D3D11_QUERY_DESC Desc;
9945
Desc.MiscFlags = 0;
9946
Desc.Query = D3D11_QUERY_TIMESTAMP;
9947
for(uint32_t i = 0; i < MICROPROFILE_D3D11_MAX_QUERIES; ++i)
9948
{
9949
HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&pGPU->m_pQueries[i]);
9950
MP_ASSERT(hr == S_OK);
9951
pGPU->m_nQueryResults[i] = -1;
9952
}
9953
HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&pGPU->pSyncQuery);
9954
MP_ASSERT(hr == S_OK);
9955
9956
pGPU->m_nQueryPut = 0;
9957
pGPU->m_nQueryGet = 0;
9958
pGPU->m_nQueryFrame = 0;
9959
pGPU->m_nQueryFrequency = 0;
9960
Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
9961
for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
9962
{
9963
pGPU->m_QueryFrames[i].m_nQueryStart = 0;
9964
pGPU->m_QueryFrames[i].m_nQueryCount = 0;
9965
pGPU->m_QueryFrames[i].m_nRateQueryStarted = 0;
9966
hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&pGPU->m_QueryFrames[i].m_pRateQuery);
9967
MP_ASSERT(hr == S_OK);
9968
}
9969
}
9970
9971
void MicroProfileGpuShutdownD3D11()
9972
{
9973
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
9974
if(!pGPU)
9975
return;
9976
9977
for(uint32_t i = 0; i < MICROPROFILE_D3D11_MAX_QUERIES; ++i)
9978
{
9979
if(pGPU->m_pQueries[i])
9980
{
9981
ID3D11Query* pQuery = (ID3D11Query*)pGPU->m_pQueries[i];
9982
pQuery->Release();
9983
pGPU->m_pQueries[i] = 0;
9984
}
9985
}
9986
for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
9987
{
9988
if(pGPU->m_QueryFrames[i].m_pRateQuery)
9989
{
9990
ID3D11Query* pQuery = (ID3D11Query*)pGPU->m_QueryFrames[i].m_pRateQuery;
9991
pQuery->Release();
9992
pGPU->m_QueryFrames[i].m_pRateQuery = 0;
9993
}
9994
}
9995
if(pGPU->pSyncQuery)
9996
{
9997
ID3D11Query* pSyncQuery = (ID3D11Query*)pGPU->pSyncQuery;
9998
pSyncQuery->Release();
9999
pGPU->pSyncQuery = 0;
10000
}
10001
}
10002
10003
int MicroProfileGetGpuTickReferenceD3D11(int64_t* pOutCPU, int64_t* pOutGpu)
10004
{
10005
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
10006
if(!pGPU)
10007
return 0;
10008
{
10009
MicroProfileD3D11Frame& Frame = pGPU->m_QueryFrames[pGPU->m_nQueryFrame];
10010
if(Frame.m_nRateQueryStarted)
10011
{
10012
ID3D11Query* pSyncQuery = (ID3D11Query*)pGPU->pSyncQuery;
10013
ID3D11DeviceContext* pImmediateContext = (ID3D11DeviceContext*)pGPU->m_pImmediateContext;
10014
pImmediateContext->End(pSyncQuery);
10015
10016
HRESULT hr;
10017
do
10018
{
10019
hr = pImmediateContext->GetData(pSyncQuery, pOutGpu, sizeof(*pOutGpu), 0);
10020
} while(hr == S_FALSE);
10021
*pOutCPU = MP_TICK();
10022
switch(hr)
10023
{
10024
case DXGI_ERROR_DEVICE_REMOVED:
10025
case DXGI_ERROR_INVALID_CALL:
10026
case E_INVALIDARG:
10027
MP_BREAK();
10028
return false;
10029
}
10030
MP_ASSERT(hr == S_OK);
10031
return 1;
10032
}
10033
}
10034
return 0;
10035
}
10036
MicroProfileGpuTimerStateD3D11* MicroProfileGetGpuTimerStateD3D11()
10037
{
10038
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_D3D11)
10039
return (MicroProfileGpuTimerStateD3D11*)S.pGPU;
10040
return nullptr;
10041
}
10042
10043
#endif
10044
10045
#if MICROPROFILE_GPU_TIMERS_D3D12
10046
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::'#######::
10047
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####:::'##.... ##:
10048
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##:::..::::: ##:
10049
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##::::'#######::
10050
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::'##::::::::
10051
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##::: ##::::::::
10052
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######: #########:
10053
//:......::::..::::::::::.......::::::........::::.......:::........::::......::.........::
10054
#include <d3d12.h>
10055
uint32_t MicroProfileGpuInsertTimeStampD3D12(void* pContext)
10056
{
10057
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10058
if(!pGPU || !pContext)
10059
return 0;
10060
10061
ID3D12GraphicsCommandList* pCommandList = (ID3D12GraphicsCommandList*)pContext;
10062
10063
bool IsCopy = D3D12_COMMAND_LIST_TYPE_COPY == pCommandList->GetType();
10064
uint32_t nNode = pGPU->nCurrentNode;
10065
uint32_t nFrame = pGPU->nFrame;
10066
10067
ID3D12QueryHeap* pHeap = IsCopy ? pGPU->NodeState[nNode].pCopyQueueHeap : pGPU->NodeState[nNode].pHeap;
10068
10069
uint32_t nQueryIndex = IsCopy ? ((pGPU->nFrameCountCopyQueueTimeStamps.fetch_add(1) + pGPU->nFrameStartCopyQueueTimeStamps) % MICROPROFILE_D3D12_MAX_QUERIES)
10070
: ((pGPU->nFrameCountTimeStamps.fetch_add(1) + pGPU->nFrameStartTimeStamps) % MICROPROFILE_D3D12_MAX_QUERIES);
10071
10072
pCommandList->EndQuery(pHeap, D3D12_QUERY_TYPE_TIMESTAMP, nQueryIndex);
10073
MP_ASSERT(nQueryIndex <= 0xffff);
10074
uint32_t res = (IsCopy ? 0x80000000 : 0) | ((nFrame << 16) & 0x7fff0000) | (nQueryIndex);
10075
return res;
10076
}
10077
10078
void MicroProfileGpuFetchRange(uint32_t nBegin, int32_t nCount, uint64_t nFrame, int64_t nTimestampOffset)
10079
{
10080
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10081
if(!pGPU || nCount <= 0)
10082
return;
10083
void* pData = 0;
10084
// uprintf("fetch [%d-%d]\n", nBegin, nBegin + nCount);
10085
D3D12_RANGE Range = { sizeof(uint64_t) * nBegin, sizeof(uint64_t) * (nBegin + nCount) };
10086
pGPU->pBuffer->Map(0, &Range, &pData);
10087
memcpy(&pGPU->nResults[nBegin], nBegin + (uint64_t*)pData, nCount * sizeof(uint64_t));
10088
for(int i = 0; i < nCount; ++i)
10089
{
10090
pGPU->nQueryFrames[i + nBegin] = nFrame;
10091
pGPU->nResults[i + nBegin] -= nTimestampOffset;
10092
}
10093
pGPU->pBuffer->Unmap(0, 0);
10094
}
10095
void MicroProfileGpuFetchRangeCopy(uint32_t nBegin, int32_t nCount, uint64_t nFrame, int64_t nTimestampOffset)
10096
{
10097
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10098
if(!pGPU || nCount <= 0)
10099
return;
10100
void* pData = 0;
10101
D3D12_RANGE Range = { sizeof(uint64_t) * nBegin, sizeof(uint64_t) * (nBegin + nCount) };
10102
pGPU->pBufferCopy->Map(0, &Range, &pData);
10103
memcpy(&pGPU->nResultsCopy[nBegin], nBegin + (uint64_t*)pData, nCount * sizeof(uint64_t));
10104
for(int i = 0; i < nCount; ++i)
10105
{
10106
pGPU->nQueryFramesCopy[i + nBegin] = nFrame;
10107
pGPU->nResultsCopy[i + nBegin] -= nTimestampOffset;
10108
}
10109
pGPU->pBufferCopy->Unmap(0, 0);
10110
}
10111
void MicroProfileGpuWaitFenceD3D12(uint32_t nNode, uint64_t nFence)
10112
{
10113
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10114
if(!pGPU)
10115
return;
10116
10117
auto GetFence = [&]() -> uint64_t
10118
{
10119
uint64_t f0 = pGPU->NodeState[nNode].pFence->GetCompletedValue();
10120
uint64_t f1 = pGPU->NodeState[nNode].pFenceCopy->GetCompletedValue();
10121
return MicroProfileMin(f0, f1);
10122
};
10123
uint64_t nCompletedFrame = GetFence();
10124
// while(nCompletedFrame < nPending)
10125
// while(0 < nPending - nCompletedFrame)
10126
while(0 < (int64_t)(nFence - nCompletedFrame))
10127
{
10128
MICROPROFILE_SCOPEI("Microprofile", "gpu-wait", MP_GREEN4);
10129
Sleep(20); // todo: use event.
10130
nCompletedFrame = GetFence();
10131
if((uint64_t)-1 == nCompletedFrame) // likely device removed.
10132
return;
10133
}
10134
}
10135
10136
void MicroProfileGpuFetchResultsD3D12(uint64_t nFrame)
10137
{
10138
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10139
if(!pGPU)
10140
return;
10141
uint64_t nPending = pGPU->nPendingFrame;
10142
10143
// while(nPending <= nFrame)
10144
// while(0 <= nFrame - nPending)
10145
while(0 <= (int64_t)(nFrame - nPending))
10146
{
10147
uint32_t nInternal = nPending % MICROPROFILE_D3D_INTERNAL_DELAY;
10148
uint32_t nNode = pGPU->Frames[nInternal].nNode;
10149
MicroProfileGpuWaitFenceD3D12(nNode, nPending);
10150
int64_t nTimestampOffset = 0;
10151
if(nNode != 0)
10152
{
10153
// Adjust timestamp queries from GPU x to be in GPU 0's frame of reference
10154
HRESULT hr;
10155
int64_t nCPU0, nGPU0;
10156
hr = pGPU->NodeState[0].pCommandQueue->GetClockCalibration((uint64_t*)&nGPU0, (uint64_t*)&nCPU0);
10157
MP_ASSERT(hr == S_OK);
10158
int64_t nCPUx, nGPUx;
10159
hr = pGPU->NodeState[nNode].pCommandQueue->GetClockCalibration((uint64_t*)&nGPUx, (uint64_t*)&nCPUx);
10160
MP_ASSERT(hr == S_OK);
10161
int64_t nFreqCPU = MicroProfileTicksPerSecondCpu();
10162
int64_t nElapsedCPU = nCPUx - nCPU0;
10163
int64_t nElapsedGPU = pGPU->nFrequency * nElapsedCPU / nFreqCPU;
10164
nTimestampOffset = nGPUx - nGPU0 - nElapsedGPU;
10165
}
10166
10167
{
10168
uint32_t nTimeStampBegin = pGPU->Frames[nInternal].nTimeStampBegin;
10169
uint32_t nTimeStampCount = pGPU->Frames[nInternal].nTimeStampCount;
10170
MicroProfileGpuFetchRange(
10171
nTimeStampBegin, (nTimeStampBegin + nTimeStampCount) > MICROPROFILE_D3D12_MAX_QUERIES ? MICROPROFILE_D3D12_MAX_QUERIES - nTimeStampBegin : nTimeStampCount, nPending, nTimestampOffset);
10172
MicroProfileGpuFetchRange(0, (nTimeStampBegin + nTimeStampCount) - MICROPROFILE_D3D12_MAX_QUERIES, nPending, nTimestampOffset);
10173
}
10174
{
10175
uint32_t nTimeStampBegin = pGPU->Frames[nInternal].nTimeStampBeginCopyQueue;
10176
uint32_t nTimeStampCount = pGPU->Frames[nInternal].nTimeStampCountCopyQueue;
10177
MicroProfileGpuFetchRangeCopy(
10178
nTimeStampBegin, (nTimeStampBegin + nTimeStampCount) > MICROPROFILE_D3D12_MAX_QUERIES ? MICROPROFILE_D3D12_MAX_QUERIES - nTimeStampBegin : nTimeStampCount, nPending, nTimestampOffset);
10179
MicroProfileGpuFetchRangeCopy(0, (nTimeStampBegin + nTimeStampCount) - MICROPROFILE_D3D12_MAX_QUERIES, nPending, nTimestampOffset);
10180
}
10181
nPending = ++pGPU->nPendingFrame;
10182
MP_ASSERT(pGPU->nFrame > nPending);
10183
}
10184
}
10185
10186
uint64_t MicroProfileGpuGetTimeStampD3D12(uint32_t nIndex)
10187
{
10188
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10189
if(!pGPU)
10190
return 0;
10191
10192
uint32_t nFrame = nIndex >> 16;
10193
bool IsCopy = (nFrame & 0x8000) != 0;
10194
nFrame &= 0x7fff;
10195
uint32_t nQueryIndex = nIndex & 0xffff;
10196
uint32_t lala = IsCopy ? pGPU->nQueryFramesCopy[nQueryIndex] : pGPU->nQueryFrames[nQueryIndex];
10197
// uprintf("read TS [%d <- %lld]\n", nQueryIndex, pGPU->nResults[nQueryIndex]);
10198
MP_ASSERT(nIndex == 0 || (0x7fff & lala) == nFrame);
10199
uint64_t r = IsCopy ? pGPU->nResultsCopy[nQueryIndex] : pGPU->nResults[nQueryIndex];
10200
if(r == 0x7fffffffffffffff)
10201
{
10202
MP_BREAK();
10203
}
10204
return r;
10205
}
10206
10207
uint64_t MicroProfileTicksPerSecondGpuD3D12()
10208
{
10209
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10210
if(!pGPU)
10211
return 1;
10212
return pGPU->nFrequency;
10213
}
10214
10215
uint32_t MicroProfileGpuFlipD3D12(void* pContext)
10216
{
10217
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10218
if(!pGPU)
10219
return 0;
10220
uint32_t nNode = pGPU->nCurrentNode;
10221
uint32_t nFrameIndex = pGPU->nFrame % MICROPROFILE_D3D_INTERNAL_DELAY;
10222
uint32_t nCount = 0, nStart = 0;
10223
uint32_t nCountCopyQueue = 0, nStartCopyQueue = 0;
10224
10225
ID3D12CommandAllocator* pCommandAllocator = pGPU->Frames[nFrameIndex].pCommandAllocator;
10226
ID3D12CommandAllocator* pCommandAllocatorCopy = pGPU->Frames[nFrameIndex].pCommandAllocatorCopy;
10227
pCommandAllocator->Reset();
10228
pCommandAllocatorCopy->Reset();
10229
ID3D12GraphicsCommandList* pCommandList = pGPU->Frames[nFrameIndex].pCommandList[nNode];
10230
10231
pCommandList->Reset(pCommandAllocator, nullptr);
10232
10233
ID3D12GraphicsCommandList* pCommandListCopy = nullptr;
10234
10235
uint32_t nFrameTimeStamp = MicroProfileGpuInsertTimeStamp(pCommandList);
10236
10237
{
10238
nCount = pGPU->nFrameCountTimeStamps.exchange(0);
10239
nStart = pGPU->nFrameStartTimeStamps;
10240
pGPU->nFrameStartTimeStamps = (pGPU->nFrameStartTimeStamps + nCount) % MICROPROFILE_D3D12_MAX_QUERIES;
10241
uint32_t nEnd = MicroProfileMin(nStart + nCount, (uint32_t)MICROPROFILE_D3D12_MAX_QUERIES);
10242
MP_ASSERT(nStart != nEnd);
10243
uint32_t nSize = nEnd - nStart;
10244
pCommandList->ResolveQueryData(pGPU->NodeState[nNode].pHeap, D3D12_QUERY_TYPE_TIMESTAMP, nStart, nEnd - nStart, pGPU->pBuffer, nStart * sizeof(int64_t));
10245
if(nStart + nCount > MICROPROFILE_D3D12_MAX_QUERIES)
10246
{
10247
pCommandList->ResolveQueryData(pGPU->NodeState[nNode].pHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, nEnd + nStart - MICROPROFILE_D3D12_MAX_QUERIES, pGPU->pBuffer, 0);
10248
}
10249
pCommandList->Close();
10250
}
10251
10252
{
10253
pCommandListCopy = pGPU->Frames[nFrameIndex].pCommandListCopy[nNode];
10254
pCommandListCopy->Reset(pCommandAllocatorCopy, nullptr);
10255
10256
nCountCopyQueue = pGPU->nFrameCountCopyQueueTimeStamps.exchange(0);
10257
nStartCopyQueue = pGPU->nFrameStartCopyQueueTimeStamps;
10258
pGPU->nFrameStartCopyQueueTimeStamps = (nStartCopyQueue + nCountCopyQueue) % MICROPROFILE_D3D12_MAX_QUERIES;
10259
uint32_t nEnd = MicroProfileMin(nStartCopyQueue + nCountCopyQueue, (uint32_t)MICROPROFILE_D3D12_MAX_QUERIES);
10260
if(nStartCopyQueue != nEnd)
10261
{
10262
uint32_t nSize = nEnd - nStartCopyQueue;
10263
pCommandListCopy->ResolveQueryData(
10264
pGPU->NodeState[nNode].pCopyQueueHeap, D3D12_QUERY_TYPE_TIMESTAMP, nStartCopyQueue, nEnd - nStartCopyQueue, pGPU->pBufferCopy, nStartCopyQueue * sizeof(int64_t));
10265
if(nStartCopyQueue + nCountCopyQueue > MICROPROFILE_D3D12_MAX_QUERIES)
10266
{
10267
pCommandListCopy->ResolveQueryData(pGPU->NodeState[nNode].pCopyQueueHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, nEnd + nStartCopyQueue - MICROPROFILE_D3D12_MAX_QUERIES, pGPU->pBufferCopy, 0);
10268
}
10269
}
10270
pCommandListCopy->Close();
10271
}
10272
10273
if(pCommandList)
10274
{
10275
ID3D12CommandList* pList = pCommandList;
10276
pGPU->NodeState[nNode].pCommandQueue->ExecuteCommandLists(1, &pList);
10277
}
10278
if(pCommandListCopy)
10279
{
10280
ID3D12CommandList* pList = pCommandListCopy;
10281
pGPU->NodeState[nNode].pCommandQueueCopy->ExecuteCommandLists(1, &pList);
10282
}
10283
pGPU->NodeState[nNode].pCommandQueue->Signal(pGPU->NodeState[nNode].pFence, pGPU->nFrame);
10284
pGPU->NodeState[nNode].pCommandQueueCopy->Signal(pGPU->NodeState[nNode].pFenceCopy, pGPU->nFrame);
10285
pGPU->Frames[nFrameIndex].nTimeStampBegin = nStart;
10286
pGPU->Frames[nFrameIndex].nTimeStampCount = nCount;
10287
pGPU->Frames[nFrameIndex].nTimeStampBeginCopyQueue = nStartCopyQueue;
10288
pGPU->Frames[nFrameIndex].nTimeStampCountCopyQueue = nCountCopyQueue;
10289
10290
pGPU->Frames[nFrameIndex].nNode = nNode;
10291
10292
pGPU->nFrame++;
10293
// fetch from earlier frames
10294
10295
MicroProfileGpuFetchResultsD3D12(pGPU->nFrame - MICROPROFILE_GPU_FRAME_DELAY);
10296
return nFrameTimeStamp;
10297
}
10298
10299
void MicroProfileGpuInitD3D12(void* pDevice_, uint32_t nNodeCount, void** pCommandQueues_, void** pCommandQueuesCopy_)
10300
{
10301
MicroProfileGpuTimerStateD3D12* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateD3D12);
10302
memset(pGPU, 0, sizeof(MicroProfileGpuTimerStateD3D12));
10303
10304
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_D3D12,
10305
pGPU,
10306
MicroProfileGpuInsertTimeStampD3D12,
10307
MicroProfileGpuGetTimeStampD3D12,
10308
MicroProfileTicksPerSecondGpuD3D12,
10309
MicroProfileGetGpuTickReferenceD3D12,
10310
MicroProfileGpuFlipD3D12,
10311
MicroProfileGpuShutdownD3D12);
10312
10313
ID3D12Device* pDevice = (ID3D12Device*)pDevice_;
10314
10315
pGPU->pDevice = pDevice;
10316
pGPU->nNodeCount = nNodeCount;
10317
MP_ASSERT(pGPU->nNodeCount <= MICROPROFILE_D3D_MAX_NODE_COUNT);
10318
10319
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
10320
{
10321
pGPU->NodeState[nNode].pCommandQueue = (ID3D12CommandQueue*)pCommandQueues_[nNode];
10322
pGPU->NodeState[nNode].pCommandQueueCopy = (ID3D12CommandQueue*)pCommandQueuesCopy_[nNode];
10323
if(nNode == 0)
10324
{
10325
pGPU->NodeState[nNode].pCommandQueue->GetTimestampFrequency((uint64_t*)&(pGPU->nFrequency));
10326
MP_ASSERT(pGPU->nFrequency);
10327
}
10328
else
10329
{
10330
// Don't support GPUs with different timer frequencies for now
10331
int64_t nFrequency;
10332
pGPU->NodeState[nNode].pCommandQueue->GetTimestampFrequency((uint64_t*)&nFrequency);
10333
MP_ASSERT(nFrequency == pGPU->nFrequency);
10334
}
10335
10336
D3D12_QUERY_HEAP_DESC QHDesc;
10337
QHDesc.Count = MICROPROFILE_D3D12_MAX_QUERIES;
10338
QHDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
10339
QHDesc.NodeMask = MP_NODE_MASK_ONE(nNode);
10340
HRESULT hr = pDevice->CreateQueryHeap(&QHDesc, IID_PPV_ARGS(&pGPU->NodeState[nNode].pHeap));
10341
MP_ASSERT(hr == S_OK);
10342
QHDesc.Type = D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP;
10343
hr = pDevice->CreateQueryHeap(&QHDesc, IID_PPV_ARGS(&pGPU->NodeState[nNode].pCopyQueueHeap));
10344
MP_ASSERT(hr == S_OK);
10345
10346
pDevice->CreateFence(pGPU->nPendingFrame, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pGPU->NodeState[nNode].pFence));
10347
pDevice->CreateFence(pGPU->nPendingFrame, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pGPU->NodeState[nNode].pFenceCopy));
10348
}
10349
10350
HRESULT hr;
10351
D3D12_HEAP_PROPERTIES HeapProperties;
10352
HeapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
10353
HeapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
10354
HeapProperties.CreationNodeMask = 0;
10355
HeapProperties.VisibleNodeMask = MP_NODE_MASK_ALL(pGPU->nNodeCount);
10356
HeapProperties.Type = D3D12_HEAP_TYPE_READBACK;
10357
10358
const size_t nResourceSize = MICROPROFILE_D3D12_MAX_QUERIES * 8;
10359
10360
D3D12_RESOURCE_DESC ResourceDesc;
10361
ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
10362
ResourceDesc.Alignment = 0;
10363
ResourceDesc.Width = nResourceSize;
10364
ResourceDesc.Height = 1;
10365
ResourceDesc.DepthOrArraySize = 1;
10366
ResourceDesc.MipLevels = 1;
10367
ResourceDesc.Format = DXGI_FORMAT_UNKNOWN;
10368
ResourceDesc.SampleDesc.Count = 1;
10369
ResourceDesc.SampleDesc.Quality = 0;
10370
ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
10371
ResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
10372
10373
hr = pDevice->CreateCommittedResource(&HeapProperties, D3D12_HEAP_FLAG_NONE, &ResourceDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pGPU->pBuffer));
10374
MP_ASSERT(hr == S_OK);
10375
hr = pDevice->CreateCommittedResource(&HeapProperties, D3D12_HEAP_FLAG_NONE, &ResourceDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pGPU->pBufferCopy));
10376
MP_ASSERT(hr == S_OK);
10377
10378
pGPU->nFrame = 0;
10379
pGPU->nPendingFrame = 0;
10380
10381
for(MicroProfileFrameD3D12& Frame : pGPU->Frames)
10382
{
10383
hr = pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&Frame.pCommandAllocator));
10384
MP_ASSERT(hr == S_OK);
10385
hr = pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&Frame.pCommandAllocatorCopy));
10386
MP_ASSERT(hr == S_OK);
10387
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
10388
{
10389
hr = pDevice->CreateCommandList(MP_NODE_MASK_ONE(nNode), D3D12_COMMAND_LIST_TYPE_DIRECT, Frame.pCommandAllocator, nullptr, IID_PPV_ARGS(&Frame.pCommandList[nNode]));
10390
MP_ASSERT(hr == S_OK);
10391
hr = Frame.pCommandList[nNode]->Close();
10392
MP_ASSERT(hr == S_OK);
10393
hr = pDevice->CreateCommandList(MP_NODE_MASK_ONE(nNode), D3D12_COMMAND_LIST_TYPE_COPY, Frame.pCommandAllocatorCopy, nullptr, IID_PPV_ARGS(&Frame.pCommandListCopy[nNode]));
10394
MP_ASSERT(hr == S_OK);
10395
hr = Frame.pCommandListCopy[nNode]->Close();
10396
MP_ASSERT(hr == S_OK);
10397
}
10398
}
10399
}
10400
10401
void MicroProfileGpuShutdownD3D12()
10402
{
10403
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10404
if(!pGPU)
10405
return;
10406
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
10407
{
10408
MicroProfileGpuWaitFenceD3D12(nNode, pGPU->nFrame - 1);
10409
}
10410
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
10411
{
10412
pGPU->NodeState[nNode].pHeap->Release();
10413
pGPU->NodeState[nNode].pCopyQueueHeap->Release();
10414
pGPU->NodeState[nNode].pFence->Release();
10415
pGPU->NodeState[nNode].pFenceCopy->Release();
10416
}
10417
pGPU->pBuffer->Release();
10418
pGPU->pBufferCopy->Release();
10419
for(MicroProfileFrameD3D12& Frame : pGPU->Frames)
10420
{
10421
Frame.pCommandAllocator->Release();
10422
Frame.pCommandAllocatorCopy->Release();
10423
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
10424
{
10425
Frame.pCommandList[nNode]->Release();
10426
Frame.pCommandListCopy[nNode]->Release();
10427
}
10428
}
10429
}
10430
void MicroProfileSetCurrentNodeD3D12(uint32_t nNode)
10431
{
10432
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10433
pGPU->nCurrentNode = nNode;
10434
}
10435
10436
int MicroProfileGetGpuTickReferenceD3D12(int64_t* pOutCPU, int64_t* pOutGpu)
10437
{
10438
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
10439
if(!pGPU)
10440
{
10441
*pOutCPU = 1;
10442
*pOutGpu = 1;
10443
return 1;
10444
}
10445
10446
HRESULT hr = pGPU->NodeState[0].pCommandQueue->GetClockCalibration((uint64_t*)pOutGpu, (uint64_t*)pOutCPU);
10447
MP_ASSERT(hr == S_OK);
10448
return 1;
10449
}
10450
10451
MicroProfileGpuTimerStateD3D12* MicroProfileGetGpuTimerStateD3D12()
10452
{
10453
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_D3D12)
10454
return (MicroProfileGpuTimerStateD3D12*)S.pGPU;
10455
return nullptr;
10456
}
10457
10458
#endif
10459
10460
#if MICROPROFILE_GPU_TIMERS_VULKAN
10461
10462
//:'######:::'########::'##::::'##::::'##::::'##:'##::::'##:'##:::::::'##:::'##::::'###::::'##::: ##:
10463
//'##... ##:: ##.... ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##::'##::::'## ##::: ###:: ##:
10464
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##:'##::::'##:. ##:: ####: ##:
10465
// ##::'####: ########:: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: #####::::'##:::. ##: ## ## ##:
10466
// ##::: ##:: ##.....::: ##:::: ##::::. ##:: ##:: ##:::: ##: ##::::::: ##. ##::: #########: ##. ####:
10467
// ##::: ##:: ##:::::::: ##:::: ##:::::. ## ##::: ##:::: ##: ##::::::: ##:. ##:: ##.... ##: ##:. ###:
10468
//. ######::: ##::::::::. #######:::::::. ###::::. #######:: ########: ##::. ##: ##:::: ##: ##::. ##:
10469
//:......::::..::::::::::.......:::::::::...::::::.......:::........::..::::..::..:::::..::..::::..::
10470
10471
#ifndef MICROPROFILE_VULKAN_MAX_QUERIES
10472
#define MICROPROFILE_VULKAN_MAX_QUERIES (32 << 10)
10473
#endif
10474
10475
#define MICROPROFILE_VULKAN_MAX_NODE_COUNT 4
10476
#define MICROPROFILE_VULKAN_INTERNAL_DELAY 8
10477
10478
#include <vulkan/vulkan.h>
10479
struct MicroProfileGpuFrameVulkan
10480
{
10481
uint32_t nBegin;
10482
uint32_t nCount;
10483
uint32_t nNode;
10484
VkCommandBuffer CommandBuffer[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10485
VkFence Fences[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10486
};
10487
struct MicroProfileGpuTimerStateVulkan : public MicroProfileGpuTimerState
10488
{
10489
VkDevice Devices[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10490
VkPhysicalDevice PhysicalDevices[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10491
VkQueue Queues[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10492
VkQueryPool QueryPool[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10493
VkCommandPool CommandPool[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
10494
10495
uint32_t nNodeCount;
10496
uint32_t nCurrentNode;
10497
uint64_t nFrame;
10498
uint64_t nPendingFrame;
10499
uint32_t nFrameStart;
10500
std::atomic<uint32_t> nFrameCount;
10501
int64_t nFrequency;
10502
10503
uint16_t nQueryFrames[MICROPROFILE_VULKAN_MAX_QUERIES];
10504
int64_t nResults[MICROPROFILE_VULKAN_MAX_QUERIES];
10505
10506
MicroProfileGpuFrameVulkan Frames[MICROPROFILE_VULKAN_INTERNAL_DELAY];
10507
};
10508
10509
MicroProfileGpuTimerStateVulkan* MicroProfileGetGpuTimerStateVulkan()
10510
{
10511
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_Vulkan)
10512
return (MicroProfileGpuTimerStateVulkan*)S.pGPU;
10513
return nullptr;
10514
}
10515
10516
uint32_t MicroProfileGpuInsertTimeStampVulkan(void* pContext)
10517
{
10518
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10519
if(!pGPU)
10520
return 0;
10521
VkCommandBuffer CB = (VkCommandBuffer)pContext;
10522
uint32_t nNode = pGPU->nCurrentNode;
10523
uint32_t nFrame = pGPU->nFrame;
10524
uint32_t nQueryIndex = (pGPU->nFrameCount.fetch_add(1) + pGPU->nFrameStart) % MICROPROFILE_VULKAN_MAX_QUERIES;
10525
vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, pGPU->QueryPool[nNode], nQueryIndex);
10526
MP_ASSERT(nQueryIndex <= 0xffff);
10527
// uprintf("insert timestamp %d :: %d ... ctx %p\n", nQueryIndex, nFrame, pContext);
10528
return ((nFrame << 16) & 0xffff0000) | (nQueryIndex);
10529
}
10530
10531
void MicroProfileGpuFetchRangeVulkan(VkCommandBuffer CommandBuffer, uint32_t nNode, uint32_t nBegin, int32_t nCount, uint64_t nFrame, int64_t nTimestampOffset)
10532
{
10533
if(nCount <= 0)
10534
return;
10535
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10536
if(!pGPU)
10537
return;
10538
10539
vkGetQueryPoolResults(pGPU->Devices[nNode], pGPU->QueryPool[nNode], nBegin, nCount, 8 * nCount, &pGPU->nResults[nBegin], 8, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
10540
vkCmdResetQueryPool(CommandBuffer, pGPU->QueryPool[nNode], nBegin, nCount);
10541
for(int i = 0; i < nCount; ++i)
10542
{
10543
pGPU->nQueryFrames[i + nBegin] = nFrame;
10544
}
10545
}
10546
void MicroProfileGpuWaitFenceVulkan(uint32_t nNode, uint64_t nFrame)
10547
{
10548
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10549
if(!pGPU)
10550
return;
10551
10552
int r;
10553
int c = 0;
10554
do
10555
{
10556
MICROPROFILE_SCOPEI("Microprofile", "gpu-wait", MP_GREEN4);
10557
r = vkWaitForFences(pGPU->Devices[nNode], 1, &pGPU->Frames[nFrame].Fences[nNode], 1, 1000 * 30);
10558
#if 0
10559
if(c++ > 1000 && (c%100) == 0)
10560
{
10561
uprintf("waiting really long time for fence\n");
10562
OutputDebugString("waiting really long time for fence\n");
10563
}
10564
#endif
10565
} while(r != VK_SUCCESS);
10566
}
10567
10568
void MicroProfileGpuFetchResultsVulkan(VkCommandBuffer Buffer, uint64_t nFrame)
10569
{
10570
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10571
if(!pGPU)
10572
return;
10573
10574
uint64_t nPending = pGPU->nPendingFrame;
10575
// while(nPending <= nFrame)
10576
// while(0 <= nFrame - nPending)
10577
while(0 <= (int64_t)(nFrame - nPending))
10578
{
10579
uint32_t nInternal = nPending % MICROPROFILE_VULKAN_INTERNAL_DELAY;
10580
uint32_t nNode = pGPU->Frames[nInternal].nNode;
10581
MicroProfileGpuWaitFenceVulkan(nNode, nInternal);
10582
int64_t nTimestampOffset = 0;
10583
10584
if(nNode != 0)
10585
{
10586
MP_ASSERT(0 && "NOT IMPLEMENTED");
10587
// note: timestamp adjustment not implemented.
10588
}
10589
10590
uint32_t nBegin = pGPU->Frames[nInternal].nBegin;
10591
uint32_t nCount = pGPU->Frames[nInternal].nCount;
10592
MicroProfileGpuFetchRangeVulkan(Buffer, nNode, nBegin, (nBegin + nCount) > MICROPROFILE_VULKAN_MAX_QUERIES ? MICROPROFILE_VULKAN_MAX_QUERIES - nBegin : nCount, nPending, nTimestampOffset);
10593
MicroProfileGpuFetchRangeVulkan(Buffer, nNode, 0, (nBegin + nCount) - MICROPROFILE_VULKAN_MAX_QUERIES, nPending, nTimestampOffset);
10594
10595
nPending = ++pGPU->nPendingFrame;
10596
MP_ASSERT(pGPU->nFrame > nPending);
10597
}
10598
}
10599
10600
uint64_t MicroProfileGpuGetTimeStampVulkan(uint32_t nIndex)
10601
{
10602
if(nIndex == (uint32_t)-1)
10603
{
10604
return 0;
10605
}
10606
10607
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10608
if(!pGPU)
10609
return 0;
10610
10611
uint32_t nFrame = nIndex >> 16;
10612
uint32_t nQueryIndex = nIndex & 0xffff;
10613
uint32_t lala = pGPU->nQueryFrames[nQueryIndex];
10614
MP_ASSERT((0xffff & lala) == nFrame);
10615
// uprintf("read TS [%d <- %lld]\n", nQueryIndex, pGPU->nResults[nQueryIndex]);
10616
return pGPU->nResults[nQueryIndex];
10617
}
10618
10619
uint64_t MicroProfileTicksPerSecondGpuVulkan()
10620
{
10621
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10622
if(!pGPU)
10623
return 1;
10624
return pGPU->nFrequency;
10625
}
10626
10627
uint32_t MicroProfileGpuFlipVulkan(void* pContext)
10628
{
10629
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10630
if(!pGPU)
10631
return 0;
10632
10633
uint32_t nNode = pGPU->nCurrentNode;
10634
uint32_t nFrameIndex = pGPU->nFrame % MICROPROFILE_VULKAN_INTERNAL_DELAY;
10635
uint32_t nCount = 0, nStart = 0;
10636
10637
VkCommandBuffer CommandBuffer = pGPU->Frames[nFrameIndex].CommandBuffer[nNode];
10638
auto& F = pGPU->Frames[nFrameIndex];
10639
VkFence Fence = F.Fences[nNode];
10640
VkDevice Device = pGPU->Devices[nNode];
10641
VkQueue Queue = pGPU->Queues[nNode];
10642
10643
vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
10644
uint32_t nFrameTimeStamp = MicroProfileGpuInsertTimeStamp(pContext);
10645
vkResetCommandBuffer(F.CommandBuffer[nNode], VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
10646
10647
VkCommandBufferBeginInfo CBI;
10648
CBI.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
10649
CBI.pNext = 0;
10650
CBI.pInheritanceInfo = 0;
10651
CBI.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
10652
vkBeginCommandBuffer(F.CommandBuffer[nNode], &CBI);
10653
vkResetFences(Device, 1, &Fence);
10654
10655
nCount = pGPU->nFrameCount.exchange(0);
10656
nStart = pGPU->nFrameStart;
10657
pGPU->nFrameStart = (pGPU->nFrameStart + nCount) % MICROPROFILE_VULKAN_MAX_QUERIES;
10658
uint32_t nEnd = MicroProfileMin(nStart + nCount, (uint32_t)MICROPROFILE_VULKAN_MAX_QUERIES);
10659
MP_ASSERT(nStart != nEnd);
10660
uint32_t nSize = nEnd - nStart;
10661
10662
pGPU->Frames[nFrameIndex].nBegin = nStart;
10663
pGPU->Frames[nFrameIndex].nCount = nCount;
10664
pGPU->Frames[nFrameIndex].nNode = nNode;
10665
pGPU->nFrame++;
10666
////fetch from earlier frames
10667
MicroProfileGpuFetchResultsVulkan(CommandBuffer, pGPU->nFrame - MICROPROFILE_GPU_FRAME_DELAY);
10668
10669
vkEndCommandBuffer(F.CommandBuffer[nNode]);
10670
VkSubmitInfo SubmitInfo = {};
10671
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
10672
SubmitInfo.pNext = nullptr;
10673
SubmitInfo.waitSemaphoreCount = 0;
10674
SubmitInfo.pWaitSemaphores = nullptr;
10675
SubmitInfo.commandBufferCount = 1;
10676
SubmitInfo.pCommandBuffers = &CommandBuffer;
10677
SubmitInfo.signalSemaphoreCount = 0;
10678
SubmitInfo.pSignalSemaphores = nullptr;
10679
vkQueueSubmit(Queue, 1, &SubmitInfo, Fence);
10680
return nFrameTimeStamp;
10681
}
10682
10683
void MicroProfileGpuInitVulkan(VkDevice* pDevices, VkPhysicalDevice* pPhysicalDevices, VkQueue* pQueues, uint32_t* QueueFamily, uint32_t nNodeCount)
10684
{
10685
MicroProfileGpuTimerStateVulkan* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateVulkan);
10686
memset(pGPU, 0, sizeof(MicroProfileGpuTimerStateVulkan));
10687
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_Vulkan,
10688
pGPU,
10689
MicroProfileGpuInsertTimeStampVulkan,
10690
MicroProfileGpuGetTimeStampVulkan,
10691
MicroProfileTicksPerSecondGpuVulkan,
10692
MicroProfileGetGpuTickReferenceVulkan,
10693
MicroProfileGpuFlipVulkan,
10694
MicroProfileGpuShutdownVulkan);
10695
10696
pGPU->nNodeCount = nNodeCount;
10697
10698
VkQueryPoolCreateInfo Q;
10699
Q.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
10700
Q.pNext = 0;
10701
Q.flags = 0;
10702
Q.queryType = VK_QUERY_TYPE_TIMESTAMP;
10703
Q.queryCount = MICROPROFILE_VULKAN_MAX_QUERIES + 1;
10704
10705
VkCommandPoolCreateInfo CreateInfo;
10706
CreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
10707
CreateInfo.pNext = 0;
10708
CreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
10709
10710
VkResult r;
10711
for(uint32_t i = 0; i < nNodeCount; ++i)
10712
{
10713
pGPU->Devices[i] = pDevices[i];
10714
pGPU->PhysicalDevices[i] = pPhysicalDevices[i];
10715
pGPU->Queues[i] = pQueues[i];
10716
r = vkCreateQueryPool(pGPU->Devices[i], &Q, 0, &pGPU->QueryPool[i]);
10717
MP_ASSERT(r == VK_SUCCESS);
10718
10719
CreateInfo.queueFamilyIndex = QueueFamily[i];
10720
r = vkCreateCommandPool(pGPU->Devices[i], &CreateInfo, 0, &pGPU->CommandPool[i]);
10721
MP_ASSERT(r == VK_SUCCESS);
10722
10723
for(uint32_t j = 0; j < MICROPROFILE_VULKAN_INTERNAL_DELAY; ++j)
10724
{
10725
auto& F = pGPU->Frames[j];
10726
VkCommandBufferAllocateInfo AllocInfo;
10727
AllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
10728
AllocInfo.pNext = 0;
10729
AllocInfo.commandBufferCount = 1;
10730
AllocInfo.commandPool = pGPU->CommandPool[i];
10731
AllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
10732
r = vkAllocateCommandBuffers(pGPU->Devices[i], &AllocInfo, &F.CommandBuffer[i]);
10733
MP_ASSERT(r == VK_SUCCESS);
10734
10735
VkFenceCreateInfo FCI;
10736
FCI.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
10737
FCI.pNext = 0;
10738
FCI.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT;
10739
r = vkCreateFence(pGPU->Devices[i], &FCI, 0, &F.Fences[i]);
10740
MP_ASSERT(r == VK_SUCCESS);
10741
if(j == 0)
10742
{
10743
VkCommandBufferBeginInfo CBI;
10744
CBI.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
10745
CBI.pNext = 0;
10746
CBI.pInheritanceInfo = 0;
10747
CBI.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
10748
vkBeginCommandBuffer(F.CommandBuffer[i], &CBI);
10749
vkCmdResetQueryPool(F.CommandBuffer[i], pGPU->QueryPool[i], 0, MICROPROFILE_VULKAN_MAX_QUERIES + 1);
10750
10751
vkEndCommandBuffer(F.CommandBuffer[i]);
10752
VkSubmitInfo SubmitInfo = {};
10753
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
10754
SubmitInfo.pNext = nullptr;
10755
SubmitInfo.waitSemaphoreCount = 0;
10756
SubmitInfo.pWaitSemaphores = nullptr;
10757
SubmitInfo.commandBufferCount = 1;
10758
SubmitInfo.pCommandBuffers = &F.CommandBuffer[i];
10759
SubmitInfo.signalSemaphoreCount = 0;
10760
SubmitInfo.pSignalSemaphores = nullptr;
10761
vkQueueSubmit(pQueues[i], 1, &SubmitInfo, F.Fences[i]);
10762
vkWaitForFences(pGPU->Devices[i], 1, &F.Fences[i], 1, (uint64_t)-1);
10763
vkResetCommandBuffer(F.CommandBuffer[i], VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
10764
}
10765
}
10766
}
10767
10768
VkPhysicalDeviceProperties Properties;
10769
vkGetPhysicalDeviceProperties(pPhysicalDevices[0], &Properties);
10770
pGPU->nFrequency = 1000000000ll / Properties.limits.timestampPeriod;
10771
}
10772
10773
void MicroProfileGpuShutdownVulkan()
10774
{
10775
// this is clearly leaking ..
10776
}
10777
void MicroProfileSetCurrentNodeVulkan(uint32_t nNode)
10778
{
10779
10780
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10781
if(!pGPU)
10782
return;
10783
pGPU->nCurrentNode = nNode;
10784
}
10785
10786
int MicroProfileGetGpuTickReferenceVulkan(int64_t* pOutCPU, int64_t* pOutGpu)
10787
{
10788
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
10789
if(!pGPU)
10790
return 0;
10791
10792
auto& F = pGPU->Frames[pGPU->nFrame % MICROPROFILE_VULKAN_INTERNAL_DELAY];
10793
uint32_t nGpu = pGPU->nCurrentNode;
10794
10795
VkCommandBufferBeginInfo CBI;
10796
CBI.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
10797
CBI.pNext = 0;
10798
CBI.pInheritanceInfo = 0;
10799
CBI.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
10800
VkCommandBuffer CB = F.CommandBuffer[nGpu];
10801
VkDevice Device = pGPU->Devices[nGpu];
10802
VkFence Fence = F.Fences[nGpu];
10803
10804
vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
10805
vkResetFences(Device, 1, &Fence);
10806
vkResetCommandBuffer(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
10807
vkBeginCommandBuffer(CB, &CBI);
10808
vkCmdResetQueryPool(CB, pGPU->QueryPool[nGpu], MICROPROFILE_VULKAN_MAX_QUERIES, 1);
10809
vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, pGPU->QueryPool[nGpu], MICROPROFILE_VULKAN_MAX_QUERIES);
10810
vkEndCommandBuffer(CB);
10811
VkSubmitInfo SubmitInfo = {};
10812
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
10813
SubmitInfo.pNext = nullptr;
10814
SubmitInfo.waitSemaphoreCount = 0;
10815
SubmitInfo.pWaitSemaphores = nullptr;
10816
SubmitInfo.commandBufferCount = 1;
10817
SubmitInfo.pCommandBuffers = &CB;
10818
SubmitInfo.signalSemaphoreCount = 0;
10819
SubmitInfo.pSignalSemaphores = nullptr;
10820
vkQueueSubmit(pGPU->Queues[nGpu], 1, &SubmitInfo, Fence);
10821
vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
10822
*pOutGpu = 0;
10823
vkGetQueryPoolResults(Device, pGPU->QueryPool[nGpu], MICROPROFILE_VULKAN_MAX_QUERIES, 1, 8, pOutGpu, 8, VK_QUERY_RESULT_64_BIT);
10824
*pOutCPU = MP_TICK();
10825
return 1;
10826
}
10827
#endif
10828
10829
#if MICROPROFILE_GPU_TIMERS_GL
10830
//:'######:::'########::'##::::'##:::::'######:::'##:::::::
10831
//'##... ##:: ##.... ##: ##:::: ##::::'##... ##:: ##:::::::
10832
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::..::: ##:::::::
10833
// ##::'####: ########:: ##:::: ##:::: ##::'####: ##:::::::
10834
// ##::: ##:: ##.....::: ##:::: ##:::: ##::: ##:: ##:::::::
10835
// ##::: ##:: ##:::::::: ##:::: ##:::: ##::: ##:: ##:::::::
10836
//. ######::: ##::::::::. #######:::::. ######::: ########:
10837
//:......::::..::::::::::.......:::::::......::::........::
10838
10839
void MicroProfileGpuInitGL()
10840
{
10841
MicroProfileGpuTimerStateGL* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateGL);
10842
memset(pGPU, 0, sizeof(MicroProfileGpuTimerStateGL));
10843
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_GL,
10844
pGPU,
10845
MicroProfileGpuInsertTimeStampGL,
10846
MicroProfileGpuGetTimeStampGL,
10847
MicroProfileTicksPerSecondGpuGL,
10848
MicroProfileGetGpuTickReferenceGL,
10849
MicroProfileGpuFlipGL,
10850
MicroProfileGpuShutdownGL);
10851
10852
pGPU->GLTimerPos = 0;
10853
glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &pGPU->GLTimers[0]);
10854
}
10855
10856
uint32_t MicroProfileGpuInsertTimeStampGL(void* pContext)
10857
{
10858
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
10859
if(!pGPU)
10860
return 0;
10861
10862
uint32_t nIndex = (pGPU->GLTimerPos + 1) % MICROPROFILE_GL_MAX_QUERIES;
10863
glQueryCounter(pGPU->GLTimers[nIndex], GL_TIMESTAMP);
10864
pGPU->GLTimerPos = nIndex;
10865
return nIndex;
10866
}
10867
uint64_t MicroProfileGpuGetTimeStampGL(uint32_t nKey)
10868
{
10869
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
10870
if(!pGPU)
10871
return 0;
10872
10873
uint64_t result;
10874
glGetQueryObjectui64v(pGPU->GLTimers[nKey], GL_QUERY_RESULT, &result);
10875
return result;
10876
}
10877
10878
uint64_t MicroProfileTicksPerSecondGpuGL()
10879
{
10880
return 1000000000ll;
10881
}
10882
10883
int MicroProfileGetGpuTickReferenceGL(int64_t* pOutCpu, int64_t* pOutGpu)
10884
{
10885
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
10886
if(!pGPU)
10887
return 0;
10888
10889
int64_t nGpuTimeStamp;
10890
glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp);
10891
if(nGpuTimeStamp)
10892
{
10893
*pOutCpu = MP_TICK();
10894
*pOutGpu = nGpuTimeStamp;
10895
#if 0 // debug test if timestamp diverges
10896
static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
10897
static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
10898
static int64_t nGpuStart = 0;
10899
static int64_t nCpuStart = 0;
10900
if(!nCpuStart)
10901
{
10902
nCpuStart = *pOutCpu;
10903
nGpuStart = *pOutGpu;
10904
}
10905
static int nCountDown = 100;
10906
if(0 == nCountDown--)
10907
{
10908
int64_t nCurCpu = *pOutCpu;
10909
int64_t nCurGpu = *pOutGpu;
10910
double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu;
10911
double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu;
10912
10913
char buf[254];
10914
snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu);
10915
OutputDebugString(buf);
10916
nCountDown = 100;
10917
}
10918
#endif
10919
return 1;
10920
}
10921
return 0;
10922
}
10923
uint32_t MicroProfileGpuFlipGL(void* pContext)
10924
{
10925
return MicroProfileGpuInsertTimeStampGL(pContext);
10926
}
10927
10928
void MicroProfileGpuShutdownGL()
10929
{
10930
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
10931
if(!pGPU)
10932
return;
10933
10934
glDeleteQueries(MICROPROFILE_GL_MAX_QUERIES, &pGPU->GLTimers[0]);
10935
}
10936
10937
MicroProfileGpuTimerStateGL* MicroProfileGetGpuTimerStateGL()
10938
{
10939
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_GL)
10940
return (MicroProfileGpuTimerStateGL*)S.pGPU;
10941
return nullptr;
10942
}
10943
#endif
10944
10945
uint32_t MicroProfileStringHash(const char* pString) // note matching: code in javascript: microprofilelive.html: function StringHash(s)
10946
{
10947
uint32_t h = 0xfeedba3e;
10948
char c;
10949
while(0 != (c = *pString++))
10950
{
10951
h = c + ((h << 5) - h);
10952
}
10953
return h;
10954
}
10955
10956
const char* MicroProfileStrDup(const char* pStr)
10957
{
10958
size_t len = strlen(pStr) + 1;
10959
char* pOut = (char*)MP_ALLOC(len, 8);
10960
memcpy(pOut, pStr, len);
10961
return pOut;
10962
}
10963
10964
uint32_t MicroProfileColorFromString(const char* pString) // note matching code/constants in javascript: microprofilelive.html: function StringToColor(s)
10965
{
10966
// var h = StringHash(s);
10967
// var cidx = h % 360;
10968
// return "hsl(" + cidx + ",50%, 70%)"; //note: matching code constants in microprofile.cpp: MicroProfileColorFromString
10969
10970
float h = MicroProfileStringHash(pString) % 360;
10971
float s = 0.5f;
10972
float l = 0.7f;
10973
// from https://www.rapidtables.com/convert/color/hsl-to-rgb.html
10974
float c = (1 - fabsf(2 * l - 1)) * s;
10975
float x = c * (1 - fabsf(fmodf(h / 60, 2.f) - 1));
10976
float m = l - c / 2.f;
10977
float r = 0.f, g = 0.f, b = 0.f;
10978
if(h < 60)
10979
{
10980
r = c;
10981
g = x;
10982
}
10983
else if(h < 120.f)
10984
{
10985
r = x;
10986
g = c;
10987
}
10988
else if(h < 180.f)
10989
{
10990
g = c;
10991
b = x;
10992
}
10993
else if(h < 240.f)
10994
{
10995
g = x;
10996
b = c;
10997
}
10998
else if(h < 300.f)
10999
{
11000
r = x;
11001
b = c;
11002
}
11003
else
11004
{
11005
r = c;
11006
b = x;
11007
}
11008
r += m;
11009
g += m;
11010
b += m;
11011
11012
r *= 255.f;
11013
g *= 255.f;
11014
b *= 255.f;
11015
11016
uint32_t R = MicroProfileMin(0xffu, (uint32_t)r);
11017
uint32_t G = MicroProfileMin(0xffu, (uint32_t)g);
11018
uint32_t B = MicroProfileMin(0xffu, (uint32_t)b);
11019
11020
return (R << 16) | (G << 8) | B;
11021
}
11022
11023
#if MICROPROFILE_DYNAMIC_INSTRUMENT
11024
// '##::::'##::'#######:::'#######::'##:::'##:::::'######::'##::::'##::::'###::::'########::'########:'########::
11025
// ##:::: ##:'##.... ##:'##.... ##: ##::'##:::::'##... ##: ##:::: ##:::'## ##::: ##.... ##: ##.....:: ##.... ##:
11026
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##:::..:: ##:::: ##::'##:. ##:: ##:::: ##: ##::::::: ##:::: ##:
11027
// #########: ##:::: ##: ##:::: ##: #####:::::::. ######:: #########:'##:::. ##: ########:: ######::: ##:::: ##:
11028
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::::..... ##: ##.... ##: #########: ##.. ##::: ##...:::: ##:::: ##:
11029
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##:::::'##::: ##: ##:::: ##: ##.... ##: ##::. ##:: ##::::::: ##:::: ##:
11030
// ##:::: ##:. #######::. #######:: ##::. ##::::. ######:: ##:::: ##: ##:::: ##: ##:::. ##: ########: ########::
11031
// ..:::::..:::.......::::.......:::..::::..::::::......:::..:::::..::..:::::..::..:::::..::........::........:::
11032
11033
#include <distorm.h>
11034
#include <mnemonics.h>
11035
11036
#if MICROPROFILE_BREAK_ON_PATCH_FAIL
11037
#define BREAK_ON_PATCH_FAIL() MP_BREAK()
11038
#else
11039
#define BREAK_ON_PATCH_FAIL() \
11040
do \
11041
{ \
11042
} while(0)
11043
#endif
11044
11045
void* MicroProfileX64FollowJump(void* pSrc);
11046
bool MicroProfileCopyInstructionBytes(char* pDest,
11047
void* pSrc,
11048
const int nLimit,
11049
const int nMaxSize,
11050
char* pTrunk,
11051
intptr_t nTrunkSize,
11052
uint32_t nUsableJumpRegs,
11053
int* nBytesDest,
11054
int* nBytesSrc,
11055
uint32_t* pRegsWritten,
11056
uint32_t* nRetSafe);
11057
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError);
11058
template <typename Callback>
11059
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules);
11060
11061
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size);
11062
bool MicroProfilePatchBeginSuspend();
11063
void MicroProfilePatchEndSuspend();
11064
bool MicroProfilePatchHasSuspendedThread(intptr_t Begin, intptr_t End);
11065
11066
#if 1
11067
#define STRING_MATCH_SIZE 64
11068
typedef uint64_t uint_string_match;
11069
#else
11070
#define STRING_MATCH_SIZE 32
11071
typedef uint32_t uint_string_match;
11072
#endif
11073
11074
struct MicroProfileStringMatchMask
11075
{
11076
uint_string_match nMask;
11077
uint_string_match M[64];
11078
};
11079
11080
struct MicroProfileSymbolDesc
11081
{
11082
const char* pName;
11083
const char* pShortName;
11084
intptr_t nAddress;
11085
intptr_t nAddressEnd;
11086
uint_string_match nMask;
11087
int nIgnoreSymbol;
11088
uint32_t nModule;
11089
};
11090
11091
struct MicroProfileSymbolBlock
11092
{
11093
MicroProfileSymbolBlock* pNext;
11094
uint32_t nNumSymbols;
11095
uint32_t nNumChars;
11096
uint_string_match nMask;
11097
MicroProfileStringMatchMask MatchMask;
11098
enum
11099
{
11100
ESIZE = 4 << 10,
11101
};
11102
union
11103
{
11104
MicroProfileSymbolDesc Symbols[ESIZE / sizeof(MicroProfileSymbolDesc)];
11105
char Chars[ESIZE];
11106
};
11107
};
11108
11109
typedef void (*MicroProfileOnSymbolCallback)(const char* pSymbolName, intptr_t nAddress);
11110
11111
MP_THREAD_LOCAL uintptr_t g_MicroProfile_TLS[17] = { 16 };
11112
11113
extern "C" MP_NOINLINE uintptr_t MicroProfile_Patch_TLS_PUSH(uintptr_t t)
11114
{
11115
uintptr_t* pTLS = &g_MicroProfile_TLS[0];
11116
11117
uintptr_t Limit = (uint32_t)pTLS[0];
11118
uintptr_t Pos = (uint32_t)(pTLS[0] >> 32);
11119
if(Pos == Limit)
11120
{
11121
return 0;
11122
}
11123
else
11124
{
11125
pTLS[0] = (Limit) | ((Pos + 1) << 32);
11126
}
11127
pTLS[Pos + 1] = t;
11128
return 1;
11129
}
11130
extern "C" MP_NOINLINE uintptr_t MicroProfile_Patch_TLS_POP()
11131
{
11132
uintptr_t* pTLS = &g_MicroProfile_TLS[0];
11133
uintptr_t Limit = (uint32_t)pTLS[0];
11134
uintptr_t Pos = (uint32_t)(pTLS[0] >> 32);
11135
if(Pos == 0)
11136
{
11137
MP_BREAK(); // this should never happen
11138
return 0;
11139
}
11140
else
11141
{
11142
pTLS[0] = (Limit) | ((Pos - 1) << 32);
11143
}
11144
uintptr_t t = pTLS[Pos];
11145
return t;
11146
}
11147
11148
char* MicroProfileInsertRegisterJump(char* pCode, intptr_t pDest, int reg)
11149
{
11150
MP_ASSERT(reg >= R_RAX && reg <= R_R15);
11151
int large = reg >= R_R8 ? 1 : 0;
11152
int offset = large ? (reg - R_R8) : (reg - R_RAX);
11153
unsigned char* uc = (unsigned char*)pCode;
11154
*uc++ = large ? 0x49 : 0x48;
11155
*uc++ = 0xb8 + offset;
11156
memcpy(uc, &pDest, 8);
11157
uc += 8;
11158
if(large)
11159
*uc++ = 0x41;
11160
*uc++ = 0xff;
11161
*uc++ = 0xe0 + offset;
11162
return (char*)uc;
11163
// 164: 48 b8 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rax
11164
// 16e: 48 b9 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rcx
11165
// 178: 48 ba 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rdx
11166
// 182: 48 bb 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rbx
11167
// 18c: 48 bc 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rsp
11168
// 196: 48 bd 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rbp
11169
// 1a0: 48 be 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rsi
11170
// 1aa: 48 bf 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rdi
11171
// 1b4: 49 b8 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r8
11172
// 1be: 49 b9 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r9
11173
// 1c8: 49 ba 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r10
11174
// 1d2: 49 bb 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r11
11175
// 1dc: 49 bc 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r12
11176
// 1e6: 49 bd 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r13
11177
// 1f0: 49 be 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r14
11178
// 1fa: 49 bf 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r15
11179
// 204: ff e0 jmpq *%rax
11180
// 206: ff e1 jmpq *%rcx
11181
// 208: ff e2 jmpq *%rdx
11182
// 20a: ff e3 jmpq *%rbx
11183
// 20c: ff e4 jmpq *%rsp
11184
// 20e: ff e5 jmpq *%rbp
11185
// 210: ff e6 jmpq *%rsi
11186
// 212: ff e7 jmpq *%rdi
11187
// 214: 41 ff e0 jmpq *%r8
11188
// 217: 41 ff e1 jmpq *%r9
11189
// 21a: 41 ff e2 jmpq *%r10
11190
// 21d: 41 ff e3 jmpq *%r11
11191
// 220: 41 ff e4 jmpq *%r12
11192
// 223: 41 ff e5 jmpq *%r13
11193
// 226: 41 ff e6 jmpq *%r14
11194
// 229: 41 ff e7 jmpq *%r15
11195
}
11196
11197
char* MicroProfileInsertRelativeJump(char* pCode, intptr_t pDest)
11198
{
11199
intptr_t src = intptr_t(pCode) + 5;
11200
intptr_t off = pDest - src;
11201
MP_ASSERT(off > intptr_t(0xffffffff80000000) && off <= 0x7fffffff);
11202
int32_t i32off = (int32_t)off;
11203
unsigned char* uc = (unsigned char*)pCode;
11204
unsigned char* c = (unsigned char*)&i32off;
11205
*uc++ = 0xe9;
11206
memcpy(uc, c, 4);
11207
uc += 4;
11208
return (char*)uc;
11209
}
11210
11211
char* MicroProfileInsertRetJump(char* pCode, intptr_t pDest)
11212
{
11213
uint32_t lower = (uint32_t)pDest;
11214
uint32_t upper = (uint32_t)(pDest >> 32);
11215
unsigned char* uc = (unsigned char*)pCode;
11216
*uc++ = 0x68;
11217
memcpy(uc, &lower, 4);
11218
uc += 4;
11219
*uc++ = 0xc7;
11220
*uc++ = 0x44;
11221
*uc++ = 0x24;
11222
*uc++ = 0x04;
11223
memcpy(uc, &upper, 4);
11224
uc += 4;
11225
*uc++ = 0xc3;
11226
return (char*)uc;
11227
}
11228
11229
uint8_t* MicroProfileInsertMov(uint8_t* p, uint8_t* pend, int r, intptr_t value)
11230
{
11231
int Large = r >= R_R8 ? 1 : 0;
11232
int RegIndex = Large ? (r - R_R8) : (r - R_RAX);
11233
*p++ = Large ? 0x49 : 0x48;
11234
*p++ = 0xb8 + RegIndex; // + (reg - (large?(R_R8-R_RAX):0));
11235
intptr_t* pAddress = (intptr_t*)p;
11236
pAddress[0] = value;
11237
p = (uint8_t*)(pAddress + 1);
11238
MP_ASSERT(p < pend);
11239
return p;
11240
}
11241
11242
uint8_t* MicroProfileInsertCall(uint8_t* p, uint8_t* pend, int r)
11243
{
11244
int Large = r >= R_R8 ? 1 : 0;
11245
int RegIndex = Large ? (r - R_R8) : (r - R_RAX);
11246
if(Large)
11247
{
11248
*p++ = 0x41;
11249
}
11250
*p++ = 0xff;
11251
*p++ = 0xd0 + RegIndex;
11252
MP_ASSERT(p < pend);
11253
return p;
11254
}
11255
11256
bool MicroProfileStringMatch(const char* pSymbol, uint32_t nStartOffset, const char** pPatterns, uint32_t* nPatternLength, uint32_t nNumPatterns)
11257
{
11258
MP_ASSERT(nStartOffset <= nNumPatterns);
11259
const char* p = pSymbol;
11260
for(uint32_t i = nStartOffset; i < nNumPatterns; ++i)
11261
{
11262
p = MP_STRCASESTR(p, pPatterns[i]);
11263
if(p)
11264
{
11265
p += nPatternLength[i];
11266
}
11267
else
11268
{
11269
return false;
11270
}
11271
}
11272
return true;
11273
}
11274
11275
int MicroProfileStringMatchOffset(const char* pSymbol, const char** pPatterns, uint32_t* nPatternLength, uint32_t nNumPatterns)
11276
{
11277
int nOffset = 0;
11278
const char* p = pSymbol;
11279
for(uint32_t i = 0; i < nNumPatterns; ++i)
11280
{
11281
p = MP_STRCASESTR(p, pPatterns[i]);
11282
if(p)
11283
{
11284
p += nPatternLength[i];
11285
nOffset++;
11286
}
11287
else
11288
{
11289
break;
11290
}
11291
}
11292
return nOffset;
11293
}
11294
11295
void* MicroProfileX64FollowJump(void* pSrc)
11296
{
11297
for(uint32_t i = 0; i < S.DynamicTokenIndex; ++i)
11298
if(S.FunctionsInstrumented[i] == pSrc)
11299
return pSrc; // if already instrumented, do not follow the jump inserted by itself.
11300
11301
// uprintf("deref possible trampoline for %p\n", pSrc);
11302
_DecodeType dt = Decode64Bits;
11303
_DInst Instructions[1];
11304
unsigned int nCount = 0;
11305
11306
_CodeInfo ci;
11307
ci.code = (uint8_t*)pSrc;
11308
ci.codeLen = 15;
11309
ci.codeOffset = 0;
11310
ci.dt = dt;
11311
ci.features = DF_NONE;
11312
int r = distorm_decompose(&ci, Instructions, 1, &nCount);
11313
if(!r || nCount != 1)
11314
{
11315
return pSrc; // fail, just return
11316
}
11317
11318
auto& I = Instructions[0];
11319
if(I.opcode == I_JMP)
11320
{
11321
if(I.ops[0].type == O_PC)
11322
{
11323
if(I.ops[0].size == 0x20)
11324
{
11325
intptr_t p = (intptr_t)pSrc;
11326
p += I.size;
11327
p += I.imm.sdword;
11328
return (void*)p;
11329
}
11330
}
11331
else if(I.ops[0].type == O_SMEM)
11332
{
11333
if(I.ops[0].index == R_RIP)
11334
{
11335
intptr_t p = (intptr_t)pSrc;
11336
p += I.size;
11337
p += I.disp;
11338
void* pHest = *(void**)p;
11339
return pHest;
11340
}
11341
}
11342
uprintf("failed to interpret I_JMP %p %d %d\n", pSrc, I.ops[0].size, I.ops[0].type);
11343
return pSrc;
11344
MP_BREAK();
11345
}
11346
return pSrc;
11347
}
11348
11349
bool MicroProfileCopyInstructionBytes(char* pDest,
11350
void* pSrc,
11351
const int nLimit,
11352
const int nMaxSize,
11353
char* pTrunk,
11354
intptr_t nTrunkSize,
11355
const uint32_t nUsableJumpRegs,
11356
int* pBytesDest,
11357
int* pBytesSrc,
11358
uint32_t* pRegsWritten,
11359
uint32_t* pRetSafe)
11360
{
11361
11362
_DecodeType dt = Decode64Bits;
11363
_DInst Instructions[128];
11364
int rip[128] = { 0 };
11365
uint32_t nRegsWrittenInstr[128] = { 0 };
11366
int offsets[129] = { 0 };
11367
unsigned int nCount = 0;
11368
11369
_CodeInfo ci;
11370
ci.code = (uint8_t*)pSrc;
11371
ci.codeLen = nLimit + 15;
11372
ci.codeOffset = 0;
11373
ci.dt = dt;
11374
ci.features = DF_NONE;
11375
int r = distorm_decompose(&ci, Instructions, 128, &nCount);
11376
if(r != DECRES_SUCCESS)
11377
{
11378
BREAK_ON_PATCH_FAIL();
11379
return false;
11380
}
11381
int offset = 0;
11382
unsigned int i = 0;
11383
unsigned nInstructions = 0;
11384
int64_t nTrunkUsage = 0;
11385
offsets[0] = 0;
11386
uint32_t nRegsWritten = 0;
11387
11388
auto Align16 = [](intptr_t p) { return (p + 15) & (~15); };
11389
11390
{
11391
11392
intptr_t iTrunk = (intptr_t)pTrunk;
11393
intptr_t iTrunkEnd = iTrunk + nTrunkSize;
11394
intptr_t iTrunkAligned = (iTrunk + 15) & ~15;
11395
nTrunkSize = iTrunkEnd - iTrunkAligned;
11396
pTrunk = (char*)iTrunkAligned;
11397
}
11398
const uint8_t* pTrunkEnd = (uint8_t*)(pTrunk + nTrunkSize);
11399
11400
auto RegToBit = [](int r) -> uint32_t
11401
{
11402
if(r >= R_RAX && r <= R_R15)
11403
{
11404
return (1u << (r - R_RAX));
11405
}
11406
else if(r >= R_EAX && r <= R_R15D)
11407
{
11408
return (1u << (r - R_EAX));
11409
}
11410
else if(r >= R_AX && r <= R_R15W)
11411
{
11412
return (1u << (r - R_AX));
11413
}
11414
else if(r >= R_AL && r <= R_R15B)
11415
{
11416
return (1u << (r - R_AL));
11417
}
11418
return 0; // might hit on registers like RIP
11419
MP_BREAK();
11420
};
11421
#ifdef _WIN32
11422
const uint32_t nUsableRegisters = RegToBit(R_RAX) | RegToBit(R_R10) | RegToBit(R_R11);
11423
#else
11424
const uint32_t nUsableRegisters = RegToBit(R_RAX) | RegToBit(R_R10) | RegToBit(R_R11);
11425
#endif
11426
11427
int nBytesToMove = 0;
11428
for(i = 0; i < nCount; ++i)
11429
{
11430
nBytesToMove += Instructions[i].size;
11431
if(nBytesToMove >= nLimit)
11432
break;
11433
}
11434
*pBytesSrc = nBytesToMove;
11435
11436
uint32_t nRspMask = RegToBit(R_RSP);
11437
*pRetSafe = 1;
11438
11439
for(i = 0; i < nCount; ++i)
11440
{
11441
rip[i] = 0;
11442
auto& I = Instructions[i];
11443
// bool bHasRipReference = false;
11444
if(I.opcode == I_LEA)
11445
{
11446
}
11447
if(I.opcode == I_CALL)
11448
{
11449
auto& O = I.ops[0];
11450
if(O.type != O_PC || O.size != 0x20)
11451
{
11452
uprintf("unknown call encountered. cannot move\n");
11453
BREAK_ON_PATCH_FAIL();
11454
return false;
11455
}
11456
if((nRegsWritten & nUsableRegisters) == nUsableRegisters)
11457
{
11458
uprintf("call encountered, but register all regs was written to. TODO: push regs?\n");
11459
BREAK_ON_PATCH_FAIL();
11460
return false;
11461
}
11462
// return value might be used past return so preserve registers.
11463
#ifdef _WIN32
11464
nRegsWritten |= RegToBit(R_RAX);
11465
#else
11466
nRegsWritten |= RegToBit(R_RAX) | RegToBit(R_RDX);
11467
#endif
11468
}
11469
11470
switch(I.ops[0].type)
11471
{
11472
case O_REG:
11473
{
11474
uint32_t reg = I.ops[0].index;
11475
nRegsWritten |= RegToBit(reg);
11476
auto& O2 = I.ops[1];
11477
switch(O2.type)
11478
{
11479
case O_REG:
11480
case O_MEM:
11481
case O_SMEM:
11482
{
11483
// if register is RSP 'contaminated', it prevents us from using that to do retjmps
11484
uint32_t nMask = RegToBit(O2.index);
11485
if(nRspMask & nMask)
11486
{
11487
nRspMask |= RegToBit(reg);
11488
}
11489
}
11490
default:
11491
break;
11492
}
11493
break;
11494
}
11495
case O_MEM:
11496
case O_SMEM:
11497
{
11498
uint32_t reg = I.ops[0].index;
11499
if(nRspMask & RegToBit(reg))
11500
{
11501
uprintf("found contaminated reg at +%lld\n", (long long)I.addr);
11502
*pRetSafe = 0;
11503
}
11504
break;
11505
}
11506
}
11507
nRegsWrittenInstr[i] = nRegsWritten;
11508
for(int j = 0; j < 4; ++j)
11509
{
11510
auto& O = I.ops[j];
11511
11512
switch(O.type)
11513
{
11514
case O_REG:
11515
case O_SMEM:
11516
case O_MEM:
11517
{
11518
if(O.index == R_RIP)
11519
{
11520
if(j != 1)
11521
{
11522
uprintf("found non base reference of rip. fail\n");
11523
BREAK_ON_PATCH_FAIL();
11524
return false;
11525
}
11526
if(I.dispSize != 0x20 && I.dispSize != 0x10)
11527
{
11528
uprintf("found offset size != 32 && != 16 bit. not implemented\n");
11529
BREAK_ON_PATCH_FAIL();
11530
return false;
11531
}
11532
rip[i] = 1;
11533
nTrunkUsage += Align16(O.size / 8);
11534
if(nTrunkUsage > nTrunkSize)
11535
{
11536
uprintf("overuse of trunk %lld\n", (long long)nTrunkUsage);
11537
BREAK_ON_PATCH_FAIL();
11538
return false;
11539
}
11540
}
11541
break;
11542
}
11543
}
11544
}
11545
if(rip[i])
11546
{
11547
if(I.ops[0].type != O_REG)
11548
{
11549
uprintf("arg 0 should be O_REG, fail\n");
11550
BREAK_ON_PATCH_FAIL();
11551
return false;
11552
}
11553
if(I.ops[1].type != O_SMEM)
11554
{
11555
uprintf("arg 1 should be O_SMEM, fail was %d\n", O_SMEM);
11556
BREAK_ON_PATCH_FAIL();
11557
return false;
11558
}
11559
}
11560
int fc = META_GET_FC(Instructions[i].meta);
11561
switch(fc)
11562
{
11563
case FC_CALL:
11564
{
11565
break;
11566
}
11567
case FC_RET:
11568
case FC_SYS:
11569
case FC_UNC_BRANCH:
11570
case FC_CND_BRANCH:
11571
uprintf("found branch inst %d :: %d\n", fc, offset);
11572
BREAK_ON_PATCH_FAIL();
11573
return false;
11574
}
11575
offset += Instructions[i].size;
11576
offsets[i + 1] = offset;
11577
if(offset >= nLimit)
11578
{
11579
nInstructions = i + 1;
11580
break;
11581
}
11582
}
11583
if(nTrunkUsage > nTrunkSize)
11584
{
11585
uprintf("function using too much trunk space\n");
11586
BREAK_ON_PATCH_FAIL();
11587
return false;
11588
}
11589
if(offset < nLimit)
11590
{
11591
uprintf("function only had %d bytes of %d\n", offset, nLimit);
11592
BREAK_ON_PATCH_FAIL();
11593
return false;
11594
}
11595
11596
if(0 == *pRetSafe && 0 == (nUsableJumpRegs & ~nRegsWritten))
11597
{
11598
// if ret jump is unsafe all of the usable jump regs are taken, fail.
11599
uprintf("cannot patch function without breaking code]\n");
11600
BREAK_ON_PATCH_FAIL();
11601
MP_BREAK();
11602
return false;
11603
}
11604
11605
// MP_BREAK();
11606
*pRegsWritten = nRegsWritten;
11607
uint8_t* d = (uint8_t*)pDest;
11608
uint8_t* dend = d + nMaxSize;
11609
const uint8_t* s = (const uint8_t*)pSrc;
11610
11611
nTrunkUsage = 0;
11612
11613
for(i = 0; i < nInstructions; ++i)
11614
{
11615
auto& I = Instructions[i];
11616
unsigned size = Instructions[i].size;
11617
if(I.opcode == I_CALL)
11618
{
11619
// find reg
11620
uint32_t nRegsWritten = nRegsWrittenInstr[i];
11621
uint32_t nUsable = nUsableRegisters & ~nRegsWritten;
11622
MP_ASSERT(nUsable);
11623
int r = R_RAX;
11624
while(0 == (1 & nUsable))
11625
{
11626
nUsable >>= 1;
11627
r++;
11628
}
11629
11630
intptr_t p = offsets[i + 1];
11631
p += (intptr_t)pSrc;
11632
p += I.imm.sdword;
11633
d = MicroProfileInsertMov(d, dend, r, p);
11634
d = MicroProfileInsertCall(d, dend, r);
11635
s += size;
11636
}
11637
else if(rip[i])
11638
{
11639
if(I.opcode == I_LEA)
11640
{
11641
if(I.ops[0].type != O_REG)
11642
{
11643
MP_BREAK();
11644
}
11645
if(I.ops[1].index != R_RIP)
11646
{
11647
MP_BREAK();
11648
}
11649
int reg = I.ops[0].index - R_RAX;
11650
int large = I.ops[0].index >= R_R8 ? 1 : 0;
11651
*d++ = large ? 0x49 : 0x48;
11652
*d++ = 0xb8 + (reg - (large ? (R_R8 - R_RAX) : 0));
11653
// calculate the offset
11654
int64_t offset = offsets[i + 1] + I.disp;
11655
intptr_t base = (intptr_t)pSrc;
11656
11657
intptr_t sum = base + offset;
11658
intptr_t* pAddress = (intptr_t*)d;
11659
pAddress[0] = sum;
11660
s += size;
11661
d += 10;
11662
d = (uint8_t*)(pAddress + 1);
11663
}
11664
else
11665
{
11666
if(15 & (intptr_t)pTrunk)
11667
{
11668
MP_BREAK();
11669
}
11670
intptr_t t = (intptr_t)pTrunk;
11671
t = (t + 15) & ~15;
11672
pTrunk = (char*)t;
11673
auto& O = I.ops[1];
11674
uint32_t Op1Size = O.size / 8;
11675
11676
memcpy(d, s, size);
11677
int32_t DispOriginal = (int32_t)I.disp;
11678
const uint8_t* pOriginal = (s + size) + DispOriginal;
11679
11680
intptr_t DispNew = ((uint8_t*)pTrunk - (d + size));
11681
if(!((intptr_t)pTrunk + Op1Size <= (intptr_t)pTrunkEnd))
11682
{
11683
MP_BREAK();
11684
}
11685
memcpy(pTrunk, pOriginal, Op1Size);
11686
pTrunk += Align16(Op1Size);
11687
if(I.dispSize == 32)
11688
{
11689
int32_t off = (int32_t)DispNew;
11690
if(DispNew > 0x7fffffff || DispNew < 0)
11691
{
11692
MP_BREAK();
11693
}
11694
memcpy(d + size - 4, &off, 4);
11695
}
11696
else if(I.dispSize == 16)
11697
{
11698
int16_t off = (int16_t)DispNew;
11699
if(DispNew > 0x7fff || DispNew < 0)
11700
{
11701
MP_BREAK();
11702
}
11703
memcpy(d + size - 2, &off, 2);
11704
}
11705
11706
d += size;
11707
s += size;
11708
}
11709
}
11710
else
11711
{
11712
memcpy(d, s, size);
11713
d += size;
11714
s += size;
11715
}
11716
}
11717
11718
*pBytesDest = (int)(d - (uint8_t*)pDest);
11719
11720
return true;
11721
}
11722
11723
extern "C" void MicroProfileInterceptEnter(int a)
11724
{
11725
MicroProfileToken T = S.DynamicTokens[a];
11726
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
11727
MP_ASSERT(pLog->nStackScope < MICROPROFILE_STACK_MAX); // if youre hitting this assert your instrumenting a deeply nested function
11728
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[pLog->nStackScope++];
11729
pScopeState->Token = T;
11730
if(T)
11731
{
11732
pScopeState->nTick = MicroProfileEnterInternal(T);
11733
}
11734
else
11735
{
11736
pScopeState->nTick = MICROPROFILE_INVALID_TICK;
11737
}
11738
}
11739
extern "C" void MicroProfileInterceptLeave(int a)
11740
{
11741
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
11742
MP_ASSERT(pLog->nStackScope > 0); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
11743
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[--pLog->nStackScope];
11744
MicroProfileLeaveInternal(pScopeState->Token, pScopeState->nTick);
11745
}
11746
11747
bool MicroProfileInstrumentFromAddressOnly(void* pFunction)
11748
{
11749
MicroProfileSymbolDesc* pDesc = MicroProfileSymbolFindFuction(pFunction);
11750
if(pDesc)
11751
{
11752
uprintf("Found function %p :: %s %s\n", (void*)pDesc->nAddress, pDesc->pName, pDesc->pShortName);
11753
uint32_t nColor = MicroProfileColorFromString(pDesc->pName);
11754
11755
return MicroProfileInstrumentFunction(pFunction, MicroProfileSymbolModuleGetString(pDesc->nModule), pDesc->pName, nColor);
11756
}
11757
else
11758
{
11759
uprintf("No Function Found %p\n", pFunction);
11760
return false;
11761
}
11762
}
11763
11764
template <typename CB>
11765
void MicroProfileInstrumentScanForFunctionCalls(CB Callback, void* pFunction, size_t nFunctionSize)
11766
{
11767
pFunction = MicroProfileX64FollowJump(pFunction);
11768
const intptr_t nCodeLen = nFunctionSize;
11769
const uint32_t nMaxInstructions = 15;
11770
intptr_t nOffset = 0;
11771
_DecodeType dt = Decode64Bits;
11772
_DInst Instructions[15];
11773
_CodeInfo ci;
11774
do
11775
{
11776
ci.code = nOffset + (uint8_t*)pFunction;
11777
ci.codeLen = nCodeLen - nOffset;
11778
ci.codeOffset = 0;
11779
ci.dt = dt;
11780
ci.features = DF_RETURN_FC_ONLY;
11781
uint32_t nCount = 0;
11782
uint32_t nOffsetNext = 0;
11783
11784
int r = distorm_decompose(&ci, Instructions, nMaxInstructions, &nCount);
11785
// uprintf("decomposed %d\n", nCount);
11786
if(r != DECRES_SUCCESS && r != DECRES_MEMORYERR)
11787
{
11788
BREAK_ON_PATCH_FAIL();
11789
return;
11790
}
11791
if(nCount == 0)
11792
{
11793
// no instructions left
11794
break;
11795
}
11796
// uprintf("instructions decoded %d %p ::\n", nCount, pFunction);
11797
for(int i = 0; i < (int)nCount; ++i)
11798
{
11799
// rip[i] = 0;
11800
auto& I = Instructions[i];
11801
// bool bHasRipReference = false;
11802
if(I.addr < nOffsetNext)
11803
{
11804
MP_BREAK();
11805
}
11806
nOffsetNext = I.addr + I.size;
11807
if(I.opcode == I_CALL)
11808
{
11809
auto& O = I.ops[0];
11810
if(O.type != O_PC || O.size != 0x20)
11811
{
11812
uprintf("non immediate call encountered. cannot follow\n");
11813
BREAK_ON_PATCH_FAIL();
11814
continue;
11815
}
11816
intptr_t pDst = nOffset + (intptr_t)pFunction;
11817
pDst += I.addr;
11818
pDst += I.size;
11819
pDst += I.imm.sdword;
11820
11821
void* fFun1 = MicroProfileX64FollowJump((void*)pDst);
11822
Callback(fFun1);
11823
}
11824
}
11825
nOffset += nOffsetNext;
11826
} while(nOffset < nCodeLen);
11827
}
11828
11829
void MicroProfileInstrumentFunctionsCalled(void* pFunction, const char* pModuleName, const char* pFunctionName, int nMinBytes, int nMaxCalls)
11830
{
11831
pFunction = MicroProfileX64FollowJump(pFunction);
11832
11833
MicroProfileSymbolDesc* pDesc = MicroProfileSymbolFindFuction(pFunction);
11834
if(pDesc)
11835
{
11836
uprintf("instrumenting child functions %p %p :: %s :: %s\n", (void*)pDesc->nAddress, (void*)pDesc->nAddressEnd, pDesc->pName, pDesc->pShortName);
11837
int a = 0;
11838
(void)a;
11839
}
11840
else
11841
{
11842
uprintf("could not find symbol info\n");
11843
return;
11844
}
11845
11846
const intptr_t nCodeLen = (intptr_t)pDesc->nAddressEnd - (intptr_t)pDesc->nAddress;
11847
11848
MicroProfilePatchBeginSuspend();
11849
int NumFunctionsInstrumented = 0;
11850
auto Callback = [&NumFunctionsInstrumented, nMinBytes, nMaxCalls](void* pFunc)
11851
{
11852
MicroProfileSymbolDesc* pDesc = MicroProfileSymbolFindFuction(pFunc);
11853
if(!pDesc)
11854
return;
11855
const char* pName = pDesc ? pDesc->pName : "??";
11856
intptr_t Size = pDesc->nAddressEnd - pDesc->nAddress;
11857
if(nMinBytes == 0 || Size >= nMinBytes)
11858
{
11859
if(0 == nMaxCalls || NumFunctionsInstrumented < nMaxCalls)
11860
{
11861
uprintf("** func Instrumented, count %d, size %d %s\n", NumFunctionsInstrumented, Size, pName);
11862
if(MicroProfileInstrumentFromAddressOnly(pFunc))
11863
{
11864
++NumFunctionsInstrumented;
11865
}
11866
}
11867
else
11868
{
11869
uprintf("** func Skipped, count %d>=%d :: %s\n", NumFunctionsInstrumented, nMaxCalls, pName);
11870
}
11871
}
11872
else
11873
{
11874
uprintf("** func Skipped, Size %d<%d :: %s\n", Size, nMinBytes, pName);
11875
}
11876
};
11877
MicroProfileInstrumentScanForFunctionCalls(Callback, pFunction, nCodeLen);
11878
11879
MicroProfilePatchEndSuspend();
11880
}
11881
11882
bool MicroProfileInstrumentFunction(void* pFunction, const char* pModuleName, const char* pFunctionName, uint32_t nColor)
11883
{
11884
MicroProfilePatchBeginSuspend();
11885
struct ScopeExit
11886
{
11887
~ScopeExit()
11888
{
11889
MicroProfilePatchEndSuspend();
11890
}
11891
} dummy;
11892
11893
MicroProfilePatchError Err;
11894
if(S.DynamicTokenIndex == MICROPROFILE_MAX_DYNAMIC_TOKENS)
11895
{
11896
uprintf("instrument failing, out of dynamic tokens %d\n", S.DynamicTokenIndex);
11897
return false;
11898
}
11899
for(uint32_t i = 0; i < S.DynamicTokenIndex; ++i)
11900
{
11901
if(S.FunctionsInstrumented[i] == pFunction)
11902
{
11903
uprintf("function %p already instrumented\n", pFunction);
11904
return false;
11905
}
11906
}
11907
if(MicroProfilePatchFunction(pFunction, S.DynamicTokenIndex, MicroProfileInterceptEnter, MicroProfileInterceptLeave, &Err))
11908
{
11909
MicroProfileToken Tok = S.DynamicTokens[S.DynamicTokenIndex] = MicroProfileGetToken("PATCHED", pFunctionName, nColor, MicroProfileTokenTypeCpu, 0);
11910
S.FunctionsInstrumented[S.DynamicTokenIndex] = pFunction;
11911
S.FunctionsInstrumentedName[S.DynamicTokenIndex] = MicroProfileStringIntern(pFunctionName);
11912
S.FunctionsInstrumentedModuleNames[S.DynamicTokenIndex] = MicroProfileStringIntern(pModuleName);
11913
S.DynamicTokenIndex++;
11914
11915
uint16_t nGroup = MicroProfileGetGroupIndex(Tok);
11916
if(!MicroProfileGroupActive(nGroup))
11917
{
11918
MicroProfileGroupSetEnabled(nGroup);
11919
}
11920
#if MICROPROFILE_WEBSERVER
11921
MicroProfileWebSocketToggleTimer(MicroProfileGetTimerIndex(Tok));
11922
#endif
11923
11924
return false;
11925
}
11926
else
11927
{
11928
bool bFound = false;
11929
for(int i = 0; i < S.nNumPatchErrors; ++i)
11930
{
11931
if(Err.nCodeSize == S.PatchErrors[i].nCodeSize && 0 == memcmp(Err.Code, S.PatchErrors[i].Code, Err.nCodeSize))
11932
{
11933
bFound = true;
11934
break;
11935
}
11936
}
11937
if(!bFound && S.nNumPatchErrors < MICROPROFILE_MAX_PATCH_ERRORS)
11938
{
11939
memcpy(&S.PatchErrors[S.nNumPatchErrors++], &Err, sizeof(Err));
11940
}
11941
bFound = false;
11942
for(int i = 0; i < S.nNumPatchErrorFunctions; ++i)
11943
{
11944
if(0 == strcmp(pFunctionName, S.PatchErrorFunctionNames[i]))
11945
{
11946
bFound = true;
11947
}
11948
}
11949
if(!bFound && S.nNumPatchErrorFunctions < MICROPROFILE_MAX_PATCH_ERRORS)
11950
{
11951
S.PatchErrorFunctionNames[S.nNumPatchErrorFunctions++] = pFunctionName;
11952
}
11953
uprintf("interception fail!!\n");
11954
return false;
11955
}
11956
}
11957
11958
void MicroProfileInstrumentPreInit();
11959
void MicroProfileSymbolInitializeInternal();
11960
void MicroProfileSymbolFreeDataInternal();
11961
void MicroProfileSymbolKickThread();
11962
void MicroProfileQueryJoinThread();
11963
11964
bool MicroProfileSymbolInitialize(bool bStartLoad, const char* pModuleName)
11965
{
11966
if(!bStartLoad)
11967
return S.SymbolState.nModuleLoadsFinished.load() != 0;
11968
// int nRequests = 0;
11969
{
11970
MicroProfileScopeLock L(MicroProfileMutex());
11971
for(int i = 0; i < S.SymbolNumModules; ++i)
11972
{
11973
if(0 == pModuleName || 0 == strcmp(pModuleName, (const char*)S.SymbolModules[i].pBaseString))
11974
{
11975
if(0 == S.SymbolModules[i].nModuleLoadRequested.exchange(1))
11976
{
11977
S.SymbolState.nModuleLoadsRequested.fetch_add(1);
11978
}
11979
}
11980
}
11981
}
11982
11983
// todo: unload modules
11984
MicroProfileSymbolKickThread();
11985
return S.SymbolState.nModuleLoadsRequested.load() == S.SymbolState.nModuleLoadsFinished.load();
11986
11987
// if(S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DEFAULT)
11988
// {
11989
// if(!bStartLoad)
11990
// return false;
11991
// {
11992
// MicroProfileScopeLock L(MicroProfileMutex());
11993
// S.SymbolState.nState.store(MICROPROFILE_SYMBOLSTATE_LOADING);
11994
// S.SymbolState.nSymbolsLoaded.store(0);
11995
// }
11996
// MicroProfileSymbolKickThread();
11997
// return false;
11998
// }
11999
// if(nRequests)
12000
// {
12001
// }
12002
// if(S.SymbolState.nState.load() == MICROPROFILE_SYMBOLSTATE_DONE)
12003
// {
12004
// MicroProfileQueryJoinThread();
12005
// }
12006
// if(S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DONE && bStartLoad)
12007
// {
12008
// MicroProfileSymbolFreeDataInternal();
12009
// {
12010
// MicroProfileScopeLock L(MicroProfileMutex());
12011
// S.SymbolState.nState.store(MICROPROFILE_SYMBOLSTATE_LOADING);
12012
// S.SymbolState.nSymbolsLoaded.store(0);
12013
// }
12014
// MicroProfileSymbolKickThread();
12015
// return false;
12016
12017
// }
12018
// else
12019
// {
12020
// return S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DONE;
12021
// }
12022
}
12023
12024
void MicroProfileSymbolFreeDataInternal()
12025
{
12026
{
12027
uprintf("todod;....\n");
12028
MP_BREAK();
12029
// MP_ASSERT(S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DONE);
12030
12031
S.nNumPatchErrorFunctions = 0;
12032
memset(S.PatchErrorFunctionNames, 0, sizeof(S.PatchErrorFunctionNames));
12033
12034
for(int i = 0; i < S.SymbolNumModules; ++i)
12035
{
12036
12037
while(S.SymbolModules[i].pSymbolBlock)
12038
{
12039
MicroProfileSymbolBlock* pBlock = S.SymbolModules[i].pSymbolBlock;
12040
S.SymbolModules[i].pSymbolBlock = pBlock->pNext;
12041
MP_FREE(pBlock);
12042
MICROPROFILE_COUNTER_SUB("/MicroProfile/Symbols/Allocs", 1);
12043
MICROPROFILE_COUNTER_SUB("/MicroProfile/Symbols/Memory", sizeof(MicroProfileSymbolBlock));
12044
}
12045
}
12046
memset(&S.SymbolModules[0], 0, sizeof(S.SymbolModules));
12047
memset(&S.SymbolModuleNameBuffer[0], 0, sizeof(S.SymbolModuleNameBuffer));
12048
S.SymbolModuleNameOffset = 0;
12049
S.SymbolNumModules = 0;
12050
}
12051
}
12052
#if STRING_MATCH_SIZE == 64
12053
int MicroProfileCharacterMaskCharIndex(char c)
12054
{
12055
if(c >= 'A' && c <= 'Z')
12056
c = 'a' + (c - 'A');
12057
// abcdefghijklmnopqrstuvwxyz
12058
if(c >= 'a' && c <= 'z')
12059
{
12060
int b = c - 'a';
12061
return b;
12062
}
12063
if(c >= '0' && c <= '9')
12064
{
12065
int b = c - '0';
12066
return b + 26;
12067
}
12068
switch(c)
12069
{
12070
case ':':
12071
return 37;
12072
case ';':
12073
return 38;
12074
case '\\':
12075
return 39;
12076
case '\'':
12077
return 40;
12078
case '\"':
12079
return 41;
12080
case '/':
12081
return 42;
12082
case '{':
12083
return 43;
12084
case '}':
12085
return 44;
12086
case '(':
12087
return 45;
12088
case ')':
12089
return 46;
12090
case '[':
12091
return 47;
12092
case ']':
12093
return 48;
12094
case '<':
12095
return 49;
12096
case '>':
12097
return 50;
12098
case '.':
12099
return 51;
12100
case ',':
12101
return 52; // special characters
12102
case ' ':
12103
return -1; // special characters
12104
}
12105
return 63;
12106
}
12107
12108
uint64_t MicroProfileCharacterMaskChar(char c)
12109
{
12110
uint64_t nMask = 1;
12111
int nIndex = MicroProfileCharacterMaskCharIndex(c);
12112
if(nIndex == -1)
12113
return 0;
12114
return nMask << nIndex;
12115
}
12116
12117
#else
12118
uint32_t MicroProfileCharacterMaskChar(char c)
12119
{
12120
if(c >= 'A' && c <= 'Z')
12121
c = 'a' + (c - 'A');
12122
// abcdefghijklmnopqrstuvwxyz
12123
if(c >= 'a' && c <= 'z')
12124
{
12125
int b = c - 'a';
12126
b = MicroProfileMin(20, b); // squish the last together
12127
// static int once = 0;
12128
// if(0 == once)
12129
//{
12130
// for(int i = 20; i < 28; ++i)
12131
// {
12132
// uprintf("char %d is %c\n", i, (char)('a' + i));
12133
// }
12134
// once = 1;
12135
//}
12136
uint32_t v = 1;
12137
return v << b;
12138
}
12139
if(c >= '0' && c <= '9')
12140
{
12141
int b = c - '0';
12142
b += 21;
12143
if(b < 21 || b > 30)
12144
MP_BREAK();
12145
return 1 << b;
12146
}
12147
switch(c)
12148
{
12149
case ':':
12150
case ';':
12151
case '\\':
12152
case '\'':
12153
case '\"':
12154
case '/':
12155
case '{':
12156
case '}':
12157
case '(':
12158
case ')':
12159
case '[':
12160
case ']':
12161
return 1u << 31; // special characters
12162
case ' ':
12163
return 0;
12164
}
12165
return 0;
12166
}
12167
int MicroProfileCharacterMaskCharIndex(char c)
12168
{
12169
if(c >= 'A' && c <= 'Z')
12170
c = 'a' + (c - 'A');
12171
// abcdefghijklmnopqrstuvwxyz
12172
if(c >= 'a' && c <= 'z')
12173
{
12174
int b = c - 'a';
12175
b = MicroProfileMin(20, b); // squish the last together
12176
static int once = 0;
12177
if(0 == once)
12178
{
12179
for(int i = 20; i < 28; ++i)
12180
{
12181
uprintf("char %d is %c\n", i, (char)('a' + i));
12182
}
12183
once = 1;
12184
}
12185
return b;
12186
}
12187
if(c >= '0' && c <= '9')
12188
{
12189
int b = c - '0';
12190
b += 21;
12191
if(b < 21 || b > 30)
12192
MP_BREAK();
12193
return b;
12194
}
12195
switch(c)
12196
{
12197
case ':':
12198
case ';':
12199
case '\\':
12200
case '\'':
12201
case '\"':
12202
case '/':
12203
case '{':
12204
case '}':
12205
case '(':
12206
case ')':
12207
case '[':
12208
case ']':
12209
return 31; // special characters
12210
case ' ':
12211
return -1;
12212
}
12213
return 1;
12214
}
12215
#endif
12216
12217
uint_string_match MicroProfileCharacterMaskString(const char* pStr)
12218
{
12219
uint_string_match nMask = 0;
12220
char c = 0;
12221
while(0 != (c = *pStr++))
12222
{
12223
nMask |= MicroProfileCharacterMaskChar(c);
12224
}
12225
return nMask;
12226
}
12227
12228
void MicroProfileCharacterMaskString2(const char* pStr, MicroProfileStringMatchMask& M)
12229
{
12230
uint_string_match nMask = 0;
12231
char c = 0;
12232
int nLast = -1;
12233
while(0 != (c = *pStr++))
12234
{
12235
nMask |= MicroProfileCharacterMaskChar(c);
12236
int nIndex = MicroProfileCharacterMaskCharIndex(c);
12237
if(nIndex >= 0 && nLast >= 0)
12238
{
12239
MP_ASSERT(nIndex < STRING_MATCH_SIZE);
12240
M.M[nLast] |= 1llu << nIndex;
12241
}
12242
nLast = nIndex;
12243
}
12244
M.nMask |= nMask;
12245
}
12246
12247
bool MicroProfileCharacterMatch(const MicroProfileStringMatchMask& Block, const MicroProfileStringMatchMask& String)
12248
{
12249
if(String.nMask != (Block.nMask & String.nMask))
12250
return false;
12251
for(uint32_t i = 0; i < STRING_MATCH_SIZE; ++i)
12252
{
12253
if(String.M[i] != (Block.M[i] & String.M[i]))
12254
return false;
12255
}
12256
return true;
12257
}
12258
12259
uint32_t MicroProfileSymbolGetModule(const char* pString, intptr_t nBaseAddr)
12260
{
12261
12262
for(int i = 0; i < S.SymbolNumModules; ++i)
12263
{
12264
auto& M = S.SymbolModules[i];
12265
for(int j = 0; j < M.nNumExecutableRegions; ++j)
12266
{
12267
if(M.Regions[j].nBegin <= nBaseAddr && nBaseAddr < M.Regions[j].nEnd)
12268
return i;
12269
}
12270
}
12271
MP_BREAK(); // should never happen.
12272
return 0;
12273
}
12274
12275
void MicroProfileSymbolMergeExecutableRegions()
12276
{
12277
for(int i = 0; i < S.SymbolNumModules; ++i)
12278
{
12279
auto& M = S.SymbolModules[i];
12280
if(M.nNumExecutableRegions > 1)
12281
{
12282
std::sort(&M.Regions[0], &M.Regions[M.nNumExecutableRegions], [](const MicroProfileSymbolModuleRegion& l, const MicroProfileSymbolModuleRegion& r) { return l.nBegin < r.nBegin; });
12283
12284
int p = 0;
12285
int g = 1;
12286
while(g < M.nNumExecutableRegions)
12287
{
12288
if(M.Regions[p].nEnd == M.Regions[g].nBegin)
12289
{
12290
M.Regions[p].nEnd = M.Regions[g].nEnd;
12291
g++;
12292
}
12293
else
12294
{
12295
++p;
12296
if(p != g)
12297
M.Regions[p] = M.Regions[g];
12298
g++;
12299
}
12300
}
12301
M.nNumExecutableRegions = p + 1;
12302
}
12303
}
12304
for(int i = 0; i < S.SymbolNumModules; ++i)
12305
{
12306
auto& M = S.SymbolModules[i];
12307
uprintf("region %s %s\n", M.pTrimmedString, M.pBaseString);
12308
for(int j = 0; j < M.nNumExecutableRegions; ++j)
12309
uprintf("\t[%p-%p]\n", (void*)M.Regions[j].nBegin, (void*)M.Regions[j].nEnd);
12310
}
12311
}
12312
12313
uint32_t MicroProfileSymbolInitModule(const char* pString_, intptr_t nAddrBegin, intptr_t nAddrEnd)
12314
{
12315
const char* pString = MicroProfileStringInternSlash(pString_);
12316
for(int i = 0; i < S.SymbolNumModules; ++i)
12317
{
12318
auto& M = S.SymbolModules[i];
12319
for(int j = 0; j < M.nNumExecutableRegions; ++j)
12320
{
12321
if(M.Regions[j].nBegin <= nAddrBegin && nAddrEnd < M.Regions[j].nEnd)
12322
{
12323
MP_ASSERT(pString == M.pBaseString);
12324
return i;
12325
}
12326
}
12327
}
12328
12329
for(int i = 0; i < S.SymbolNumModules; ++i)
12330
{
12331
auto& M = S.SymbolModules[i];
12332
if(M.pBaseString == pString)
12333
{
12334
MP_ASSERT((intptr_t)pString != -2);
12335
for(int j = 0; j < M.nNumExecutableRegions; ++j)
12336
if(nAddrBegin == M.Regions[j].nBegin)
12337
return i;
12338
12339
if(M.nNumExecutableRegions == MICROPROFILE_MAX_MODULE_EXEC_REGIONS)
12340
{
12341
return (uint32_t)-1;
12342
}
12343
M.Regions[M.nNumExecutableRegions].nBegin = nAddrBegin;
12344
M.Regions[M.nNumExecutableRegions].nEnd = nAddrEnd;
12345
// uprintf("added module region %d %p %p %s \n", M.nNumExecutableRegions, (void*)nAddrBegin, (void*)nAddrEnd, pString);
12346
M.nNumExecutableRegions++;
12347
return i;
12348
}
12349
}
12350
12351
MP_ASSERT((intptr_t)pString != -2);
12352
// trim untill last path char
12353
const char* pTrimmedString = pString;
12354
12355
const char* pWork = pTrimmedString;
12356
bool bLastSeperator = false;
12357
while(*pWork != '\0')
12358
{
12359
if(bLastSeperator)
12360
pTrimmedString = pWork;
12361
bLastSeperator = *pWork == '\\' || *pWork == '/';
12362
12363
pWork++;
12364
}
12365
int nLen = (int)strlen(pTrimmedString) + 1;
12366
// uprintf("STRING '%s' :: trimmedstring %s . len %d\n", pString, pTrimmedString, nLen);
12367
12368
const char* pTrimmedIntern = MicroProfileStringIntern(pTrimmedString);
12369
if(S.SymbolModuleNameOffset + nLen > MICROPROFILE_INSTRUMENT_MAX_MODULE_CHARS)
12370
return 0;
12371
memcpy(S.SymbolModuleNameOffset + &S.SymbolModuleNameBuffer[0], pTrimmedString, nLen);
12372
12373
MP_ASSERT(S.SymbolNumModules < MICROPROFILE_INSTRUMENT_MAX_MODULES);
12374
S.SymbolModules[S.SymbolNumModules].nModuleBase = nAddrBegin;
12375
S.SymbolModules[S.SymbolNumModules].nMatchOffset = 0;
12376
S.SymbolModules[S.SymbolNumModules].nStringOffset = S.SymbolModuleNameOffset;
12377
S.SymbolModules[S.SymbolNumModules].pBaseString = (const char*)pString;
12378
S.SymbolModules[S.SymbolNumModules].pTrimmedString = pTrimmedIntern;
12379
S.SymbolModules[S.SymbolNumModules].Regions[0].nBegin = nAddrBegin;
12380
S.SymbolModules[S.SymbolNumModules].Regions[0].nEnd = nAddrEnd;
12381
S.SymbolModules[S.SymbolNumModules].nNumExecutableRegions = 1;
12382
S.SymbolModules[S.SymbolNumModules].bDownloading = false;
12383
S.SymbolModules[S.SymbolNumModules].nProgress = 0;
12384
S.SymbolModules[S.SymbolNumModules].nProgressTarget = 0;
12385
12386
S.SymbolModuleNameOffset += nLen;
12387
return S.SymbolNumModules++;
12388
}
12389
12390
const char* MicroProfileSymbolModuleGetString(uint32_t nIndex)
12391
{
12392
MP_ASSERT(S.SymbolNumModules > (int)nIndex);
12393
return S.SymbolModules[nIndex].nStringOffset + &S.SymbolModuleNameBuffer[0];
12394
}
12395
12396
bool MicroProfileSymbolIgnoreSymbol(const char* pName)
12397
{
12398
if(strstr(pName, "MicroProfile"))
12399
{
12400
#if MICROPROFILE_INSTRUMENT_MICROPROFILE == 0
12401
return true;
12402
#else
12403
if(strstr(pName, "Log") || strstr(pName, "Scope") || strstr(pName, "Tick") || strstr(pName, "Enter") || strstr(pName, "Leave") || strstr(pName, "Thread") || strstr(pName, "Thread") ||
12404
strstr(pName, "Mutex")) // just for debugging: skip these so we can play around with the sample projects
12405
{
12406
return true;
12407
}
12408
#endif
12409
}
12410
#ifdef _WIN32
12411
if(pName[0] == '_' && pName[1] == '_')
12412
return true;
12413
if(strstr(pName, "__security_check_cookie") || strstr(pName, "_RTC_CheckStackVars") || strstr(pName, "__chkstk") || strstr(pName, "std::_Atomic") || strstr(pName, "_Init_thread_header") ||
12414
strstr(pName, "_Init_thread_footer"))
12415
{
12416
return true;
12417
}
12418
#endif
12419
return false;
12420
}
12421
12422
void MicroProfileSymbolInitializeInternal()
12423
{
12424
uprintf("Starting load...\n");
12425
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolInitialize", MP_CYAN);
12426
12427
auto AllocBlock = []() -> MicroProfileSymbolBlock*
12428
{
12429
MicroProfileSymbolBlock* pBlock = MP_ALLOC_OBJECT(MicroProfileSymbolBlock);
12430
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Allocs", 1);
12431
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Memory", sizeof(MicroProfileSymbolBlock));
12432
MICROPROFILE_COUNTER_CONFIG_ONCE("/MicroProfile/Symbols/Memory", MICROPROFILE_COUNTER_FORMAT_BYTES, 0, 0);
12433
memset(pBlock, 0, sizeof(MicroProfileSymbolBlock));
12434
return pBlock;
12435
};
12436
12437
auto SymbolCallback = [&](const char* pName, const char* pShortName, intptr_t nAddress, intptr_t nAddressEnd, uint32_t nModuleId)
12438
{
12439
MICROPROFILE_SCOPEI("microprofile", "SymbolCallback", MP_AUTO);
12440
uint32_t nModule = nModuleId;
12441
if(MicroProfileHashTableGetPtr(&S.SymbolModules[nModule].AddressToSymbol, (void*)nAddress))
12442
{
12443
return;
12444
}
12445
char Demangled[1024];
12446
if(MicroProfileDemangleName(pName, Demangled, sizeof(Demangled)))
12447
{
12448
pName = &Demangled[0];
12449
pShortName = &Demangled[0];
12450
}
12451
intptr_t delta = nAddressEnd - nAddress;
12452
S.SymbolModules[nModule].nProgress = MicroProfileMax(delta, S.SymbolModules[nModule].nProgress);
12453
S.nSymbolsDirty++;
12454
12455
int nIgnoreSymbol = MicroProfileSymbolIgnoreSymbol(pName) ? 1 : 0;
12456
12457
MicroProfileSymbolBlock* pActiveBlock = S.SymbolModules[nModule].pSymbolBlock;
12458
if(!pActiveBlock)
12459
{
12460
pActiveBlock = AllocBlock();
12461
pActiveBlock->pNext = S.SymbolModules[nModule].pSymbolBlock;
12462
S.SymbolModules[nModule].pSymbolBlock = pActiveBlock;
12463
}
12464
12465
if(pName == pShortName)
12466
{
12467
pShortName = 0;
12468
}
12469
uint32_t nLen = (uint32_t)strlen(pName) + 1;
12470
12471
if(nLen > MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN)
12472
nLen = MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN;
12473
uint32_t nLenShort = (uint32_t)(pShortName ? 1 + strlen(pShortName) : 0);
12474
if(nLenShort && nLenShort > MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN)
12475
nLenShort = MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN;
12476
uint32_t S0 = sizeof(MicroProfileSymbolDesc) * pActiveBlock->nNumSymbols;
12477
uint32_t S1 = pActiveBlock->nNumChars;
12478
uint32_t S3 = nLenShort + nLen + sizeof(MicroProfileSymbolDesc) + 64;
12479
if(S0 + S1 + S3 >= MicroProfileSymbolBlock::ESIZE)
12480
{
12481
MicroProfileSymbolBlock* pNewBlock = AllocBlock();
12482
MP_ASSERT(pActiveBlock == S.SymbolModules[nModule].pSymbolBlock);
12483
pNewBlock->pNext = pActiveBlock;
12484
S.SymbolModules[nModule].pSymbolBlock = pNewBlock;
12485
pActiveBlock = pNewBlock;
12486
}
12487
S0 = sizeof(MicroProfileSymbolDesc) * pActiveBlock->nNumSymbols;
12488
S1 = pActiveBlock->nNumChars;
12489
S3 = nLenShort + nLen + sizeof(MicroProfileSymbolDesc);
12490
MP_ASSERT(S0 + S1 + S3 < MicroProfileSymbolBlock::ESIZE);
12491
pActiveBlock->nNumChars += nLen;
12492
char* pStr = &pActiveBlock->Chars[MicroProfileSymbolBlock::ESIZE - pActiveBlock->nNumChars - 1];
12493
memcpy(pStr, pName, nLen);
12494
pStr[nLen - 1] = '\0';
12495
MicroProfileSymbolDesc& E = pActiveBlock->Symbols[pActiveBlock->nNumSymbols++];
12496
MicroProfileHashTableSetPtr(&S.SymbolModules[nModule].AddressToSymbol, (void*)nAddress, &E);
12497
12498
E.pName = pStr;
12499
E.nAddress = nAddress;
12500
E.nAddressEnd = nAddressEnd;
12501
E.nIgnoreSymbol = nIgnoreSymbol;
12502
E.nModule = nModule;
12503
if(pShortName && strlen(pShortName))
12504
{
12505
pActiveBlock->nNumChars += nLenShort;
12506
char* pStrShort = &pActiveBlock->Chars[MicroProfileSymbolBlock::ESIZE - pActiveBlock->nNumChars - 1];
12507
memcpy(pStrShort, pShortName, nLenShort);
12508
pStrShort[nLenShort - 1] = '\0';
12509
E.pShortName = pStrShort;
12510
}
12511
else
12512
{
12513
E.pShortName = E.pName;
12514
}
12515
#define SYMDBG 0
12516
#if SYMDBG
12517
uprintf("Got symbol %lld %lld %f .. %llx %llx %llx %s\n",
12518
S.SymbolModules[nModule].nProgress,
12519
S.SymbolModules[nModule].nProgressTarget,
12520
S.SymbolModules[nModule].nProgressTarget ? float(S.SymbolModules[nModule].nProgress) / float(S.SymbolModules[nModule].nProgressTarget) : 0.f,
12521
(int64_t)E.nAddress,
12522
(int64_t)S.SymbolModules[nModule].nAddrBegin,
12523
(int64_t)S.SymbolModules[nModule].nAddrEnd,
12524
E.pName);
12525
if(E.nAddress < (int64_t)S.SymbolModules[nModule].nAddrBegin || E.nAddress > (int64_t)S.SymbolModules[nModule].nAddrEnd)
12526
{
12527
MP_BREAK();
12528
}
12529
#endif
12530
E.nMask = MicroProfileCharacterMaskString(E.pShortName);
12531
MicroProfileCharacterMaskString2(E.pShortName, pActiveBlock->MatchMask);
12532
12533
pActiveBlock->nMask |= E.nMask;
12534
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Count", 1);
12535
if(nIgnoreSymbol)
12536
{
12537
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Ignored", 1);
12538
}
12539
#if SYMDBG
12540
MicroProfileSleep(10);
12541
#endif
12542
#undef SYMDBG
12543
12544
S.SymbolModules[nModule].nSymbolsLoaded.fetch_add(1);
12545
S.nSymbolsDirty.exchange(1);
12546
S.SymbolState.nSymbolsLoaded.fetch_add(1);
12547
MP_ASSERT((intptr_t)E.pShortName >= (intptr_t)&E); // assert pointer arithmetic is correct.
12548
};
12549
do
12550
{
12551
uint32_t nModuleLoad[MICROPROFILE_INSTRUMENT_MAX_MODULES];
12552
uint32_t nNumModulesRequested = 0;
12553
for(int i = 0; i < S.SymbolNumModules; ++i)
12554
{
12555
if(S.SymbolModules[i].nModuleLoadRequested.load() != 0 && S.SymbolModules[i].nModuleLoadFinished.load() == 0)
12556
{
12557
nModuleLoad[nNumModulesRequested] = i;
12558
S.SymbolModules[i].nProgress = 0;
12559
MicroProfileHashTableInit(&S.SymbolModules[i].AddressToSymbol, 256, 64, MicroProfileHashTableComparePtr, MicroProfileHashTableHashPtr);
12560
nNumModulesRequested++;
12561
}
12562
}
12563
if(0 == nNumModulesRequested)
12564
{
12565
break;
12566
}
12567
MicroProfileIterateSymbols(SymbolCallback, nModuleLoad, nNumModulesRequested);
12568
S.SymbolState.nModuleLoadsFinished.fetch_add(nNumModulesRequested);
12569
for(uint32_t i = 0; i < nNumModulesRequested; ++i)
12570
{
12571
if(S.SymbolModules[nModuleLoad[i]].nModuleLoadRequested.load() == S.SymbolModules[nModuleLoad[i]].nModuleLoadFinished.load())
12572
{
12573
S.SymbolModules[nModuleLoad[i]].nProgress = S.SymbolModules[nModuleLoad[i]].nProgressTarget;
12574
S.nSymbolsDirty.exchange(1);
12575
}
12576
}
12577
} while(1);
12578
}
12579
12580
MicroProfileSymbolDesc* MicroProfileSymbolFindFuction(void* pAddress)
12581
{
12582
for(int i = 0; i < S.SymbolNumModules; ++i)
12583
{
12584
MicroProfileSymbolDesc* pDesc = nullptr;
12585
if(MicroProfileHashTableGetPtr(&S.SymbolModules[i].AddressToSymbol, pAddress, &pDesc))
12586
{
12587
if(0 == pDesc->nIgnoreSymbol)
12588
return pDesc;
12589
else
12590
return nullptr;
12591
}
12592
}
12593
return nullptr;
12594
}
12595
12596
#define MICROPROFILE_MAX_FILTER 32
12597
#define MICROPROFILE_MAX_QUERY_RESULTS 32
12598
#define MICROPROFILE_MAX_FILTER_STRING 1024
12599
12600
struct MicroProfileFunctionQuery
12601
{
12602
MicroProfileFunctionQuery* pNext;
12603
uint32_t nState;
12604
const char* pFilterStrings[MICROPROFILE_MAX_FILTER];
12605
uint32_t nPatternLength[MICROPROFILE_MAX_FILTER];
12606
int nMaxFilter;
12607
12608
uint32_t nModuleFilterMatch[MICROPROFILE_INSTRUMENT_MAX_MODULES]; // prematch the modules, so it can be skipped during search
12609
uint32_t nMask[MICROPROFILE_MAX_FILTER]; // masks for subpatterns skipped
12610
MicroProfileStringMatchMask MatchMask[MICROPROFILE_MAX_FILTER]; // masks for subpatterns skipped
12611
12612
// results
12613
MicroProfileSymbolDesc* Results[MICROPROFILE_MAX_QUERY_RESULTS];
12614
uint32_t nNumResults;
12615
char FilterString[MICROPROFILE_MAX_FILTER_STRING];
12616
12617
uint32_t QueryId;
12618
};
12619
12620
MicroProfileFunctionQuery* MicroProfileAllocFunctionQuery()
12621
{
12622
MicroProfileScopeLock L(MicroProfileMutex());
12623
MicroProfileFunctionQuery* pQ = nullptr;
12624
S.nNumQueryAllocated++;
12625
if(S.pQueryFreeList != 0)
12626
{
12627
pQ = S.pQueryFreeList;
12628
S.pQueryFreeList = pQ->pNext;
12629
S.nNumQueryFree--;
12630
}
12631
else
12632
{
12633
pQ = MP_ALLOC_OBJECT(MicroProfileFunctionQuery);
12634
MICROPROFILE_COUNTER_ADD("MicroProfile/Symbols/FunctionQuery", 1);
12635
MICROPROFILE_COUNTER_ADD("MicroProfile/Symbols/FunctionQueryMem", sizeof(MicroProfileFunctionQuery));
12636
S.nNumQueryAllocated++;
12637
}
12638
memset(pQ, 0, sizeof(MicroProfileFunctionQuery));
12639
return pQ;
12640
}
12641
void MicroProfileFreeFunctionQuery(MicroProfileFunctionQuery* pQ)
12642
{
12643
pQ->pNext = S.pQueryFreeList;
12644
S.pQueryFreeList = pQ;
12645
}
12646
12647
void MicroProfileProcessQuery(MicroProfileFunctionQuery* pQuery)
12648
{
12649
MicroProfileFunctionQuery& Q = *pQuery;
12650
12651
int nBlocksTested = 0, nSymbolsTested = 0, nStringsTested = 0, nStringsTested0 = 0;
12652
int nBlocks = 0;
12653
// (void)nBlocksTested;
12654
// (void)nSymbolsTested;
12655
// (void)nStringsTested;
12656
// (void)nStringsTested0;
12657
// (void)nBlocks;
12658
12659
int64_t t = MP_TICK();
12660
int64_t tt = 0;
12661
12662
for(int i = 0; i < S.SymbolNumModules; ++i)
12663
{
12664
int nModule = i;
12665
uint32_t nModuleMatchOffset = Q.nModuleFilterMatch[nModule];
12666
MicroProfileSymbolBlock* pSymbols = S.SymbolModules[nModule].pSymbolBlock;
12667
12668
uint32_t nMaskQ = Q.nMask[nModuleMatchOffset];
12669
MicroProfileStringMatchMask& MatchMaskQ = Q.MatchMask[nModuleMatchOffset];
12670
{
12671
while(pSymbols && 0 == S.pPendingQuery && Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS)
12672
{
12673
12674
MICROPROFILE_SCOPEI("MicroProfile", "SymbolQueryLoop", MP_YELLOW);
12675
nBlocks++;
12676
if(MicroProfileCharacterMatch(pSymbols->MatchMask, MatchMaskQ))
12677
{
12678
nBlocksTested++;
12679
for(uint32_t i = 0; i < pSymbols->nNumSymbols && 0 == S.pPendingQuery && Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS; ++i)
12680
{
12681
MicroProfileSymbolDesc& E = pSymbols->Symbols[i];
12682
if(0 == E.nIgnoreSymbol)
12683
{
12684
nSymbolsTested++;
12685
if(nMaskQ == (nMaskQ & E.nMask))
12686
{
12687
nStringsTested++;
12688
MP_ASSERT((int)E.nModule < S.SymbolNumModules);
12689
if(MicroProfileStringMatch(E.pShortName, nModuleMatchOffset, &Q.pFilterStrings[0], Q.nPatternLength, Q.nMaxFilter))
12690
{
12691
if(Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS)
12692
{
12693
12694
Q.Results[Q.nNumResults++] = &E;
12695
if(Q.nNumResults == MICROPROFILE_MAX_QUERY_RESULTS)
12696
tt = MP_TICK();
12697
}
12698
}
12699
if(Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS)
12700
nStringsTested0++;
12701
}
12702
}
12703
}
12704
}
12705
pSymbols = pSymbols->pNext;
12706
}
12707
}
12708
}
12709
int64_t tend = MP_TICK();
12710
float ToMS = MicroProfileTickToMsMultiplierCpu();
12711
float TIME = (tend - t) * ToMS;
12712
float TIME0 = (tt - t) * ToMS;
12713
uprintf(" %6.3fms [%6.3f]: %5d/%5d blocks tested. %5d symbols %5d/%5d string compares\n", TIME, TIME0, nBlocksTested, nBlocks, nSymbolsTested, nStringsTested, nStringsTested0);
12714
}
12715
12716
void* MicroProfileQueryThread(void* p)
12717
{
12718
MicroProfileOnThreadCreate("MicroProfileSymbolThread");
12719
{
12720
while(1)
12721
{
12722
MicroProfileSleep(100); // todo:: use an event instead
12723
MicroProfileScopeLock L(MicroProfileMutex());
12724
if(S.pPendingQuery != nullptr)
12725
{
12726
MICROPROFILE_SCOPEI("MicroProfile", "SymbolQuery", MP_WHEAT);
12727
MicroProfileFunctionQuery* pQuery = S.pPendingQuery;
12728
12729
MP_ASSERT(pQuery->QueryId > S.nQueryProcessed);
12730
S.pPendingQuery = 0;
12731
L.Unlock();
12732
12733
// uprintf("processing query %d\n", pQuery->QueryId);
12734
MicroProfileProcessQuery(pQuery);
12735
12736
L.Lock();
12737
S.nQueryProcessed = MicroProfileMax(pQuery->QueryId, S.nQueryProcessed);
12738
12739
pQuery->pNext = S.pFinishedQuery;
12740
S.pFinishedQuery = pQuery;
12741
}
12742
if(S.SymbolState.nModuleLoadsRequested.load() != S.SymbolState.nModuleLoadsFinished.load())
12743
{
12744
L.Unlock();
12745
MicroProfileSymbolInitializeInternal();
12746
L.Lock();
12747
}
12748
}
12749
12750
S.SymbolThreadFinished = 1;
12751
}
12752
MicroProfileOnThreadExit();
12753
return 0;
12754
}
12755
12756
void MicroProfileQueryJoinThread()
12757
{
12758
if(S.SymbolThreadFinished)
12759
{
12760
MicroProfileThreadJoin(&S.SymbolThread);
12761
S.SymbolThreadFinished = 0;
12762
S.SymbolThreadRunning = 0;
12763
}
12764
}
12765
void MicroProfileSymbolKickThread()
12766
{
12767
// MicroProfileQueryJoinThread();
12768
if(S.SymbolThreadRunning == 0)
12769
{
12770
S.SymbolThreadRunning = 1;
12771
MicroProfileThreadStart(&S.SymbolThread, MicroProfileQueryThread);
12772
}
12773
}
12774
#if MICROPROFILE_WEBSERVER
12775
void MicroProfileSymbolSendFunctionNames(MpSocket Connection)
12776
{
12777
if(S.WSFunctionsInstrumentedSent < S.DynamicTokenIndex)
12778
{
12779
MicroProfileWSPrintStart(Connection);
12780
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":[", MSG_FUNCTION_NAMES);
12781
bool bFirst = true;
12782
for(uint32_t i = S.WSFunctionsInstrumentedSent; i < S.DynamicTokenIndex; ++i)
12783
{
12784
const char* pString = S.FunctionsInstrumentedName[i];
12785
const char* pModuleString = S.FunctionsInstrumentedModuleNames[i];
12786
MicroProfileWSPrintf(bFirst ? "[\"%s\",\"%s\",\"%s\"]" : ",[\"%s\",\"%s\",\"%s\"]", pString, pModuleString, "unused");
12787
bFirst = false;
12788
}
12789
MicroProfileWSPrintf("]}");
12790
MicroProfileWSFlush();
12791
MicroProfileWSPrintEnd();
12792
12793
S.WSFunctionsInstrumentedSent = S.DynamicTokenIndex;
12794
}
12795
}
12796
12797
void MicroProfileSymbolSendErrors(MpSocket Connection)
12798
{
12799
if(S.nNumPatchErrors)
12800
{
12801
MicroProfileWSPrintStart(Connection);
12802
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"version\":\"%d.%d\",\"data\":[", MSG_INSTRUMENT_ERROR, MICROPROFILE_MAJOR_VERSION, MICROPROFILE_MINOR_VERSION);
12803
bool bFirst = true;
12804
for(int i = 0; i < S.nNumPatchErrors; ++i)
12805
{
12806
MicroProfilePatchError& E = S.PatchErrors[i];
12807
(void)E;
12808
if(!bFirst)
12809
MicroProfileWSPrintf(",");
12810
MicroProfileWSPrintf("{\"code\":\"");
12811
for(int i = 0; i < E.nCodeSize; ++i)
12812
MicroProfileWSPrintf("%02x", E.Code[i] & 0xff);
12813
MicroProfileWSPrintf("\",\"message\":\"%s\",\"already\":%d}", &E.Message[0], E.AlreadyInstrumented);
12814
bFirst = false;
12815
}
12816
12817
MicroProfileWSPrintf("],\"functions\":[");
12818
bFirst = true;
12819
for(int i = 0; i < S.nNumPatchErrorFunctions; ++i)
12820
{
12821
if(!bFirst)
12822
MicroProfileWSPrintf(",");
12823
12824
MicroProfileWSPrintf("\"%s\"", S.PatchErrorFunctionNames[i]);
12825
12826
bFirst = false;
12827
}
12828
12829
MicroProfileWSPrintf("]}}");
12830
12831
MicroProfileWSFlush();
12832
MicroProfileWSPrintEnd();
12833
12834
S.nNumPatchErrors = 0;
12835
S.nNumPatchErrorFunctions = 0;
12836
}
12837
}
12838
12839
void MicroProfileSymbolQuerySendResult(MpSocket Connection)
12840
{
12841
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolQuerySendResult", MP_PINK2);
12842
MicroProfileFunctionQuery* pQuery = 0;
12843
{
12844
MicroProfileScopeLock L(MicroProfileMutex());
12845
12846
uint32_t nBest = 0;
12847
12848
while(S.pFinishedQuery != nullptr)
12849
{
12850
if(!pQuery)
12851
{
12852
pQuery = S.pFinishedQuery;
12853
nBest = pQuery->QueryId;
12854
S.pFinishedQuery = pQuery->pNext;
12855
}
12856
else
12857
{
12858
MicroProfileFunctionQuery* pQ = S.pFinishedQuery;
12859
S.pFinishedQuery = pQ->pNext;
12860
if(pQ->QueryId > nBest)
12861
{
12862
MicroProfileFreeFunctionQuery(pQuery);
12863
nBest = pQ->QueryId;
12864
pQuery = pQ;
12865
}
12866
else
12867
{
12868
MicroProfileFreeFunctionQuery(pQ);
12869
}
12870
}
12871
}
12872
}
12873
12874
if(pQuery)
12875
{
12876
uprintf("Sending result for query %d\n", pQuery->QueryId);
12877
MicroProfileWSPrintStart(Connection);
12878
MicroProfileWSPrintf("{\"k\":\"%d\",\"q\":%d,\"v\":[", MSG_FUNCTION_RESULTS, pQuery->QueryId);
12879
bool bFirst = true;
12880
for(uint32_t i = 0; i < pQuery->nNumResults; ++i)
12881
{
12882
MicroProfileSymbolDesc& E = *pQuery->Results[i];
12883
if(bFirst)
12884
{
12885
MicroProfileWSPrintf("{\"a\":\"%p\",\"n\":\"%s\",\"sn\":\"%s\",\"m\":\"%s\"}", E.nAddress, E.pName, E.pShortName, MicroProfileSymbolModuleGetString(E.nModule));
12886
bFirst = false;
12887
}
12888
else
12889
{
12890
MicroProfileWSPrintf(",{\"a\":\"%p\",\"n\":\"%s\",\"sn\":\"%s\",\"m\":\"%s\"}", E.nAddress, E.pName, E.pShortName, MicroProfileSymbolModuleGetString(E.nModule));
12891
}
12892
}
12893
MicroProfileWSPrintf("]}");
12894
MicroProfileWSFlush();
12895
MicroProfileWSPrintEnd();
12896
12897
MicroProfileScopeLock L(MicroProfileMutex());
12898
MicroProfileFreeFunctionQuery(pQuery);
12899
}
12900
}
12901
#endif
12902
12903
void MicroProfileSymbolQueryFunctions(MpSocket Connection, const char* pFilter)
12904
{
12905
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolQueryFunctions", MP_WHEAT);
12906
12907
if(!MicroProfileSymbolInitialize(false))
12908
{
12909
return;
12910
}
12911
{
12912
int QueryId = atoi(pFilter);
12913
pFilter = strchr(pFilter, 'x');
12914
pFilter++;
12915
MicroProfileScopeLock L(MicroProfileMutex());
12916
if(0 == S.pPendingQuery || S.pPendingQuery->QueryId < (uint32_t)QueryId)
12917
{
12918
MicroProfileFunctionQuery* pQuery = S.pPendingQuery;
12919
if(!pQuery)
12920
{
12921
S.pPendingQuery = pQuery = MicroProfileAllocFunctionQuery();
12922
}
12923
MP_ASSERT(pQuery->pNext == 0);
12924
memset(pQuery, 0, sizeof(*pQuery));
12925
12926
MicroProfileFunctionQuery& Q = *pQuery;
12927
Q.QueryId = QueryId;
12928
12929
uint32_t nLen = (uint32_t)strlen(pFilter) + 1;
12930
if(nLen >= MICROPROFILE_MAX_FILTER_STRING)
12931
nLen = MICROPROFILE_MAX_FILTER_STRING - 1;
12932
12933
memcpy(Q.FilterString, pFilter, nLen);
12934
Q.FilterString[nLen] = '\0';
12935
12936
char* pBuffer = Q.FilterString;
12937
bool bStartString = true;
12938
for(uint32_t i = 0; i < nLen; ++i)
12939
{
12940
char c = pBuffer[i];
12941
if(c == '\0')
12942
{
12943
break;
12944
}
12945
if(isspace(c) || c == '*')
12946
{
12947
pBuffer[i] = '\0';
12948
bStartString = true;
12949
}
12950
else
12951
{
12952
if(bStartString)
12953
{
12954
if(Q.nMaxFilter < MICROPROFILE_MAX_FILTER)
12955
{
12956
const char* pstr = &pBuffer[i];
12957
Q.nMask[Q.nMaxFilter] = MicroProfileCharacterMaskString(pstr);
12958
MicroProfileCharacterMaskString2(pstr, Q.MatchMask[Q.nMaxFilter]);
12959
Q.pFilterStrings[Q.nMaxFilter++] = &pBuffer[i];
12960
}
12961
}
12962
bStartString = false;
12963
}
12964
}
12965
memset(Q.nModuleFilterMatch, 0xff, sizeof(Q.nModuleFilterMatch));
12966
for(int i = 0; i < S.SymbolNumModules; ++i)
12967
{
12968
Q.nModuleFilterMatch[i] = MicroProfileStringMatchOffset(MicroProfileSymbolModuleGetString(i), Q.pFilterStrings, Q.nPatternLength, Q.nMaxFilter);
12969
}
12970
12971
#if 0
12972
uprintf("query %d::",QueryId);
12973
for(int i = 0; i < Q.nMaxFilter; ++i)
12974
{
12975
Q.nPatternLength[i] = (uint32_t)strlen(Q.pFilterStrings[i]);
12976
uprintf("'%s' ", Q.pFilterStrings[i]);
12977
}
12978
uprintf("\n");
12979
#endif
12980
}
12981
}
12982
MicroProfileSymbolKickThread();
12983
}
12984
12985
#if defined(_WIN32)
12986
// '##::::'##::'#######:::'#######::'##:::'##::::'##:::::'##:'####:'##::: ##::'#######:::'#######::
12987
// ##:::: ##:'##.... ##:'##.... ##: ##::'##::::: ##:'##: ##:. ##:: ###:: ##:'##.... ##:'##.... ##:
12988
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##: ##: ##:: ##:: ####: ##:..::::: ##:..::::: ##:
12989
// #########: ##:::: ##: ##:::: ##: #####::::::: ##: ##: ##:: ##:: ## ## ##::'#######:::'#######::
12990
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::: ##: ##: ##:: ##:: ##. ####::...... ##:'##::::::::
12991
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##::::: ##: ##: ##:: ##:: ##:. ###:'##:::: ##: ##::::::::
12992
// ##:::: ##:. #######::. #######:: ##::. ##::::. ###. ###::'####: ##::. ##:. #######:: #########:
12993
// ..:::::..:::.......::::.......:::..::::..::::::...::...:::....::..::::..:::.......:::.........::
12994
12995
#ifdef _WIN32
12996
static void* MicroProfileAllocExecutableMemory(void* pBase, size_t s);
12997
static void* MicroProfileAllocExecutableMemoryFar(size_t s);
12998
static void MicroProfileMakeMemoryExecutable(void* p, size_t s);
12999
static void MicroProfileMakeWriteable(void* p_, size_t size, DWORD* oldFlags);
13000
static void MicroProfileRestore(void* p_, size_t size, DWORD* oldFlags);
13001
13002
extern "C" void microprofile_tramp_enter_patch();
13003
extern "C" void microprofile_tramp_enter();
13004
extern "C" void microprofile_tramp_code_begin();
13005
extern "C" void microprofile_tramp_code_end();
13006
extern "C" void microprofile_tramp_intercept0();
13007
extern "C" void microprofile_tramp_end();
13008
extern "C" void microprofile_tramp_exit();
13009
extern "C" void microprofile_tramp_leave();
13010
extern "C" void microprofile_tramp_trunk();
13011
extern "C" void microprofile_tramp_call_patch_pop();
13012
extern "C" void microprofile_tramp_call_patch_push();
13013
13014
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError)
13015
{
13016
char* pOriginal = (char*)f;
13017
13018
f = MicroProfileX64FollowJump(f);
13019
if(MicroProfilePatchHasSuspendedThread((intptr_t)f, (intptr_t)f + 32))
13020
{
13021
uprintf("failed to patch, thread running in patch position");
13022
return false;
13023
}
13024
intptr_t t_enter = (intptr_t)microprofile_tramp_enter;
13025
intptr_t t_enter_patch_offset = (intptr_t)microprofile_tramp_enter_patch - t_enter;
13026
intptr_t t_code_begin_offset = (intptr_t)microprofile_tramp_code_begin - t_enter;
13027
intptr_t t_code_end_offset = (intptr_t)microprofile_tramp_code_end - t_enter;
13028
intptr_t t_code_intercept0_offset = (intptr_t)microprofile_tramp_intercept0 - t_enter;
13029
intptr_t t_code_exit_offset = (intptr_t)microprofile_tramp_exit - t_enter;
13030
intptr_t t_code_leave_offset = (intptr_t)microprofile_tramp_leave - t_enter;
13031
13032
intptr_t t_code_call_patch_push_offset = (intptr_t)microprofile_tramp_call_patch_push - t_enter;
13033
intptr_t t_code_call_patch_pop_offset = (intptr_t)microprofile_tramp_call_patch_pop - t_enter;
13034
intptr_t codemaxsize = t_code_end_offset - t_code_begin_offset;
13035
intptr_t t_end_offset = (intptr_t)microprofile_tramp_end - t_enter;
13036
intptr_t t_trunk_offset = (intptr_t)microprofile_tramp_trunk - t_enter;
13037
int t_trunk_size = (int)((intptr_t)microprofile_tramp_end - (intptr_t)microprofile_tramp_trunk);
13038
13039
char* ptramp = (char*)MicroProfileAllocExecutableMemory(f, t_end_offset);
13040
if(!ptramp)
13041
ptramp = (char*)MicroProfileAllocExecutableMemoryFar(t_end_offset);
13042
13043
intptr_t offset = ((intptr_t)f + 6 - (intptr_t)ptramp);
13044
13045
uint32_t nBytesToCopy = 14;
13046
if(offset < 0x80000000 && offset > -0x7fffffff)
13047
{
13048
/// offset is small enough to insert a relative jump
13049
nBytesToCopy = 5;
13050
}
13051
13052
memcpy(ptramp, (void*)t_enter, t_end_offset);
13053
13054
int nInstructionBytesDest = 0;
13055
char* pInstructionMoveDest = ptramp + t_code_begin_offset;
13056
char* pTrunk = ptramp + t_trunk_offset;
13057
13058
int nInstructionBytesSrc = 0;
13059
uint32_t nRegsWritten = 0;
13060
uint32_t nRetSafe = 1;
13061
uint32_t nUsableJumpRegs = (1 << R_RAX) | (1 << R_R10) | (1 << R_R11);
13062
static_assert(R_RAX == 0, "R_RAX must be 0");
13063
if(!MicroProfileCopyInstructionBytes(
13064
pInstructionMoveDest, f, nBytesToCopy, (int)codemaxsize, pTrunk, t_trunk_size, nUsableJumpRegs, &nInstructionBytesDest, &nInstructionBytesSrc, &nRegsWritten, &nRetSafe))
13065
{
13066
if(pError)
13067
{
13068
const char* pCode = (const char*)f;
13069
memset(pError->Code, 0, sizeof(pError->Code));
13070
memcpy(pError->Code, pCode, nInstructionBytesSrc);
13071
int off = stbsp_snprintf(pError->Message, sizeof(pError->Message), "Failed to move %d code bytes ", nInstructionBytesSrc);
13072
pError->nCodeSize = nInstructionBytesSrc;
13073
for(int i = 0; i < nInstructionBytesSrc; ++i)
13074
{
13075
off += stbsp_snprintf(off + pError->Message, sizeof(pError->Message) - off, "%02x ", 0xff & pCode[i]);
13076
}
13077
uprintf("%s\n", pError->Message);
13078
}
13079
return false;
13080
}
13081
13082
intptr_t phome = nInstructionBytesSrc + (intptr_t)f;
13083
uint32_t reg = nUsableJumpRegs & ~nRegsWritten;
13084
if(0 == reg)
13085
{
13086
if(0 == nRetSafe)
13087
MP_BREAK(); // should be caught earlier
13088
MicroProfileInsertRetJump(pInstructionMoveDest + nInstructionBytesDest, phome);
13089
}
13090
else
13091
{
13092
int r = R_RAX;
13093
while((reg & 1) == 0)
13094
{
13095
reg >>= 1;
13096
r++;
13097
}
13098
MicroProfileInsertRegisterJump(pInstructionMoveDest + nInstructionBytesDest, phome, r);
13099
}
13100
13101
// PATCH 1 TRAMP EXIT
13102
intptr_t microprofile_tramp_exit = (intptr_t)ptramp + t_code_exit_offset;
13103
memcpy(ptramp + t_enter_patch_offset + 2, (void*)&microprofile_tramp_exit, 8);
13104
13105
char* pintercept = t_code_intercept0_offset + ptramp;
13106
13107
// PATCH 1.5 Argument
13108
memcpy(pintercept - 4, (void*)&Argument, 4);
13109
13110
// PATCH 2 INTERCEPT0
13111
intptr_t addr = (intptr_t)enter; //&intercept0;
13112
memcpy(pintercept + 2, (void*)&addr, 8);
13113
13114
// PATHC 2.5 argument
13115
memcpy(ptramp + t_code_exit_offset + 3, (void*)&Argument, 4);
13116
13117
intptr_t microprofile_tramp_leave = (intptr_t)ptramp + t_code_leave_offset;
13118
// PATCH 3 INTERCEPT1
13119
intptr_t addr1 = (intptr_t)leave; //&intercept1;
13120
memcpy((char*)microprofile_tramp_leave + 2, (void*)&addr1, 8);
13121
13122
intptr_t patch_push_addr = (intptr_t)(&MicroProfile_Patch_TLS_PUSH);
13123
intptr_t patch_pop_addr = (intptr_t)(&MicroProfile_Patch_TLS_POP);
13124
memcpy((char*)ptramp + t_code_call_patch_push_offset + 2, &patch_push_addr, 8);
13125
memcpy((char*)ptramp + t_code_call_patch_pop_offset + 2, &patch_pop_addr, 8);
13126
MicroProfileMakeMemoryExecutable(ptramp, t_end_offset);
13127
13128
{
13129
// PATCH 4 DEST FUNC
13130
13131
DWORD OldFlags[2] = { 0 };
13132
MicroProfileMakeWriteable(f, nInstructionBytesSrc, OldFlags);
13133
char* pp = (char*)f;
13134
char* ppend = pp + nInstructionBytesSrc;
13135
13136
if(nInstructionBytesSrc < 14)
13137
{
13138
pp = MicroProfileInsertRelativeJump((char*)pp, (intptr_t)ptramp);
13139
}
13140
else
13141
{
13142
pp = MicroProfileInsertRegisterJump((char*)pp, (intptr_t)ptramp, R_RAX);
13143
}
13144
13145
while(pp != ppend)
13146
{
13147
char c = (unsigned char)0x90;
13148
MP_ASSERT((unsigned char)c == (unsigned char)0x90);
13149
*pp++ = (unsigned char)0x90;
13150
}
13151
MicroProfileRestore(f, nInstructionBytesSrc, OldFlags);
13152
}
13153
return true;
13154
}
13155
13156
static void MicroProfileMakeWriteable(void* p_, size_t s, DWORD* oldFlags)
13157
{
13158
static uint64_t nPageSize = 4 << 10;
13159
13160
intptr_t aligned = (intptr_t)p_;
13161
aligned = (aligned & (~(nPageSize - 1)));
13162
intptr_t aligned_end = (intptr_t)p_;
13163
aligned_end += s;
13164
aligned_end = (aligned_end + nPageSize - 1) & (~(nPageSize - 1));
13165
uint32_t nNumPages = (uint32_t)((aligned_end - aligned) / nPageSize);
13166
MP_ASSERT(nNumPages >= 1 && nNumPages <= 2);
13167
for(uint32_t i = 0; i < nNumPages; ++i)
13168
{
13169
if(!VirtualProtect((void*)(aligned + nPageSize * i), nPageSize, PAGE_EXECUTE_READWRITE, oldFlags + i))
13170
{
13171
MP_BREAK();
13172
}
13173
}
13174
//*(unsigned char*)p_ = 0x90;
13175
}
13176
13177
static void MicroProfileRestore(void* p_, size_t s, DWORD* oldFlags)
13178
{
13179
static uint64_t nPageSize = 4 << 10;
13180
13181
intptr_t aligned = (intptr_t)p_;
13182
aligned = (aligned & (~(nPageSize - 1)));
13183
intptr_t aligned_end = (intptr_t)p_;
13184
aligned_end += s;
13185
aligned_end = (aligned_end + nPageSize - 1) & (~(nPageSize - 1));
13186
uint32_t nNumPages = (uint32_t)((aligned_end - aligned) / nPageSize);
13187
DWORD Dummy;
13188
for(uint32_t i = 0; i < nNumPages; ++i)
13189
{
13190
if(!VirtualProtect((void*)(aligned + nPageSize * i), nPageSize, oldFlags[i], &Dummy))
13191
{
13192
MP_BREAK();
13193
}
13194
}
13195
}
13196
13197
void* MicroProfileAllocExecutableMemoryUp(intptr_t nBase, size_t s, uint32_t RegionIndex)
13198
{
13199
SYSTEM_INFO si;
13200
GetSystemInfo(&si);
13201
size_t Granularity = si.dwAllocationGranularity << 1;
13202
nBase = (nBase / Granularity) * Granularity;
13203
intptr_t nEnd = nBase + 0x80000000;
13204
13205
for(uint32_t i = RegionIndex; i < S.MemoryRegions.Size; i++)
13206
{
13207
// try and allocate 2x before
13208
nBase = S.MemoryRegions[i].Start + S.MemoryRegions[i].Size + Granularity;
13209
nBase = (nBase / Granularity) * Granularity;
13210
13211
if(nBase >= nEnd)
13212
break;
13213
void* pMemory = VirtualAlloc((void*)nBase, s, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
13214
if(pMemory)
13215
{
13216
return pMemory;
13217
}
13218
}
13219
return nullptr;
13220
}
13221
13222
static void MicroProfileUpdateMemoryRegions()
13223
{
13224
MicroProfileArrayClear(S.MemoryRegions);
13225
SYSTEM_INFO si;
13226
GetSystemInfo(&si);
13227
13228
BYTE* Addr = (BYTE*)si.lpMinimumApplicationAddress;
13229
BYTE* MaxAddr = (BYTE*)si.lpMaximumApplicationAddress;
13230
// uprintf("updating memory regions\n");
13231
uint32_t idx = 0;
13232
(void)idx;
13233
while(Addr < MaxAddr)
13234
{
13235
MEMORY_BASIC_INFORMATION mbi;
13236
SIZE_T Result = VirtualQuery(Addr, &mbi, sizeof(mbi));
13237
if(Result == 0)
13238
break;
13239
MicroProfileInstrumentMemoryRegion region;
13240
region.Start = (intptr_t)mbi.BaseAddress;
13241
region.Size = (intptr_t)mbi.RegionSize;
13242
MicroProfileArrayPushBack(S.MemoryRegions, region);
13243
// uprintf("Memory Region %d: %p(%p) %p .. State=%08x Protect=%08x Type=%08x\n", idx++, mbi.BaseAddress, mbi.AllocationBase, (intptr_t)mbi.BaseAddress + mbi.RegionSize, mbi.State, mbi.Protect,
13244
// mbi.Type);
13245
Addr = (BYTE*)mbi.BaseAddress + mbi.RegionSize;
13246
}
13247
uprintf("Iterated %d regions\n", S.MemoryRegions.Size);
13248
}
13249
13250
static void* MicroProfileAllocExecutableMemoryDown(intptr_t nBase, size_t s, uint32_t RegionIndex)
13251
{
13252
SYSTEM_INFO si;
13253
GetSystemInfo(&si);
13254
size_t Granularity = si.dwAllocationGranularity << 1;
13255
intptr_t nEnd = nBase - 0x80000000;
13256
13257
for(int32_t i = RegionIndex; i >= 0; i--)
13258
{
13259
// try and allocate 2x before
13260
nBase = S.MemoryRegions[i].Start - Granularity;
13261
nBase = (nBase / Granularity) * Granularity;
13262
if(nBase < nEnd)
13263
break;
13264
void* pMemory = VirtualAlloc((void*)nBase, s, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
13265
if(pMemory)
13266
{
13267
return pMemory;
13268
}
13269
}
13270
return nullptr;
13271
}
13272
13273
static void* MicroProfileAllocExecutableMemory(void* pBase, size_t s)
13274
{
13275
uint32_t RegionIndex = 0;
13276
for(uint32_t i = 0; i < S.MemoryRegions.Size; ++i)
13277
{
13278
auto& R = S.MemoryRegions[i];
13279
if(R.Start <= (intptr_t)pBase && (intptr_t)pBase < R.Start + R.Size)
13280
{
13281
RegionIndex = i;
13282
break;
13283
}
13284
}
13285
13286
s = (s + 4095) & ~(4095);
13287
intptr_t nBase = (intptr_t)pBase;
13288
void* pResult = 0;
13289
if(0 == pResult && nBase > 0x40000000)
13290
{
13291
pResult = MicroProfileAllocExecutableMemoryDown(nBase - 0x40000000, s, RegionIndex);
13292
if(0 == pResult)
13293
{
13294
pResult = MicroProfileAllocExecutableMemoryUp(nBase - 0x40000000, s, RegionIndex);
13295
}
13296
}
13297
if(0 == pResult && nBase < 0xffffffff40000000)
13298
{
13299
pResult = MicroProfileAllocExecutableMemoryUp(nBase + 0x40000000, s, RegionIndex);
13300
if(0 == pResult)
13301
{
13302
pResult = MicroProfileAllocExecutableMemoryUp(nBase + 0x40000000, s, RegionIndex);
13303
}
13304
}
13305
return pResult;
13306
}
13307
static void* MicroProfileAllocExecutableMemoryFar(size_t s)
13308
{
13309
static uint64_t nPageSize = 4 << 10;
13310
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
13311
13312
void* pMem = VirtualAlloc(0, s, MEM_COMMIT, PAGE_READWRITE);
13313
MP_ASSERT(pMem);
13314
13315
// uprintf("Allocating %zu %p\n", s, pMem);
13316
return pMem;
13317
}
13318
static void MicroProfileMakeMemoryExecutable(void* p, size_t s)
13319
{
13320
static uint64_t nPageSize = 4 << 10;
13321
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
13322
DWORD Unused;
13323
if(!VirtualProtect(p, s, PAGE_EXECUTE_READ, &Unused))
13324
{
13325
MP_BREAK();
13326
}
13327
}
13328
#endif
13329
13330
int MicroProfileTrimFunctionName(const char* pStr, char* pOutBegin, char* pOutEnd)
13331
{
13332
const char* pStart = pOutBegin;
13333
int l = (int)strlen(pStr) - 1;
13334
int sz = 0;
13335
pOutEnd--;
13336
if(l < 1024 && pOutBegin != pOutEnd)
13337
{
13338
const char* p = pStr;
13339
const char* pEnd = pStr + l + 1;
13340
int in = 0;
13341
while(p != pEnd && pOutBegin != pOutEnd)
13342
{
13343
char c = *p++;
13344
if(c == '(' || c == '<')
13345
{
13346
in++;
13347
}
13348
else if(c == ')' || c == '>')
13349
{
13350
in--;
13351
continue;
13352
}
13353
13354
if(in == 0)
13355
{
13356
*pOutBegin++ = c;
13357
sz++;
13358
}
13359
}
13360
13361
*pOutBegin++ = '\0';
13362
}
13363
return sz;
13364
}
13365
13366
int MicroProfileFindFunctionName(const char* pStr, const char** ppStart)
13367
{
13368
int l = (int)strlen(pStr) - 1;
13369
if(l < 1024)
13370
{
13371
char b[1024] = { 0 };
13372
char* put = &b[0];
13373
13374
const char* p = pStr;
13375
const char* pEnd = pStr + l + 1;
13376
int in = 0;
13377
while(p != pEnd)
13378
{
13379
char c = *p++;
13380
if(c == '(' || c == '<')
13381
{
13382
in++;
13383
}
13384
else if(c == ')' || c == '>')
13385
{
13386
in--;
13387
continue;
13388
}
13389
13390
if(in == 0)
13391
{
13392
*put++ = c;
13393
}
13394
}
13395
13396
*put++ = '\0';
13397
uprintf("trimmed %s\n", b);
13398
}
13399
13400
// int nFirstParen = l;
13401
int nNumParen = 0;
13402
int c = 0;
13403
13404
while(l >= 0 && pStr[l] != ')' && c++ < sizeof(" const") - 1)
13405
{
13406
l--;
13407
}
13408
if(pStr[l] == ')')
13409
{
13410
do
13411
{
13412
if(pStr[l] == ')')
13413
{
13414
nNumParen++;
13415
}
13416
else if(pStr[l] == '(')
13417
{
13418
nNumParen--;
13419
}
13420
l--;
13421
} while(nNumParen > 0 && l >= 0);
13422
}
13423
else
13424
{
13425
*ppStart = pStr;
13426
return 0;
13427
}
13428
while(l >= 0 && isspace(pStr[l]))
13429
{
13430
--l;
13431
}
13432
int nLast = l;
13433
while(l >= 0 && !isspace(pStr[l]))
13434
{
13435
l--;
13436
}
13437
int nFirst = l;
13438
if(nFirst == nLast)
13439
return 0;
13440
int nCount = nLast - nFirst + 1;
13441
*ppStart = pStr + nFirst;
13442
return nCount;
13443
}
13444
13445
#include <dbghelp.h>
13446
#include <psapi.h>
13447
#include <tlhelp32.h>
13448
#include <winternl.h>
13449
struct MicroProfileQueryContext
13450
{
13451
13452
const char* pFilterStrings[MICROPROFILE_MAX_FILTER];
13453
uint32_t nPatternLength[MICROPROFILE_MAX_FILTER];
13454
int nMaxFilter = 0;
13455
char TempBuffer[128];
13456
uint32_t size = 0;
13457
bool bFirst = false;
13458
};
13459
13460
BOOL CALLBACK MicroProfileEnumModules(_In_ PCTSTR ModuleName, _In_ DWORD64 BaseOfDll, _In_opt_ PVOID UserContext)
13461
{
13462
13463
MODULEINFO MI;
13464
GetModuleInformation(GetCurrentProcess(), (HMODULE)BaseOfDll, &MI, sizeof(MI));
13465
MEMORY_BASIC_INFORMATION B;
13466
int r = VirtualQuery((LPCVOID)BaseOfDll, (MEMORY_BASIC_INFORMATION*)&B, sizeof(B));
13467
char buffer[1024];
13468
int r1 = GetLastError();
13469
if(r == 0)
13470
{
13471
stbsp_snprintf(buffer, sizeof(buffer) - 1, "Error %d\n", r1);
13472
OutputDebugString(buffer);
13473
MP_BREAK();
13474
}
13475
MicroProfileSymbolInitModule(ModuleName, BaseOfDll, BaseOfDll + MI.SizeOfImage);
13476
return true;
13477
}
13478
13479
namespace
13480
{
13481
struct QueryCallbackBase // fucking c++, this is a pain in the ass
13482
{
13483
virtual void CB(const char* pName, const char* pShortName, intptr_t addr, intptr_t addrend, uint32_t nModuleId) = 0;
13484
};
13485
template <typename T>
13486
struct QueryCallbackImpl : public QueryCallbackBase
13487
{
13488
T t;
13489
QueryCallbackImpl(T t)
13490
: t(t)
13491
{
13492
}
13493
virtual void CB(const char* pName, const char* pShortName, intptr_t addr, intptr_t addrend, uint32_t nModuleId)
13494
{
13495
t(pName, pShortName, addr, addrend, nModuleId);
13496
}
13497
};
13498
} // namespace
13499
13500
static uint32_t nLastModuleIdWin32 = (uint32_t)-1;
13501
static intptr_t nLastModuleBaseWin32 = (intptr_t)-1;
13502
13503
BOOL MicroProfileQueryContextEnumSymbols(_In_ PSYMBOL_INFO pSymInfo, _In_ ULONG SymbolSize, _In_opt_ PVOID UserContext)
13504
{
13505
uint32_t nModuleId = nLastModuleIdWin32;
13506
if(nLastModuleBaseWin32 != (intptr_t)pSymInfo->ModBase)
13507
{
13508
nLastModuleIdWin32 = nModuleId = MicroProfileSymbolGetModule((const char*)(intptr_t)-2, pSymInfo->ModBase);
13509
nLastModuleBaseWin32 = (intptr_t)pSymInfo->ModBase;
13510
}
13511
13512
if(pSymInfo->Tag == 5 || pSymInfo->Tag == 10)
13513
{
13514
13515
char FunctionName[1024];
13516
int ret = 0;
13517
int l = MicroProfileTrimFunctionName(pSymInfo->Name, &FunctionName[0], &FunctionName[1024]);
13518
QueryCallbackBase* pCB = (QueryCallbackBase*)UserContext;
13519
13520
pCB->CB(pSymInfo->Name, l ? &FunctionName[0] : 0, (intptr_t)pSymInfo->Address, pSymInfo->Size + (intptr_t)pSymInfo->Address, nModuleId);
13521
}
13522
return TRUE;
13523
};
13524
13525
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size)
13526
{
13527
MICROPROFILE_SCOPEI("microprofile", "SymbolDemangle", MP_AUTO);
13528
if(UnDecorateSymbolName(pName, OutName, Size, UNDNAME_NAME_ONLY))
13529
{
13530
return true;
13531
}
13532
return false;
13533
}
13534
13535
bool MicroProfileExtractPdbInfo(HMODULE hMod, GUID& guid, DWORD& age, char pdbName[MAX_PATH])
13536
{
13537
struct CV_INFO_PDB70
13538
{
13539
DWORD CvSignature; // "RSDS"
13540
GUID Signature; // GUID
13541
DWORD Age; // Age
13542
char PdbFileName[1]; // Null-terminated string
13543
};
13544
13545
BYTE* base = (BYTE*)hMod;
13546
IMAGE_DOS_HEADER* dos = (IMAGE_DOS_HEADER*)base;
13547
if(dos->e_magic != IMAGE_DOS_SIGNATURE)
13548
return false;
13549
IMAGE_NT_HEADERS* nt = (IMAGE_NT_HEADERS*)(base + dos->e_lfanew);
13550
if(nt->Signature != IMAGE_NT_SIGNATURE)
13551
return false;
13552
auto& dd = nt->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG];
13553
if(!dd.VirtualAddress || !dd.Size)
13554
return false;
13555
IMAGE_DEBUG_DIRECTORY* debugDir = (IMAGE_DEBUG_DIRECTORY*)(base + dd.VirtualAddress);
13556
int count = dd.Size / sizeof(IMAGE_DEBUG_DIRECTORY);
13557
for(int i = 0; i < count; i++)
13558
{
13559
if(debugDir[i].Type == IMAGE_DEBUG_TYPE_CODEVIEW)
13560
{
13561
auto cv = (CV_INFO_PDB70*)(base + debugDir[i].AddressOfRawData);
13562
if(cv->CvSignature != 'SDSR')
13563
continue; // "RSDS"
13564
guid = cv->Signature;
13565
age = cv->Age;
13566
strcpy_s(pdbName, MAX_PATH, cv->PdbFileName);
13567
return true;
13568
}
13569
}
13570
return false;
13571
}
13572
13573
bool MicroProfileDownloadPDB(HMODULE Module, HANDLE Process, char outPath[MAX_PATH])
13574
{
13575
GUID guid;
13576
DWORD age;
13577
char pdbName[MAX_PATH];
13578
if(!MicroProfileExtractPdbInfo(Module, guid, age, pdbName))
13579
{
13580
uprintf("Failed to download pdb\n");
13581
MP_BREAK();
13582
return false;
13583
}
13584
uprintf("pdb name %s age %d\n", pdbName, age);
13585
13586
FILE* f = fopen(pdbName, "r");
13587
if(f)
13588
{
13589
fclose(f);
13590
strcpy_s(outPath, MAX_PATH, pdbName);
13591
return true;
13592
}
13593
char localPath[MAX_PATH] = {};
13594
BOOL ok = SymFindFileInPath(Process,
13595
NULL,
13596
pdbName,
13597
(PVOID)&guid, // GUID
13598
age, // Age
13599
0, // FileSize (not used for PDBs)
13600
SSRVOPT_GUIDPTR, // we're passing GUID pointer
13601
outPath,
13602
NULL,
13603
NULL);
13604
return ok != 0;
13605
}
13606
13607
#include "PDB.h"
13608
#include "PDB_DBIStream.h"
13609
#include "PDB_IPIStream.h"
13610
#include "PDB_InfoStream.h"
13611
#include "PDB_NamesStream.h"
13612
#include "PDB_RawFile.h"
13613
#include "PDB_TPIStream.h"
13614
13615
template <typename Callback>
13616
void MicroProfileLoadRawPDB(Callback CB, const char* Filename, uint64_t Base, uint32_t nModuleId)
13617
{
13618
auto OnSymbol = [CB, Base, nModuleId](const char* Sym, uint32_t Offset, uint32_t Size)
13619
{
13620
char FunctionName[1024];
13621
int ret = 0;
13622
int l = MicroProfileTrimFunctionName(Sym, &FunctionName[0], &FunctionName[1024]);
13623
const char* fname = l ? &FunctionName[0] : nullptr;
13624
CB(Sym, fname, (intptr_t)Offset + Base, (intptr_t)Offset + Base + Size, nModuleId);
13625
};
13626
13627
void* File = CreateFileA(Filename, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, nullptr);
13628
13629
if(File == INVALID_HANDLE_VALUE)
13630
{
13631
MP_BREAK();
13632
}
13633
13634
void* FileMapping = CreateFileMappingA(File, nullptr, PAGE_READONLY, 0, 0, nullptr);
13635
13636
if(FileMapping == nullptr)
13637
{
13638
CloseHandle(File);
13639
MP_BREAK();
13640
}
13641
13642
void* BaseAddress = MapViewOfFile(FileMapping, FILE_MAP_READ, 0, 0, 0);
13643
13644
if(BaseAddress == nullptr)
13645
{
13646
CloseHandle(FileMapping);
13647
CloseHandle(File);
13648
}
13649
13650
BY_HANDLE_FILE_INFORMATION FileInformation;
13651
const bool GetInformationResult = GetFileInformationByHandle(File, &FileInformation);
13652
if(!GetInformationResult)
13653
{
13654
UnmapViewOfFile(BaseAddress);
13655
CloseHandle(FileMapping);
13656
CloseHandle(File);
13657
13658
MP_BREAK();
13659
}
13660
13661
const size_t FileSizeHighBytes = static_cast<size_t>(FileInformation.nFileSizeHigh) << 32;
13662
const size_t FileSizeLowBytes = FileInformation.nFileSizeLow;
13663
const size_t FileSize = FileSizeHighBytes | FileSizeLowBytes;
13664
13665
const PDB::RawFile RawPdbFile = PDB::CreateRawFile(BaseAddress);
13666
if(PDB::HasValidDBIStream(RawPdbFile) != PDB::ErrorCode::Success)
13667
{
13668
MP_BREAK();
13669
}
13670
const PDB::InfoStream InfoStream(RawPdbFile);
13671
if(InfoStream.UsesDebugFastLink())
13672
{
13673
MP_BREAK();
13674
}
13675
13676
// const PDB::Header* h = InfoStream.GetHeader();
13677
// uprintf("Version %u, signature %u, age %u, GUID %08x-%04x-%04x-%02x%02x%02x%02x%02x%02x%02x%02x\n",
13678
// static_cast<uint32_t>(h->version), h->signature, h->age,
13679
// h->guid.Data1, h->guid.Data2, h->guid.Data3,
13680
// h->guid.Data4[0], h->guid.Data4[1], h->guid.Data4[2], h->guid.Data4[3], h->guid.Data4[4], h->guid.Data4[5], h->guid.Data4[6], h->guid.Data4[7]);
13681
13682
const PDB::DBIStream DbiStream = PDB::CreateDBIStream(RawPdbFile);
13683
if(PDB::ErrorCode::Success != DbiStream.HasValidSymbolRecordStream(RawPdbFile))
13684
{
13685
MP_BREAK();
13686
}
13687
13688
if(PDB::ErrorCode::Success != DbiStream.HasValidPublicSymbolStream(RawPdbFile))
13689
{
13690
MP_BREAK();
13691
}
13692
13693
if(PDB::ErrorCode::Success != DbiStream.HasValidGlobalSymbolStream(RawPdbFile))
13694
{
13695
MP_BREAK();
13696
}
13697
13698
if(PDB::ErrorCode::Success != DbiStream.HasValidSectionContributionStream(RawPdbFile))
13699
{
13700
MP_BREAK();
13701
}
13702
13703
if(PDB::ErrorCode::Success != DbiStream.HasValidImageSectionStream(RawPdbFile))
13704
{
13705
MP_BREAK();
13706
}
13707
13708
const PDB::ImageSectionStream ImageSectionStream = DbiStream.CreateImageSectionStream(RawPdbFile);
13709
const PDB::ModuleInfoStream ModuleInfoStream = DbiStream.CreateModuleInfoStream(RawPdbFile);
13710
const PDB::CoalescedMSFStream SymbolRecordStream = DbiStream.CreateSymbolRecordStream(RawPdbFile);
13711
13712
const PDB::ArrayView<PDB::ModuleInfoStream::Module> modules = ModuleInfoStream.GetModules();
13713
13714
for(const PDB::ModuleInfoStream::Module& module : modules)
13715
{
13716
if(!module.HasSymbolStream())
13717
{
13718
continue;
13719
}
13720
13721
const PDB::ModuleSymbolStream moduleSymbolStream = module.CreateSymbolStream(RawPdbFile);
13722
moduleSymbolStream.ForEachSymbol(
13723
[&ImageSectionStream, &OnSymbol](const PDB::CodeView::DBI::Record* record)
13724
{
13725
// only grab function symbols from the module streams
13726
const char* name = nullptr;
13727
uint32_t rva = 0u;
13728
uint32_t size = 0u;
13729
if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_FRAMEPROC)
13730
{
13731
// functionSymbols[functionSymbols.size() - 1].frameProc = record;
13732
return;
13733
}
13734
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_THUNK32)
13735
{
13736
if(record->data.S_THUNK32.thunk == PDB::CodeView::DBI::ThunkOrdinal::TrampolineIncremental)
13737
{
13738
// we have never seen incremental linking thunks stored inside a S_THUNK32 symbol, but better safe than sorry
13739
name = "ILT";
13740
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_THUNK32.section, record->data.S_THUNK32.offset);
13741
size = 5u;
13742
}
13743
}
13744
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_TRAMPOLINE)
13745
{
13746
// incremental linking thunks are stored in the linker module
13747
name = "ILT";
13748
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_TRAMPOLINE.thunkSection, record->data.S_TRAMPOLINE.thunkOffset);
13749
size = 5u;
13750
}
13751
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32)
13752
{
13753
name = record->data.S_LPROC32.name;
13754
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_LPROC32.section, record->data.S_LPROC32.offset);
13755
size = record->data.S_LPROC32.codeSize;
13756
}
13757
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32)
13758
{
13759
name = record->data.S_GPROC32.name;
13760
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_GPROC32.section, record->data.S_GPROC32.offset);
13761
size = record->data.S_GPROC32.codeSize;
13762
}
13763
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32_ID)
13764
{
13765
name = record->data.S_LPROC32_ID.name;
13766
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_LPROC32_ID.section, record->data.S_LPROC32_ID.offset);
13767
size = record->data.S_LPROC32_ID.codeSize;
13768
}
13769
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32_ID)
13770
{
13771
name = record->data.S_GPROC32_ID.name;
13772
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_GPROC32_ID.section, record->data.S_GPROC32_ID.offset);
13773
size = record->data.S_GPROC32_ID.codeSize;
13774
}
13775
13776
if(rva == 0u)
13777
{
13778
return;
13779
}
13780
// uprintf("func %p / %d .. %s \n", rva, size, name);
13781
OnSymbol(name, rva, size);
13782
});
13783
}
13784
const PDB::PublicSymbolStream PublicSymbolStream = DbiStream.CreatePublicSymbolStream(RawPdbFile);
13785
{
13786
const PDB::ArrayView<PDB::HashRecord> HashRecords = PublicSymbolStream.GetRecords();
13787
const size_t Count = HashRecords.GetLength();
13788
13789
for(const PDB::HashRecord& HashRecord : HashRecords)
13790
{
13791
const PDB::CodeView::DBI::Record* Record = PublicSymbolStream.GetRecord(SymbolRecordStream, HashRecord);
13792
if(Record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_PUB32)
13793
{
13794
continue;
13795
}
13796
13797
if((PDB_AS_UNDERLYING(Record->data.S_PUB32.flags) & PDB_AS_UNDERLYING(PDB::CodeView::DBI::PublicSymbolFlags::Function)) == 0u)
13798
{
13799
continue;
13800
}
13801
13802
const uint32_t rva = ImageSectionStream.ConvertSectionOffsetToRVA(Record->data.S_PUB32.section, Record->data.S_PUB32.offset);
13803
if(rva == 0u)
13804
{
13805
continue;
13806
}
13807
OnSymbol(Record->data.S_PUB32.name, rva, 0);
13808
}
13809
}
13810
UnmapViewOfFile(BaseAddress);
13811
CloseHandle(FileMapping);
13812
CloseHandle(File);
13813
}
13814
13815
bool MicroProfilePatchHasSuspendedThread(intptr_t Begin, intptr_t End)
13816
{
13817
MicroProfileSuspendState& State = S.SuspendState;
13818
for(uint32_t i = 0; i < State.NumSuspended; ++i)
13819
{
13820
intptr_t ip = State.SuspendedIP[i];
13821
if(Begin <= ip && ip <= End)
13822
return true;
13823
}
13824
return false;
13825
}
13826
13827
bool MicroProfilePatchBeginSuspend()
13828
{
13829
MicroProfileSuspendState& State = S.SuspendState;
13830
13831
if(State.SuspendCounter++ > 0)
13832
return true;
13833
MicroProfileUpdateMemoryRegions();
13834
13835
MicroProfileMutex().lock();
13836
MP_ASSERT(State.NumSuspended == 0);
13837
13838
DWORD ProcessId = GetCurrentProcessId();
13839
DWORD ThreadId = GetCurrentThreadId();
13840
13841
HANDLE hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
13842
if(hSnap == INVALID_HANDLE_VALUE)
13843
{
13844
return false;
13845
}
13846
THREADENTRY32 te{};
13847
te.dwSize = sizeof(te);
13848
State.NumSuspended = 0;
13849
13850
if(Thread32First(hSnap, &te))
13851
{
13852
do
13853
{
13854
if(te.th32OwnerProcessID != ProcessId)
13855
continue;
13856
if(te.th32ThreadID == ThreadId)
13857
continue;
13858
HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION, FALSE, te.th32ThreadID);
13859
if(!hThread)
13860
{
13861
continue;
13862
}
13863
DWORD PrevCount = SuspendThread(hThread);
13864
if(PrevCount == (DWORD)-1)
13865
{
13866
CloseHandle(hThread);
13867
continue;
13868
}
13869
13870
CONTEXT ctx{};
13871
ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; // Rip + registers
13872
if(GetThreadContext(hThread, &ctx))
13873
{
13874
State.SuspendedIP[State.NumSuspended] = (intptr_t)ctx.Rip;
13875
}
13876
if(State.NumSuspended < MICROPROFILE_SUSPEND_MAX)
13877
{
13878
State.Suspended[State.NumSuspended++] = hThread;
13879
}
13880
} while(Thread32Next(hSnap, &te));
13881
}
13882
else
13883
{
13884
uprintf("Thread32First failed %08x\n", GetLastError());
13885
CloseHandle(hSnap);
13886
return false;
13887
}
13888
CloseHandle(hSnap);
13889
return State.NumSuspended > 0;
13890
}
13891
13892
void MicroProfilePatchEndSuspend()
13893
{
13894
MicroProfileSuspendState& State = S.SuspendState;
13895
if(0 == --State.SuspendCounter)
13896
{
13897
13898
for(uint32_t i = 0; i < State.NumSuspended; ++i)
13899
{
13900
ResumeThread(State.Suspended[i]);
13901
CloseHandle(State.Suspended[i]);
13902
}
13903
State.NumSuspended = 0;
13904
MicroProfileMutex().unlock();
13905
}
13906
}
13907
13908
template <typename Callback>
13909
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules)
13910
{
13911
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileIterateSymbols", MP_PINK3);
13912
QueryCallbackImpl<Callback> Context(CB);
13913
if(MicroProfileSymInit())
13914
{
13915
// uprintf("symbols loaded!\n");
13916
// API_VERSION* pv = ImagehlpApiVersion();
13917
// uprintf("VERSION %d.%d.%d\n", pv->MajorVersion, pv->MinorVersion, pv->Revision);
13918
13919
nLastModuleBaseWin32 = -1;
13920
if(SymEnumerateModules64(GetCurrentProcess(), (PSYM_ENUMMODULES_CALLBACK64)MicroProfileEnumModules, NULL))
13921
{
13922
}
13923
QueryCallbackBase* pBase = &Context;
13924
if(nNumModules)
13925
{
13926
HANDLE hProcess = GetCurrentProcess();
13927
char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)];
13928
PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer;
13929
uint64_t t0 = MP_TICK();
13930
for(uint32_t i = 0; i < nNumModules; ++i)
13931
{
13932
uint32_t nModule = nModules[i];
13933
int64_t nBytes = 0;
13934
MEMORY_BASIC_INFORMATION B;
13935
13936
for(int j = 0; j < S.SymbolModules[nModule].nNumExecutableRegions; ++j)
13937
{
13938
intptr_t b = S.SymbolModules[nModule].Regions[j].nBegin;
13939
intptr_t e = S.SymbolModules[nModule].Regions[j].nEnd;
13940
while(b < e)
13941
{
13942
int r = VirtualQuery((LPCVOID)b, &B, sizeof(B));
13943
if(!r)
13944
break;
13945
switch(B.Protect)
13946
{
13947
case PAGE_EXECUTE:
13948
case PAGE_EXECUTE_READ:
13949
case PAGE_EXECUTE_READWRITE:
13950
case PAGE_EXECUTE_WRITECOPY:
13951
nBytes += B.RegionSize;
13952
// uprintf("RANGE %p, %p .. %5.2fkb %08x, %08x\n", B.BaseAddress, (void*)(intptr_t(B.BaseAddress) + B.RegionSize), B.RegionSize / 1024.f, B.State, B.Protect);
13953
}
13954
b = intptr_t(B.BaseAddress) + B.RegionSize;
13955
}
13956
}
13957
S.SymbolModules[nModule].nProgressTarget = nBytes;
13958
13959
char pdbPath[MAX_PATH];
13960
HMODULE Module = (HMODULE)S.SymbolModules[nModule].nModuleBase;
13961
S.nSymbolsDirty++;
13962
S.SymbolModules[nModule].bDownloading = true;
13963
if(MicroProfileDownloadPDB(Module, hProcess, pdbPath))
13964
{
13965
S.SymbolModules[nModule].bDownloading = false;
13966
S.nSymbolsDirty++;
13967
MicroProfileLoadRawPDB<Callback>(CB, pdbPath, S.SymbolModules[nModule].nModuleBase, nModule);
13968
}
13969
S.SymbolModules[nModule].bDownloading = false;
13970
S.nSymbolsDirty++;
13971
S.SymbolModules[nModule].nProgress = S.SymbolModules[nModule].nProgressTarget;
13972
S.SymbolModules[nModule].nModuleLoadFinished.exchange(1);
13973
}
13974
13975
uint64_t t1 = MP_TICK();
13976
float fTime = float(MicroProfileTickToMsMultiplierCpu()) * (t1 - t0);
13977
uprintf("load symbol time %6.2fms\n", fTime);
13978
}
13979
MicroProfileSymCleanup();
13980
}
13981
}
13982
13983
static int MicroProfileWin32SymInitCount = 0;
13984
static int MicroProfileWin32SymInitSuccess = 0;
13985
13986
bool MicroProfileSymInit()
13987
{
13988
if(0 == MicroProfileWin32SymInitCount++)
13989
{
13990
auto h = GetCurrentProcess();
13991
SymCleanup(h);
13992
SymSetOptions(SYMOPT_DEFERRED_LOADS);
13993
if(SymInitialize(h, 0, FALSE))
13994
{
13995
13996
MicroProfileWin32SymInitSuccess = 1;
13997
char Path[MAX_PATH];
13998
bool PathValid = SymGetSearchPath(h, Path, MAX_PATH) > 0;
13999
if(PathValid)
14000
{
14001
PathValid = strlen(Path) > 3;
14002
}
14003
if(!PathValid)
14004
{
14005
SymSetSearchPath(h, "srv*C:\\symbols*https://msdl.microsoft.com/download/symbols");
14006
}
14007
}
14008
else
14009
{
14010
MicroProfileWin32SymInitSuccess = 0;
14011
}
14012
}
14013
return MicroProfileWin32SymInitSuccess != 0;
14014
}
14015
void MicroProfileSymCleanup()
14016
{
14017
if(0 == --MicroProfileWin32SymInitCount)
14018
{
14019
MicroProfileWin32SymInitSuccess = 0;
14020
SymCleanup(GetCurrentProcess());
14021
}
14022
}
14023
14024
static void* g_pFunctionFoundHack = 0;
14025
static const char* g_pFunctionpNameFound = 0;
14026
static char g_Demangled[512];
14027
14028
BOOL MicroProfileQueryContextEnumSymbols1(_In_ PSYMBOL_INFO pSymInfo, _In_ ULONG SymbolSize, _In_opt_ PVOID UserContext)
14029
{
14030
if(pSymInfo->Tag == 5 || pSymInfo->Tag == 10)
14031
{
14032
char str[200];
14033
stbsp_snprintf(str, sizeof(str) - 1, "%s : %p\n", pSymInfo->Name, (void*)pSymInfo->Address);
14034
OutputDebugStringA(str);
14035
g_pFunctionpNameFound = pSymInfo->Name;
14036
g_pFunctionFoundHack = (void*)pSymInfo->Address;
14037
return FALSE;
14038
}
14039
return TRUE;
14040
};
14041
14042
const char* MicroProfileDemangleSymbol(const char* pSymbol)
14043
{
14044
return pSymbol; // todo: for some reasons all symbols im seaing right now are already undecorated?
14045
}
14046
14047
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols)
14048
{
14049
char SymString[512];
14050
const char* pStr = 0;
14051
if(MicroProfileSymInit())
14052
{
14053
HANDLE h = GetCurrentProcess();
14054
for(uint32_t i = 0; i < nNumSymbols; ++i)
14055
{
14056
int nCount = stbsp_snprintf(SymString, sizeof(SymString) - 1, "%s!%s", pModules[i], pSymbols[i]);
14057
if(nCount <= sizeof(SymString) - 1)
14058
{
14059
g_pFunctionFoundHack = 0;
14060
if(SymEnumSymbols(h, 0, SymString, MicroProfileQueryContextEnumSymbols1, 0))
14061
{
14062
if(g_pFunctionFoundHack)
14063
{
14064
uint32_t nColor = MicroProfileColorFromString(pSymbols[i]);
14065
const char* pDemangled = pSymbols[i]; // MicroProfileDemangleSymbol(pSymbols[i]);
14066
MicroProfileInstrumentFunction(g_pFunctionFoundHack, pModules[i], pDemangled, nColor);
14067
}
14068
}
14069
}
14070
}
14071
MicroProfileSymCleanup();
14072
}
14073
}
14074
14075
void MicroProfileSymbolEnumModules()
14076
{
14077
HMODULE modules[1024];
14078
DWORD needed;
14079
HANDLE h = GetCurrentProcess();
14080
14081
if(EnumProcessModules(h, modules, sizeof(modules), &needed))
14082
{
14083
int count = needed / sizeof(HMODULE);
14084
for(int i = 0; i < count; i++)
14085
{
14086
char moduleName[MAX_PATH];
14087
if(GetModuleFileNameEx(h, modules[i], moduleName, MAX_PATH))
14088
{
14089
MODULEINFO mi = {};
14090
if(GetModuleInformation(h, modules[i], &mi, sizeof(mi)))
14091
{
14092
MicroProfileEnumModules(moduleName, (DWORD64)mi.lpBaseOfDll, 0);
14093
}
14094
}
14095
}
14096
}
14097
}
14098
14099
void MicroProfileSymbolUpdateModuleList()
14100
{
14101
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolUpdateModuleList", MP_PINK3);
14102
// QueryCallbackImpl<Callback> Context(CB);
14103
if(MicroProfileSymInit())
14104
{
14105
uprintf("symbols loaded!\n");
14106
API_VERSION* pv = ImagehlpApiVersion();
14107
uprintf("VERSION %d.%d.%d\n", pv->MajorVersion, pv->MinorVersion, pv->Revision);
14108
14109
nLastModuleBaseWin32 = -1;
14110
MicroProfileSymbolEnumModules();
14111
MicroProfileSymCleanup();
14112
}
14113
}
14114
14115
#endif
14116
14117
#if defined(__APPLE__) && defined(__MACH__)
14118
// '##::::'##::'#######:::'#######::'##:::'##:::::'#######:::'######::'##::::'##:
14119
// ##:::: ##:'##.... ##:'##.... ##: ##::'##:::::'##.... ##:'##... ##:. ##::'##::
14120
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##:::: ##: ##:::..:::. ##'##:::
14121
// #########: ##:::: ##: ##:::: ##: #####::::::: ##:::: ##:. ######::::. ###::::
14122
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::: ##:::: ##::..... ##::: ## ##:::
14123
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##::::: ##:::: ##:'##::: ##:: ##:. ##::
14124
// ##:::: ##:. #######::. #######:: ##::. ##::::. #######::. ######:: ##:::. ##:
14125
// ..:::::..:::.......::::.......:::..::::..::::::.......::::......:::..:::::..::
14126
14127
#include <cxxabi.h>
14128
#include <distorm.h>
14129
#include <dlfcn.h>
14130
#include <mach/mach.h>
14131
#include <mach/mach_vm.h>
14132
#include <mnemonics.h>
14133
#include <sys/mman.h>
14134
#include <unistd.h>
14135
14136
static void* MicroProfileAllocExecutableMemory(void* f, size_t s);
14137
static void MicroProfileMakeWriteable(void* p_);
14138
14139
extern "C" void microprofile_tramp_enter_patch();
14140
extern "C" void microprofile_tramp_enter();
14141
extern "C" void microprofile_tramp_code_begin();
14142
extern "C" void microprofile_tramp_code_end();
14143
extern "C" void microprofile_tramp_intercept0();
14144
extern "C" void microprofile_tramp_end();
14145
extern "C" void microprofile_tramp_exit();
14146
extern "C" void microprofile_tramp_leave();
14147
extern "C" void microprofile_tramp_trunk();
14148
extern "C" void microprofile_tramp_call_patch_pop();
14149
extern "C" void microprofile_tramp_call_patch_push();
14150
14151
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError) __attribute__((optnone))
14152
{
14153
if(pError)
14154
{
14155
memcpy(&pError->Code[0], f, 12);
14156
}
14157
14158
intptr_t t_enter = (intptr_t)microprofile_tramp_enter;
14159
intptr_t t_enter_patch_offset = (intptr_t)microprofile_tramp_enter_patch - t_enter;
14160
intptr_t t_code_begin_offset = (intptr_t)microprofile_tramp_code_begin - t_enter;
14161
intptr_t t_code_end_offset = (intptr_t)microprofile_tramp_code_end - t_enter;
14162
intptr_t t_code_intercept0_offset = (intptr_t)microprofile_tramp_intercept0 - t_enter;
14163
intptr_t t_code_exit_offset = (intptr_t)microprofile_tramp_exit - t_enter;
14164
intptr_t t_code_leave_offset = (intptr_t)microprofile_tramp_leave - t_enter;
14165
14166
intptr_t t_code_call_patch_push_offset = (intptr_t)microprofile_tramp_call_patch_push - t_enter;
14167
intptr_t t_code_call_patch_pop_offset = (intptr_t)microprofile_tramp_call_patch_pop - t_enter;
14168
intptr_t codemaxsize = t_code_end_offset - t_code_begin_offset;
14169
intptr_t t_end_offset = (intptr_t)microprofile_tramp_end - t_enter;
14170
intptr_t t_trunk_offset = (intptr_t)microprofile_tramp_trunk - t_enter;
14171
intptr_t t_trunk_size = (intptr_t)microprofile_tramp_end - (intptr_t)microprofile_tramp_trunk;
14172
14173
char* ptramp = (char*)MicroProfileAllocExecutableMemory(f, t_end_offset);
14174
14175
intptr_t offset = ((intptr_t)f + 6 - (intptr_t)ptramp);
14176
14177
uint32_t nBytesToCopy = 14;
14178
if(offset < 0x80000000 && offset > -0x7fffffff)
14179
{
14180
/// offset is small enough to insert a relative jump
14181
nBytesToCopy = 5;
14182
}
14183
14184
memcpy(ptramp, (void*)t_enter, t_end_offset);
14185
14186
int nInstructionBytesDest = 0;
14187
char* pInstructionMoveDest = ptramp + t_code_begin_offset;
14188
char* pTrunk = ptramp + t_trunk_offset;
14189
14190
int nInstructionBytesSrc = 0;
14191
14192
uint32_t nRegsWritten = 0;
14193
uint32_t nRetSafe = 0;
14194
uint32_t nUsableJumpRegs = (1 << R_RAX) | (1 << R_R10) | (1 << R_R11); // scratch && !parameter register
14195
if(!MicroProfileCopyInstructionBytes(
14196
pInstructionMoveDest, f, nBytesToCopy, codemaxsize, pTrunk, t_trunk_size, nUsableJumpRegs, &nInstructionBytesDest, &nInstructionBytesSrc, &nRegsWritten, &nRetSafe))
14197
{
14198
if(pError)
14199
{
14200
const char* pCode = (const char*)f;
14201
memset(pError->Code, 0, sizeof(pError->Code));
14202
memcpy(pError->Code, pCode, nInstructionBytesSrc);
14203
int off = stbsp_snprintf(pError->Message, sizeof(pError->Message), "Failed to move %d code bytes ", nInstructionBytesSrc);
14204
pError->nCodeSize = nInstructionBytesSrc;
14205
for(int i = 0; i < nInstructionBytesSrc; ++i)
14206
{
14207
off += stbsp_snprintf(off + pError->Message, sizeof(pError->Message) - off, "%02x ", 0xff & pCode[i]);
14208
}
14209
uprintf("%s\n", pError->Message);
14210
}
14211
return false;
14212
}
14213
intptr_t phome = nInstructionBytesSrc + (intptr_t)f;
14214
uint32_t reg = nUsableJumpRegs & ~nRegsWritten;
14215
static_assert(R_RAX == 0, "R_RAX must be 0");
14216
if(0 == reg)
14217
{
14218
if(nRetSafe == 0)
14219
{
14220
MP_BREAK(); // shout fail earlier
14221
}
14222
MicroProfileInsertRetJump(pInstructionMoveDest + nInstructionBytesDest, phome);
14223
}
14224
else
14225
{
14226
int r = R_RAX;
14227
while((reg & 1) == 0)
14228
{
14229
reg >>= 1;
14230
r++;
14231
}
14232
MicroProfileInsertRegisterJump(pInstructionMoveDest + nInstructionBytesDest, phome, r);
14233
}
14234
14235
// PATCH 1 TRAMP EXIT
14236
intptr_t microprofile_tramp_exit = (intptr_t)ptramp + t_code_exit_offset;
14237
memcpy(ptramp + t_enter_patch_offset + 2, (void*)&microprofile_tramp_exit, 8);
14238
14239
char* pintercept = t_code_intercept0_offset + ptramp;
14240
14241
// PATCH 1.5 Argument
14242
memcpy(pintercept - 4, (void*)&Argument, 4);
14243
14244
// PATCH 2 INTERCEPT0
14245
intptr_t addr = (intptr_t)enter; //&intercept0;
14246
memcpy(pintercept + 2, (void*)&addr, 8);
14247
14248
// PATHC 2.5 argument
14249
memcpy(ptramp + t_code_exit_offset + 3, (void*)&Argument, 4);
14250
14251
intptr_t microprofile_tramp_leave = (intptr_t)ptramp + t_code_leave_offset;
14252
// PATCH 3 INTERCEPT1
14253
intptr_t addr1 = (intptr_t)leave; //&intercept1;
14254
memcpy((char*)microprofile_tramp_leave + 2, (void*)&addr1, 8);
14255
14256
intptr_t patch_push_addr = (intptr_t)(&MicroProfile_Patch_TLS_PUSH);
14257
intptr_t patch_pop_addr = (intptr_t)(&MicroProfile_Patch_TLS_POP);
14258
memcpy((char*)ptramp + t_code_call_patch_push_offset + 2, &patch_push_addr, 8);
14259
memcpy((char*)ptramp + t_code_call_patch_pop_offset + 2, &patch_pop_addr, 8);
14260
14261
{
14262
// PATCH 4 DEST FUNC
14263
14264
MicroProfileMakeWriteable(f);
14265
char* pp = (char*)f;
14266
char* ppend = pp + nInstructionBytesSrc;
14267
if(nInstructionBytesSrc < 14)
14268
{
14269
uprintf("inserting 5b jump\n");
14270
pp = MicroProfileInsertRelativeJump((char*)pp, (intptr_t)ptramp);
14271
}
14272
else
14273
{
14274
uprintf("inserting 14b jump\n");
14275
pp = MicroProfileInsertRegisterJump(pp, (intptr_t)ptramp, R_RAX);
14276
}
14277
while(pp != ppend)
14278
{
14279
*pp++ = 0x90;
14280
}
14281
}
14282
return true;
14283
}
14284
14285
static void MicroProfileMakeWriteable(void* p_)
14286
{
14287
#ifdef _PATCH_TEST
14288
// for testing..
14289
static const uint32_t WritableSize = 16;
14290
static uint32_t WritableCount = 0;
14291
static intptr_t WritableStart[WritableSize] = { 0 };
14292
static intptr_t WritableEnd[WritableSize] = { 0 };
14293
for(uint32_t i = 0; i < WritableCount; ++i)
14294
{
14295
intptr_t x = (intptr_t)p_;
14296
if(x >= WritableStart[i] && x < WritableEnd[i])
14297
{
14298
return;
14299
}
14300
}
14301
14302
#endif
14303
14304
intptr_t p = (intptr_t)p_;
14305
// uprintf("MicroProfilemakewriteable %lx\n", p);
14306
mach_port_name_t task = mach_task_self();
14307
vm_map_offset_t vmoffset = 0;
14308
mach_vm_size_t vmsize = 0;
14309
uint32_t nd;
14310
kern_return_t kr;
14311
vm_region_submap_info_64 vbr;
14312
mach_msg_type_number_t vbrcount = sizeof(vbr) / 4;
14313
14314
while(KERN_SUCCESS == (kr = mach_vm_region_recurse(task, &vmoffset, &vmsize, &nd, (vm_region_recurse_info_t)&vbr, &vbrcount)))
14315
{
14316
if(p >= (intptr_t)vmoffset && p <= intptr_t(vmoffset + vmsize))
14317
{
14318
if(0 == (vbr.protection & VM_PROT_WRITE))
14319
{
14320
// uprintf("region match .. enabling write\n");
14321
int x = mprotect((void*)vmoffset, vmsize, PROT_WRITE | PROT_READ | PROT_EXEC);
14322
if(x)
14323
{
14324
// uprintf("mprotect failed ... err %d:: %d %s\n", errno, x, strerror(errno));
14325
}
14326
else
14327
{
14328
uprintf("region is [%llx,%llx] .. %08llx %d", vmoffset, vmoffset + vmsize, vmsize, vbr.is_submap);
14329
uprintf("prot: %c%c%c %c%c%c\n",
14330
vbr.protection & VM_PROT_READ ? 'r' : '-',
14331
vbr.protection & VM_PROT_WRITE ? 'w' : '-',
14332
vbr.protection & VM_PROT_EXECUTE ? 'x' : '-',
14333
14334
vbr.max_protection & VM_PROT_READ ? 'r' : '-',
14335
vbr.max_protection & VM_PROT_WRITE ? 'w' : '-',
14336
vbr.max_protection & VM_PROT_EXECUTE ? 'x' : '-');
14337
continue;
14338
}
14339
}
14340
else
14341
{
14342
#ifdef _PATCH_TEST
14343
if(WritableCount < WritableSize)
14344
{
14345
WritableStart[WritableCount] = vmoffset;
14346
WritableEnd[WritableCount] = vmoffset + vmsize;
14347
WritableCount++;
14348
}
14349
14350
#endif
14351
}
14352
}
14353
14354
vmoffset += vmsize;
14355
vbrcount = sizeof(vbr) / 4;
14356
}
14357
}
14358
14359
int MicroProfileTrimFunctionName(const char* pStr, char* pOutBegin, char* pOutEnd)
14360
{
14361
int l = strlen(pStr) - 1;
14362
int sz = 0;
14363
pOutEnd--;
14364
if(l < pOutEnd - pOutBegin && pOutBegin != pOutEnd)
14365
{
14366
const char* p = pStr;
14367
const char* pEnd = pStr + l + 1;
14368
int in = 0;
14369
while(p != pEnd && pOutBegin != pOutEnd)
14370
{
14371
char c = *p++;
14372
if(c == '(' || c == '<')
14373
{
14374
in++;
14375
}
14376
else if(c == ')' || c == '>')
14377
{
14378
in--;
14379
continue;
14380
}
14381
14382
if(in == 0)
14383
{
14384
*pOutBegin++ = c;
14385
sz++;
14386
}
14387
}
14388
14389
*pOutBegin++ = '\0';
14390
}
14391
return sz;
14392
}
14393
14394
int MicroProfileFindFunctionName(const char* pStr, const char** ppStart)
14395
{
14396
int l = strlen(pStr) - 1;
14397
if(l < 1024)
14398
{
14399
char b[1024] = { 0 };
14400
char* put = &b[0];
14401
14402
const char* p = pStr;
14403
const char* pEnd = pStr + l + 1;
14404
int in = 0;
14405
while(p != pEnd)
14406
{
14407
char c = *p++;
14408
if(c == '(' || c == '<')
14409
{
14410
in++;
14411
}
14412
else if(c == ')' || c == '>')
14413
{
14414
in--;
14415
continue;
14416
}
14417
14418
if(in == 0)
14419
{
14420
*put++ = c;
14421
}
14422
}
14423
14424
*put++ = '\0';
14425
uprintf("trimmed %s\n", b);
14426
}
14427
14428
// int nFirstParen = l;
14429
int nNumParen = 0;
14430
int c = 0;
14431
14432
while(l >= 0 && pStr[l] != ')' && c++ < (int)(sizeof(" const") - 1))
14433
{
14434
l--;
14435
}
14436
if(pStr[l] == ')')
14437
{
14438
do
14439
{
14440
if(pStr[l] == ')')
14441
{
14442
nNumParen++;
14443
}
14444
else if(pStr[l] == '(')
14445
{
14446
nNumParen--;
14447
}
14448
l--;
14449
} while(nNumParen > 0 && l >= 0);
14450
}
14451
else
14452
{
14453
*ppStart = pStr;
14454
return 0;
14455
}
14456
while(l >= 0 && isspace(pStr[l]))
14457
{
14458
--l;
14459
}
14460
int nLast = l;
14461
while(l >= 0 && !isspace(pStr[l]))
14462
{
14463
l--;
14464
}
14465
int nFirst = l;
14466
if(nFirst == nLast)
14467
return 0;
14468
int nCount = nLast - nFirst + 1;
14469
*ppStart = pStr + nFirst;
14470
return nCount;
14471
}
14472
14473
const char* MicroProfileDemangleSymbol(const char* pSymbol)
14474
{
14475
static unsigned long size = 128;
14476
static char* pTempBuffer = (char*)malloc(size); // needs to be malloc because demangle function might realloc it.
14477
unsigned long len = size;
14478
int ret = 0;
14479
char* pBuffer = pTempBuffer;
14480
pBuffer = abi::__cxa_demangle(pSymbol, pTempBuffer, &len, &ret);
14481
if(ret == 0)
14482
{
14483
if(pBuffer != pTempBuffer)
14484
{
14485
pTempBuffer = pBuffer;
14486
if(len < size)
14487
__builtin_trap();
14488
size = len;
14489
}
14490
return pTempBuffer;
14491
}
14492
else
14493
{
14494
return pSymbol;
14495
}
14496
}
14497
14498
template <typename Callback>
14499
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules)
14500
{
14501
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileIterateSymbols", MP_PINK3);
14502
char FunctionName[1024];
14503
(void)FunctionName;
14504
mach_port_name_t task = mach_task_self();
14505
vm_map_offset_t vmoffset = 0;
14506
mach_vm_size_t vmsize = 0;
14507
uint32_t nd;
14508
kern_return_t kr;
14509
vm_region_submap_info_64 vbr;
14510
mach_msg_type_number_t vbrcount = sizeof(vbr) / 4;
14511
14512
intptr_t nCurrentModule = -1;
14513
uint32_t nCurrentModuleId = -1;
14514
14515
auto OnFunction = [&](void* addr, void* addrend, const char* pSymbol, const char* pModuleName, void* pModuleAddr) -> bool
14516
{
14517
const char* pStr = MicroProfileDemangleSymbol(pSymbol);
14518
;
14519
int l = MicroProfileTrimFunctionName(pStr, &FunctionName[0], &FunctionName[1024]);
14520
if(nCurrentModule != (intptr_t)pModuleAddr)
14521
{
14522
nCurrentModule = (intptr_t)pModuleAddr;
14523
nCurrentModuleId = MicroProfileSymbolGetModule(pModuleName, nCurrentModule);
14524
}
14525
14526
CB(l ? &FunctionName[0] : pStr, l ? &FunctionName[0] : 0, (intptr_t)addr, (intptr_t)addrend, nCurrentModuleId);
14527
return true;
14528
};
14529
vm_offset_t addr_prev = 0;
14530
14531
while(KERN_SUCCESS == (kr = mach_vm_region_recurse(task, &vmoffset, &vmsize, &nd, (vm_region_recurse_info_t)&vbr, &vbrcount)))
14532
{
14533
{
14534
addr_prev = vmoffset + vmsize;
14535
if(0 != (vbr.protection & VM_PROT_EXECUTE))
14536
{
14537
bool bProcessModule = true;
14538
int nModule = -1;
14539
if(nNumModules)
14540
{
14541
bProcessModule = false;
14542
for(uint32_t i = 0; i < nNumModules; ++i)
14543
{
14544
intptr_t nBase = S.SymbolModules[nModules[i]].Regions[0].nBegin;
14545
if((intptr_t)vmoffset == nBase)
14546
{
14547
bProcessModule = true;
14548
nModule = nModules[i];
14549
break;
14550
}
14551
}
14552
}
14553
if(bProcessModule)
14554
{
14555
S.SymbolModules[nModule].nProgressTarget = S.SymbolModules[nModule].Regions[0].nEnd - S.SymbolModules[nModule].Regions[0].nBegin;
14556
dl_info di;
14557
int r = 0;
14558
r = dladdr((void*)vmoffset, &di);
14559
if(r)
14560
{
14561
OnFunction(di.dli_saddr, (void*)addr_prev, di.dli_sname, di.dli_fname, di.dli_fbase);
14562
}
14563
intptr_t addr = vmoffset + vmsize - 1;
14564
while(1)
14565
{
14566
r = dladdr((void*)(addr), &di);
14567
if(r)
14568
{
14569
if(!di.dli_sname)
14570
{
14571
break;
14572
}
14573
OnFunction(di.dli_saddr, (void*)addr_prev, di.dli_sname, di.dli_fname, di.dli_fbase);
14574
}
14575
else
14576
{
14577
break;
14578
}
14579
addr_prev = (vm_offset_t)di.dli_saddr;
14580
addr = (intptr_t)di.dli_saddr - 1;
14581
if(di.dli_saddr < (void*)vmoffset)
14582
{
14583
break;
14584
}
14585
}
14586
for(int i = 0; i < S.SymbolNumModules; ++i)
14587
{
14588
if(S.SymbolModules[i].Regions[0].nBegin == (intptr_t)vmoffset)
14589
{
14590
S.SymbolModules[i].nModuleLoadFinished.store(1);
14591
}
14592
}
14593
}
14594
}
14595
}
14596
vmoffset += vmsize;
14597
vbrcount = sizeof(vbr) / 4;
14598
}
14599
}
14600
14601
void MicroProfileSymbolUpdateModuleList()
14602
{
14603
char FunctionName[1024];
14604
(void)FunctionName;
14605
mach_port_name_t task = mach_task_self();
14606
vm_map_offset_t vmoffset = 0;
14607
mach_vm_size_t vmsize = 0;
14608
uint32_t nd;
14609
kern_return_t kr;
14610
vm_region_submap_info_64 vbr;
14611
mach_msg_type_number_t vbrcount = sizeof(vbr) / 4;
14612
14613
while(KERN_SUCCESS == (kr = mach_vm_region_recurse(task, &vmoffset, &vmsize, &nd, (vm_region_recurse_info_t)&vbr, &vbrcount)))
14614
{
14615
{
14616
if(0 != (vbr.protection & VM_PROT_EXECUTE))
14617
{
14618
dl_info di;
14619
int r = 0;
14620
r = dladdr((void*)vmoffset, &di);
14621
if(r)
14622
{
14623
uprintf("[0x%p-0x%p] (0x%p) %s %s\n", (void*)vmoffset, (void*)addr_prev, di.dli_fbase, di.dli_fname, di.dli_sname);
14624
MicroProfileSymbolInitModule(di.dli_fname, (intptr_t)vmoffset, (intptr_t)vmoffset + vmsize);
14625
}
14626
}
14627
}
14628
vmoffset += vmsize;
14629
vbrcount = sizeof(vbr) / 4;
14630
}
14631
}
14632
14633
static void* MicroProfileAllocExecutableMemory(void* f, size_t s)
14634
{
14635
static uint64_t nPageSize = 0;
14636
if(!nPageSize)
14637
{
14638
nPageSize = getpagesize();
14639
}
14640
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
14641
14642
void* pMem = mmap((void*)f, s, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);
14643
14644
// uprintf("Allocating %zu %p\n", s, pMem);
14645
return pMem;
14646
}
14647
14648
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size)
14649
{
14650
// demangle not implemented
14651
strcpy(OutName, pName);
14652
return true;
14653
}
14654
14655
bool MicroProfilePatchBeginSuspend()
14656
{
14657
// Not implemented
14658
return true;
14659
}
14660
14661
void MicroProfilePatchEndSuspend()
14662
{
14663
// Not implemented
14664
}
14665
14666
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols)
14667
{
14668
void* M = dlopen(0, 0);
14669
for(uint32_t i = 0; i < nNumSymbols; ++i)
14670
{
14671
// uprintf("trying to find symbol %s\n", pSym);
14672
void* s = dlsym(M, pSymbols[i]);
14673
uprintf("sym returned %p\n", s);
14674
if(s)
14675
{
14676
uint32_t nColor = MicroProfileColorFromString(pSymbols[i]);
14677
const char* pDemangled = MicroProfileDemangleSymbol(pSymbols[i]);
14678
MicroProfileInstrumentFunction(s, pModules[i], pDemangled, nColor);
14679
}
14680
}
14681
dlclose(M);
14682
}
14683
#endif
14684
14685
#if defined(__unix__) && defined(__x86_64__)
14686
// '##::::'##::'#######:::'#######::'##:::'##::::'##:::::::'####:'##::: ##:'##::::'##:'##::::'##:
14687
// ##:::: ##:'##.... ##:'##.... ##: ##::'##::::: ##:::::::. ##:: ###:: ##: ##:::: ##:. ##::'##::
14688
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##:::::::: ##:: ####: ##: ##:::: ##::. ##'##:::
14689
// #########: ##:::: ##: ##:::: ##: #####::::::: ##:::::::: ##:: ## ## ##: ##:::: ##:::. ###::::
14690
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::: ##:::::::: ##:: ##. ####: ##:::: ##::: ## ##:::
14691
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##::::: ##:::::::: ##:: ##:. ###: ##:::: ##:: ##:. ##::
14692
// ##:::: ##:. #######::. #######:: ##::. ##:::: ########:'####: ##::. ##:. #######:: ##:::. ##:
14693
// ..:::::..:::.......::::.......:::..::::..:::::........::....::..::::..:::.......:::..:::::..::
14694
14695
#include <cxxabi.h>
14696
#include <distorm.h>
14697
#include <dlfcn.h>
14698
#include <mnemonics.h>
14699
#include <sys/mman.h>
14700
#include <unistd.h>
14701
14702
static void* MicroProfileAllocExecutableMemory(void* f, size_t s);
14703
static void MicroProfileMakeWriteable(void* p_);
14704
14705
extern "C" void microprofile_tramp_enter_patch() asm("_microprofile_tramp_enter_patch");
14706
extern "C" void microprofile_tramp_enter() asm("_microprofile_tramp_enter");
14707
extern "C" void microprofile_tramp_code_begin() asm("_microprofile_tramp_code_begin");
14708
extern "C" void microprofile_tramp_code_end() asm("_microprofile_tramp_code_end");
14709
extern "C" void microprofile_tramp_intercept0() asm("_microprofile_tramp_intercept0");
14710
extern "C" void microprofile_tramp_end() asm("_microprofile_tramp_end");
14711
extern "C" void microprofile_tramp_exit() asm("_microprofile_tramp_exit");
14712
extern "C" void microprofile_tramp_leave() asm("_microprofile_tramp_leave");
14713
extern "C" void microprofile_tramp_trunk() asm("_microprofile_tramp_trunk");
14714
extern "C" void microprofile_tramp_call_patch_pop() asm("_microprofile_tramp_call_patch_pop");
14715
extern "C" void microprofile_tramp_call_patch_push() asm("_microprofile_tramp_call_patch_push");
14716
14717
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError)
14718
{
14719
if(pError)
14720
{
14721
memcpy(&pError->Code[0], f, 12);
14722
}
14723
14724
intptr_t t_enter = (intptr_t)microprofile_tramp_enter;
14725
intptr_t t_enter_patch_offset = (intptr_t)microprofile_tramp_enter_patch - t_enter;
14726
intptr_t t_code_begin_offset = (intptr_t)microprofile_tramp_code_begin - t_enter;
14727
intptr_t t_code_end_offset = (intptr_t)microprofile_tramp_code_end - t_enter;
14728
intptr_t t_code_intercept0_offset = (intptr_t)microprofile_tramp_intercept0 - t_enter;
14729
intptr_t t_code_exit_offset = (intptr_t)microprofile_tramp_exit - t_enter;
14730
intptr_t t_code_leave_offset = (intptr_t)microprofile_tramp_leave - t_enter;
14731
14732
intptr_t t_code_call_patch_push_offset = (intptr_t)microprofile_tramp_call_patch_push - t_enter;
14733
intptr_t t_code_call_patch_pop_offset = (intptr_t)microprofile_tramp_call_patch_pop - t_enter;
14734
intptr_t codemaxsize = t_code_end_offset - t_code_begin_offset;
14735
intptr_t t_end_offset = (intptr_t)microprofile_tramp_end - t_enter;
14736
intptr_t t_trunk_offset = (intptr_t)microprofile_tramp_trunk - t_enter;
14737
intptr_t t_trunk_size = (intptr_t)microprofile_tramp_end - (intptr_t)microprofile_tramp_trunk;
14738
14739
char* ptramp = (char*)MicroProfileAllocExecutableMemory(f, t_end_offset);
14740
14741
intptr_t offset = ((intptr_t)f + 6 - (intptr_t)ptramp);
14742
14743
uint32_t nBytesToCopy = 14;
14744
if(offset < 0x80000000 && offset > -0x7fffffff)
14745
{
14746
/// offset is small enough to insert a relative jump
14747
nBytesToCopy = 5;
14748
}
14749
14750
memcpy(ptramp, (void*)t_enter, t_end_offset);
14751
14752
int nInstructionBytesDest = 0;
14753
char* pInstructionMoveDest = ptramp + t_code_begin_offset;
14754
char* pTrunk = ptramp + t_trunk_offset;
14755
14756
int nInstructionBytesSrc = 0;
14757
14758
uint32_t nRegsWritten = 0;
14759
uint32_t nRetSafe = 0;
14760
uint32_t nUsableJumpRegs = (1 << R_RAX) | (1 << R_R10) | (1 << R_R11); // scratch && !parameter register
14761
if(!MicroProfileCopyInstructionBytes(
14762
pInstructionMoveDest, f, nBytesToCopy, codemaxsize, pTrunk, t_trunk_size, nUsableJumpRegs, &nInstructionBytesDest, &nInstructionBytesSrc, &nRegsWritten, &nRetSafe))
14763
{
14764
if(pError)
14765
{
14766
const char* pCode = (const char*)f;
14767
memset(pError->Code, 0, sizeof(pError->Code));
14768
memcpy(pError->Code, pCode, nInstructionBytesSrc);
14769
int off = stbsp_snprintf(pError->Message, sizeof(pError->Message), "Failed to move %d code bytes ", nInstructionBytesSrc);
14770
pError->nCodeSize = nInstructionBytesSrc;
14771
for(int i = 0; i < nInstructionBytesSrc; ++i)
14772
{
14773
off += stbsp_snprintf(off + pError->Message, sizeof(pError->Message) - off, "%02x ", 0xff & pCode[i]);
14774
}
14775
uprintf("%s\n", pError->Message);
14776
}
14777
return false;
14778
}
14779
intptr_t phome = nInstructionBytesSrc + (intptr_t)f;
14780
uint32_t reg = nUsableJumpRegs & ~nRegsWritten;
14781
static_assert(R_RAX == 0, "R_RAX must be 0");
14782
if(0 == reg)
14783
{
14784
if(nRetSafe == 0)
14785
{
14786
MP_BREAK(); // shout fail earlier
14787
}
14788
MicroProfileInsertRetJump(pInstructionMoveDest + nInstructionBytesDest, phome);
14789
}
14790
else
14791
{
14792
int r = R_RAX;
14793
while((reg & 1) == 0)
14794
{
14795
reg >>= 1;
14796
r++;
14797
}
14798
MicroProfileInsertRegisterJump(pInstructionMoveDest + nInstructionBytesDest, phome, r);
14799
}
14800
14801
// PATCH 1 TRAMP EXIT
14802
intptr_t microprofile_tramp_exit = (intptr_t)ptramp + t_code_exit_offset;
14803
memcpy(ptramp + t_enter_patch_offset + 2, (void*)&microprofile_tramp_exit, 8);
14804
14805
char* pintercept = t_code_intercept0_offset + ptramp;
14806
14807
// PATCH 1.5 Argument
14808
memcpy(pintercept - 4, (void*)&Argument, 4);
14809
14810
// PATCH 2 INTERCEPT0
14811
intptr_t addr = (intptr_t)enter; //&intercept0;
14812
memcpy(pintercept + 2, (void*)&addr, 8);
14813
14814
// PATHC 2.5 argument
14815
memcpy(ptramp + t_code_exit_offset + 3, (void*)&Argument, 4);
14816
14817
intptr_t microprofile_tramp_leave = (intptr_t)ptramp + t_code_leave_offset;
14818
// PATCH 3 INTERCEPT1
14819
intptr_t addr1 = (intptr_t)leave; //&intercept1;
14820
memcpy((char*)microprofile_tramp_leave + 2, (void*)&addr1, 8);
14821
14822
intptr_t patch_push_addr = (intptr_t)(&MicroProfile_Patch_TLS_PUSH);
14823
intptr_t patch_pop_addr = (intptr_t)(&MicroProfile_Patch_TLS_POP);
14824
memcpy((char*)ptramp + t_code_call_patch_push_offset + 2, &patch_push_addr, 8);
14825
memcpy((char*)ptramp + t_code_call_patch_pop_offset + 2, &patch_pop_addr, 8);
14826
14827
{
14828
// PATCH 4 DEST FUNC
14829
14830
MicroProfileMakeWriteable(f);
14831
char* pp = (char*)f;
14832
char* ppend = pp + nInstructionBytesSrc;
14833
14834
if(nInstructionBytesSrc < 14)
14835
{
14836
uprintf("inserting 5b jump\n");
14837
pp = MicroProfileInsertRelativeJump((char*)pp, (intptr_t)ptramp);
14838
}
14839
else
14840
{
14841
uprintf("inserting 14b jump\n");
14842
pp = MicroProfileInsertRegisterJump(pp, (intptr_t)ptramp, R_RAX);
14843
}
14844
while(pp != ppend)
14845
{
14846
*pp++ = 0x90;
14847
}
14848
}
14849
return true;
14850
}
14851
14852
static void MicroProfileMakeWriteable(void* p_)
14853
{
14854
intptr_t nPageSize = (intptr_t)getpagesize();
14855
intptr_t p = ((intptr_t)p_) & ~(nPageSize - 1);
14856
intptr_t e = nPageSize + ((14 + (intptr_t)p_) & ~(nPageSize - 1));
14857
size_t s = e - p;
14858
mprotect((void*)p, s, PROT_READ | PROT_WRITE | PROT_EXEC);
14859
}
14860
14861
int MicroProfileTrimFunctionName(const char* pStr, char* pOutBegin, char* pOutEnd)
14862
{
14863
int l = strlen(pStr) - 1;
14864
int sz = 0;
14865
pOutEnd--;
14866
if(l < pOutEnd - pOutBegin && pOutBegin != pOutEnd)
14867
{
14868
const char* p = pStr;
14869
const char* pEnd = pStr + l + 1;
14870
int in = 0;
14871
while(p != pEnd && pOutBegin != pOutEnd)
14872
{
14873
char c = *p++;
14874
if(c == '(' || c == '<')
14875
{
14876
in++;
14877
}
14878
else if(c == ')' || c == '>')
14879
{
14880
in--;
14881
continue;
14882
}
14883
14884
if(in == 0)
14885
{
14886
*pOutBegin++ = c;
14887
sz++;
14888
}
14889
}
14890
14891
*pOutBegin++ = '\0';
14892
}
14893
return sz;
14894
}
14895
14896
int MicroProfileFindFunctionName(const char* pStr, const char** ppStart)
14897
{
14898
int l = strlen(pStr) - 1;
14899
if(l < 1024)
14900
{
14901
char b[1024] = { 0 };
14902
char* put = &b[0];
14903
14904
const char* p = pStr;
14905
const char* pEnd = pStr + l + 1;
14906
int in = 0;
14907
while(p != pEnd)
14908
{
14909
char c = *p++;
14910
if(c == '(' || c == '<')
14911
{
14912
in++;
14913
}
14914
else if(c == ')' || c == '>')
14915
{
14916
in--;
14917
continue;
14918
}
14919
14920
if(in == 0)
14921
{
14922
*put++ = c;
14923
}
14924
}
14925
14926
*put++ = '\0';
14927
uprintf("trimmed %s\n", b);
14928
}
14929
14930
// int nFirstParen = l;
14931
int nNumParen = 0;
14932
int c = 0;
14933
14934
while(l >= 0 && pStr[l] != ')' && c++ < (int)(sizeof(" const") - 1))
14935
{
14936
l--;
14937
}
14938
if(pStr[l] == ')')
14939
{
14940
do
14941
{
14942
if(pStr[l] == ')')
14943
{
14944
nNumParen++;
14945
}
14946
else if(pStr[l] == '(')
14947
{
14948
nNumParen--;
14949
}
14950
l--;
14951
} while(nNumParen > 0 && l >= 0);
14952
}
14953
else
14954
{
14955
*ppStart = pStr;
14956
return 0;
14957
}
14958
while(l >= 0 && isspace(pStr[l]))
14959
{
14960
--l;
14961
}
14962
int nLast = l;
14963
while(l >= 0 && !isspace(pStr[l]))
14964
{
14965
l--;
14966
}
14967
int nFirst = l;
14968
if(nFirst == nLast)
14969
return 0;
14970
int nCount = nLast - nFirst + 1;
14971
*ppStart = pStr + nFirst;
14972
return nCount;
14973
}
14974
14975
const char* MicroProfileDemangleSymbol(const char* pSymbol)
14976
{
14977
static unsigned long size = 128;
14978
static char* pTempBuffer = (char*)malloc(size); // needs to be malloc because demangle function might realloc it.
14979
unsigned long len = size;
14980
int ret = 0;
14981
char* pBuffer = pTempBuffer;
14982
pBuffer = abi::__cxa_demangle(pSymbol, pTempBuffer, &len, &ret);
14983
if(ret == 0)
14984
{
14985
if(pBuffer != pTempBuffer)
14986
{
14987
pTempBuffer = pBuffer;
14988
if(len < size)
14989
__builtin_trap();
14990
size = len;
14991
}
14992
return pTempBuffer;
14993
}
14994
else
14995
{
14996
return pSymbol;
14997
}
14998
}
14999
15000
template <typename Callback>
15001
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules)
15002
{
15003
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileIterateSymbols", MP_PINK3);
15004
char FunctionName[1024];
15005
15006
intptr_t nCurrentModule = -1;
15007
uint32_t nCurrentModuleId = -1;
15008
15009
auto OnFunction = [&](void* addr, void* addrend, const char* pSymbol, const char* pModuleName, void* pModuleAddr) -> bool
15010
{
15011
const char* pStr = MicroProfileDemangleSymbol(pSymbol);
15012
;
15013
int l = MicroProfileTrimFunctionName(pStr, &FunctionName[0], &FunctionName[1024]);
15014
MP_ASSERT(nCurrentModule == (intptr_t)pModuleAddr);
15015
CB(l ? &FunctionName[0] : pStr, l ? &FunctionName[0] : 0, (intptr_t)addr, (intptr_t)addrend, nCurrentModuleId);
15016
return true;
15017
};
15018
15019
for(int i = 0; i < S.SymbolNumModules; ++i)
15020
{
15021
auto& M = S.SymbolModules[i];
15022
if(0 != nNumModules)
15023
{
15024
bool bProcess = false;
15025
for(uint32_t j = 0; j < nNumModules; ++j)
15026
{
15027
if(nModules[j] == (uint32_t)i)
15028
{
15029
bProcess = true;
15030
break;
15031
}
15032
}
15033
if(!bProcess)
15034
continue;
15035
}
15036
nCurrentModuleId = i;
15037
Dl_info di;
15038
int r = 0;
15039
r = dladdr((void*)(M.Regions[0].nBegin), &di);
15040
if(r)
15041
{
15042
nCurrentModule = (intptr_t)di.dli_fbase;
15043
M.nProgressTarget = 0;
15044
for(int j = 0; j < M.nNumExecutableRegions; ++j)
15045
{
15046
M.nProgressTarget += M.Regions[j].nEnd - M.Regions[j].nBegin;
15047
}
15048
for(int j = 0; j < M.nNumExecutableRegions; ++j)
15049
{
15050
const intptr_t nBegin = M.Regions[j].nBegin;
15051
const intptr_t nEnd = M.Regions[j].nEnd;
15052
int r = 0;
15053
intptr_t nAddr = (nEnd - 8) & ~7;
15054
intptr_t nAddrPrev = nEnd;
15055
while(1)
15056
{
15057
r = dladdr((void*)(nAddr), &di);
15058
if(r && di.dli_sname)
15059
{
15060
OnFunction(di.dli_saddr, (void*)nAddrPrev, di.dli_sname, di.dli_fname, di.dli_fbase);
15061
nAddrPrev = (intptr_t)di.dli_saddr;
15062
nAddr = (intptr_t)di.dli_saddr - 1;
15063
}
15064
else
15065
{
15066
nAddr = (nAddr - 7) & ~7; // pretty ineffecient, but it seems linux just returns 0 when there is no symbols, making this the only option I can come up with?
15067
}
15068
if(nAddr < nBegin)
15069
{
15070
break;
15071
}
15072
}
15073
}
15074
M.nProgress = M.nProgressTarget;
15075
M.nModuleLoadFinished.store(1);
15076
}
15077
}
15078
}
15079
15080
void MicroProfileSymbolUpdateModuleList()
15081
{
15082
// So, this was the only way I could find to do this..
15083
// Is this seriously how they want this to be done?
15084
FILE* F = fopen("/proc/self/maps", "r");
15085
char* line = 0;
15086
size_t len;
15087
ssize_t read;
15088
Dl_info di;
15089
while((read = getline(&line, &len, F)) != -1)
15090
{
15091
void* pBase = 0;
15092
void* pEnd = 0;
15093
char c, r, w, x, p;
15094
15095
if(8 == sscanf(line, "%p%c%p%c%c%c%c%c", &pBase, &c, &pEnd, &c, &r, &w, &x, &p))
15096
{
15097
if('x' == x)
15098
{
15099
int r = 0;
15100
r = dladdr(pBase, &di);
15101
if(r)
15102
{
15103
if('[' != di.dli_fname[0])
15104
{
15105
MicroProfileSymbolInitModule(di.dli_fname, (intptr_t)pBase, (intptr_t)pEnd);
15106
}
15107
}
15108
}
15109
}
15110
}
15111
fclose(F);
15112
MicroProfileSymbolMergeExecutableRegions();
15113
}
15114
15115
static void* MicroProfileAllocExecutableMemory(void* f, size_t s)
15116
{
15117
static uint64_t nPageSize = 0;
15118
if(!nPageSize)
15119
{
15120
nPageSize = getpagesize();
15121
}
15122
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
15123
15124
void* pMem = mmap(f, s, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);
15125
return pMem;
15126
}
15127
15128
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size)
15129
{
15130
// demangle not implemented
15131
strcpy(OutName, pName);
15132
return true;
15133
}
15134
15135
bool MicroProfilePatchBeginSuspend()
15136
{
15137
// Not implemented
15138
return true;
15139
}
15140
15141
void MicroProfilePatchEndSuspend()
15142
{
15143
// Not implemented
15144
}
15145
15146
// not yet tested.
15147
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols)
15148
{
15149
void* M = dlopen(0, 0);
15150
for(uint32_t i = 0; i < nNumSymbols; ++i)
15151
{
15152
// uprintf("trying to find symbol %s\n", pSym);
15153
void* s = dlsym(M, pSymbols[i]);
15154
uprintf("sym returned %p\n", s);
15155
if(s)
15156
{
15157
uint32_t nColor = MicroProfileColorFromString(pSymbols[i]);
15158
const char* pDemangled = MicroProfileDemangleSymbol(pSymbols[i]);
15159
MicroProfileInstrumentFunction(s, pModules[i], pDemangled, nColor);
15160
}
15161
}
15162
dlclose(M);
15163
}
15164
15165
#endif
15166
15167
#endif
15168
15169
void MicroProfileHashTableInit(MicroProfileHashTable* pTable, uint32_t nInitialSize, uint32_t nSearchLimit, MicroProfileHashCompareFunction CompareFunc, MicroProfileHashFunction HashFunc)
15170
{
15171
pTable->nAllocated = nInitialSize;
15172
pTable->nUsed = 0;
15173
uint32_t nSize = nInitialSize * sizeof(MicroProfileHashTableEntry);
15174
pTable->pEntries = (MicroProfileHashTableEntry*)MICROPROFILE_ALLOC(nSize, 8);
15175
pTable->CompareFunc = CompareFunc;
15176
pTable->HashFunc = HashFunc;
15177
pTable->nSearchLimit = nSearchLimit;
15178
pTable->nLim = pTable->nAllocated / 5;
15179
if(pTable->nLim > pTable->nSearchLimit)
15180
pTable->nLim = pTable->nSearchLimit;
15181
memset(pTable->pEntries, 0, nSize);
15182
}
15183
void MicroProfileHashTableDestroy(MicroProfileHashTable* pTable)
15184
{
15185
MICROPROFILE_FREE(pTable->pEntries);
15186
}
15187
15188
uint64_t MicroProfileHashTableHash(MicroProfileHashTable* pTable, uint64_t K)
15189
{
15190
uint64_t H = pTable->HashFunc ? (*pTable->HashFunc)(K) : K;
15191
return H == 0 ? 1 : H;
15192
}
15193
15194
void MicroProfileHashTableGrow(MicroProfileHashTable* pTable)
15195
{
15196
uint32_t nAllocated = pTable->nAllocated;
15197
uint32_t nNewSize = nAllocated * 2;
15198
uprintf("GROW %d -> %d\n", nAllocated, nNewSize);
15199
15200
MicroProfileHashTable New;
15201
MicroProfileHashTableInit(&New, nNewSize, pTable->nSearchLimit, pTable->CompareFunc, pTable->HashFunc);
15202
for(uint32_t i = 0; i < nAllocated; ++i)
15203
{
15204
MicroProfileHashTableEntry& E = pTable->pEntries[i];
15205
if(E.Hash != 0)
15206
{
15207
MicroProfileHashTableSet(&New, E.Key, E.Value, E.Hash, false);
15208
}
15209
}
15210
MicroProfileHashTableDestroy(pTable);
15211
*pTable = New;
15212
}
15213
15214
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value)
15215
{
15216
uint64_t H = MicroProfileHashTableHash(pTable, Key);
15217
return MicroProfileHashTableSet(pTable, Key, Value, H, true);
15218
}
15219
15220
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorBegin(MicroProfileHashTable* HashTable)
15221
{
15222
return MicroProfileHashTableIterator(0, HashTable);
15223
}
15224
15225
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorEnd(MicroProfileHashTable* HashTable)
15226
{
15227
return MicroProfileHashTableIterator(HashTable->nAllocated, HashTable);
15228
}
15229
15230
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value, uint64_t H, bool bAllowGrow)
15231
{
15232
if(H == 0)
15233
MP_BREAK(); // not supported.
15234
MicroProfileHashCompareFunction Cmp = pTable->CompareFunc;
15235
while(1)
15236
{
15237
const uint32_t nLim = pTable->nLim;
15238
uint32_t B = H % pTable->nAllocated;
15239
MicroProfileHashTableEntry* pEntries = pTable->pEntries;
15240
15241
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
15242
{
15243
uint32_t Idx = (B + i) % pTable->nAllocated;
15244
if(pEntries[Idx].Hash == 0)
15245
{
15246
pEntries[Idx].Hash = H;
15247
pEntries[Idx].Key = Key;
15248
pEntries[Idx].Value = Value;
15249
return true;
15250
}
15251
else if(pEntries[Idx].Hash == H && (Cmp ? (Cmp)(Key, pEntries[Idx].Key) : Key == pEntries[Idx].Key))
15252
{
15253
pEntries[Idx].Value = Value;
15254
return true;
15255
}
15256
else if(i > nLim)
15257
{
15258
break;
15259
}
15260
}
15261
if(bAllowGrow)
15262
{
15263
MicroProfileHashTableGrow(pTable);
15264
}
15265
else
15266
{
15267
MP_BREAK();
15268
}
15269
}
15270
MP_BREAK();
15271
}
15272
15273
bool MicroProfileHashTableGet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t* pValue)
15274
{
15275
uint64_t H = MicroProfileHashTableHash(pTable, Key);
15276
uint32_t B = H % pTable->nAllocated;
15277
MicroProfileHashTableEntry* pEntries = pTable->pEntries;
15278
MicroProfileHashCompareFunction Cmp = pTable->CompareFunc;
15279
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
15280
{
15281
uint32_t Idx = (B + i) % pTable->nAllocated;
15282
if(pEntries[Idx].Hash == 0)
15283
{
15284
return false;
15285
}
15286
else if(pEntries[Idx].Hash == H && (Cmp ? (Cmp)(Key, pEntries[Idx].Key) : Key == pEntries[Idx].Key))
15287
{
15288
*pValue = pEntries[Idx].Value;
15289
return true;
15290
}
15291
}
15292
return false;
15293
}
15294
15295
bool MicroProfileHashTableRemove(MicroProfileHashTable* pTable, uint64_t Key)
15296
{
15297
15298
uint64_t H = MicroProfileHashTableHash(pTable, Key);
15299
uint32_t B = H % pTable->nAllocated;
15300
MicroProfileHashTableEntry* pEntries = pTable->pEntries;
15301
MicroProfileHashCompareFunction Cmp = pTable->CompareFunc;
15302
uint32_t nBase = (uint32_t)-1;
15303
uint32_t nAllocated = pTable->nAllocated;
15304
for(uint32_t i = 0; i < nAllocated; ++i)
15305
{
15306
uint32_t Idx = (B + i) % nAllocated;
15307
if(pEntries[Idx].Hash == 0)
15308
{
15309
return false;
15310
}
15311
else if(pEntries[Idx].Hash == H && (Cmp ? (Cmp)(Key, pEntries[Idx].Key) : Key == pEntries[Idx].Key))
15312
{
15313
nBase = Idx;
15314
break;
15315
}
15316
}
15317
pEntries[nBase].Hash = 0;
15318
pEntries[nBase].Key = 0;
15319
pEntries[nBase].Value = 0;
15320
nBase++;
15321
for(uint32_t i = 0; i < nAllocated; ++i)
15322
{
15323
uint32_t Idx = (nBase + i) % nAllocated;
15324
if(pEntries[Idx].Hash == 0)
15325
{
15326
break;
15327
}
15328
else
15329
{
15330
MicroProfileHashTableEntry E = pEntries[Idx];
15331
pEntries[Idx] = {};
15332
MicroProfileHashTableSet(pTable, E.Key, E.Value, E.Hash, false);
15333
}
15334
}
15335
return true;
15336
}
15337
uint64_t MicroProfileHashTableHashString(uint64_t pString)
15338
{
15339
return MicroProfileStringHash((const char*)pString);
15340
}
15341
15342
bool MicroProfileHashTableCompareString(uint64_t L, uint64_t R)
15343
{
15344
return 0 == strcmp((const char*)L, (const char*)R);
15345
}
15346
uint64_t MicroProfileHashTableHashPtr(uint64_t x)
15347
{
15348
x ^= x >> 33;
15349
x *= 0xff51afd7ed558ccdULL;
15350
x ^= x >> 33;
15351
x *= 0xc4ceb9fe1a85ec53ULL;
15352
x ^= x >> 33;
15353
return x;
15354
}
15355
bool MicroProfileHashTableComparePtr(uint64_t L, uint64_t R)
15356
{
15357
return L == R;
15358
}
15359
15360
bool MicroProfileHashTableSetString(MicroProfileHashTable* pTable, const char* pKey, const char* pValue)
15361
{
15362
return MicroProfileHashTableSet(pTable, (uint64_t)pKey, (uintptr_t)pValue);
15363
}
15364
15365
bool MicroProfileHashTableGetString(MicroProfileHashTable* pTable, const char* pKey, const char** pValue)
15366
{
15367
return MicroProfileHashTableGet(pTable, (uint64_t)pKey, (uintptr_t*)pValue);
15368
}
15369
15370
bool MicroProfileHashTableRemoveString(MicroProfileHashTable* pTable, const char* pKey)
15371
{
15372
return MicroProfileHashTableRemove(pTable, (uint64_t)pKey);
15373
}
15374
15375
bool MicroProfileHashTableSetPtr(MicroProfileHashTable* pTable, const void* pKey, void* pValue)
15376
{
15377
return MicroProfileHashTableSet(pTable, (uint64_t)pKey, (uintptr_t)pValue);
15378
}
15379
15380
template <typename T>
15381
bool MicroProfileHashTableGetPtr(MicroProfileHashTable* pTable, const void* pKey, T** pValue)
15382
{
15383
uintptr_t Dummy;
15384
uintptr_t* Arg = pValue ? (uintptr_t*)pValue : &Dummy;
15385
return MicroProfileHashTableGet(pTable, (uint64_t)pKey, Arg);
15386
}
15387
15388
bool MicroProfileHashTableRemovePtr(MicroProfileHashTable* pTable, const char* pKey)
15389
{
15390
return MicroProfileHashTableRemove(pTable, (uint64_t)pKey);
15391
}
15392
15393
template <typename T>
15394
T& MicroProfileArray<T>::operator[](const uint32_t Index)
15395
{
15396
return Data[Index];
15397
}
15398
15399
template <typename T>
15400
const T& MicroProfileArray<T>::operator[](const uint32_t Index) const
15401
{
15402
MP_ASSERT(Index < Size);
15403
return Data[Index];
15404
}
15405
template <typename T>
15406
T* MicroProfileArray<T>::begin()
15407
{
15408
return Data;
15409
}
15410
template <typename T>
15411
T* MicroProfileArray<T>::end()
15412
{
15413
return Data + Size;
15414
}
15415
15416
template <typename T>
15417
void MicroProfileArrayInit(MicroProfileArray<T>& Array, uint32_t InitialCapacity)
15418
{
15419
MP_ASSERT(Array.Data == nullptr);
15420
MP_ASSERT(Array.Size == 0);
15421
MP_ASSERT(Array.Capacity == 0);
15422
Array.Capacity = InitialCapacity;
15423
Array.Data = MP_ALLOC_OBJECT_ARRAY(T, InitialCapacity);
15424
Array.Size = 0;
15425
}
15426
template <typename T>
15427
void MicroProfileArrayDestroy(MicroProfileArray<T>& Array, uint32_t InitialCapacity)
15428
{
15429
if(Array.Data)
15430
MP_FREE(Array.Data);
15431
memset(Array, 0, sizeof(*Array));
15432
}
15433
template <typename T>
15434
void MicroProfileArrayClear(MicroProfileArray<T>& Array)
15435
{
15436
Array.Size = 0;
15437
}
15438
15439
template <typename T>
15440
void MicroProfileArrayPushBack(MicroProfileArray<T>& Array, const T& v)
15441
{
15442
uint32_t& Size = Array.Size;
15443
uint32_t& Capacity = Array.Capacity;
15444
if(Size >= Capacity)
15445
{
15446
uint32_t NewCapacity = (MicroProfileMax<uint32_t>(1u, Capacity) + 1) * 3 / 2;
15447
T* NewData = MP_ALLOC_OBJECT_ARRAY(T, NewCapacity);
15448
memcpy(NewData, Array.Data, Size * sizeof(T));
15449
if(Array.Data)
15450
{
15451
MP_FREE(Array.Data);
15452
}
15453
Array.Data = NewData;
15454
Capacity = NewCapacity;
15455
}
15456
Array.Data[Size++] = v;
15457
}
15458
15459
void MicroProfileStringBlockFree(MicroProfileStringBlock* pBlock)
15460
{
15461
MicroProfileCounterAdd(S.CounterToken_StringBlock_Count, -1);
15462
MicroProfileCounterAdd(S.CounterToken_StringBlock_Memory, -(int64_t)(pBlock->nSize + sizeof(MicroProfileStringBlock)));
15463
15464
MP_FREE(pBlock);
15465
}
15466
MicroProfileStringBlock* MicroProfileStringBlockAlloc(uint32_t nSize)
15467
{
15468
nSize = MicroProfileMax(nSize, (uint32_t)(MicroProfileStringBlock::DEFAULT_SIZE - sizeof(MicroProfileStringBlock)));
15469
nSize += sizeof(MicroProfileStringBlock);
15470
MicroProfileCounterAdd(S.CounterToken_StringBlock_Count, 1);
15471
MicroProfileCounterAdd(S.CounterToken_StringBlock_Memory, nSize);
15472
// uprintf("alloc string block %d sizeof strings is %d\n", nSize, (int)sizeof(MicroProfileStringBlock));
15473
MicroProfileStringBlock* pBlock = (MicroProfileStringBlock*)MP_ALLOC(nSize, 8);
15474
pBlock->pNext = 0;
15475
pBlock->nSize = nSize - sizeof(MicroProfileStringBlock);
15476
pBlock->nUsed = 0;
15477
return pBlock;
15478
}
15479
15480
void MicroProfileStringsInit(MicroProfileStrings* pStrings)
15481
{
15482
MicroProfileHashTableInit(&pStrings->HashTable, 1, 25, MicroProfileHashTableCompareString, MicroProfileHashTableHashString);
15483
pStrings->pFirst = 0;
15484
pStrings->pLast = 0;
15485
}
15486
void MicroProfileStringsDestroy(MicroProfileStrings* pStrings)
15487
{
15488
MicroProfileStringBlock* pBlock = pStrings->pFirst;
15489
while(pBlock)
15490
{
15491
MicroProfileStringBlock* pNext = pBlock->pNext;
15492
MicroProfileStringBlockFree(pBlock);
15493
pBlock = pNext;
15494
}
15495
MicroProfileCounterSet(S.CounterToken_StringBlock_Waste, 0);
15496
MicroProfileCounterSet(S.CounterToken_StringBlock_Strings, 0);
15497
15498
memset(pStrings, 0, sizeof(*pStrings));
15499
}
15500
15501
const char* MicroProfileStringIntern(const char* pStr)
15502
{
15503
return MicroProfileStringIntern(pStr, (uint32_t)strlen(pStr), 0);
15504
}
15505
15506
const char* MicroProfileStringInternLower(const char* pStr)
15507
{
15508
return MicroProfileStringIntern(pStr, (uint32_t)strlen(pStr), ESTRINGINTERN_LOWERCASE);
15509
}
15510
const char* MicroProfileStringInternSlash(const char* pStr)
15511
{
15512
return MicroProfileStringIntern(pStr, (uint32_t)strlen(pStr), ESTRINGINTERN_FORCEFORWARDSLASH);
15513
}
15514
15515
const char* MicroProfileStringIntern(const char* pStr_, uint32_t nLen, uint32_t nFlags)
15516
{
15517
MicroProfileStrings* pStrings = &S.Strings;
15518
const char* pStr = pStr_;
15519
char* pLowerCaseStr = (char*)alloca(nLen + 1);
15520
if(0 != (nFlags & (ESTRINGINTERN_FORCEFORWARDSLASH | ESTRINGINTERN_LOWERCASE)))
15521
{
15522
for(uint32_t i = 0; i < nLen; ++i)
15523
{
15524
char c = pStr[i];
15525
if(nFlags & ESTRINGINTERN_LOWERCASE)
15526
{
15527
c = tolower(c);
15528
}
15529
if(nFlags & ESTRINGINTERN_FORCEFORWARDSLASH)
15530
{
15531
if(c == '\\')
15532
c = '/';
15533
}
15534
pLowerCaseStr[i] = c;
15535
}
15536
pLowerCaseStr[nLen] = '\0';
15537
pStr = pLowerCaseStr;
15538
}
15539
const char* pRet;
15540
if(MicroProfileHashTableGetString(&pStrings->HashTable, pStr, &pRet))
15541
{
15542
if(0 != strcmp(pStr, pRet))
15543
{
15544
MP_BREAK();
15545
}
15546
return pRet;
15547
}
15548
else
15549
{
15550
if(pStr[nLen] != '\0')
15551
MP_BREAK(); // string should be 0 terminated.
15552
nLen += 1;
15553
MicroProfileStringBlock* pBlock = pStrings->pLast;
15554
if(0 == pBlock || pBlock->nUsed + nLen > pBlock->nSize)
15555
{
15556
MicroProfileStringBlock* pNewBlock = MicroProfileStringBlockAlloc(nLen);
15557
if(pBlock)
15558
{
15559
pBlock->pNext = pNewBlock;
15560
pStrings->pLast = pNewBlock;
15561
MicroProfileCounterAdd(S.CounterToken_StringBlock_Waste, pBlock->nSize - pBlock->nUsed);
15562
}
15563
else
15564
{
15565
pStrings->pLast = pStrings->pFirst = pNewBlock;
15566
}
15567
pBlock = pNewBlock;
15568
}
15569
MicroProfileCounterAdd(S.CounterToken_StringBlock_Strings, 1);
15570
char* pDest = &pBlock->Memory[pBlock->nUsed];
15571
pBlock->nUsed += nLen;
15572
MP_ASSERT(pBlock->nUsed <= pBlock->nSize);
15573
memcpy(pDest, pStr, nLen);
15574
MicroProfileHashTableSetString(&pStrings->HashTable, pDest, pDest);
15575
15576
#if 0
15577
void DumpTableStr(MicroProfileHashTable* pTable);
15578
DumpTableStr(&pStrings->HashTable);
15579
#endif
15580
15581
return pDest;
15582
}
15583
}
15584
15585
void DumpTable(MicroProfileHashTable* pTable)
15586
{
15587
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
15588
{
15589
if(pTable->pEntries[i].Hash != 0)
15590
{
15591
uprintf("[%05d,%05" PRIu64 "] ::::%" PRIx64 ", %p .. hash %" PRIx64 "\n",
15592
i,
15593
pTable->pEntries[i].Hash % pTable->nAllocated,
15594
pTable->pEntries[i].Key,
15595
(void*)pTable->pEntries[i].Value,
15596
pTable->pEntries[i].Hash);
15597
}
15598
}
15599
};
15600
void DumpTableStr(MicroProfileHashTable* pTable)
15601
{
15602
int c = 0;
15603
(void)c;
15604
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
15605
{
15606
if(pTable->pEntries[i].Hash != 0)
15607
{
15608
uprintf("%03d [%05d,%05" PRIu64 "] ::::%s, %s .. hash %" PRIx64 "\n",
15609
c++,
15610
i,
15611
pTable->pEntries[i].Hash % pTable->nAllocated,
15612
(const char*)pTable->pEntries[i].Key,
15613
(const char*)pTable->pEntries[i].Value,
15614
pTable->pEntries[i].Hash);
15615
}
15616
}
15617
uprintf("FillPrc %f\n", 100.f * c / (float)pTable->nAllocated);
15618
};
15619
15620
static const char* txt[] = { "gaudy", "chilly", "obtain", "suspend", "jelly", "peel", "nauseating", "complain", "cave", "practise", "sail", "close",
15621
"drawer", "mature", "impossible", "exist", "sister", "poke", "ancient", "paddle", "ask", "shallow", "outrageous", "healthy",
15622
"reading", "obey", "water", "elbow", "abnormal", "trap", "wholesale", "lovely", "stupid", "comparison", "swim", "brash",
15623
"towering", "accept", "invention", "plantation", "spooky", "tiger", "knot", "literate", "awake", "itch", "medical", "ticket",
15624
"tawdry", "correct", "mine", "accidental", "dinner", "produce", "protective", "red", "dreary", "toe", "drain", "zesty",
15625
"inform", "boundless", "ghost", "attend", "rely", "fill", "liquid", "pump", "continue", "spark", "church", "fortunate",
15626
"truthful", "conscious", "possible", "motion", "evanescent", "branch", "skirt", "number", "meek", "hour", "form", "work",
15627
"car", "post", "talk", "fear", "tightfisted", "dress", "perform", "fry", "courageous", "dysfunctional", "page", "one",
15628
"annoy", "abrasive", "dependent", "payment" };
15629
15630
void MicroProfileStringInternTest()
15631
{
15632
MicroProfileStringsInit(&S.Strings);
15633
uint32_t nCount = sizeof(txt) / sizeof(txt[0]);
15634
const char* pStrings[100];
15635
const char* pStrings2[100];
15636
15637
DumpTableStr(&S.Strings.HashTable);
15638
for(uint32_t i = 0; i < nCount; ++i)
15639
{
15640
pStrings[i] = MicroProfileStringIntern(txt[i]);
15641
pStrings2[i] = MicroProfileStrDup(txt[i]);
15642
}
15643
15644
for(uint32_t i = 0; i < nCount; ++i)
15645
{
15646
const char* pStr = MicroProfileStringIntern(pStrings2[i]);
15647
if(pStr != pStrings[i])
15648
{
15649
MP_BREAK();
15650
}
15651
}
15652
DumpTableStr(&S.Strings.HashTable);
15653
15654
MicroProfileStringsDestroy(&S.Strings);
15655
}
15656
15657
void MicroProfileHashTableTest()
15658
{
15659
MicroProfileStringInternTest();
15660
15661
MicroProfileHashTable T;
15662
MicroProfileHashTable* pTable = &T;
15663
MicroProfileHashTableInit(pTable, 1, 100, 0, 0);
15664
15665
#define NUM_ITEMS 100
15666
15667
uint64_t Keys[NUM_ITEMS];
15668
uint64_t Values[NUM_ITEMS];
15669
memset(Keys, 0xff, sizeof(Keys));
15670
memset(Values, 0xff, sizeof(Values));
15671
15672
static int l = 0;
15673
auto RR = [&]() -> uint64_t
15674
{
15675
if(l++ % 4 < 2)
15676
{
15677
return l;
15678
}
15679
uint64_t l2 = rand();
15680
uint64_t u = rand();
15681
return l2 | (u << 32);
15682
};
15683
auto RRUnique = [&]()
15684
{
15685
bool bFound = false;
15686
uint64_t V = 0;
15687
do
15688
{
15689
V = RR();
15690
for(uint32_t i = 0; i != NUM_ITEMS; ++i)
15691
{
15692
if(V == Keys[i])
15693
{
15694
bFound = true;
15695
}
15696
}
15697
if(!bFound)
15698
{
15699
return V;
15700
}
15701
} while(bFound);
15702
MP_BREAK();
15703
return (uint64_t)0;
15704
};
15705
15706
Keys[0] = 0;
15707
Values[0] = 42;
15708
for(uint32_t i = 1; i < NUM_ITEMS; ++i)
15709
{
15710
Keys[i] = RRUnique();
15711
Values[i] = RR();
15712
}
15713
15714
for(uint32_t i = 0; i < NUM_ITEMS; ++i)
15715
{
15716
MicroProfileHashTableSet(pTable, Keys[i], Values[i]);
15717
}
15718
15719
for(uint32_t i = 0; i < NUM_ITEMS; ++i)
15720
{
15721
uintptr_t V;
15722
if(MicroProfileHashTableGet(pTable, Keys[i], &V))
15723
{
15724
if(V != Values[i])
15725
{
15726
MP_BREAK();
15727
}
15728
}
15729
else
15730
{
15731
MP_BREAK();
15732
}
15733
uint64_t nonkey = RRUnique();
15734
if(MicroProfileHashTableGet(pTable, nonkey, &V))
15735
{
15736
MP_BREAK();
15737
}
15738
}
15739
15740
DumpTable(pTable);
15741
if(!MicroProfileHashTableRemove(pTable, 0))
15742
{
15743
MP_BREAK();
15744
}
15745
uprintf("removed\n");
15746
DumpTable(pTable);
15747
uintptr_t v;
15748
if(MicroProfileHashTableGet(pTable, 0, &v))
15749
{
15750
MP_BREAK();
15751
}
15752
if(MicroProfileHashTableGet(pTable, 1, &v))
15753
{
15754
if(v != 2)
15755
MP_BREAK();
15756
}
15757
15758
MicroProfileHashTableDestroy(pTable);
15759
15760
MicroProfileHashTable Strings;
15761
MicroProfileHashTableInit(&Strings, 1, 25, MicroProfileHashTableCompareString, MicroProfileHashTableHashString);
15762
uint32_t nCount = sizeof(txt) / sizeof(txt[0]);
15763
for(uint32_t i = 0; i < nCount; i += 2)
15764
{
15765
MicroProfileHashTableSetString(&Strings, txt[i], txt[i + 1]);
15766
}
15767
DumpTableStr(&Strings);
15768
15769
for(uint32_t i = 0; i < nCount; i += 2)
15770
{
15771
const char* pKey = txt[i];
15772
const char* pValue = txt[i + 1];
15773
const char* pRes = 0;
15774
if(MicroProfileHashTableGetString(&Strings, pKey, &pRes))
15775
{
15776
if(pRes != pValue)
15777
{
15778
MP_BREAK();
15779
}
15780
}
15781
else
15782
{
15783
MP_BREAK();
15784
}
15785
}
15786
uint32_t nRem = nCount / 2;
15787
for(uint32_t i = 0; i < nRem; i += 2)
15788
{
15789
const char* pKey = txt[i];
15790
const char* pValue = txt[i + 1];
15791
15792
if(!MicroProfileHashTableRemoveString(&Strings, pKey))
15793
{
15794
MP_BREAK();
15795
}
15796
if(MicroProfileHashTableRemoveString(&Strings, pValue))
15797
{
15798
MP_BREAK();
15799
}
15800
}
15801
for(uint32_t i = 0; i < nRem; i += 2)
15802
{
15803
const char* pKey = txt[i];
15804
if(MicroProfileHashTableRemoveString(&Strings, pKey))
15805
{
15806
MP_BREAK();
15807
}
15808
}
15809
15810
for(uint32_t i = 0; i < nCount; i += 2)
15811
{
15812
const char* pKey = txt[i];
15813
const char* pValue = txt[i + 1];
15814
const char* V;
15815
if(MicroProfileHashTableGetString(&Strings, pKey, &V))
15816
{
15817
if(i < nRem)
15818
{
15819
MP_BREAK();
15820
}
15821
else
15822
{
15823
if(V != pValue)
15824
MP_BREAK();
15825
}
15826
}
15827
else
15828
{
15829
if(i >= nRem)
15830
MP_BREAK();
15831
}
15832
}
15833
15834
DumpTableStr(&Strings);
15835
MicroProfileHashTableDestroy(&Strings);
15836
}
15837
15838
uint32_t MicroProfileGetColor(uint32_t TimerIndex)
15839
{
15840
MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
15841
if(TI.nColor == MP_AUTO)
15842
{
15843
return MicroProfileColorFromString(TI.pName);
15844
}
15845
else
15846
{
15847
return TI.nColor;
15848
}
15849
}
15850
15851
#if MICROPROFILE_IMGUI
15852
#include "imgui.h"
15853
#ifndef MICROPROFILE_IMGUI_MAX_GRAPHS
15854
#define MICROPROFILE_IMGUI_MAX_GRAPHS 64
15855
#endif
15856
15857
#define MICROPROFILE_IMGUI_GRAPH_SIZE 256
15858
15859
struct MicroProfileImguiTimerState
15860
{
15861
int TimerIndex = -1;
15862
uint64_t FrameFetched = (uint64_t)-1;
15863
uint32_t nColor = 0;
15864
float fValues[MICROPROFILE_IMGUI_GRAPH_SIZE];
15865
};
15866
15867
struct MicroProfileImguiState
15868
{
15869
MicroProfileImguiTimerState Timers[MICROPROFILE_IMGUI_MAX_GRAPHS];
15870
uint32_t NumTimers = 0;
15871
uint32_t GraphPut;
15872
};
15873
15874
static MicroProfileImguiState ImguiState;
15875
15876
void MicroProfileImguiGather()
15877
{
15878
MICROPROFILE_SCOPEI("MicroProfile", "ImguiGather", MP_AUTO);
15879
uint32_t Put = ImguiState.GraphPut;
15880
for(uint32_t i = 0; i < ImguiState.NumTimers; ++i)
15881
{
15882
MicroProfileImguiTimerState* pGraphInfo = &ImguiState.Timers[i];
15883
uint64_t Ticks = S.Frame[pGraphInfo->TimerIndex].nTicks;
15884
float fToMs = S.TimerInfo[pGraphInfo->TimerIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTickToMsMultiplierGpu() : MicroProfileTickToMsMultiplierCpu();
15885
pGraphInfo->fValues[Put] = fToMs * Ticks;
15886
}
15887
ImguiState.GraphPut = (ImguiState.GraphPut + 1) % MICROPROFILE_IMGUI_GRAPH_SIZE;
15888
}
15889
15890
uint32_t MicroProfileImGuiColor(uint32_t Color)
15891
{
15892
uint32_t A = 0xff;
15893
uint32_t R = 0xff & (Color >> 16);
15894
uint32_t G = 0xff & (Color >> 8);
15895
uint32_t B = 0xff & (Color);
15896
15897
return (A << IM_COL32_A_SHIFT) | (R << IM_COL32_R_SHIFT) | (G << IM_COL32_G_SHIFT) | (B << IM_COL32_B_SHIFT);
15898
}
15899
15900
void MicroProfileImguiControls()
15901
{
15902
using namespace ImGui;
15903
uint32_t IdCounter = 42;
15904
{
15905
PushID(IdCounter++);
15906
int Aggr = MicroProfileGetAggregateFrames();
15907
Text("Aggregate Frames %7d", MicroProfileGetCurrentAggregateFrames());
15908
SameLine();
15909
if(RadioButton("Inf", Aggr == 0))
15910
MicroProfileSetAggregateFrames(0);
15911
int AggrFrameOptions[] = {
15912
30,
15913
60,
15914
100,
15915
1000,
15916
};
15917
for(int i = 0; i < sizeof(AggrFrameOptions) / sizeof(AggrFrameOptions[0]); ++i)
15918
{
15919
int v = AggrFrameOptions[i];
15920
char Buffer[32];
15921
stbsp_snprintf(Buffer, sizeof(Buffer) - 1, "%d", v);
15922
SameLine();
15923
if(RadioButton(Buffer, Aggr == v))
15924
MicroProfileSetAggregateFrames(v);
15925
}
15926
15927
if(Aggr == 0)
15928
{
15929
if(Button("Clear Inf Aggregate"))
15930
S.nAggregateClear = 1;
15931
}
15932
15933
PopID();
15934
}
15935
Separator();
15936
{
15937
PushID(IdCounter++);
15938
Text("Categories");
15939
if(BeginTable("CategoryTable", 3, 0))
15940
{
15941
TableSetupColumn("Name", ImGuiTableColumnFlags_WidthStretch);
15942
TableSetupColumn("On", ImGuiTableColumnFlags_WidthFixed, 70);
15943
TableSetupColumn("Off", ImGuiTableColumnFlags_WidthFixed, 70);
15944
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
15945
{
15946
PushID(i);
15947
TableNextRow();
15948
TableSetColumnIndex(0);
15949
Text(S.CategoryInfo[i].pName);
15950
bool bEnabled = MicroProfileCategoryEnabled(i);
15951
bool bDisabled = MicroProfileCategoryDisabled(i);
15952
TableSetColumnIndex(1);
15953
if(RadioButton("On", bEnabled))
15954
MicroProfileEnableCategory(S.CategoryInfo[i].pName);
15955
TableSetColumnIndex(2);
15956
if(RadioButton("Off", bDisabled))
15957
MicroProfileDisableCategory(S.CategoryInfo[i].pName);
15958
15959
PopID();
15960
}
15961
EndTable();
15962
}
15963
PopID();
15964
}
15965
Separator();
15966
{
15967
PushID(IdCounter++);
15968
Text("Groups");
15969
if(BeginTable("GroupTable", 3, 0))
15970
{
15971
TableSetupColumn("Name", ImGuiTableColumnFlags_WidthStretch);
15972
TableSetupColumn("On", ImGuiTableColumnFlags_WidthFixed, 70);
15973
TableSetupColumn("Off", ImGuiTableColumnFlags_WidthFixed, 70);
15974
15975
for(uint32_t i = 0; i < S.nGroupCount; ++i)
15976
{
15977
TableNextRow();
15978
PushID(i);
15979
const char* pName = S.GroupInfo[i].pName;
15980
bool bEnabled = MicroProfileGroupEnabled(i);
15981
TableSetColumnIndex(0);
15982
Text(pName);
15983
TableSetColumnIndex(1);
15984
if(RadioButton("On", bEnabled))
15985
MicroProfileToggleGroup(i);
15986
TableSetColumnIndex(2);
15987
if(RadioButton("Off", !bEnabled))
15988
MicroProfileToggleGroup(i);
15989
PopID();
15990
}
15991
EndTable();
15992
}
15993
PopID();
15994
}
15995
}
15996
15997
MicroProfileImguiTimerState* MicroProfileImguiGetTimerState(int TimerIndex)
15998
{
15999
MicroProfileImguiTimerState* ptr = nullptr;
16000
for(uint32_t i = 0; i < ImguiState.NumTimers; ++i)
16001
if(ImguiState.Timers[i].TimerIndex == TimerIndex)
16002
return &ImguiState.Timers[i];
16003
16004
if(ImguiState.NumTimers < MICROPROFILE_IMGUI_MAX_GRAPHS)
16005
{
16006
MicroProfileImguiTimerState* pState = &ImguiState.Timers[ImguiState.NumTimers++];
16007
pState->TimerIndex = TimerIndex;
16008
pState->nColor = MicroProfileGetColor(TimerIndex);
16009
memset(&pState->fValues[0], 0, sizeof(pState->fValues));
16010
return pState;
16011
}
16012
16013
return nullptr;
16014
}
16015
16016
void MicroProfileImguiTable(const MicroProfileImguiWindowDesc& Window, const MicroProfileImguiEntryDesc* Entries, uint32_t NumEntries)
16017
{
16018
using namespace ImGui;
16019
const uint32_t NumColumns = 6;
16020
ImGuiIO& io = ImGui::GetIO();
16021
16022
float Padding = GetStyle().CellPadding.x * 2;
16023
float GroupWidth = CalcTextSize("Group").x;
16024
float NameWidth = CalcTextSize("Name").x;
16025
float BaseWidth = CalcTextSize("100000.00").x;
16026
float Height = CalcTextSize("G").y + Padding;
16027
16028
for(uint32_t i = 0; i < NumEntries; ++i)
16029
{
16030
uint32_t TimerIndex = MicroProfileGetTimerIndex(Entries[i].GraphTimer);
16031
const MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
16032
const MicroProfileGroupInfo& GI = S.GroupInfo[TI.nGroupIndex];
16033
GroupWidth = MicroProfileMax(GroupWidth, CalcTextSize(GI.pName).x);
16034
NameWidth = MicroProfileMax(NameWidth, CalcTextSize(TI.pName).x);
16035
}
16036
16037
float TableWidth = GroupWidth + NameWidth + BaseWidth * 4 + NumColumns * Padding + (NumColumns - 1) * GetStyle().ItemSpacing.x;
16038
float TableHeight = Height * (NumEntries + 1);
16039
16040
ImVec2 TablePos = ImVec2(0.f, 0.f);
16041
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_TOP_RIGHT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
16042
TablePos.x = io.DisplaySize.x - TableWidth;
16043
16044
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_LEFT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
16045
TablePos.y = io.DisplaySize.y - TableHeight;
16046
TablePos.x += Window.OffsetX;
16047
TablePos.y += Window.OffsetY;
16048
16049
SetCursorScreenPos(TablePos);
16050
16051
if(BeginTable("MicroProfileImguiTable", NumColumns, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_NoHostExtendX))
16052
{
16053
TableSetupColumn("Group", ImGuiTableColumnFlags_WidthFixed, GroupWidth);
16054
TableSetupColumn("Name", ImGuiTableColumnFlags_WidthFixed, NameWidth);
16055
16056
TableSetupColumn("Max", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
16057
TableSetupColumn("Min", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
16058
TableSetupColumn("Avg", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
16059
TableSetupColumn("Time", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
16060
16061
TableHeadersRow();
16062
16063
for(uint32_t i = 0; i < NumEntries; ++i)
16064
{
16065
uint32_t TimerIndex = MicroProfileGetTimerIndex(Entries[i].GraphTimer);
16066
16067
MicroProfileTimerValues Values;
16068
MicroProfileCalcTimers(TimerIndex, Values);
16069
const MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
16070
const MicroProfileGroupInfo& GI = S.GroupInfo[TI.nGroupIndex];
16071
TableNextRow();
16072
ImU32 RowBGColor = GetColorU32((i % 2) ? ImVec4(0.1f, 0.1f, 0.1f, 0.85f) : ImVec4(0.2f, 0.2f, 0.2f, 0.85f));
16073
TableSetBgColor(ImGuiTableBgTarget_RowBg1, RowBGColor);
16074
PushID(i);
16075
float fMax = 0.f, fMin = 0.f, fAvg = 0.f, fTime = 0.f;
16076
16077
auto RightAlignedFloat = [](float f)
16078
{
16079
float CellWidth = GetContentRegionAvail().x;
16080
char Buffer[32];
16081
stbsp_snprintf(Buffer, sizeof(Buffer) - 1, "%.2f", f);
16082
ImVec2 TextSize = CalcTextSize(Buffer);
16083
SetCursorPosX(GetCursorPosX() + (CellWidth - TextSize.x));
16084
TextUnformatted(Buffer);
16085
};
16086
16087
TableSetColumnIndex(0);
16088
Text(GI.pName);
16089
TableSetColumnIndex(1);
16090
Text(TI.pName);
16091
TableSetColumnIndex(2);
16092
RightAlignedFloat(Values.MaxMs);
16093
TableSetColumnIndex(3);
16094
RightAlignedFloat(Values.MinMs);
16095
TableSetColumnIndex(4);
16096
RightAlignedFloat(Values.AverageMs);
16097
TableSetColumnIndex(5);
16098
RightAlignedFloat(Values.TimeMs);
16099
PopID();
16100
}
16101
EndTable();
16102
}
16103
}
16104
16105
void MicroProfileImguiGraphs(const MicroProfileImguiWindowDesc& Window, const MicroProfileImguiEntryDesc* Entries, uint32_t NumEntries)
16106
{
16107
using namespace ImGui;
16108
ImGuiIO& io = ImGui::GetIO();
16109
uint32_t Width = Window.GraphWidth;
16110
uint32_t Height = (Window.GraphHeight + GetStyle().ItemSpacing.y) * NumEntries;
16111
16112
ImVec2 Pos = ImVec2(0.f, 0.f);
16113
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_TOP_RIGHT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
16114
Pos.x = io.DisplaySize.x - Width;
16115
16116
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_LEFT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
16117
Pos.y = io.DisplaySize.y - Height;
16118
16119
Pos.x += Window.OffsetX;
16120
Pos.y += Window.OffsetY;
16121
for(uint32_t i = 0; i < NumEntries; ++i)
16122
{
16123
SetCursorScreenPos(Pos);
16124
uint32_t TimerIndex = MicroProfileGetTimerIndex(Entries[i].GraphTimer);
16125
float GraphMax = Entries[i].GraphMax;
16126
const MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
16127
16128
MicroProfileImguiTimerState* TimerState = MicroProfileImguiGetTimerState(TimerIndex);
16129
16130
PushID(i << 16 | TimerIndex);
16131
if(TimerState->nColor == 0)
16132
TimerState->nColor = MicroProfileGetColor(TimerIndex);
16133
ImVec4 FrameBg = GetStyleColorVec4(ImGuiCol_FrameBg);
16134
FrameBg.x = 0.15f;
16135
FrameBg.y = 0.15f;
16136
FrameBg.z = 0.15f;
16137
FrameBg.w = 0.8f;
16138
16139
PushStyleColor(ImGuiCol_PlotLines, MicroProfileImGuiColor(TimerState->nColor));
16140
PushStyleColor(ImGuiCol_FrameBg, FrameBg);
16141
uint32_t Start = (ImguiState.GraphPut) % MICROPROFILE_IMGUI_GRAPH_SIZE;
16142
uint32_t Last = (ImguiState.GraphPut + MICROPROFILE_IMGUI_GRAPH_SIZE - 1) % MICROPROFILE_IMGUI_GRAPH_SIZE;
16143
PlotLines("", &TimerState->fValues[0], MICROPROFILE_IMGUI_GRAPH_SIZE, Start, nullptr, 0.f, GraphMax, ImVec2(Window.GraphWidth, Window.GraphHeight));
16144
16145
char TimeStr[32];
16146
stbsp_snprintf(TimeStr, sizeof(TimeStr) - 1, "%.3fms", TimerState->fValues[Last]);
16147
ImVec2 PlotMin = GetItemRectMin();
16148
ImVec2 PlotMax = GetItemRectMax();
16149
ImVec2 NameSize = CalcTextSize(TI.pName);
16150
ImVec2 NamePos = ImVec2(PlotMin.x + 1, PlotMax.y - NameSize.y - 1);
16151
ImVec2 TimeSize = CalcTextSize(TimeStr);
16152
ImVec2 TimePos = ImVec2(PlotMax.x - TimeSize.x - 1, PlotMax.y - TimeSize.y - 1);
16153
GetWindowDrawList()->AddText(NamePos, GetColorU32(ImGuiCol_Text), TI.pName);
16154
GetWindowDrawList()->AddText(TimePos, GetColorU32(ImGuiCol_Text), TimeStr);
16155
16156
PopStyleColor();
16157
PopStyleColor();
16158
PopID();
16159
Pos.y += Window.GraphHeight + GetStyle().ItemSpacing.y;
16160
}
16161
}
16162
16163
#endif
16164
16165
#undef uprintf
16166
16167
#undef S
16168
#ifdef _WIN32
16169
#pragma warning(pop)
16170
#undef microprofile_fopen_helper
16171
#endif
16172
16173
#ifdef MICROPROFILE_PS4
16174
#define MICROPROFILE_PS4_IMPL
16175
#include "microprofile_ps4.h"
16176
#endif
16177
#ifdef MICROPROFILE_XBOXONE
16178
#define MICROPROFILE_XBOXONE_IMPL
16179
#include "microprofile_xboxone.h"
16180
#endif
16181
16182
#endif // #if MICROPROFILE_ENABLED
16183
16184
#include "microprofile_html.h"
16185
#include "microprofile_icons.h"
16186
16187