Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/winpixeventruntime/include/WinPixEventRuntime/PIXEventsCommon.h
4261 views
1
// Copyright (c) Microsoft Corporation. All rights reserved.
2
3
/*==========================================================================;
4
*
5
* Copyright (C) Microsoft Corporation. All Rights Reserved.
6
*
7
* File: PIXEventsCommon.h
8
* Content: PIX include file
9
* Don't include this file directly - use pix3.h
10
*
11
****************************************************************************/
12
#pragma once
13
14
#ifndef _PIXEventsCommon_H_
15
#define _PIXEventsCommon_H_
16
17
#if defined(XBOX) || defined(_XBOX_ONE) || defined(_DURANGO) || defined(_GAMING_XBOX) || defined(_GAMING_XBOX_SCARLETT)
18
#define PIX_XBOX
19
#endif
20
21
#include <cstdint>
22
23
#if defined(_M_X64) || defined(_M_IX86)
24
#include <emmintrin.h>
25
#endif
26
27
//
28
// The PIXBeginEvent and PIXSetMarker functions have an optimized path for
29
// copying strings that work by copying 128-bit or 64-bits at a time. In some
30
// circumstances this may result in PIX logging the remaining memory after the
31
// null terminator.
32
//
33
// By default this optimization is enabled unless Address Sanitizer is enabled,
34
// since this optimization can trigger a global-buffer-overflow when copying
35
// string literals.
36
//
37
// The PIX_ENABLE_BLOCK_ARGUMENT_COPY controls whether or not this optimization
38
// is enabled. Applications may also explicitly set this macro to 0 to disable
39
// the optimization if necessary.
40
//
41
42
// Check for Address Sanitizer on either Clang or MSVC
43
44
#if defined(__has_feature)
45
#if __has_feature(address_sanitizer)
46
#define PIX_ASAN_ENABLED
47
#endif
48
#elif defined(__SANITIZE_ADDRESS__)
49
#define PIX_ASAN_ENABLED
50
#endif
51
52
#if defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)
53
// Previously set values override everything
54
# define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 0
55
#elif defined(PIX_ASAN_ENABLED)
56
// Disable block argument copy when address sanitizer is enabled
57
#define PIX_ENABLE_BLOCK_ARGUMENT_COPY 0
58
#define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1
59
#endif
60
61
#if !defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)
62
// Default to enabled.
63
#define PIX_ENABLE_BLOCK_ARGUMENT_COPY 1
64
#define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1
65
#endif
66
67
struct PIXEventsBlockInfo;
68
69
struct PIXEventsThreadInfo
70
{
71
PIXEventsBlockInfo* block;
72
UINT64* biasedLimit;
73
UINT64* destination;
74
};
75
76
#ifdef PIX_XBOX
77
extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime) noexcept;
78
#else
79
extern "C" UINT64 WINAPI PIXEventsReplaceBlock(PIXEventsThreadInfo * threadInfo, bool getEarliestTime) noexcept;
80
#endif
81
82
enum PIXEventType
83
{
84
PIXEvent_EndEvent = 0x000,
85
PIXEvent_BeginEvent_VarArgs = 0x001,
86
PIXEvent_BeginEvent_NoArgs = 0x002,
87
PIXEvent_SetMarker_VarArgs = 0x007,
88
PIXEvent_SetMarker_NoArgs = 0x008,
89
90
PIXEvent_EndEvent_OnContext = 0x010,
91
PIXEvent_BeginEvent_OnContext_VarArgs = 0x011,
92
PIXEvent_BeginEvent_OnContext_NoArgs = 0x012,
93
PIXEvent_SetMarker_OnContext_VarArgs = 0x017,
94
PIXEvent_SetMarker_OnContext_NoArgs = 0x018,
95
};
96
97
static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;
98
//this is used to make sure SSE string copy always will end 16-byte write in the current block
99
//this way only a check if destination < limit can be performed, instead of destination < limit - 1
100
//since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
101
//so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
102
//on next iteration check destination will be greater than limit
103
//this is used as well for fixed size UMD events and PIXEndEvent since these require less space
104
//than other variable length user events and do not need big reserved space
105
static const UINT64 PIXEventsReservedTailSpaceQwords = 2;
106
static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
107
static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;
108
109
//Bits 7-19 (13 bits)
110
static const UINT64 PIXEventsBlockEndMarker = 0x00000000000FFF80;
111
112
//Bits 10-19 (10 bits)
113
static const UINT64 PIXEventsTypeReadMask = 0x00000000000FFC00;
114
static const UINT64 PIXEventsTypeWriteMask = 0x00000000000003FF;
115
static const UINT64 PIXEventsTypeBitShift = 10;
116
117
//Bits 20-63 (44 bits)
118
static const UINT64 PIXEventsTimestampReadMask = 0xFFFFFFFFFFF00000;
119
static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;
120
static const UINT64 PIXEventsTimestampBitShift = 20;
121
122
inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)
123
{
124
return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |
125
(((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);
126
}
127
128
//Bits 60-63 (4)
129
static const UINT64 PIXEventsStringAlignmentWriteMask = 0x000000000000000F;
130
static const UINT64 PIXEventsStringAlignmentReadMask = 0xF000000000000000;
131
static const UINT64 PIXEventsStringAlignmentBitShift = 60;
132
133
//Bits 55-59 (5)
134
static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;
135
static const UINT64 PIXEventsStringCopyChunkSizeReadMask = 0x0F80000000000000;
136
static const UINT64 PIXEventsStringCopyChunkSizeBitShift = 55;
137
138
//Bit 54
139
static const UINT64 PIXEventsStringIsANSIWriteMask = 0x0000000000000001;
140
static const UINT64 PIXEventsStringIsANSIReadMask = 0x0040000000000000;
141
static const UINT64 PIXEventsStringIsANSIBitShift = 54;
142
143
//Bit 53
144
static const UINT64 PIXEventsStringIsShortcutWriteMask = 0x0000000000000001;
145
static const UINT64 PIXEventsStringIsShortcutReadMask = 0x0020000000000000;
146
static const UINT64 PIXEventsStringIsShortcutBitShift = 53;
147
148
inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)
149
{
150
return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |
151
((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |
152
(((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |
153
(((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);
154
}
155
156
template<UINT alignment, class T>
157
inline bool PIXIsPointerAligned(T* pointer)
158
{
159
return !(((UINT64)pointer) & (alignment - 1));
160
}
161
162
// Generic template version slower because of the additional clear write
163
template<class T>
164
inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)
165
{
166
if (destination < limit)
167
{
168
*destination = 0ull;
169
*((T*)destination) = argument;
170
++destination;
171
}
172
}
173
174
// int32 specialization to avoid slower double memory writes
175
template<>
176
inline void PIXCopyEventArgument<INT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT32 argument)
177
{
178
if (destination < limit)
179
{
180
*reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument);
181
++destination;
182
}
183
}
184
185
// unsigned int32 specialization to avoid slower double memory writes
186
template<>
187
inline void PIXCopyEventArgument<UINT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT32 argument)
188
{
189
if (destination < limit)
190
{
191
*destination = static_cast<UINT64>(argument);
192
++destination;
193
}
194
}
195
196
// int64 specialization to avoid slower double memory writes
197
template<>
198
inline void PIXCopyEventArgument<INT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT64 argument)
199
{
200
if (destination < limit)
201
{
202
*reinterpret_cast<INT64*>(destination) = argument;
203
++destination;
204
}
205
}
206
207
// unsigned int64 specialization to avoid slower double memory writes
208
template<>
209
inline void PIXCopyEventArgument<UINT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT64 argument)
210
{
211
if (destination < limit)
212
{
213
*destination = argument;
214
++destination;
215
}
216
}
217
218
//floats must be cast to double during writing the data to be properly printed later when reading the data
219
//this is needed because when float is passed to varargs function it's cast to double
220
template<>
221
inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)
222
{
223
if (destination < limit)
224
{
225
*reinterpret_cast<double*>(destination) = static_cast<double>(argument);
226
++destination;
227
}
228
}
229
230
//char has to be cast to a longer signed integer type
231
//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
232
template<>
233
inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)
234
{
235
if (destination < limit)
236
{
237
*reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument);
238
++destination;
239
}
240
}
241
242
//unsigned char has to be cast to a longer unsigned integer type
243
//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
244
template<>
245
inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)
246
{
247
if (destination < limit)
248
{
249
*destination = static_cast<UINT64>(argument);
250
++destination;
251
}
252
}
253
254
//bool has to be cast to an integer since it's not explicitly supported by string format routines
255
//there's no format specifier for bool type, but it should work with integer format specifiers
256
template<>
257
inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)
258
{
259
if (destination < limit)
260
{
261
*destination = static_cast<UINT64>(argument);
262
++destination;
263
}
264
}
265
266
inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
267
{
268
*destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
269
while (destination < limit)
270
{
271
UINT64 c = static_cast<uint8_t>(argument[0]);
272
if (!c)
273
{
274
*destination++ = 0;
275
return;
276
}
277
UINT64 x = c;
278
c = static_cast<uint8_t>(argument[1]);
279
if (!c)
280
{
281
*destination++ = x;
282
return;
283
}
284
x |= c << 8;
285
c = static_cast<uint8_t>(argument[2]);
286
if (!c)
287
{
288
*destination++ = x;
289
return;
290
}
291
x |= c << 16;
292
c = static_cast<uint8_t>(argument[3]);
293
if (!c)
294
{
295
*destination++ = x;
296
return;
297
}
298
x |= c << 24;
299
c = static_cast<uint8_t>(argument[4]);
300
if (!c)
301
{
302
*destination++ = x;
303
return;
304
}
305
x |= c << 32;
306
c = static_cast<uint8_t>(argument[5]);
307
if (!c)
308
{
309
*destination++ = x;
310
return;
311
}
312
x |= c << 40;
313
c = static_cast<uint8_t>(argument[6]);
314
if (!c)
315
{
316
*destination++ = x;
317
return;
318
}
319
x |= c << 48;
320
c = static_cast<uint8_t>(argument[7]);
321
if (!c)
322
{
323
*destination++ = x;
324
return;
325
}
326
x |= c << 56;
327
*destination++ = x;
328
argument += 8;
329
}
330
}
331
332
inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
333
{
334
#if PIX_ENABLE_BLOCK_ARGUMENT_COPY
335
if (PIXIsPointerAligned<8>(argument))
336
{
337
*destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
338
UINT64* source = (UINT64*)argument;
339
while (destination < limit)
340
{
341
UINT64 qword = *source++;
342
*destination++ = qword;
343
//check if any of the characters is a terminating zero
344
if (!((qword & 0xFF00000000000000) &&
345
(qword & 0xFF000000000000) &&
346
(qword & 0xFF0000000000) &&
347
(qword & 0xFF00000000) &&
348
(qword & 0xFF000000) &&
349
(qword & 0xFF0000) &&
350
(qword & 0xFF00) &&
351
(qword & 0xFF)))
352
{
353
break;
354
}
355
}
356
}
357
else
358
#endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY
359
{
360
PIXCopyEventArgumentSlowest(destination, limit, argument);
361
}
362
}
363
364
template<>
365
inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
366
{
367
if (destination < limit)
368
{
369
if (argument != nullptr)
370
{
371
#if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
372
if (PIXIsPointerAligned<16>(argument))
373
{
374
*destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);
375
__m128i zero = _mm_setzero_si128();
376
if (PIXIsPointerAligned<16>(destination))
377
{
378
while (destination < limit)
379
{
380
__m128i mem = _mm_load_si128((__m128i*)argument);
381
_mm_store_si128((__m128i*)destination, mem);
382
//check if any of the characters is a terminating zero
383
__m128i res = _mm_cmpeq_epi8(mem, zero);
384
destination += 2;
385
if (_mm_movemask_epi8(res))
386
break;
387
argument += 16;
388
}
389
}
390
else
391
{
392
while (destination < limit)
393
{
394
__m128i mem = _mm_load_si128((__m128i*)argument);
395
_mm_storeu_si128((__m128i*)destination, mem);
396
//check if any of the characters is a terminating zero
397
__m128i res = _mm_cmpeq_epi8(mem, zero);
398
destination += 2;
399
if (_mm_movemask_epi8(res))
400
break;
401
argument += 16;
402
}
403
}
404
}
405
else
406
#endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
407
{
408
PIXCopyEventArgumentSlow(destination, limit, argument);
409
}
410
}
411
else
412
{
413
*destination++ = 0ull;
414
}
415
}
416
}
417
418
template<>
419
inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)
420
{
421
PIXCopyEventArgument(destination, limit, (PCSTR)argument);
422
}
423
424
inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
425
{
426
*destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
427
while (destination < limit)
428
{
429
UINT64 c = static_cast<uint16_t>(argument[0]);
430
if (!c)
431
{
432
*destination++ = 0;
433
return;
434
}
435
UINT64 x = c;
436
c = static_cast<uint16_t>(argument[1]);
437
if (!c)
438
{
439
*destination++ = x;
440
return;
441
}
442
x |= c << 16;
443
c = static_cast<uint16_t>(argument[2]);
444
if (!c)
445
{
446
*destination++ = x;
447
return;
448
}
449
x |= c << 32;
450
c = static_cast<uint16_t>(argument[3]);
451
if (!c)
452
{
453
*destination++ = x;
454
return;
455
}
456
x |= c << 48;
457
*destination++ = x;
458
argument += 4;
459
}
460
}
461
462
inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
463
{
464
#if PIX_ENABLE_BLOCK_ARGUMENT_COPY
465
if (PIXIsPointerAligned<8>(argument))
466
{
467
*destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
468
UINT64* source = (UINT64*)argument;
469
while (destination < limit)
470
{
471
UINT64 qword = *source++;
472
*destination++ = qword;
473
//check if any of the characters is a terminating zero
474
//TODO: check if reversed condition is faster
475
if (!((qword & 0xFFFF000000000000) &&
476
(qword & 0xFFFF00000000) &&
477
(qword & 0xFFFF0000) &&
478
(qword & 0xFFFF)))
479
{
480
break;
481
}
482
}
483
}
484
else
485
#endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY
486
{
487
PIXCopyEventArgumentSlowest(destination, limit, argument);
488
}
489
}
490
491
template<>
492
inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
493
{
494
if (destination < limit)
495
{
496
if (argument != nullptr)
497
{
498
#if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
499
if (PIXIsPointerAligned<16>(argument))
500
{
501
*destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);
502
__m128i zero = _mm_setzero_si128();
503
if (PIXIsPointerAligned<16>(destination))
504
{
505
while (destination < limit)
506
{
507
__m128i mem = _mm_load_si128((__m128i*)argument);
508
_mm_store_si128((__m128i*)destination, mem);
509
//check if any of the characters is a terminating zero
510
__m128i res = _mm_cmpeq_epi16(mem, zero);
511
destination += 2;
512
if (_mm_movemask_epi8(res))
513
break;
514
argument += 8;
515
}
516
}
517
else
518
{
519
while (destination < limit)
520
{
521
__m128i mem = _mm_load_si128((__m128i*)argument);
522
_mm_storeu_si128((__m128i*)destination, mem);
523
//check if any of the characters is a terminating zero
524
__m128i res = _mm_cmpeq_epi16(mem, zero);
525
destination += 2;
526
if (_mm_movemask_epi8(res))
527
break;
528
argument += 8;
529
}
530
}
531
}
532
else
533
#endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
534
{
535
PIXCopyEventArgumentSlow(destination, limit, argument);
536
}
537
}
538
else
539
{
540
*destination++ = 0ull;
541
}
542
}
543
}
544
545
template<>
546
inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)
547
{
548
PIXCopyEventArgument(destination, limit, (PCWSTR)argument);
549
};
550
551
#if defined(__d3d12_x_h__) || defined(__d3d12_xs_h__) || defined(__d3d12_h__)
552
553
inline void PIXSetGPUMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
554
{
555
commandList->SetMarker(D3D12_EVENT_METADATA, data, size);
556
}
557
558
inline void PIXSetGPUMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
559
{
560
commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);
561
}
562
563
inline void PIXBeginGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
564
{
565
commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);
566
}
567
568
inline void PIXBeginGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
569
{
570
commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);
571
}
572
573
inline void PIXEndGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)
574
{
575
commandList->EndEvent();
576
}
577
578
inline void PIXEndGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue)
579
{
580
commandQueue->EndEvent();
581
}
582
583
#endif //__d3d12_h__
584
585
template<class T> struct PIXInferScopedEventType { typedef T Type; };
586
template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };
587
template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };
588
template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };
589
template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };
590
template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };
591
template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };
592
template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };
593
template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };
594
template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };
595
template<> struct PIXInferScopedEventType<INT> { typedef void Type; };
596
template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };
597
598
599
#if PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET
600
#undef PIX_ENABLE_BLOCK_ARGUMENT_COPY
601
#endif
602
603
#undef PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET
604
605
#endif //_PIXEventsCommon_H_
606
607