Path: blob/master/dep/winpixeventruntime/include/WinPixEventRuntime/PIXEventsCommon.h
4261 views
// Copyright (c) Microsoft Corporation. All rights reserved.12/*==========================================================================;3*4* Copyright (C) Microsoft Corporation. All Rights Reserved.5*6* File: PIXEventsCommon.h7* Content: PIX include file8* Don't include this file directly - use pix3.h9*10****************************************************************************/11#pragma once1213#ifndef _PIXEventsCommon_H_14#define _PIXEventsCommon_H_1516#if defined(XBOX) || defined(_XBOX_ONE) || defined(_DURANGO) || defined(_GAMING_XBOX) || defined(_GAMING_XBOX_SCARLETT)17#define PIX_XBOX18#endif1920#include <cstdint>2122#if defined(_M_X64) || defined(_M_IX86)23#include <emmintrin.h>24#endif2526//27// The PIXBeginEvent and PIXSetMarker functions have an optimized path for28// copying strings that work by copying 128-bit or 64-bits at a time. In some29// circumstances this may result in PIX logging the remaining memory after the30// null terminator.31//32// By default this optimization is enabled unless Address Sanitizer is enabled,33// since this optimization can trigger a global-buffer-overflow when copying34// string literals.35//36// The PIX_ENABLE_BLOCK_ARGUMENT_COPY controls whether or not this optimization37// is enabled. Applications may also explicitly set this macro to 0 to disable38// the optimization if necessary.39//4041// Check for Address Sanitizer on either Clang or MSVC4243#if defined(__has_feature)44#if __has_feature(address_sanitizer)45#define PIX_ASAN_ENABLED46#endif47#elif defined(__SANITIZE_ADDRESS__)48#define PIX_ASAN_ENABLED49#endif5051#if defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)52// Previously set values override everything53# define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 054#elif defined(PIX_ASAN_ENABLED)55// Disable block argument copy when address sanitizer is enabled56#define PIX_ENABLE_BLOCK_ARGUMENT_COPY 057#define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 158#endif5960#if !defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)61// Default to enabled.62#define PIX_ENABLE_BLOCK_ARGUMENT_COPY 163#define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 164#endif6566struct PIXEventsBlockInfo;6768struct PIXEventsThreadInfo69{70PIXEventsBlockInfo* block;71UINT64* biasedLimit;72UINT64* destination;73};7475#ifdef PIX_XBOX76extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime) noexcept;77#else78extern "C" UINT64 WINAPI PIXEventsReplaceBlock(PIXEventsThreadInfo * threadInfo, bool getEarliestTime) noexcept;79#endif8081enum PIXEventType82{83PIXEvent_EndEvent = 0x000,84PIXEvent_BeginEvent_VarArgs = 0x001,85PIXEvent_BeginEvent_NoArgs = 0x002,86PIXEvent_SetMarker_VarArgs = 0x007,87PIXEvent_SetMarker_NoArgs = 0x008,8889PIXEvent_EndEvent_OnContext = 0x010,90PIXEvent_BeginEvent_OnContext_VarArgs = 0x011,91PIXEvent_BeginEvent_OnContext_NoArgs = 0x012,92PIXEvent_SetMarker_OnContext_VarArgs = 0x017,93PIXEvent_SetMarker_OnContext_NoArgs = 0x018,94};9596static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;97//this is used to make sure SSE string copy always will end 16-byte write in the current block98//this way only a check if destination < limit can be performed, instead of destination < limit - 199//since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve100//so even if SSE overwrites 8 extra bytes, those will still belong to the correct block101//on next iteration check destination will be greater than limit102//this is used as well for fixed size UMD events and PIXEndEvent since these require less space103//than other variable length user events and do not need big reserved space104static const UINT64 PIXEventsReservedTailSpaceQwords = 2;105static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;106static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;107108//Bits 7-19 (13 bits)109static const UINT64 PIXEventsBlockEndMarker = 0x00000000000FFF80;110111//Bits 10-19 (10 bits)112static const UINT64 PIXEventsTypeReadMask = 0x00000000000FFC00;113static const UINT64 PIXEventsTypeWriteMask = 0x00000000000003FF;114static const UINT64 PIXEventsTypeBitShift = 10;115116//Bits 20-63 (44 bits)117static const UINT64 PIXEventsTimestampReadMask = 0xFFFFFFFFFFF00000;118static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;119static const UINT64 PIXEventsTimestampBitShift = 20;120121inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)122{123return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |124(((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);125}126127//Bits 60-63 (4)128static const UINT64 PIXEventsStringAlignmentWriteMask = 0x000000000000000F;129static const UINT64 PIXEventsStringAlignmentReadMask = 0xF000000000000000;130static const UINT64 PIXEventsStringAlignmentBitShift = 60;131132//Bits 55-59 (5)133static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;134static const UINT64 PIXEventsStringCopyChunkSizeReadMask = 0x0F80000000000000;135static const UINT64 PIXEventsStringCopyChunkSizeBitShift = 55;136137//Bit 54138static const UINT64 PIXEventsStringIsANSIWriteMask = 0x0000000000000001;139static const UINT64 PIXEventsStringIsANSIReadMask = 0x0040000000000000;140static const UINT64 PIXEventsStringIsANSIBitShift = 54;141142//Bit 53143static const UINT64 PIXEventsStringIsShortcutWriteMask = 0x0000000000000001;144static const UINT64 PIXEventsStringIsShortcutReadMask = 0x0020000000000000;145static const UINT64 PIXEventsStringIsShortcutBitShift = 53;146147inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)148{149return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |150((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |151(((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |152(((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);153}154155template<UINT alignment, class T>156inline bool PIXIsPointerAligned(T* pointer)157{158return !(((UINT64)pointer) & (alignment - 1));159}160161// Generic template version slower because of the additional clear write162template<class T>163inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)164{165if (destination < limit)166{167*destination = 0ull;168*((T*)destination) = argument;169++destination;170}171}172173// int32 specialization to avoid slower double memory writes174template<>175inline void PIXCopyEventArgument<INT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT32 argument)176{177if (destination < limit)178{179*reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument);180++destination;181}182}183184// unsigned int32 specialization to avoid slower double memory writes185template<>186inline void PIXCopyEventArgument<UINT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT32 argument)187{188if (destination < limit)189{190*destination = static_cast<UINT64>(argument);191++destination;192}193}194195// int64 specialization to avoid slower double memory writes196template<>197inline void PIXCopyEventArgument<INT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT64 argument)198{199if (destination < limit)200{201*reinterpret_cast<INT64*>(destination) = argument;202++destination;203}204}205206// unsigned int64 specialization to avoid slower double memory writes207template<>208inline void PIXCopyEventArgument<UINT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT64 argument)209{210if (destination < limit)211{212*destination = argument;213++destination;214}215}216217//floats must be cast to double during writing the data to be properly printed later when reading the data218//this is needed because when float is passed to varargs function it's cast to double219template<>220inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)221{222if (destination < limit)223{224*reinterpret_cast<double*>(destination) = static_cast<double>(argument);225++destination;226}227}228229//char has to be cast to a longer signed integer type230//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier231template<>232inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)233{234if (destination < limit)235{236*reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument);237++destination;238}239}240241//unsigned char has to be cast to a longer unsigned integer type242//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier243template<>244inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)245{246if (destination < limit)247{248*destination = static_cast<UINT64>(argument);249++destination;250}251}252253//bool has to be cast to an integer since it's not explicitly supported by string format routines254//there's no format specifier for bool type, but it should work with integer format specifiers255template<>256inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)257{258if (destination < limit)259{260*destination = static_cast<UINT64>(argument);261++destination;262}263}264265inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)266{267*destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);268while (destination < limit)269{270UINT64 c = static_cast<uint8_t>(argument[0]);271if (!c)272{273*destination++ = 0;274return;275}276UINT64 x = c;277c = static_cast<uint8_t>(argument[1]);278if (!c)279{280*destination++ = x;281return;282}283x |= c << 8;284c = static_cast<uint8_t>(argument[2]);285if (!c)286{287*destination++ = x;288return;289}290x |= c << 16;291c = static_cast<uint8_t>(argument[3]);292if (!c)293{294*destination++ = x;295return;296}297x |= c << 24;298c = static_cast<uint8_t>(argument[4]);299if (!c)300{301*destination++ = x;302return;303}304x |= c << 32;305c = static_cast<uint8_t>(argument[5]);306if (!c)307{308*destination++ = x;309return;310}311x |= c << 40;312c = static_cast<uint8_t>(argument[6]);313if (!c)314{315*destination++ = x;316return;317}318x |= c << 48;319c = static_cast<uint8_t>(argument[7]);320if (!c)321{322*destination++ = x;323return;324}325x |= c << 56;326*destination++ = x;327argument += 8;328}329}330331inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)332{333#if PIX_ENABLE_BLOCK_ARGUMENT_COPY334if (PIXIsPointerAligned<8>(argument))335{336*destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);337UINT64* source = (UINT64*)argument;338while (destination < limit)339{340UINT64 qword = *source++;341*destination++ = qword;342//check if any of the characters is a terminating zero343if (!((qword & 0xFF00000000000000) &&344(qword & 0xFF000000000000) &&345(qword & 0xFF0000000000) &&346(qword & 0xFF00000000) &&347(qword & 0xFF000000) &&348(qword & 0xFF0000) &&349(qword & 0xFF00) &&350(qword & 0xFF)))351{352break;353}354}355}356else357#endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY358{359PIXCopyEventArgumentSlowest(destination, limit, argument);360}361}362363template<>364inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)365{366if (destination < limit)367{368if (argument != nullptr)369{370#if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY371if (PIXIsPointerAligned<16>(argument))372{373*destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);374__m128i zero = _mm_setzero_si128();375if (PIXIsPointerAligned<16>(destination))376{377while (destination < limit)378{379__m128i mem = _mm_load_si128((__m128i*)argument);380_mm_store_si128((__m128i*)destination, mem);381//check if any of the characters is a terminating zero382__m128i res = _mm_cmpeq_epi8(mem, zero);383destination += 2;384if (_mm_movemask_epi8(res))385break;386argument += 16;387}388}389else390{391while (destination < limit)392{393__m128i mem = _mm_load_si128((__m128i*)argument);394_mm_storeu_si128((__m128i*)destination, mem);395//check if any of the characters is a terminating zero396__m128i res = _mm_cmpeq_epi8(mem, zero);397destination += 2;398if (_mm_movemask_epi8(res))399break;400argument += 16;401}402}403}404else405#endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY406{407PIXCopyEventArgumentSlow(destination, limit, argument);408}409}410else411{412*destination++ = 0ull;413}414}415}416417template<>418inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)419{420PIXCopyEventArgument(destination, limit, (PCSTR)argument);421}422423inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)424{425*destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);426while (destination < limit)427{428UINT64 c = static_cast<uint16_t>(argument[0]);429if (!c)430{431*destination++ = 0;432return;433}434UINT64 x = c;435c = static_cast<uint16_t>(argument[1]);436if (!c)437{438*destination++ = x;439return;440}441x |= c << 16;442c = static_cast<uint16_t>(argument[2]);443if (!c)444{445*destination++ = x;446return;447}448x |= c << 32;449c = static_cast<uint16_t>(argument[3]);450if (!c)451{452*destination++ = x;453return;454}455x |= c << 48;456*destination++ = x;457argument += 4;458}459}460461inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)462{463#if PIX_ENABLE_BLOCK_ARGUMENT_COPY464if (PIXIsPointerAligned<8>(argument))465{466*destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);467UINT64* source = (UINT64*)argument;468while (destination < limit)469{470UINT64 qword = *source++;471*destination++ = qword;472//check if any of the characters is a terminating zero473//TODO: check if reversed condition is faster474if (!((qword & 0xFFFF000000000000) &&475(qword & 0xFFFF00000000) &&476(qword & 0xFFFF0000) &&477(qword & 0xFFFF)))478{479break;480}481}482}483else484#endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY485{486PIXCopyEventArgumentSlowest(destination, limit, argument);487}488}489490template<>491inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)492{493if (destination < limit)494{495if (argument != nullptr)496{497#if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY498if (PIXIsPointerAligned<16>(argument))499{500*destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);501__m128i zero = _mm_setzero_si128();502if (PIXIsPointerAligned<16>(destination))503{504while (destination < limit)505{506__m128i mem = _mm_load_si128((__m128i*)argument);507_mm_store_si128((__m128i*)destination, mem);508//check if any of the characters is a terminating zero509__m128i res = _mm_cmpeq_epi16(mem, zero);510destination += 2;511if (_mm_movemask_epi8(res))512break;513argument += 8;514}515}516else517{518while (destination < limit)519{520__m128i mem = _mm_load_si128((__m128i*)argument);521_mm_storeu_si128((__m128i*)destination, mem);522//check if any of the characters is a terminating zero523__m128i res = _mm_cmpeq_epi16(mem, zero);524destination += 2;525if (_mm_movemask_epi8(res))526break;527argument += 8;528}529}530}531else532#endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY533{534PIXCopyEventArgumentSlow(destination, limit, argument);535}536}537else538{539*destination++ = 0ull;540}541}542}543544template<>545inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)546{547PIXCopyEventArgument(destination, limit, (PCWSTR)argument);548};549550#if defined(__d3d12_x_h__) || defined(__d3d12_xs_h__) || defined(__d3d12_h__)551552inline void PIXSetGPUMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)553{554commandList->SetMarker(D3D12_EVENT_METADATA, data, size);555}556557inline void PIXSetGPUMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)558{559commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);560}561562inline void PIXBeginGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)563{564commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);565}566567inline void PIXBeginGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)568{569commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);570}571572inline void PIXEndGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)573{574commandList->EndEvent();575}576577inline void PIXEndGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue)578{579commandQueue->EndEvent();580}581582#endif //__d3d12_h__583584template<class T> struct PIXInferScopedEventType { typedef T Type; };585template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };586template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };587template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };588template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };589template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };590template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };591template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };592template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };593template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };594template<> struct PIXInferScopedEventType<INT> { typedef void Type; };595template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };596597598#if PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET599#undef PIX_ENABLE_BLOCK_ARGUMENT_COPY600#endif601602#undef PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET603604#endif //_PIXEventsCommon_H_605606607