Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
4574 views
/****************************************************************************1* Copyright (C) 2016 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* @file archrast.cpp23*24* @brief Implementation for archrast.25*26******************************************************************************/27#include <sys/stat.h>2829#include <atomic>30#include <map>3132#include "common/os.h"33#include "archrast/archrast.h"34#include "archrast/eventmanager.h"35#include "gen_ar_event.hpp"36#include "gen_ar_eventhandlerfile.hpp"3738namespace ArchRast39{40//////////////////////////////////////////////////////////////////////////41/// @brief struct that keeps track of depth and stencil event information42struct DepthStencilStats43{44uint32_t earlyZTestPassCount = 0;45uint32_t earlyZTestFailCount = 0;46uint32_t lateZTestPassCount = 0;47uint32_t lateZTestFailCount = 0;48uint32_t earlyStencilTestPassCount = 0;49uint32_t earlyStencilTestFailCount = 0;50uint32_t lateStencilTestPassCount = 0;51uint32_t lateStencilTestFailCount = 0;52};5354struct CStats55{56uint32_t trivialRejectCount;57uint32_t trivialAcceptCount;58uint32_t mustClipCount;59};6061struct TEStats62{63uint32_t inputPrims = 0;64//@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.65};6667struct GSStateInfo68{69uint32_t inputPrimCount;70uint32_t primGeneratedCount;71uint32_t vertsInput;72};7374struct RastStats75{76uint32_t rasterTiles = 0;77};7879struct CullStats80{81uint32_t degeneratePrimCount = 0;82uint32_t backfacePrimCount = 0;83};8485struct AlphaStats86{87uint32_t alphaTestCount = 0;88uint32_t alphaBlendCount = 0;89};909192//////////////////////////////////////////////////////////////////////////93/// @brief Event handler that handles API thread events. This is shared94/// between the API and its caller (e.g. driver shim) but typically95/// there is only a single API thread per context. So you can save96/// information in the class to be used for other events.97class EventHandlerApiStats : public EventHandlerFile98{99public:100EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)101{102#if defined(_WIN32)103// Attempt to copy the events.proto file to the ArchRast output dir. It's common for104// tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it105// exists, this will attempt to copy it the first time we get here to package it with106// the stats. Otherwise, the user would need to specify the events.proto location when107// parsing the stats in post.108std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;109eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;110eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)111<< "\\events.proto" << std::ends;112113// If event.proto already exists, we're done; else do the copy114struct stat buf; // Use a Posix stat for file existence check115if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)116{117// Now check to make sure the events.proto source exists118if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)119{120std::ifstream srcFile;121srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);122if (srcFile.is_open())123{124// Just do a binary buffer copy125std::ofstream dstFile;126dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);127dstFile << srcFile.rdbuf();128dstFile.close();129}130srcFile.close();131}132}133#endif134}135136virtual void Handle(const DrawInstancedEvent& event)137{138DrawInfoEvent e(event.data.drawId,139ArchRast::Instanced,140event.data.topology,141event.data.numVertices,1420,1430,144event.data.startVertex,145event.data.numInstances,146event.data.startInstance,147event.data.tsEnable,148event.data.gsEnable,149event.data.soEnable,150event.data.soTopology,151event.data.splitId);152153EventHandlerFile::Handle(e);154}155156virtual void Handle(const DrawIndexedInstancedEvent& event)157{158DrawInfoEvent e(event.data.drawId,159ArchRast::IndexedInstanced,160event.data.topology,1610,162event.data.numIndices,163event.data.indexOffset,164event.data.baseVertex,165event.data.numInstances,166event.data.startInstance,167event.data.tsEnable,168event.data.gsEnable,169event.data.soEnable,170event.data.soTopology,171event.data.splitId);172173EventHandlerFile::Handle(e);174}175};176177//////////////////////////////////////////////////////////////////////////178/// @brief Event handler that handles worker thread events. There is one179/// event handler per thread. The python script will need to sum180/// up counters across all of the threads.181class EventHandlerWorkerStats : public EventHandlerFile182{183public:184EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)185{186memset(mShaderStats, 0, sizeof(mShaderStats));187}188189virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)190{191// earlyZ test compute192mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);193mDSSingleSample.earlyZTestFailCount +=194_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);195196// earlyStencil test compute197mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);198mDSSingleSample.earlyStencilTestFailCount +=199_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);200201// earlyZ test single and multi sample202mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);203mDSCombined.earlyZTestFailCount +=204_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);205206// earlyStencil test single and multi sample207mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);208mDSCombined.earlyStencilTestFailCount +=209_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);210211mNeedFlush = true;212}213214virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)215{216// earlyZ test compute217mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);218mDSSampleRate.earlyZTestFailCount +=219_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);220221// earlyStencil test compute222mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);223mDSSampleRate.earlyStencilTestFailCount +=224_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);225226// earlyZ test single and multi sample227mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);228mDSCombined.earlyZTestFailCount +=229_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);230231// earlyStencil test single and multi sample232mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);233mDSCombined.earlyStencilTestFailCount +=234_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);235236mNeedFlush = true;237}238239virtual void Handle(const EarlyDepthStencilInfoNullPS& event)240{241// earlyZ test compute242mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);243mDSNullPS.earlyZTestFailCount +=244_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);245246// earlyStencil test compute247mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);248mDSNullPS.earlyStencilTestFailCount +=249_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);250mNeedFlush = true;251}252253virtual void Handle(const LateDepthStencilInfoSingleSample& event)254{255// lateZ test compute256mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);257mDSSingleSample.lateZTestFailCount +=258_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);259260// lateStencil test compute261mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);262mDSSingleSample.lateStencilTestFailCount +=263_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);264265// lateZ test single and multi sample266mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);267mDSCombined.lateZTestFailCount +=268_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);269270// lateStencil test single and multi sample271mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);272mDSCombined.lateStencilTestFailCount +=273_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);274275mNeedFlush = true;276}277278virtual void Handle(const LateDepthStencilInfoSampleRate& event)279{280// lateZ test compute281mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);282mDSSampleRate.lateZTestFailCount +=283_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);284285// lateStencil test compute286mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);287mDSSampleRate.lateStencilTestFailCount +=288_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);289290// lateZ test single and multi sample291mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);292mDSCombined.lateZTestFailCount +=293_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);294295// lateStencil test single and multi sample296mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);297mDSCombined.lateStencilTestFailCount +=298_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);299300mNeedFlush = true;301}302303virtual void Handle(const LateDepthStencilInfoNullPS& event)304{305// lateZ test compute306mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);307mDSNullPS.lateZTestFailCount +=308_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);309310// lateStencil test compute311mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);312mDSNullPS.lateStencilTestFailCount +=313_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);314mNeedFlush = true;315}316317virtual void Handle(const EarlyDepthInfoPixelRate& event)318{319// earlyZ test compute320mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;321mDSPixelRate.earlyZTestFailCount +=322(_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);323mNeedFlush = true;324}325326327virtual void Handle(const LateDepthInfoPixelRate& event)328{329// lateZ test compute330mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;331mDSPixelRate.lateZTestFailCount +=332(_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);333mNeedFlush = true;334}335336337virtual void Handle(const ClipInfoEvent& event)338{339mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);340mClipper.trivialRejectCount +=341event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);342mClipper.trivialAcceptCount +=343_mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);344}345346void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)347{348pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;349pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;350pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;351pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;352pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;353pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;354pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;355pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;356pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;357pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;358pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;359pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;360}361362virtual void Handle(const VSStats& event)363{364SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;365UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);366}367368virtual void Handle(const GSStats& event)369{370SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;371UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);372}373374virtual void Handle(const DSStats& event)375{376SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;377UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);378}379380virtual void Handle(const HSStats& event)381{382SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;383UpdateStats(&mShaderStats[SHADER_HULL], pStats);384}385386virtual void Handle(const PSStats& event)387{388SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;389UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);390mNeedFlush = true;391}392393virtual void Handle(const CSStats& event)394{395SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;396UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);397mNeedFlush = true;398}399400// Flush cached events for this draw401virtual void FlushDraw(uint32_t drawId)402{403if (mNeedFlush == false)404return;405406EventHandlerFile::Handle(PSInfo(drawId,407mShaderStats[SHADER_PIXEL].numInstExecuted,408mShaderStats[SHADER_PIXEL].numSampleExecuted,409mShaderStats[SHADER_PIXEL].numSampleLExecuted,410mShaderStats[SHADER_PIXEL].numSampleBExecuted,411mShaderStats[SHADER_PIXEL].numSampleCExecuted,412mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,413mShaderStats[SHADER_PIXEL].numSampleCDExecuted,414mShaderStats[SHADER_PIXEL].numGather4Executed,415mShaderStats[SHADER_PIXEL].numGather4CExecuted,416mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,417mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,418mShaderStats[SHADER_PIXEL].numLodExecuted));419EventHandlerFile::Handle(CSInfo(drawId,420mShaderStats[SHADER_COMPUTE].numInstExecuted,421mShaderStats[SHADER_COMPUTE].numSampleExecuted,422mShaderStats[SHADER_COMPUTE].numSampleLExecuted,423mShaderStats[SHADER_COMPUTE].numSampleBExecuted,424mShaderStats[SHADER_COMPUTE].numSampleCExecuted,425mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,426mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,427mShaderStats[SHADER_COMPUTE].numGather4Executed,428mShaderStats[SHADER_COMPUTE].numGather4CExecuted,429mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,430mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,431mShaderStats[SHADER_COMPUTE].numLodExecuted));432433// singleSample434EventHandlerFile::Handle(EarlyZSingleSample(435drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));436EventHandlerFile::Handle(LateZSingleSample(437drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));438EventHandlerFile::Handle(439EarlyStencilSingleSample(drawId,440mDSSingleSample.earlyStencilTestPassCount,441mDSSingleSample.earlyStencilTestFailCount));442EventHandlerFile::Handle(443LateStencilSingleSample(drawId,444mDSSingleSample.lateStencilTestPassCount,445mDSSingleSample.lateStencilTestFailCount));446447// sampleRate448EventHandlerFile::Handle(EarlyZSampleRate(449drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));450EventHandlerFile::Handle(LateZSampleRate(451drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));452EventHandlerFile::Handle(453EarlyStencilSampleRate(drawId,454mDSSampleRate.earlyStencilTestPassCount,455mDSSampleRate.earlyStencilTestFailCount));456EventHandlerFile::Handle(LateStencilSampleRate(drawId,457mDSSampleRate.lateStencilTestPassCount,458mDSSampleRate.lateStencilTestFailCount));459460// combined461EventHandlerFile::Handle(462EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));463EventHandlerFile::Handle(464LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));465EventHandlerFile::Handle(EarlyStencil(drawId,466mDSCombined.earlyStencilTestPassCount,467mDSCombined.earlyStencilTestFailCount));468EventHandlerFile::Handle(LateStencil(drawId,469mDSCombined.lateStencilTestPassCount,470mDSCombined.lateStencilTestFailCount));471472// pixelRate473EventHandlerFile::Handle(EarlyZPixelRate(474drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));475EventHandlerFile::Handle(LateZPixelRate(476drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));477478479// NullPS480EventHandlerFile::Handle(481EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));482EventHandlerFile::Handle(EarlyStencilNullPS(483drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));484485// Rasterized Subspans486EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));487488// Alpha Subspans489EventHandlerFile::Handle(490AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));491492// Primitive Culling493EventHandlerFile::Handle(494CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));495496mDSSingleSample = {};497mDSSampleRate = {};498mDSCombined = {};499mDSPixelRate = {};500mDSNullPS = {};501502rastStats = {};503mCullStats = {};504mAlphaStats = {};505506mShaderStats[SHADER_PIXEL] = {};507mShaderStats[SHADER_COMPUTE] = {};508509mNeedFlush = false;510}511512virtual void Handle(const FrontendDrawEndEvent& event)513{514// Clipper515EventHandlerFile::Handle(ClipperEvent(event.data.drawId,516mClipper.trivialRejectCount,517mClipper.trivialAcceptCount,518mClipper.mustClipCount));519520// Tesselator521EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));522523// Geometry Shader524EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));525EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));526EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));527528EventHandlerFile::Handle(VSInfo(event.data.drawId,529mShaderStats[SHADER_VERTEX].numInstExecuted,530mShaderStats[SHADER_VERTEX].numSampleExecuted,531mShaderStats[SHADER_VERTEX].numSampleLExecuted,532mShaderStats[SHADER_VERTEX].numSampleBExecuted,533mShaderStats[SHADER_VERTEX].numSampleCExecuted,534mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,535mShaderStats[SHADER_VERTEX].numSampleCDExecuted,536mShaderStats[SHADER_VERTEX].numGather4Executed,537mShaderStats[SHADER_VERTEX].numGather4CExecuted,538mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,539mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,540mShaderStats[SHADER_VERTEX].numLodExecuted));541EventHandlerFile::Handle(HSInfo(event.data.drawId,542mShaderStats[SHADER_HULL].numInstExecuted,543mShaderStats[SHADER_HULL].numSampleExecuted,544mShaderStats[SHADER_HULL].numSampleLExecuted,545mShaderStats[SHADER_HULL].numSampleBExecuted,546mShaderStats[SHADER_HULL].numSampleCExecuted,547mShaderStats[SHADER_HULL].numSampleCLZExecuted,548mShaderStats[SHADER_HULL].numSampleCDExecuted,549mShaderStats[SHADER_HULL].numGather4Executed,550mShaderStats[SHADER_HULL].numGather4CExecuted,551mShaderStats[SHADER_HULL].numGather4CPOExecuted,552mShaderStats[SHADER_HULL].numGather4CPOCExecuted,553mShaderStats[SHADER_HULL].numLodExecuted));554EventHandlerFile::Handle(DSInfo(event.data.drawId,555mShaderStats[SHADER_DOMAIN].numInstExecuted,556mShaderStats[SHADER_DOMAIN].numSampleExecuted,557mShaderStats[SHADER_DOMAIN].numSampleLExecuted,558mShaderStats[SHADER_DOMAIN].numSampleBExecuted,559mShaderStats[SHADER_DOMAIN].numSampleCExecuted,560mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,561mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,562mShaderStats[SHADER_DOMAIN].numGather4Executed,563mShaderStats[SHADER_DOMAIN].numGather4CExecuted,564mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,565mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,566mShaderStats[SHADER_DOMAIN].numLodExecuted));567EventHandlerFile::Handle(GSInfo(event.data.drawId,568mShaderStats[SHADER_GEOMETRY].numInstExecuted,569mShaderStats[SHADER_GEOMETRY].numSampleExecuted,570mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,571mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,572mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,573mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,574mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,575mShaderStats[SHADER_GEOMETRY].numGather4Executed,576mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,577mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,578mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,579mShaderStats[SHADER_GEOMETRY].numLodExecuted));580581mShaderStats[SHADER_VERTEX] = {};582mShaderStats[SHADER_HULL] = {};583mShaderStats[SHADER_DOMAIN] = {};584mShaderStats[SHADER_GEOMETRY] = {};585586// Reset Internal Counters587mClipper = {};588mTS = {};589mGS = {};590}591592virtual void Handle(const GSPrimInfo& event)593{594mGS.inputPrimCount += event.data.inputPrimCount;595mGS.primGeneratedCount += event.data.primGeneratedCount;596mGS.vertsInput += event.data.vertsInput;597}598599virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }600601virtual void Handle(const RasterTileCount& event)602{603rastStats.rasterTiles += event.data.rasterTiles;604}605606virtual void Handle(const CullInfoEvent& event)607{608mCullStats.degeneratePrimCount += _mm_popcnt_u32(609event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));610mCullStats.backfacePrimCount += _mm_popcnt_u32(611event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));612}613614virtual void Handle(const AlphaInfoEvent& event)615{616mAlphaStats.alphaTestCount += event.data.alphaTestEnable;617mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;618}619620protected:621bool mNeedFlush;622// Per draw stats623DepthStencilStats mDSSingleSample = {};624DepthStencilStats mDSSampleRate = {};625DepthStencilStats mDSPixelRate = {};626DepthStencilStats mDSCombined = {};627DepthStencilStats mDSNullPS = {};628DepthStencilStats mDSOmZ = {};629CStats mClipper = {};630TEStats mTS = {};631GSStateInfo mGS = {};632RastStats rastStats = {};633CullStats mCullStats = {};634AlphaStats mAlphaStats = {};635636SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];637638};639640static EventManager* FromHandle(HANDLE hThreadContext)641{642return reinterpret_cast<EventManager*>(hThreadContext);643}644645// Construct an event manager and associate a handler with it.646HANDLE CreateThreadContext(AR_THREAD type)647{648// Can we assume single threaded here?649static std::atomic<uint32_t> counter(0);650uint32_t id = counter.fetch_add(1);651652EventManager* pManager = new EventManager();653654if (pManager)655{656EventHandlerFile* pHandler = nullptr;657658if (type == AR_THREAD::API)659{660pHandler = new EventHandlerApiStats(id);661pManager->Attach(pHandler);662pHandler->Handle(ThreadStartApiEvent());663}664else665{666pHandler = new EventHandlerWorkerStats(id);667pManager->Attach(pHandler);668pHandler->Handle(ThreadStartWorkerEvent());669}670671pHandler->MarkHeader();672673return pManager;674}675676SWR_INVALID("Failed to register thread.");677return nullptr;678}679680void DestroyThreadContext(HANDLE hThreadContext)681{682EventManager* pManager = FromHandle(hThreadContext);683SWR_ASSERT(pManager != nullptr);684685delete pManager;686}687688// Dispatch event for this thread.689void Dispatch(HANDLE hThreadContext, const Event& event)690{691if (event.IsEnabled())692{693EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext);694SWR_ASSERT(pManager != nullptr);695pManager->Dispatch(event);696}697}698699// Flush for this thread.700void FlushDraw(HANDLE hThreadContext, uint32_t drawId)701{702EventManager* pManager = FromHandle(hThreadContext);703SWR_ASSERT(pManager != nullptr);704705pManager->FlushDraw(drawId);706}707} // namespace ArchRast708709710